From e6dcd5999ab789ec5b10ed16b9421b9ba1f3227a Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Tue, 8 Apr 2025 15:41:41 +0800 Subject: [PATCH 1/9] address_space: region compare implement It is not necessary to strictly require the pointer address to be one, when the region is normal RAM memory. --- address_space/src/region.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/address_space/src/region.rs b/address_space/src/region.rs index 6c1be857e..65b55da42 100644 --- a/address_space/src/region.rs +++ b/address_space/src/region.rs @@ -171,10 +171,18 @@ impl PartialEq for FlatRange { /// Implement PartialEq/Eq for comparison of Region. impl PartialEq for Region { fn eq(&self, other: &Region) -> bool { - Arc::as_ptr(&self.priority) == Arc::as_ptr(&other.priority) - && self.region_type() == other.region_type() - && Arc::as_ptr(&self.offset) == Arc::as_ptr(&other.offset) - && Arc::as_ptr(&self.size) == Arc::as_ptr(&other.size) + if other.region_type() == RegionType::Ram { + self.priority() == other.priority() + && self.name == other.name + && self.region_type == other.region_type + && self.offset() == other.offset() + && self.size() == other.size() + } else { + Arc::as_ptr(&self.priority) == Arc::as_ptr(&other.priority) + && self.region_type() == other.region_type() + && Arc::as_ptr(&self.offset) == Arc::as_ptr(&other.offset) + && Arc::as_ptr(&self.size) == Arc::as_ptr(&other.size) + } } } @@ -245,11 +253,15 @@ impl Region { mem_mapping: Option>, ops: Option, ) -> Region { + let offset = match &mem_mapping { + Some(map) => map.start_address().0, + None => 0, + }; Region { name: String::from(name), region_type, priority: Arc::new(AtomicI32::new(0)), - offset: Arc::new(Mutex::new(GuestAddress(0))), + offset: Arc::new(Mutex::new(GuestAddress(offset))), size: Arc::new(AtomicU64::new(size)), mem_mapping, ops, -- Gitee From 47f1a04f52d0dabd41f569df1818456340d27785 Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Wed, 9 Apr 2025 11:54:59 +0800 Subject: [PATCH 2/9] machine_manager: rename MemZoneConfig to MemBackendObjConfig --- address_space/src/host_mmap.rs | 19 +++-- machine/src/lib.rs | 14 ++-- machine_manager/src/config/machine_config.rs | 87 ++++++++++---------- machine_manager/src/config/mod.rs | 4 +- 4 files changed, 64 insertions(+), 60 deletions(-) diff --git a/address_space/src/host_mmap.rs b/address_space/src/host_mmap.rs index d2ca9b8a2..bd81da9b7 100644 --- a/address_space/src/host_mmap.rs +++ b/address_space/src/host_mmap.rs @@ -24,7 +24,7 @@ use nix::sys::statfs::fstatfs; use nix::unistd::{mkstemp, sysconf, unlink, SysconfVar}; use crate::{AddressRange, GuestAddress, Region}; -use machine_manager::config::{HostMemPolicy, MachineMemConfig, MemZoneConfig}; +use machine_manager::config::{HostMemPolicy, MachineMemConfig, MemBackendObjConfig}; use util::unix::{do_mmap, host_page_size, mbind}; const MAX_PREALLOC_THREAD: i64 = 16; @@ -296,7 +296,7 @@ pub fn create_default_mem(mem_config: &MachineMemConfig, thread_num: u8) -> Resu /// /// * `mem_config` - The config of default memory. /// * `thread_num` - The num of mem preallocv threads, typically the number of vCPUs. -pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result { +pub fn create_backend_mem(mem_config: &MemBackendObjConfig, thread_num: u8) -> Result { let mut f_back: Option = None; if mem_config.memfd() { @@ -349,13 +349,16 @@ pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result< /// # Arguments /// /// * `mem_mappings` - The host virtual address of mapped memory information. -/// * `zone` - Memory zone config info. -fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConfig) -> Result<()> { - if zone.host_numa_nodes.is_none() { +/// * `mb_config` - Memory backend config info. +fn set_host_memory_policy( + mem_mappings: &Arc, + mb_config: &MemBackendObjConfig, +) -> Result<()> { + if mb_config.host_numa_nodes.is_none() { return Ok(()); } let host_addr_start = mem_mappings.host_address(); - let nodes = zone.host_numa_nodes.as_ref().unwrap(); + let nodes = mb_config.host_numa_nodes.as_ref().unwrap(); let mut max_node = nodes[nodes.len() - 1] as usize; // Upper limit of max_node is MAX_NODES. @@ -367,7 +370,7 @@ fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConf // It is kind of linux bug or feature which will cut off the last node. max_node += 1; - let policy = HostMemPolicy::from(zone.policy.clone()); + let policy = HostMemPolicy::from(mb_config.policy.clone()); if policy == HostMemPolicy::Default { max_node = 0; nmask = vec![0_u64; max_node]; @@ -380,7 +383,7 @@ fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConf unsafe { mbind( host_addr_start, - zone.size, + mb_config.size, policy as u32, nmask, max_node as u64, diff --git a/machine/src/lib.rs b/machine/src/lib.rs index ef2ff1827..88efdfc4f 100644 --- a/machine/src/lib.rs +++ b/machine/src/lib.rs @@ -409,21 +409,21 @@ pub trait MachineOps: MachineLifecycle { let root = self.get_vm_ram(); let numa_nodes = self.get_numa_nodes(); - if numa_nodes.is_none() || mem_config.mem_zones.is_none() { + if numa_nodes.is_none() || mem_config.membackend_objs.is_none() { let default_mem = create_default_mem(mem_config, thread_num)?; root.add_subregion_not_update(default_mem, 0_u64)?; return Ok(()); } - let zones = mem_config.mem_zones.as_ref().unwrap(); + let mb_objs = mem_config.membackend_objs.as_ref().unwrap(); let mut offset = 0_u64; for node in numa_nodes.as_ref().unwrap().iter() { - for zone in zones.iter() { - if zone.id.eq(&node.1.mem_dev) { - let ram = create_backend_mem(zone, thread_num)?; + for mb_obj in mb_objs.iter() { + if mb_obj.id.eq(&node.1.mem_dev) { + let ram = create_backend_mem(mb_obj, thread_num)?; root.add_subregion_not_update(ram, offset)?; offset = offset - .checked_add(zone.size) - .with_context(|| "total zone size overflow")?; + .checked_add(mb_obj.size) + .with_context(|| "total mem backend size overflow")?; break; } } diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 3d277ba15..1625b1068 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -86,7 +86,7 @@ impl From for HostMemPolicy { #[derive(Parser, Clone, Debug, Serialize, Deserialize)] #[command(no_binary_name(true))] -pub struct MemZoneConfig { +pub struct MemBackendObjConfig { #[arg(long, alias = "classtype", value_parser = ["memory-backend-ram", "memory-backend-file", "memory-backend-memfd"])] pub mem_type: String, #[arg(long, value_parser = valid_id)] @@ -112,7 +112,7 @@ pub struct MemZoneConfig { pub prealloc: bool, } -impl MemZoneConfig { +impl MemBackendObjConfig { pub fn memfd(&self) -> bool { self.mem_type.eq("memory-backend-memfd") } @@ -126,7 +126,7 @@ pub struct MachineMemConfig { pub dump_guest_core: bool, pub mem_share: bool, pub mem_prealloc: bool, - pub mem_zones: Option>, + pub membackend_objs: Option>, } impl Default for MachineMemConfig { @@ -137,7 +137,7 @@ impl Default for MachineMemConfig { dump_guest_core: true, mem_share: false, mem_prealloc: false, - mem_zones: None, + membackend_objs: None, } } } @@ -480,43 +480,44 @@ impl VmConfig { } impl VmConfig { - /// Convert memory zone cmdline to VM config + /// Convert memory backend cmdline to VM config /// /// # Arguments /// - /// * `mem_zone` - The memory zone cmdline string. - pub fn add_mem_zone(&mut self, mem_zone: &str) -> Result { - let zone_config = MemZoneConfig::try_parse_from(str_slip_to_clap(mem_zone, true, false))?; + /// * `mem_backend` - The memory backend cmdline string. + pub fn add_mem_backend(&mut self, mem_backend: &str) -> Result { + let mb_config = + MemBackendObjConfig::try_parse_from(str_slip_to_clap(mem_backend, true, false))?; - if (zone_config.mem_path.is_none() && zone_config.mem_type.eq("memory-backend-file")) - || (zone_config.mem_path.is_some() && zone_config.mem_type.ne("memory-backend-file")) + if (mb_config.mem_path.is_none() && mb_config.mem_type.eq("memory-backend-file")) + || (mb_config.mem_path.is_some() && mb_config.mem_type.ne("memory-backend-file")) { - bail!("Object type: {} config path err", zone_config.mem_type); + bail!("Object type: {} config path err", mb_config.mem_type); } - if self.object.mem_object.contains_key(&zone_config.id) { - bail!("Object: {} has been added", zone_config.id); + if self.object.mem_object.contains_key(&mb_config.id) { + bail!("Object: {} has been added", mb_config.id); } self.object .mem_object - .insert(zone_config.id.clone(), zone_config.clone()); + .insert(mb_config.id.clone(), mb_config.clone()); - if zone_config.host_numa_nodes.is_none() { - return Ok(zone_config); + if mb_config.host_numa_nodes.is_none() { + return Ok(mb_config); } - if self.machine_config.mem_config.mem_zones.is_some() { + if self.machine_config.mem_config.membackend_objs.is_some() { self.machine_config .mem_config - .mem_zones + .membackend_objs .as_mut() .unwrap() - .push(zone_config.clone()); + .push(mb_config.clone()); } else { - self.machine_config.mem_config.mem_zones = Some(vec![zone_config.clone()]); + self.machine_config.mem_config.membackend_objs = Some(vec![mb_config.clone()]); } - Ok(zone_config) + Ok(mb_config) } } @@ -620,7 +621,7 @@ mod tests { mem_share: false, dump_guest_core: false, mem_prealloc: false, - mem_zones: None, + membackend_objs: None, }; let mut machine_config = MachineConfig { mach_type: MachineType::MicroVm, @@ -955,37 +956,37 @@ mod tests { } #[test] - fn test_add_mem_zone() { + fn test_add_mem_backend() { let mut vm_config = VmConfig::default(); - let zone_config_1 = vm_config - .add_mem_zone("memory-backend-ram,size=2G,id=mem1,host-nodes=1,policy=bind") + let mb_config_1 = vm_config + .add_mem_backend("memory-backend-ram,size=2G,id=mem1,host-nodes=1,policy=bind") .unwrap(); - assert_eq!(zone_config_1.id, "mem1"); - assert_eq!(zone_config_1.size, 2147483648); - assert_eq!(zone_config_1.host_numa_nodes, Some(vec![1])); - assert_eq!(zone_config_1.policy, "bind"); + assert_eq!(mb_config_1.id, "mem1"); + assert_eq!(mb_config_1.size, 2147483648); + assert_eq!(mb_config_1.host_numa_nodes, Some(vec![1])); + assert_eq!(mb_config_1.policy, "bind"); - let zone_config_2 = vm_config - .add_mem_zone("memory-backend-ram,size=2G,id=mem2,host-nodes=1-2,policy=default") + let mb_config_2 = vm_config + .add_mem_backend("memory-backend-ram,size=2G,id=mem2,host-nodes=1-2,policy=default") .unwrap(); - assert_eq!(zone_config_2.host_numa_nodes, Some(vec![1, 2])); + assert_eq!(mb_config_2.host_numa_nodes, Some(vec![1, 2])); - let zone_config_3 = vm_config - .add_mem_zone("memory-backend-ram,size=2M,id=mem3,share=on") + let mb_config_3 = vm_config + .add_mem_backend("memory-backend-ram,size=2M,id=mem3,share=on") .unwrap(); - assert_eq!(zone_config_3.size, 2 * 1024 * 1024); - assert!(zone_config_3.share); + assert_eq!(mb_config_3.size, 2 * 1024 * 1024); + assert!(mb_config_3.share); - let zone_config_4 = vm_config - .add_mem_zone("memory-backend-ram,size=2M,id=mem4") + let mb_config_4 = vm_config + .add_mem_backend("memory-backend-ram,size=2M,id=mem4") .unwrap(); - assert!(!zone_config_4.share); - assert!(!zone_config_4.memfd()); + assert!(!mb_config_4.share); + assert!(!mb_config_4.memfd()); - let zone_config_5 = vm_config - .add_mem_zone("memory-backend-memfd,size=2M,id=mem5") + let mb_config_5 = vm_config + .add_mem_backend("memory-backend-memfd,size=2M,id=mem5") .unwrap(); - assert!(zone_config_5.memfd()); + assert!(mb_config_5.memfd()); } #[test] diff --git a/machine_manager/src/config/mod.rs b/machine_manager/src/config/mod.rs index a3f96b04a..144589bfc 100644 --- a/machine_manager/src/config/mod.rs +++ b/machine_manager/src/config/mod.rs @@ -109,7 +109,7 @@ struct GlobalConfig { #[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct ObjectConfig { pub rng_object: HashMap, - pub mem_object: HashMap, + pub mem_object: HashMap, #[cfg(feature = "vnc_auth")] pub tls_object: HashMap, #[cfg(feature = "vnc_auth")] @@ -223,7 +223,7 @@ impl VmConfig { self.object.rng_object.insert(id, rng_cfg); } "memory-backend-ram" | "memory-backend-file" | "memory-backend-memfd" => { - self.add_mem_zone(object_args)?; + self.add_mem_backend(object_args)?; } #[cfg(feature = "vnc_auth")] "tls-creds-x509" => { -- Gitee From eaad3a3ed4c02ce770340eea8217c733f1486a92 Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Wed, 9 Apr 2025 12:12:52 +0800 Subject: [PATCH 3/9] machine_manager: file and memfd default share property to true --- address_space/src/host_mmap.rs | 2 +- machine/src/lib.rs | 2 +- machine_manager/src/config/machine_config.rs | 19 +++++++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/address_space/src/host_mmap.rs b/address_space/src/host_mmap.rs index bd81da9b7..cf6f8b17a 100644 --- a/address_space/src/host_mmap.rs +++ b/address_space/src/host_mmap.rs @@ -331,7 +331,7 @@ pub fn create_backend_mem(mem_config: &MemBackendObjConfig, thread_num: u8) -> R mem_config.size, f_back, mem_config.dump_guest_core, - mem_config.share, + mem_config.share(), false, )?); if mem_config.prealloc { diff --git a/machine/src/lib.rs b/machine/src/lib.rs index 88efdfc4f..eb4601da7 100644 --- a/machine/src/lib.rs +++ b/machine/src/lib.rs @@ -1747,7 +1747,7 @@ pub trait MachineOps: MachineLifecycle { ) })?; - if !mem_cfg.share { + if !mem_cfg.share() { bail!("Object for share config is not on"); } diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 1625b1068..e843def56 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -106,8 +106,8 @@ pub struct MemBackendObjConfig { pub mem_path: Option, #[arg(long, default_value = "true", value_parser = parse_bool, action = ArgAction::Append)] pub dump_guest_core: bool, - #[arg(long, default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] - pub share: bool, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub share: Option, #[arg(long, alias = "mem-prealloc", default_value = "false", value_parser = parse_bool, action = ArgAction::Append)] pub prealloc: bool, } @@ -116,6 +116,16 @@ impl MemBackendObjConfig { pub fn memfd(&self) -> bool { self.mem_type.eq("memory-backend-memfd") } + + pub fn share(&self) -> bool { + match self.share { + Some(share) => share, + None => matches!( + self.mem_type.as_str(), + "memory-backend-file" | "memory-backend-memfd" + ), + } + } } /// Config that contains machine's memory information config. @@ -975,17 +985,18 @@ mod tests { .add_mem_backend("memory-backend-ram,size=2M,id=mem3,share=on") .unwrap(); assert_eq!(mb_config_3.size, 2 * 1024 * 1024); - assert!(mb_config_3.share); + assert!(mb_config_3.share()); let mb_config_4 = vm_config .add_mem_backend("memory-backend-ram,size=2M,id=mem4") .unwrap(); - assert!(!mb_config_4.share); + assert!(!mb_config_4.share()); assert!(!mb_config_4.memfd()); let mb_config_5 = vm_config .add_mem_backend("memory-backend-memfd,size=2M,id=mem5") .unwrap(); + assert!(mb_config_5.share()); assert!(mb_config_5.memfd()); } -- Gitee From 7e59cd3225ebc540451dd00b6a3cc376fbf20e4b Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Wed, 9 Apr 2025 14:55:47 +0800 Subject: [PATCH 4/9] machine_manager: add maxmem option --- machine_manager/src/config/machine_config.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index e843def56..9584c2015 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -132,6 +132,7 @@ impl MemBackendObjConfig { #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MachineMemConfig { pub mem_size: u64, + pub max_size: u64, pub mem_path: Option, pub dump_guest_core: bool, pub mem_share: bool, @@ -143,6 +144,7 @@ impl Default for MachineMemConfig { fn default() -> Self { MachineMemConfig { mem_size: DEFAULT_MEMSIZE * M, + max_size: MAX_MEMSIZE, mem_path: None, dump_guest_core: true, mem_share: false, @@ -275,6 +277,8 @@ struct AccelConfig { struct MemSizeConfig { #[arg(long, alias = "classtype", value_parser = parse_size)] size: u64, + #[arg(long, default_value = "262144", value_parser = parse_size)] + maxmem: u64, } #[derive(Parser)] @@ -432,6 +436,10 @@ impl VmConfig { let mem_cfg = MemSizeConfig::try_parse_from(str_slip_to_clap(mem_config, !has_size_label, false))?; self.machine_config.mem_config.mem_size = mem_cfg.size; + self.machine_config.mem_config.max_size = mem_cfg.maxmem; + if mem_cfg.maxmem < mem_cfg.size { + bail!("maxmem must bigger than current memory size") + } Ok(()) } @@ -833,6 +841,8 @@ mod tests { assert!(mem_cfg_ret.is_ok()); let mem_size = vm_config.machine_config.mem_config.mem_size; assert_eq!(mem_size, 8 * 1024 * 1024); + let max_size = vm_config.machine_config.mem_config.max_size; + assert_eq!(max_size, 256 * 1024 * 1024 * 1024); let memory_cfg = "size=8m"; let mem_cfg_ret = vm_config.add_memory(memory_cfg); @@ -845,6 +855,13 @@ mod tests { assert!(mem_cfg_ret.is_ok()); let mem_size = vm_config.machine_config.mem_config.mem_size; assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); + + let memory_cfg = "size=8G,maxmem=32G"; + let mem_cfg_ret = vm_config.add_memory(memory_cfg); + let mem_size = vm_config.machine_config.mem_config.mem_size; + let max_size = vm_config.machine_config.mem_config.max_size; + assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); + assert_eq!(max_size, 32 * 1024 * 1024 * 1024); } #[test] -- Gitee From b89ceb5bcea1fcd9bd190e02d0abcaf3c7470174 Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Thu, 10 Apr 2025 13:33:25 +0800 Subject: [PATCH 5/9] machine_manager: add memory backend object --- machine_manager/Cargo.toml | 1 + machine_manager/src/config/machine_config.rs | 87 ++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/machine_manager/Cargo.toml b/machine_manager/Cargo.toml index ec787bc51..b226af0f1 100644 --- a/machine_manager/Cargo.toml +++ b/machine_manager/Cargo.toml @@ -22,6 +22,7 @@ thiserror = "1.0" anyhow = "1.0" trace = { path = "../trace" } util = { path = "../util" } +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"]} clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } [features] diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 9584c2015..521ef5db9 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -10,10 +10,14 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::fs::{remove_file, File}; +use std::os::unix::io::FromRawFd; use std::str::FromStr; +use std::sync::Arc; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser}; +use nix::sys::memfd::{memfd_create, MemFdCreateFlag}; use serde::{Deserialize, Serialize}; use super::error::ConfigError; @@ -154,6 +158,89 @@ impl Default for MachineMemConfig { } } +pub const MEM_BACKEND_TYPE_ANON: u64 = 1; +pub const MEM_BACKEND_TYPE_MEMFD: u64 = 2; +pub const MEM_BACKEND_TYPE_FILE: u64 = 4; + +#[derive(Clone, Debug, Default)] +pub struct MemoryBackend { + pub mb_type: u64, + pub size: u64, + pub backend: Option>, + pub file_path: Option, + pub share: bool, +} + +impl MemoryBackend { + pub fn new(option: MemBackendObjConfig) -> Self { + let mut mb: MemoryBackend = Default::default(); + mb.mb_type = match option.mem_type.as_str() { + "memory-backend-memfd" => { + mb.file_path = Some(format!("stratovirt_memfd@{}", option.id)); + MEM_BACKEND_TYPE_MEMFD + } + "memory-backend-file" => { + mb.file_path = option.mem_path.clone(); + MEM_BACKEND_TYPE_FILE + } + _ => { + mb.file_path = None; + MEM_BACKEND_TYPE_ANON + } + }; + mb.backend = None; + mb.share = option.share(); + mb.size = option.size; + mb + } + + pub fn realize(&mut self) -> Result<()> { + match self.mb_type { + MEM_BACKEND_TYPE_MEMFD => { + let path_str = match self.file_path.as_ref() { + Some(path) => path.clone(), + None => bail!("memory-backend-memfd path absent"), + }; + let memfd = + memfd_create(&std::ffi::CString::new(path_str)?, MemFdCreateFlag::empty())?; + if memfd < 0 { + return Err(std::io::Error::last_os_error()) + .with_context(|| "Failed to create memfd"); + } + // SAFETY: The parameters memfd has checked upper. + let memfile = unsafe { File::from_raw_fd(memfd) }; + memfile + .set_len(self.size) + .with_context(|| "Failed to set the length of memfd file")?; + self.backend = Some(Arc::new(memfile)); + } + MEM_BACKEND_TYPE_FILE => { + let path_str = match self.file_path.as_ref() { + Some(path) => path.clone(), + None => bail!("memory-backend-file path absent"), + }; + let path = std::path::Path::new(&path_str); + let unlink = !path.exists(); + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(path) + .with_context(|| format!("Failed to open file: {}", path_str))?; + if file.metadata().unwrap().len() < self.size { + file.set_len(self.size)?; + } + if unlink { + remove_file(path.as_os_str())?; + } + self.backend = Some(Arc::new(file)); + } + _ => {} + }; + Ok(()) + } +} + #[derive(Parser, Clone, Debug, Serialize, Deserialize, Default)] #[command(no_binary_name(true))] pub struct CpuConfig { -- Gitee From 1dd696b2242a2057422680231d519a86d1446798 Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Sat, 12 Apr 2025 08:26:26 +0800 Subject: [PATCH 6/9] virtio: add virtio-mem device The main goal of virtio-mem is to allow for dynamic resizing of virtual machine memory. --- machine/src/lib.rs | 52 ++ machine_manager/src/cmdline.rs | 2 + machine_manager/src/config/machine_config.rs | 7 + virtio/src/device/memory.rs | 844 +++++++++++++++++++ virtio/src/device/mod.rs | 1 + virtio/src/lib.rs | 35 +- 6 files changed, 940 insertions(+), 1 deletion(-) create mode 100644 virtio/src/device/memory.rs diff --git a/machine/src/lib.rs b/machine/src/lib.rs index eb4601da7..f066cdc80 100644 --- a/machine/src/lib.rs +++ b/machine/src/lib.rs @@ -738,6 +738,57 @@ pub trait MachineOps: MachineLifecycle { Ok(()) } + /// Add virtio memory device. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `cfg_args` - Device configuration args. + fn add_virtio_mem(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let option = virtio::MemoryConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let memoption = vm_config + .object + .mem_object + .remove(&option.memdev) + .with_context(|| { + format!( + "Object for memory-backend-* {} config not found", + option.memdev + ) + })?; + + let max_size = vm_config.machine_config.mem_config.max_size; + let device = virtio::Memory::new_arc(option.clone(), memoption, max_size)?; + + let current_size = device.lock().unwrap().get_region_size() + + vm_config.machine_config.mem_config.current_size; + if current_size > max_size { + bail!("failed to add virtio-mem, current memory out of maxsize"); + } else { + vm_config.machine_config.mem_config.current_size = current_size; + } + + match option.classtype.as_str() { + "virtio-mem-device" => { + check_arg_nonexist!( + ("bus", option.bus), + ("addr", option.addr), + ("multifunction", option.multifunction) + ); + self.add_virtio_mmio_device(option.id.clone(), device) + .with_context(|| "Failed to add virtio mmio mem device")?; + } + _ => { + check_arg_exist!(("bus", option.bus), ("addr", option.addr)); + let bdf = PciBdf::new(option.bus.clone().unwrap(), option.addr.unwrap()); + let multi_func = option.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&option.id, &bdf, device, multi_func, false) + .with_context(|| "Failed to add pci mem device")?; + } + } + Ok(()) + } + /// Add virtio serial device. /// /// # Arguments @@ -2017,6 +2068,7 @@ pub trait MachineOps: MachineLifecycle { ("virtio-net-pci", add_virtio_pci_net, vm_config, cfg_args, false), ("pcie-root-port", add_pci_root_port, cfg_args), ("virtio-balloon-device" | "virtio-balloon-pci", add_virtio_balloon, vm_config, cfg_args), + ("virtio-mem-device" | "virtio-mem-pci", add_virtio_mem, vm_config, cfg_args), ("virtio-input-device" | "virtio-input-pci", add_virtio_input, cfg_args), ("virtio-serial-device" | "virtio-serial-pci", add_virtio_serial, vm_config, cfg_args), ("virtconsole" | "virtserialport", add_virtio_serial_port, vm_config, cfg_args), diff --git a/machine_manager/src/cmdline.rs b/machine_manager/src/cmdline.rs index 109221329..cb00ee5fd 100644 --- a/machine_manager/src/cmdline.rs +++ b/machine_manager/src/cmdline.rs @@ -247,6 +247,8 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { \n\t\tadd vhost pci vsock: -device vhost-vsock-pci,id=,guest-cid=,bus=,addr=<0x3>[,multifunction=on|off]; \ \n\t\tadd virtio mmio balloon: -device virtio-balloon-device[,deflate-on-oom=true|false][,free-page-reporting=true|false]; \ \n\t\tadd virtio pci balloon: -device virtio-balloon-pci,id=,bus=,addr=<0x4>[,deflate-on-oom=true|false][,free-page-reporting=true|false][,multifunction=on|off]; \ + \n\t\tadd virtio mmio mem: -device virtio-mem-device,id=,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>]; \ + \n\t\tadd virtio pci mem: -device virtio-mem-pci,id=,bus=,addr=<0x4>,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>][,multifunction=on|off]; \ \n\t\tadd virtio mmio rng: -device virtio-rng-device,rng=,max-bytes=<1234>,period=<1000>; \ \n\t\tadd virtio pci rng: -device virtio-rng-pci,id=,rng=,max-bytes=<1234>,period=<1000>,bus=,addr=<0x1>[,multifunction=on|off]; \ \n\t\tadd virtio mmio input: -device virtio-input-device,id=,evdev=; \ diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 521ef5db9..12ea09540 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -137,6 +137,7 @@ impl MemBackendObjConfig { pub struct MachineMemConfig { pub mem_size: u64, pub max_size: u64, + pub current_size: u64, pub mem_path: Option, pub dump_guest_core: bool, pub mem_share: bool, @@ -149,6 +150,7 @@ impl Default for MachineMemConfig { MachineMemConfig { mem_size: DEFAULT_MEMSIZE * M, max_size: MAX_MEMSIZE, + current_size: DEFAULT_MEMSIZE * M, mem_path: None, dump_guest_core: true, mem_share: false, @@ -225,6 +227,7 @@ impl MemoryBackend { .read(true) .write(true) .create(true) + .truncate(false) .open(path) .with_context(|| format!("Failed to open file: {}", path_str))?; if file.metadata().unwrap().len() < self.size { @@ -527,6 +530,7 @@ impl VmConfig { if mem_cfg.maxmem < mem_cfg.size { bail!("maxmem must bigger than current memory size") } + self.machine_config.mem_config.current_size = mem_cfg.size; Ok(()) } @@ -727,6 +731,8 @@ mod tests { dump_guest_core: false, mem_prealloc: false, membackend_objs: None, + max_size: MAX_MEMSIZE, + current_size: MAX_MEMSIZE, }; let mut machine_config = MachineConfig { mach_type: MachineType::MicroVm, @@ -945,6 +951,7 @@ mod tests { let memory_cfg = "size=8G,maxmem=32G"; let mem_cfg_ret = vm_config.add_memory(memory_cfg); + assert!(mem_cfg_ret.is_ok()); let mem_size = vm_config.machine_config.mem_config.mem_size; let max_size = vm_config.machine_config.mem_config.max_size; assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); diff --git a/virtio/src/device/memory.rs b/virtio/src/device/memory.rs new file mode 100644 index 000000000..780a6148b --- /dev/null +++ b/virtio/src/device/memory.rs @@ -0,0 +1,844 @@ +// Copyright (c) 2025 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; + +use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; +use log::{error, info, warn}; +use machine_manager::config::{ + get_pci_df, parse_bool, valid_id, MemBackendObjConfig, MemoryBackend, DEFAULT_VIRTQUEUE_SIZE, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; +use std::vec::Vec; +use util::bitmap::Bitmap; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; +use util::unix::do_mmap; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::VirtioError; +use crate::{ + iov_read_object, iov_write_object, read_config_default, report_virtio_error, Queue, VirtioBase, + VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_RING_EVENT_IDX, + VIRTIO_F_VERSION_1, VIRTIO_TYPE_MEM, +}; + +const VIRTIO_MEM_F_ACPI_PXM: u32 = 0; +const VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: u32 = 1; + +const QUEUE_NUM_MEM: usize = 1; + +const VIRTIO_MEM_REQ_PLUG: u16 = 0; +const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; +const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; +const VIRTIO_MEM_REQ_STATE: u16 = 3; + +const VIRTIO_MEM_RESP_ACK: u16 = 0; +const VIRTIO_MEM_RESP_NACK: u16 = 1; +const VIRTIO_MEM_RESP_BUSY: u16 = 2; +const VIRTIO_MEM_RESP_ERROR: u16 = 3; + +const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; +const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; +const VIRTIO_MEM_STATE_MIXED: u16 = 2; + +const DEFAULT_MEM_BLOCK_SIZE: u64 = 33554432; // 32 MB +const DEFAULT_MEM_BLOCK_ALIGN_SIZE: u64 = 16384; // 16 KB + +const NUMA_NONE: u16 = 4097; +const INVALID_ADDR: u64 = 0; + +type ViomemDeviceTable = HashMap>>; +static VIOMEM_DEV_LIST: OnceLock>> = OnceLock::new(); +static DEFAULT_PLUGGABLE_ADDR_BASE: OnceLock>> = OnceLock::new(); + +#[derive(Copy, Clone, Default)] +struct PluggableAddrBase { + addr: u64, + auto_alloc: bool, +} + +fn alloc_base_addr( + max_size: u64, + maddr_cfg: Option, + region_size: u64, + block_size: u64, +) -> u64 { + let auto_alloc = maddr_cfg.is_none(); + let mut pluggable = DEFAULT_PLUGGABLE_ADDR_BASE + .get_or_init(|| { + Arc::new(Mutex::new(PluggableAddrBase { + addr: max_size, + auto_alloc, + })) + }) + .lock() + .unwrap(); + if auto_alloc != pluggable.auto_alloc { + error!("inconsistent maddr configuration options"); + return INVALID_ADDR; + } + + let base_addr = match maddr_cfg { + Some(maddr) => maddr, + None => pluggable.addr.div_ceil(block_size) * block_size, + }; + pluggable.addr = base_addr + region_size; + + base_addr +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemConfig { + /// size and the alignment in bytes of a memory block. + block_size: u64, + /// has no meaning without VIRTIO_MEM_F_ACPI_PXM. + node_id: u16, + /// reserved for future use. + padding: [u8; 6], + /// start guest physical address of device-managed memory region. + addr: u64, + /// the size of device-managed memory region in bytes. + region_size: u64, + /// the size of the usable device-managed memory region. + usable_region_size: u64, + /// the amount of plugged memory in bytes within the usable device-managed memory region. + plugged_size: u64, + /// the requested amount of plugged memory within the usable device-managed memory region. + requested_size: u64, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReq { + req_type: u16, + padding: [u16; 3], + req_union: [u8; 16], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqPlug { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqUnplug { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqState { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemResp { + resp_type: u16, + padding: [u16; 3], + state: VirtioMemRespState, +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemRespState { + state_type: u16, +} + +impl ByteCode for VirtioMemConfig {} +impl ByteCode for VirtioMemReq {} +impl ByteCode for VirtioMemReqPlug {} +impl ByteCode for VirtioMemReqUnplug {} +impl ByteCode for VirtioMemReqState {} +impl ByteCode for VirtioMemResp {} +impl ByteCode for VirtioMemRespState {} + +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct MemoryConfig { + #[arg(long, value_parser = ["virtio-mem-device", "virtio-mem-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long)] + pub memaddr: Option, + #[arg(long)] + pub memdev: String, + #[arg(long)] + pub block_size: Option, + #[arg(long)] + pub node: Option, + #[arg(long)] + pub requested_size: Option, + #[arg(long, default_value = "false", value_parser = parse_bool, action = ArgAction::Append)] + pub unplugged_inaccessible: bool, +} + +struct MemRegionState { + base_gpa: u64, + block_size: u64, + nr_blocks: u64, + plugged_size: u64, + plugged_regions: Bitmap, + /// The memory backend host address + host_addr: u64, +} + +impl MemRegionState { + fn new(addr: u64, region_size: u64, block_size: u64, host_addr: u64) -> Self { + assert!(block_size != 0); + assert!((region_size % block_size) == 0); + let nr_blocks = region_size / block_size; + Self { + base_gpa: addr, + block_size, + nr_blocks, + plugged_size: 0, + plugged_regions: Bitmap::new(nr_blocks as usize), + host_addr, + } + } + + fn get_regions(&self, first_gpa: u64, nb_blocks: u64) -> Vec { + let mut regs = Vec::new(); + for n in 0..nb_blocks { + let gpa = first_gpa + n * self.block_size; + let block_addr = self.host_addr + (gpa - self.base_gpa); + let block = Arc::new( + HostMemMapping::new( + GuestAddress(gpa), + Some(block_addr), + self.block_size, + None, + false, + true, + false, + ) + .unwrap(), + ); + + regs.push(Region::init_ram_region( + block, + format!("viomem@{}-{}", gpa, self.block_size).as_str(), + )); + } + regs + } + + fn valid_range(&self, gpa: u64, nb_blocks: u64) -> u16 { + // 1. check gpa above region base gpa + if gpa < self.base_gpa || nb_blocks == 0 { + return VIRTIO_MEM_RESP_ERROR; + } + + // 2. check gpa addr aligned with block size + let addr_offset = gpa - self.base_gpa; + let block_offset: u64 = if addr_offset % self.block_size == 0 { + addr_offset / self.block_size + } else { + return VIRTIO_MEM_RESP_ERROR; + }; + + // 3. gpa + nb_blocks no overflow + let (end_block, overflow) = block_offset.overflowing_add(nb_blocks); + + if overflow { + return VIRTIO_MEM_RESP_ERROR; + } + + // 4. check add mem segment in the region + if end_block > self.nr_blocks { + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn top_plugged_range(&self) -> (u64, u16) { + let first_block = 0; + match self.plugged_regions.find_next_bit(first_block) { + Ok(begin) => match self.plugged_regions.find_next_zero(begin) { + Ok(end) => ( + self.base_gpa + begin as u64 * self.block_size, + (end - begin) as u16, + ), + Err(_) => (0, 0), + }, + Err(_) => (0, 0), + } + } + + fn check_range_unplugged(&self, gpa: u64, nb_blocks: u64) -> u16 { + let first_block = (gpa - self.base_gpa) / self.block_size; + let last_block = first_block + nb_blocks - 1; + match self.plugged_regions.find_next_bit(first_block as usize) { + Ok(found_block) => { + if found_block as u64 > last_block { + VIRTIO_MEM_RESP_ACK + } else { + VIRTIO_MEM_RESP_ERROR + } + } + Err(_) => VIRTIO_MEM_RESP_ERROR, + } + } + + fn check_range_plugged(&self, gpa: u64, nb_blocks: u64) -> u16 { + let first_block = (gpa - self.base_gpa) / self.block_size; + let last_block = first_block + nb_blocks - 1; + match self.plugged_regions.find_next_zero(first_block as usize) { + Ok(found_block) => { + if found_block as u64 > last_block { + VIRTIO_MEM_RESP_ACK + } else { + VIRTIO_MEM_RESP_ERROR + } + } + Err(_) => VIRTIO_MEM_RESP_ERROR, + } + } + + fn plug_range(&mut self, mem_space: Arc, gpa: u64, nb_blocks: usize) -> u16 { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + if self.valid_range(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("plug request region illegal"); + return VIRTIO_MEM_RESP_ERROR; + } + if self.check_range_unplugged(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("plug request region conflict"); + return VIRTIO_MEM_RESP_ERROR; + } + + for region in self.get_regions(gpa, nb_blocks as u64) { + let offset = region.offset().0; + warn!("add region offset {}", offset); + if mem_space.root().add_subregion(region, offset).is_err() { + error!("failed to add subregion"); + return VIRTIO_MEM_RESP_BUSY; + } + } + + if self + .plugged_regions + .set_range(first_block, nb_blocks) + .is_err() + { + error!("failed to set range"); + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn unplug_range(&mut self, mem_space: Arc, gpa: u64, nb_blocks: usize) -> u16 { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + if self.valid_range(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("unplug request region illegal"); + return VIRTIO_MEM_RESP_ERROR; + } + if self.check_range_plugged(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + warn!("unplug request region conflict"); + return VIRTIO_MEM_RESP_ERROR; + } + + for region in self.get_regions(gpa, nb_blocks as u64) { + warn!("del region offset {}", region.offset().0); + if mem_space.root().delete_subregion(®ion).is_err() { + error!("failed to delete subregion"); + return VIRTIO_MEM_RESP_ERROR; + } + } + + if self + .plugged_regions + .clear_range(first_block, nb_blocks) + .is_err() + { + error!("failed to delete subregion"); + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn range_state(&self, gpa: u64, nb_blocks: u64) -> (u16, u16) { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + let last_block = first_block + nb_blocks as usize; + if self.valid_range(gpa, nb_blocks) != VIRTIO_MEM_RESP_ACK { + error!("plug request region illegal"); + return (VIRTIO_MEM_RESP_ERROR, 0); + } + + let bit = match self.plugged_regions.contain(first_block) { + Ok(bit) => bit, + Err(_) => return (VIRTIO_MEM_RESP_ERROR, 0), + }; + + if bit { + match self.plugged_regions.find_next_zero(first_block + 1) { + Ok(found_block) => { + if found_block >= last_block { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_PLUGGED) + } else { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_MIXED) + } + } + Err(_) => (VIRTIO_MEM_RESP_ERROR, 0), + } + } else { + match self.plugged_regions.find_next_bit(first_block + 1) { + Ok(found_block) => { + if found_block >= last_block { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_UNPLUGGED) + } else { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_MIXED) + } + } + Err(_) => (VIRTIO_MEM_RESP_ERROR, 0), + } + } + } +} + +struct MemoryHandler { + /// The guest request queue + pub(crate) queue: Arc>, + /// The eventfd used to notify the guest request queue event + pub(crate) queue_evt: Arc, + /// The function for interrupt triggering + pub(crate) interrupt_cb: Arc, + /// Configuration space of virtio mem device. + config: Arc>, + /// System address space. + pub(crate) mem_space: Arc, + /// Bit mask of features negotiated by the backend and the frontend + pub(crate) driver_features: u64, + /// Virtio mem device is broken or not. + pub(crate) device_broken: Arc, + /// Virtio mem Region list + pub(crate) regions: Arc>, +} + +impl MemoryHandler { + fn handle_plug_request(&self, req: &VirtioMemReqPlug) -> u16 { + info!("handle_plug_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let mut locked_regions = self.regions.lock().unwrap(); + let mut config = self.config.lock().unwrap(); + let plug_size = nb_blocks * config.block_size; + if (plug_size + config.plugged_size) > config.requested_size || plug_size == 0 { + return VIRTIO_MEM_RESP_NACK; + } + let ack = locked_regions.plug_range(self.mem_space.clone(), gpa, nb_blocks as usize); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + config.plugged_size += plug_size; + + locked_regions.plugged_size += nb_blocks * locked_regions.block_size; + + VIRTIO_MEM_RESP_ACK + } + + fn handle_unplug_request(&self, req: &VirtioMemReqUnplug) -> u16 { + info!("handle_unplug_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let mut config = self.config.lock().unwrap(); + let unplug_size = config.block_size * nb_blocks; + if (unplug_size + config.requested_size) > config.plugged_size || unplug_size == 0 { + return VIRTIO_MEM_RESP_NACK; + } + let mut locked_regions = self.regions.lock().unwrap(); + let ack = locked_regions.unplug_range(self.mem_space.clone(), gpa, nb_blocks as usize); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + config.plugged_size -= unplug_size; + locked_regions.plugged_size += nb_blocks * locked_regions.block_size; + + VIRTIO_MEM_RESP_ACK + } + + fn handle_state_request(&self, req: &VirtioMemReqState) -> (u16, u16) { + info!("handle_state_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let locked_regions = self.regions.lock().unwrap(); + locked_regions.range_state(gpa, nb_blocks) + } + + fn handle_unplug_all_request(&self) -> u16 { + loop { + let (addr, nb_blocks) = self.regions.lock().unwrap().top_plugged_range(); + info!("find plugged memory region: ({}, {})", addr, nb_blocks); + if nb_blocks == 0 { + break; + } + let req = &VirtioMemReqUnplug { + addr, + nb_blocks, + ..Default::default() + }; + let ack = self.handle_unplug_request(req); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + } + VIRTIO_MEM_RESP_ACK + } + + pub fn process_queue(&self) -> Result<()> { + loop { + let mut locked_queue = self.queue.lock().unwrap(); + let elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| { + "Failed to pop avail ring element for process guest request queue" + })?; + if elem.desc_num == 0 { + break; + } + + let mut req = iov_read_object::( + &self.mem_space.clone(), + &elem.out_iovec, + locked_queue.vring.get_cache(), + )?; + + let mut send_response = |resp: VirtioMemResp| -> Result<()> { + iov_write_object( + &self.mem_space, + &elem.in_iovec, + locked_queue.vring.get_cache(), + resp, + )?; + + locked_queue + .vring + .add_used(elem.index, resp.as_bytes().len() as u32) + .with_context(|| { + format!( + "Failed to add used ring(guest request queue), index {}, len {}", + elem.index, + resp.as_bytes().len(), + ) + })?; + + if locked_queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "mem guest request queue", + VirtioInterruptType::Vring, + ) + })?; + } + + Ok(()) + }; + + match req.req_type { + VIRTIO_MEM_REQ_PLUG => { + let resp_type = self.handle_plug_request( + VirtioMemReqPlug::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ); + let resp = VirtioMemResp { + resp_type, + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_UNPLUG => { + let resp = VirtioMemResp { + resp_type: self.handle_unplug_request( + VirtioMemReqUnplug::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ), + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_UNPLUG_ALL => { + let resp = VirtioMemResp { + resp_type: self.handle_unplug_all_request(), + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_STATE => { + let (resp_type, state_type) = self.handle_state_request( + VirtioMemReqState::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ); + let mut resp = VirtioMemResp { + resp_type, + ..Default::default() + }; + resp.state.state_type = state_type; + send_response(resp)?; + } + _ => { + bail!("virtio-mem: unknown request type {}", req.req_type); + } + } + } + Ok(()) + } +} + +impl EventNotifierHelper for MemoryHandler { + fn internal_notifiers(mh: Arc>) -> Vec { + let mut notifiers = Vec::new(); + let closure_mh = mh.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let locked_closure_mh = closure_mh.lock().unwrap(); + if let Err(e) = locked_closure_mh.process_queue() { + error!("Failed to plug/unplug mem: {:?}", e); + report_virtio_error( + locked_closure_mh.interrupt_cb.clone(), + locked_closure_mh.driver_features, + &locked_closure_mh.device_broken, + ); + } + + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + mh.lock().unwrap().queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + notifiers + } +} + +#[derive(Default)] +pub struct Memory { + /// Virtio device base property. + base: VirtioBase, + /// Virtio mem device id + id: String, + /// Configuration space of virtio mem device. + config: Arc>, + /// Memory + backend: Arc>, + /// unplugged-inaccessible + unplugged_inaccessible: bool, + /// Interrupt callback function. + interrupt_cb: Option>, +} + +impl Memory { + fn new_internal(option: MemoryConfig, memobj: MemBackendObjConfig, max_size: u64) -> Self { + info!("virtio-mem: new MemoryConfig {:?}", option); + let mut mem = Self { + base: VirtioBase::new(VIRTIO_TYPE_MEM, QUEUE_NUM_MEM, DEFAULT_VIRTQUEUE_SIZE), + id: option.id.clone(), + backend: Arc::new(Mutex::new(MemoryBackend::new(memobj))), + ..Default::default() + }; + + let mut config: std::sync::MutexGuard<'_, VirtioMemConfig> = mem.config.lock().unwrap(); + config.block_size = match option.block_size { + Some(block_size) => { + if block_size % DEFAULT_MEM_BLOCK_ALIGN_SIZE != 0 { + DEFAULT_MEM_BLOCK_ALIGN_SIZE + } else { + block_size + } + } + None => DEFAULT_MEM_BLOCK_SIZE, + }; + config.region_size = mem.backend.lock().unwrap().size; + config.addr = alloc_base_addr( + max_size, + option.memaddr, + config.region_size, + config.block_size, + ); + config.usable_region_size = config.region_size; + config.node_id = match option.node { + Some(node) => { + info!( + "virtio-mem not support ACPI NUMA, ignore node option(node={})", + node + ); + NUMA_NONE + } + None => NUMA_NONE, + }; + + config.plugged_size = 0; + config.requested_size = option.requested_size.unwrap_or(0); + drop(config); + + mem.unplugged_inaccessible = option.unplugged_inaccessible; + mem + } + + pub fn new_arc( + option: MemoryConfig, + memobj: MemBackendObjConfig, + max_size: u64, + ) -> Result>> { + let mem = Self::new_internal(option, memobj, max_size); + let id = mem.id.clone(); + let mem_arc = Arc::new(Mutex::new(mem)); + + Ok(mem_arc) + } + + pub fn get_region_size(&self) -> u64 { + self.config.lock().unwrap().region_size + } +} + +impl VirtioDevice for Memory { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + if self.config.lock().unwrap().addr == INVALID_ADDR { + bail!("inconsistent maddr configuration options"); + } + + self.backend.lock().unwrap().realize()?; + self.init_config_features()?; + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1u64 << VIRTIO_F_VERSION_1 | 1u64 << VIRTIO_F_RING_EVENT_IDX; + + if self.config.lock().unwrap().node_id != NUMA_NONE { + self.base.device_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; + } + if self.unplugged_inaccessible { + self.base.device_features |= 1u64 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE; + } + + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let new_config = *self.config.lock().unwrap(); + let config_len = size_of::(); + let config = &new_config.as_bytes()[..config_len]; + read_config_default(config, offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + warn!( + "virtio-mem write config: offset = {}, data = {:?}", + offset, data + ); + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + info!("virtio-mem@{} activate", self.id); + let queues = &self.base.queues; + if queues.len() != self.queue_num() { + return Err(anyhow!(VirtioError::IncorrectQueueNum( + self.queue_num(), + queues.len() + ))); + } + self.interrupt_cb = Some(interrupt_cb.clone()); + + let config = self.config.lock().unwrap(); + let backend = self.backend.lock().unwrap(); + let (host_addr, _) = match &backend.backend { + Some(file) => ( + do_mmap( + &Some(file.as_ref()), + config.region_size, + 0, + false, + backend.share, + false, + )?, + Some(FileBackend::new_common(file.clone())), + ), + None => ( + do_mmap(&None, config.region_size, 0, false, backend.share, false)?, + None, + ), + }; + drop(backend); + + let handler = MemoryHandler { + queue: queues[0].clone(), + queue_evt: queue_evts[0].clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features: self.base.driver_features, + config: self.config.clone(), + mem_space, + regions: Arc::new(Mutex::new(MemRegionState::new( + config.addr, + config.region_size, + config.block_size, + host_addr, + ))), + device_broken: self.base.broken.clone(), + }; + + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts) + .with_context(|| "Failed to register mem guest request event notifier to MainLoop")?; + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + info!("virtio-mem@{} deactivate", self.id); + unregister_event_helper(None, &mut self.base.deactivate_evts) + } + + fn reset(&mut self) -> Result<()> { + Ok(()) + } +} diff --git a/virtio/src/device/mod.rs b/virtio/src/device/mod.rs index f8914b3a9..cc3a7ab66 100644 --- a/virtio/src/device/mod.rs +++ b/virtio/src/device/mod.rs @@ -15,6 +15,7 @@ pub mod block; #[cfg(feature = "virtio_gpu")] pub mod gpu; pub mod input; +pub mod memory; pub mod net; #[cfg(feature = "virtio_rng")] pub mod rng; diff --git a/virtio/src/lib.rs b/virtio/src/lib.rs index 2b48c9fb5..bfa0ef5db 100644 --- a/virtio/src/lib.rs +++ b/virtio/src/lib.rs @@ -37,6 +37,7 @@ pub use device::block::{Block, BlockState, VirtioBlkConfig, VirtioBlkDevConfig}; #[cfg(feature = "virtio_gpu")] pub use device::gpu::*; pub use device::input::*; +pub use device::memory::*; pub use device::net::*; #[cfg(feature = "virtio_rng")] pub use device::rng::{Rng, RngConfig, RngState}; @@ -66,7 +67,7 @@ use devices::pci::register_pcidevops_type; use devices::sysbus::register_sysbusdevops_type; use machine_manager::config::ConfigCheck; use migration_derive::ByteCode; -use util::aio::{mem_to_buf, Iovec}; +use util::aio::{iov_from_buf_direct, mem_to_buf, Iovec}; use util::byte_code::ByteCode; use util::num_ops::{read_u32, write_u32}; use util::AsAny; @@ -86,6 +87,7 @@ pub const VIRTIO_TYPE_SCSI: u32 = 8; pub const VIRTIO_TYPE_GPU: u32 = 16; pub const VIRTIO_TYPE_INPUT: u32 = 18; pub const VIRTIO_TYPE_VSOCK: u32 = 19; +pub const VIRTIO_TYPE_MEM: u32 = 24; pub const VIRTIO_TYPE_FS: u32 = 26; // The Status of Virtio Device. @@ -811,6 +813,37 @@ pub fn iov_read_object( Ok(obj) } +/// Write object typed `T` to iovec. +pub fn iov_write_object( + mem_space: &Arc, + iovec: &[ElemIovec], + cache: &Option, + obj: T, +) -> Result<()> { + let (in_size, ctrl_vec) = gpa_hva_iovec_map(iovec, mem_space, cache)?; + let obj_len = size_of::() as u64; + if in_size < obj_len { + bail!( + "Invalid length for object: get {}, expected {}", + in_size, + obj_len + ); + } + + // SAFETY: obj_len has checked above + unsafe { iov_from_buf_direct(&ctrl_vec, obj.as_bytes()) }.and_then(|size| { + if size as u64 != obj_len { + bail!( + "Expected send msg length is {}, actual send length {}.", + obj_len, + size + ) + }; + Ok(()) + })?; + Ok(()) +} + /// Read iovec to buf and return the read number of bytes. pub fn iov_to_buf( mem_space: &AddressSpace, -- Gitee From 8cb33ec270515afdd90505c6ebdb66ce70915a34 Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Sat, 12 Apr 2025 16:22:59 +0800 Subject: [PATCH 7/9] machine_manager: add set-viomem qmp api for virtio-mem device --- Cargo.lock | 1 + machine/src/micro_common/mod.rs | 32 ++++++++++- machine/src/standard_common/mod.rs | 30 +++++++++- machine_manager/src/machine.rs | 6 +- machine_manager/src/qmp/qmp_schema.rs | 26 +++++++++ machine_manager/src/qmp/qmp_socket.rs | 1 + virtio/src/device/memory.rs | 81 +++++++++++++++++++++++---- 7 files changed, 161 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36c8f8418..10cfa1260 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1002,6 +1002,7 @@ dependencies = [ "hex", "libc", "log", + "nix 0.26.2", "once_cell", "regex", "serde", diff --git a/machine/src/micro_common/mod.rs b/machine/src/micro_common/mod.rs index 200028d56..8c6200462 100644 --- a/machine/src/micro_common/mod.rs +++ b/machine/src/micro_common/mod.rs @@ -57,8 +57,8 @@ use machine_manager::config::get_chardev_socket_path; #[cfg(target_arch = "x86_64")] use machine_manager::config::Param; use machine_manager::config::{ - parse_incoming_uri, str_slip_to_clap, ConfigCheck, DriveConfig, MigrateMode, NetDevcfg, - NetworkInterfaceConfig, VmConfig, + parse_incoming_uri, parse_size, str_slip_to_clap, ConfigCheck, DriveConfig, MigrateMode, + NetDevcfg, NetworkInterfaceConfig, VmConfig, }; use machine_manager::machine::{ DeviceInterface, MachineAddressInterface, MachineExternalInterface, MachineInterface, @@ -983,6 +983,34 @@ impl DeviceInterface for LightMachine { None, ) } + + fn set_viomem(&mut self, args: Box) -> Response { + let requested_size = match parse_size(&args.requested_size) { + Ok(rs) => rs, + Err(_) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid set-viomem arguments".to_string(), + ), + None, + ) + } + }; + + match virtio::qmp_set_viomem(&args.id, requested_size) { + Ok(_) => Response::create_empty_response(), + Err(e) => { + error!( + "Failed to set viomem@{} requested size to {}, {:?}", + args.id, requested_size, e + ); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } } impl MigrateInterface for LightMachine { diff --git a/machine/src/standard_common/mod.rs b/machine/src/standard_common/mod.rs index 4f44cf878..5953d2339 100644 --- a/machine/src/standard_common/mod.rs +++ b/machine/src/standard_common/mod.rs @@ -62,7 +62,7 @@ use machine_manager::config::get_cameradev_config; #[cfg(target_arch = "aarch64")] use machine_manager::config::ShutdownAction; use machine_manager::config::{ - get_chardev_config, get_netdev_config, memory_unit_conversion, parse_incoming_uri, + get_chardev_config, get_netdev_config, memory_unit_conversion, parse_incoming_uri, parse_size, BootIndexInfo, ConfigCheck, DiskFormat, DriveConfig, ExBool, MigrateMode, NumaNode, NumaNodes, M, }; @@ -1580,6 +1580,34 @@ impl DeviceInterface for StdMachine { } } + fn set_viomem(&mut self, args: Box) -> Response { + let requested_size = match parse_size(&args.requested_size) { + Ok(rs) => rs, + Err(_) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid set-viomem arguments".to_string(), + ), + None, + ) + } + }; + + match virtio::qmp_set_viomem(&args.id, requested_size) { + Ok(_) => Response::create_empty_response(), + Err(e) => { + error!( + "Failed to set viomem@{} requested size to {}, {:?}", + args.id, requested_size, e + ); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + #[cfg(feature = "scream")] fn switch_audio_record(&self, authorized: String) -> Response { match authorized.as_str() { diff --git a/machine_manager/src/machine.rs b/machine_manager/src/machine.rs index 3b684513b..75df97678 100644 --- a/machine_manager/src/machine.rs +++ b/machine_manager/src/machine.rs @@ -26,7 +26,8 @@ use crate::qmp::qmp_schema::{ CharDevAddArgument, ChardevInfo, Cmd, CmdLine, CmdParameter, DeviceAddArgument, DeviceProps, Events, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, MigrateCapabilities, NetDevAddArgument, PropList, QmpCommand, QmpErrorClass, QmpEvent, - QueryMemGpaArgument, QueryVcpuRegArgument, Target, TypeLists, UpdateRegionArgument, + QueryMemGpaArgument, QueryVcpuRegArgument, SetViomemArgument, Target, TypeLists, + UpdateRegionArgument, }; #[derive(Clone)] @@ -237,6 +238,9 @@ pub trait DeviceInterface { /// Query display of stratovirt. fn query_display_image(&self) -> Response; + /// Update a virtio-mem device. + fn set_viomem(&mut self, args: Box) -> Response; + /// Query state. fn query_workloads(&self) -> Response { Response::create_error_response( diff --git a/machine_manager/src/qmp/qmp_schema.rs b/machine_manager/src/qmp/qmp_schema.rs index 2342c0beb..095eb3fa0 100644 --- a/machine_manager/src/qmp/qmp_schema.rs +++ b/machine_manager/src/qmp/qmp_schema.rs @@ -97,6 +97,7 @@ define_qmp_command_enum!( blockdev_add("blockdev-add", Box), blockdev_del("blockdev-del", blockdev_del), balloon("balloon", balloon, default), + set_viomem("set-viomem", Box), query_mem("query-mem", query_mem, default), query_mem_gpa("query-mem-gpa", query_mem_gpa, default), query_balloon("query-balloon", query_balloon, default), @@ -707,6 +708,31 @@ pub struct cameradev_del { } generate_command_impl!(cameradev_del, Empty); +/// set-viomem +/// +/// # Arguments +/// +/// * `id` - The device's ID, must be unique. +/// * `requested-size` - new request size of the virtio-mem device. +/// +/// # Examples +/// +/// ```test +/// -> { "execute" : "set-viomem" , +/// "arguments" : { "id" : "viomem0", "requested-size" : "1G"} } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct set_viomem { + pub id: String, + #[serde(rename = "requested-size")] + pub requested_size: String, +} + +pub type SetViomemArgument = set_viomem; +generate_command_impl!(set_viomem, Empty); + /// query-hotpluggable-cpus /// /// Query which CPU types could be plugged. diff --git a/machine_manager/src/qmp/qmp_socket.rs b/machine_manager/src/qmp/qmp_socket.rs index a3187884e..3e1dabc32 100644 --- a/machine_manager/src/qmp/qmp_socket.rs +++ b/machine_manager/src/qmp/qmp_socket.rs @@ -484,6 +484,7 @@ fn qmp_command_exec( (netdev_add, netdev_add), (chardev_add, chardev_add), (cameradev_add, cameradev_add), + (set_viomem, set_viomem), (update_region, update_region), (human_monitor_command, human_monitor_command), (blockdev_snapshot_internal_sync, blockdev_snapshot_internal_sync), diff --git a/virtio/src/device/memory.rs b/virtio/src/device/memory.rs index 780a6148b..b78627187 100644 --- a/virtio/src/device/memory.rs +++ b/virtio/src/device/memory.rs @@ -10,8 +10,18 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::collections::HashMap; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex, OnceLock}; +use std::vec::Vec; + use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; use log::{error, info, warn}; @@ -19,12 +29,6 @@ use machine_manager::config::{ get_pci_df, parse_bool, valid_id, MemBackendObjConfig, MemoryBackend, DEFAULT_VIRTQUEUE_SIZE, }; use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; -use std::mem::size_of; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::rc::Rc; -use std::sync::atomic::AtomicBool; -use std::sync::{Arc, Mutex}; -use std::vec::Vec; use util::bitmap::Bitmap; use util::byte_code::ByteCode; use util::gen_base_func; @@ -32,8 +36,6 @@ use util::loop_context::{ read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, }; use util::unix::do_mmap; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; use crate::error::VirtioError; use crate::{ @@ -42,9 +44,6 @@ use crate::{ VIRTIO_F_VERSION_1, VIRTIO_TYPE_MEM, }; -const VIRTIO_MEM_F_ACPI_PXM: u32 = 0; -const VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: u32 = 1; - const QUEUE_NUM_MEM: usize = 1; const VIRTIO_MEM_REQ_PLUG: u16 = 0; @@ -61,6 +60,9 @@ const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; const VIRTIO_MEM_STATE_MIXED: u16 = 2; +const VIRTIO_MEM_F_ACPI_PXM: u32 = 0; +const VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: u32 = 1; + const DEFAULT_MEM_BLOCK_SIZE: u64 = 33554432; // 32 MB const DEFAULT_MEM_BLOCK_ALIGN_SIZE: u64 = 16384; // 16 KB @@ -452,6 +454,8 @@ struct MemoryHandler { pub(crate) device_broken: Arc, /// Virtio mem Region list pub(crate) regions: Arc>, + /// The memory backend for the device. + pub(crate) fb: Option, } impl MemoryHandler { @@ -723,6 +727,7 @@ impl Memory { let mem = Self::new_internal(option, memobj, max_size); let id = mem.id.clone(); let mem_arc = Arc::new(Mutex::new(mem)); + register_viomem_device(id, mem_arc.clone())?; Ok(mem_arc) } @@ -730,6 +735,34 @@ impl Memory { pub fn get_region_size(&self) -> u64 { self.config.lock().unwrap().region_size } + + fn update_request(&mut self, request_size: u64) -> Result<()> { + info!("qmp request size {}", request_size); + if request_size % self.config.lock().unwrap().block_size != 0 { + bail!("requested_size not aligned with device block size") + } + let old_requested_size = self.config.lock().unwrap().requested_size; + self.config.lock().unwrap().requested_size = request_size; + self.signal_config_change().with_context(|| { + self.config.lock().unwrap().requested_size = old_requested_size; + "Failed to notify about configuration change after setting request memory size" + })?; + + Ok(()) + } + + /// Notify configuration changes to VM. + fn signal_config_change(&self) -> Result<()> { + if let Some(interrupt_cb) = &self.interrupt_cb { + interrupt_cb(&VirtioInterruptType::Config, None, false).with_context(|| { + VirtioError::InterruptTrigger("viomem", VirtioInterruptType::Config) + }) + } else { + Err(anyhow!(VirtioError::DeviceNotActivated( + "viomem".to_string() + ))) + } + } } impl VirtioDevice for Memory { @@ -791,7 +824,7 @@ impl VirtioDevice for Memory { let config = self.config.lock().unwrap(); let backend = self.backend.lock().unwrap(); - let (host_addr, _) = match &backend.backend { + let (host_addr, fb) = match &backend.backend { Some(file) => ( do_mmap( &Some(file.as_ref()), @@ -824,6 +857,7 @@ impl VirtioDevice for Memory { host_addr, ))), device_broken: self.base.broken.clone(), + fb, }; let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); @@ -842,3 +876,26 @@ impl VirtioDevice for Memory { Ok(()) } } + +fn register_viomem_device(id: String, mem: Arc>) -> Result<()> { + VIOMEM_DEV_LIST + .get_or_init(|| Arc::new(Mutex::new(HashMap::new()))) + .lock() + .unwrap() + .insert(id, mem); + Ok(()) +} + +pub fn qmp_set_viomem(id: &String, request_size: u64) -> Result<()> { + if let Some(devlist) = VIOMEM_DEV_LIST.get() { + match devlist.lock().unwrap().get(id) { + Some(mem) => { + mem.lock().unwrap().update_request(request_size)?; + } + None => { + bail!("not found virtio-mem@{} device", id) + } + } + } + bail!("no virtio-mem device context") +} -- Gitee From 1172c6a46052c268ba3932a3a8567ac4859d55e9 Mon Sep 17 00:00:00 2001 From: Yandong Xu Date: Fri, 30 May 2025 13:08:49 +0000 Subject: [PATCH 8/9] machine_manager: add get-viomem qmp api for virtio-mem device --- Cargo.lock | 1 + machine/src/micro_common/mod.rs | 13 ++++ machine/src/standard_common/mod.rs | 13 ++++ machine_manager/src/machine.rs | 7 ++- machine_manager/src/qmp/qmp_schema.rs | 39 ++++++++++++ machine_manager/src/qmp/qmp_socket.rs | 1 + virtio/Cargo.toml | 1 + virtio/src/device/memory.rs | 85 ++++++++++++++++++++------- 8 files changed, 136 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10cfa1260..454edf667 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1929,6 +1929,7 @@ dependencies = [ "migration", "migration_derive", "once_cell", + "serde", "serde_json", "thiserror", "trace", diff --git a/machine/src/micro_common/mod.rs b/machine/src/micro_common/mod.rs index 8c6200462..b5b05ee9b 100644 --- a/machine/src/micro_common/mod.rs +++ b/machine/src/micro_common/mod.rs @@ -1011,6 +1011,19 @@ impl DeviceInterface for LightMachine { } } } + + fn get_viomem(&self, args: Box) -> Response { + match virtio::qmp_get_viomem(&args.id) { + Ok(value) => Response::create_response(value, None), + Err(e) => { + error!("Failed to get viomem@{} information, {:?}", args.id, e); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } } impl MigrateInterface for LightMachine { diff --git a/machine/src/standard_common/mod.rs b/machine/src/standard_common/mod.rs index 5953d2339..b34401fc3 100644 --- a/machine/src/standard_common/mod.rs +++ b/machine/src/standard_common/mod.rs @@ -1608,6 +1608,19 @@ impl DeviceInterface for StdMachine { } } + fn get_viomem(&self, args: Box) -> Response { + match virtio::qmp_get_viomem(&args.id) { + Ok(value) => Response::create_response(value, None), + Err(e) => { + error!("Failed to get viomem@{} information, {:?}", args.id, e); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + #[cfg(feature = "scream")] fn switch_audio_record(&self, authorized: String) -> Response { match authorized.as_str() { diff --git a/machine_manager/src/machine.rs b/machine_manager/src/machine.rs index 75df97678..f054becdd 100644 --- a/machine_manager/src/machine.rs +++ b/machine_manager/src/machine.rs @@ -24,7 +24,7 @@ use crate::qmp::qmp_response::{Response, Version}; use crate::qmp::qmp_schema::{ BlockDevAddArgument, BlockdevSnapshotInternalArgument, CameraDevAddArgument, CharDevAddArgument, ChardevInfo, Cmd, CmdLine, CmdParameter, DeviceAddArgument, DeviceProps, - Events, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, + Events, GetViomemArgument, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, MigrateCapabilities, NetDevAddArgument, PropList, QmpCommand, QmpErrorClass, QmpEvent, QueryMemGpaArgument, QueryVcpuRegArgument, SetViomemArgument, Target, TypeLists, UpdateRegionArgument, @@ -238,9 +238,12 @@ pub trait DeviceInterface { /// Query display of stratovirt. fn query_display_image(&self) -> Response; - /// Update a virtio-mem device. + /// Set requested-size of a virtio-mem device. fn set_viomem(&mut self, args: Box) -> Response; + /// Get information of a virtio-mem device. + fn get_viomem(&self, args: Box) -> Response; + /// Query state. fn query_workloads(&self) -> Response { Response::create_error_response( diff --git a/machine_manager/src/qmp/qmp_schema.rs b/machine_manager/src/qmp/qmp_schema.rs index 095eb3fa0..6e85bd427 100644 --- a/machine_manager/src/qmp/qmp_schema.rs +++ b/machine_manager/src/qmp/qmp_schema.rs @@ -98,6 +98,7 @@ define_qmp_command_enum!( blockdev_del("blockdev-del", blockdev_del), balloon("balloon", balloon, default), set_viomem("set-viomem", Box), + get_viomem("get-viomem", Box), query_mem("query-mem", query_mem, default), query_mem_gpa("query-mem-gpa", query_mem_gpa, default), query_balloon("query-balloon", query_balloon, default), @@ -733,6 +734,44 @@ pub struct set_viomem { pub type SetViomemArgument = set_viomem; generate_command_impl!(set_viomem, Empty); +/// get-viomem +/// +/// Query virtio-mem device status. +/// +/// # Arguments +/// +/// * `id` - The device's ID, must be unique. +/// +/// # Examples +/// +/// ```test +/// -> { "execute" : "get-viomem" , +/// "arguments" : { "id" : "viomem0"} } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct get_viomem { + pub id: String, +} + +pub type GetViomemArgument = get_viomem; +generate_command_impl!(get_viomem, ViomemInfo); + +#[allow(clippy::upper_case_acronyms)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct ViomemInfo { + pub node: u16, + #[serde(rename = "size")] + pub region_size: usize, + #[serde(rename = "block-size")] + pub block_size: usize, + #[serde(rename = "requested-size")] + pub requested_size: usize, + #[serde(rename = "plugged-size")] + pub plugged_size: usize, +} + /// query-hotpluggable-cpus /// /// Query which CPU types could be plugged. diff --git a/machine_manager/src/qmp/qmp_socket.rs b/machine_manager/src/qmp/qmp_socket.rs index 3e1dabc32..132a1c276 100644 --- a/machine_manager/src/qmp/qmp_socket.rs +++ b/machine_manager/src/qmp/qmp_socket.rs @@ -485,6 +485,7 @@ fn qmp_command_exec( (chardev_add, chardev_add), (cameradev_add, cameradev_add), (set_viomem, set_viomem), + (get_viomem, get_viomem), (update_region, update_region), (human_monitor_command, human_monitor_command), (blockdev_snapshot_internal_sync, blockdev_snapshot_internal_sync), diff --git a/virtio/Cargo.toml b/virtio/Cargo.toml index b8692b394..3d173235e 100644 --- a/virtio/Cargo.toml +++ b/virtio/Cargo.toml @@ -12,6 +12,7 @@ thiserror = "1.0" anyhow = "1.0" libc = "0.2" log = "0.4" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" vmm-sys-util = "0.12.1" once_cell = "1.18.0" diff --git a/virtio/src/device/memory.rs b/virtio/src/device/memory.rs index b78627187..ab0028541 100644 --- a/virtio/src/device/memory.rs +++ b/virtio/src/device/memory.rs @@ -20,6 +20,8 @@ use std::vec::Vec; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; @@ -109,6 +111,20 @@ fn alloc_base_addr( base_addr } +#[allow(clippy::upper_case_acronyms)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +struct ViomemInfo { + pub node: u16, + #[serde(rename = "size")] + pub region_size: u64, + #[serde(rename = "block-size")] + pub block_size: u64, + #[serde(rename = "requested-size")] + pub requested_size: u64, + #[serde(rename = "plugged-size")] + pub plugged_size: u64, +} + #[repr(C)] #[derive(Copy, Clone, Default)] struct VirtioMemConfig { @@ -130,6 +146,25 @@ struct VirtioMemConfig { requested_size: u64, } +impl VirtioMemConfig { + pub(crate) fn qmp_query(&self) -> Value { + let node_id = if self.node_id == NUMA_NONE { + 0 + } else { + self.node_id + }; + + serde_json::to_value(ViomemInfo { + node: node_id, + region_size: self.region_size, + block_size: self.block_size, + requested_size: self.requested_size, + plugged_size: self.plugged_size, + }) + .unwrap() + } +} + #[repr(C)] #[derive(Copy, Clone, Debug, Default)] struct VirtioMemReq { @@ -454,8 +489,6 @@ struct MemoryHandler { pub(crate) device_broken: Arc, /// Virtio mem Region list pub(crate) regions: Arc>, - /// The memory backend for the device. - pub(crate) fb: Option, } impl MemoryHandler { @@ -738,6 +771,9 @@ impl Memory { fn update_request(&mut self, request_size: u64) -> Result<()> { info!("qmp request size {}", request_size); + if request_size > self.config.lock().unwrap().region_size { + bail!("request size out of the device region size") + } if request_size % self.config.lock().unwrap().block_size != 0 { bail!("requested_size not aligned with device block size") } @@ -824,22 +860,16 @@ impl VirtioDevice for Memory { let config = self.config.lock().unwrap(); let backend = self.backend.lock().unwrap(); - let (host_addr, fb) = match &backend.backend { - Some(file) => ( - do_mmap( - &Some(file.as_ref()), - config.region_size, - 0, - false, - backend.share, - false, - )?, - Some(FileBackend::new_common(file.clone())), - ), - None => ( - do_mmap(&None, config.region_size, 0, false, backend.share, false)?, - None, - ), + let host_addr = match &backend.backend { + Some(file) => do_mmap( + &Some(file.as_ref()), + config.region_size, + 0, + false, + backend.share, + false, + )?, + None => do_mmap(&None, config.region_size, 0, false, backend.share, false)?, }; drop(backend); @@ -857,7 +887,6 @@ impl VirtioDevice for Memory { host_addr, ))), device_broken: self.base.broken.clone(), - fb, }; let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); @@ -889,13 +918,25 @@ fn register_viomem_device(id: String, mem: Arc>) -> Result<()> { pub fn qmp_set_viomem(id: &String, request_size: u64) -> Result<()> { if let Some(devlist) = VIOMEM_DEV_LIST.get() { match devlist.lock().unwrap().get(id) { - Some(mem) => { - mem.lock().unwrap().update_request(request_size)?; + Some(mem) => mem.lock().unwrap().update_request(request_size), + None => { + bail!("not found virtio-mem@{} device", id) } + } + } else { + bail!("no virtio-mem device context") + } +} + +pub fn qmp_get_viomem(id: &String) -> Result { + if let Some(devlist) = VIOMEM_DEV_LIST.get() { + match devlist.lock().unwrap().get(id) { + Some(mem) => Ok(mem.lock().unwrap().config.lock().unwrap().qmp_query()), None => { bail!("not found virtio-mem@{} device", id) } } + } else { + bail!("no virtio-mem device context") } - bail!("no virtio-mem device context") } -- Gitee From 460eb76ddd494e74406f33081b27e614438c297c Mon Sep 17 00:00:00 2001 From: Xu Yandong Date: Mon, 28 Jul 2025 20:13:51 +0800 Subject: [PATCH 9/9] docs: add documents for virtio-mem device --- docs/config_guidebook.md | 29 ++++++++++++++++++++++++++++- docs/qmp.md | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/docs/config_guidebook.md b/docs/config_guidebook.md index 44a3c46cb..3701a81f7 100644 --- a/docs/config_guidebook.md +++ b/docs/config_guidebook.md @@ -77,13 +77,15 @@ Default VM memory size is 256M. The supported VM memory size is among [128M, 512 ```shell # cmdline --m [size=][m|M|g|G] +-m [size=][m|M|g|G][,maxmem=[m|M|g|G]] -m 256m -m 256 -m 1G ``` +Note: The maxmem option is only used in memory hot-(un)plug scenarios. + #### 1.3.2 Memory Prealloc Memory Prealloc feature is used to preallocate VM physical memory in advance and create its page tables. Using this feature, the number of page faults will decrease, and the memory access performance of the VM will improve. @@ -1243,6 +1245,31 @@ Sample Configuration: Note: 1. Only host evdev passthrough supported. +### 2.23 virtio-mem +virtio-mem device allow dynamic resizing of virtual machine memory, provide a flexible, cross-architecture memory hot(un)plug solution. + +Five properties are supported for virtio-mem. +* id: unique device id. +* memdev: memory-backend id +* memaddr: the base GPA of the virtio-mem managed memory region. +* requested-size: the requested amount of plugged memory. +* block-size: the alignment size of a memory block. +* node: numa node id. + +For virtio-mem-pci, two more properties are required. +* bus: name of bus which to attach. +* addr: including slot number and function number. the first number represents slot number +of device and the second one represents function number of it. As virtio pci mem device is a +single function device, the function number should be set to zero. + +Sample Configuration: +```shell +# virtio mmio mem device +-device virtio-mem-device,id=,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>] +# virtio pci mem device +-device virtio-mem-pci,id=,bus=,addr=<0x4>,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>][,multifunction=on|off] +``` + ## 3. Trace Users can specify a configuration file which lists the traces that needs to be enabled, or specify the trace type that needs to be enabled. Setting both file and type is also allowed, so that traces with the specified type and traces listed in the file will all be enabled. diff --git a/docs/qmp.md b/docs/qmp.md index 5e5e25e9f..c1416db89 100644 --- a/docs/qmp.md +++ b/docs/qmp.md @@ -413,6 +413,40 @@ Get memory size of guest. <- { "return": { "actual": 2147483648 } } ``` +## virtio-mem device management + +With QMP command you can set target memory size of guest and get memory size of guest. + +### set-viomem + +Set target memory size of guest. + +#### Arguments + +* `id` : The device's ID. +* `requested-size` : new request size of the virtio-mem device. + +#### Example + +```json +-> { "execute": "set-viomem", "arguments": { "id" : "viomem0", "requested-size": "1G" } } +<- { "return": {} } +``` + +### get-viomem + +Get memory size of guest. + +#### Arguments +* `id`: The device's ID. + +#### Example + +```json +-> { "execute": "get-viomem", "arguments" : { "id" : "viomem0"} } +<- { "return": { "node" : 0, "size": 21474883648, "block-size": 67108864, "plugged-size": 0, "requested-size": 0 } } +``` + ## Migration ### migrate -- Gitee