diff --git a/Cargo.lock b/Cargo.lock index 36c8f841879cdb9a30fb1911130c24957782e5ce..454edf66728df6a508fe193e2dc5a073ffef72c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1002,6 +1002,7 @@ dependencies = [ "hex", "libc", "log", + "nix 0.26.2", "once_cell", "regex", "serde", @@ -1928,6 +1929,7 @@ dependencies = [ "migration", "migration_derive", "once_cell", + "serde", "serde_json", "thiserror", "trace", diff --git a/address_space/src/host_mmap.rs b/address_space/src/host_mmap.rs index d2ca9b8a2f7f54e9d62781992c4f3021de5b3acc..cf6f8b17a93b312bd0216bad9c1cb97b2afffa82 100644 --- a/address_space/src/host_mmap.rs +++ b/address_space/src/host_mmap.rs @@ -24,7 +24,7 @@ use nix::sys::statfs::fstatfs; use nix::unistd::{mkstemp, sysconf, unlink, SysconfVar}; use crate::{AddressRange, GuestAddress, Region}; -use machine_manager::config::{HostMemPolicy, MachineMemConfig, MemZoneConfig}; +use machine_manager::config::{HostMemPolicy, MachineMemConfig, MemBackendObjConfig}; use util::unix::{do_mmap, host_page_size, mbind}; const MAX_PREALLOC_THREAD: i64 = 16; @@ -296,7 +296,7 @@ pub fn create_default_mem(mem_config: &MachineMemConfig, thread_num: u8) -> Resu /// /// * `mem_config` - The config of default memory. /// * `thread_num` - The num of mem preallocv threads, typically the number of vCPUs. -pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result { +pub fn create_backend_mem(mem_config: &MemBackendObjConfig, thread_num: u8) -> Result { let mut f_back: Option = None; if mem_config.memfd() { @@ -331,7 +331,7 @@ pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result< mem_config.size, f_back, mem_config.dump_guest_core, - mem_config.share, + mem_config.share(), false, )?); if mem_config.prealloc { @@ -349,13 +349,16 @@ pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result< /// # Arguments /// /// * `mem_mappings` - The host virtual address of mapped memory information. -/// * `zone` - Memory zone config info. -fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConfig) -> Result<()> { - if zone.host_numa_nodes.is_none() { +/// * `mb_config` - Memory backend config info. +fn set_host_memory_policy( + mem_mappings: &Arc, + mb_config: &MemBackendObjConfig, +) -> Result<()> { + if mb_config.host_numa_nodes.is_none() { return Ok(()); } let host_addr_start = mem_mappings.host_address(); - let nodes = zone.host_numa_nodes.as_ref().unwrap(); + let nodes = mb_config.host_numa_nodes.as_ref().unwrap(); let mut max_node = nodes[nodes.len() - 1] as usize; // Upper limit of max_node is MAX_NODES. @@ -367,7 +370,7 @@ fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConf // It is kind of linux bug or feature which will cut off the last node. max_node += 1; - let policy = HostMemPolicy::from(zone.policy.clone()); + let policy = HostMemPolicy::from(mb_config.policy.clone()); if policy == HostMemPolicy::Default { max_node = 0; nmask = vec![0_u64; max_node]; @@ -380,7 +383,7 @@ fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConf unsafe { mbind( host_addr_start, - zone.size, + mb_config.size, policy as u32, nmask, max_node as u64, diff --git a/address_space/src/region.rs b/address_space/src/region.rs index 6c1be857eb6e25792f83ff9c99174dd234c86471..65b55da42b2f359375beb3547807334a7eba9cae 100644 --- a/address_space/src/region.rs +++ b/address_space/src/region.rs @@ -171,10 +171,18 @@ impl PartialEq for FlatRange { /// Implement PartialEq/Eq for comparison of Region. impl PartialEq for Region { fn eq(&self, other: &Region) -> bool { - Arc::as_ptr(&self.priority) == Arc::as_ptr(&other.priority) - && self.region_type() == other.region_type() - && Arc::as_ptr(&self.offset) == Arc::as_ptr(&other.offset) - && Arc::as_ptr(&self.size) == Arc::as_ptr(&other.size) + if other.region_type() == RegionType::Ram { + self.priority() == other.priority() + && self.name == other.name + && self.region_type == other.region_type + && self.offset() == other.offset() + && self.size() == other.size() + } else { + Arc::as_ptr(&self.priority) == Arc::as_ptr(&other.priority) + && self.region_type() == other.region_type() + && Arc::as_ptr(&self.offset) == Arc::as_ptr(&other.offset) + && Arc::as_ptr(&self.size) == Arc::as_ptr(&other.size) + } } } @@ -245,11 +253,15 @@ impl Region { mem_mapping: Option>, ops: Option, ) -> Region { + let offset = match &mem_mapping { + Some(map) => map.start_address().0, + None => 0, + }; Region { name: String::from(name), region_type, priority: Arc::new(AtomicI32::new(0)), - offset: Arc::new(Mutex::new(GuestAddress(0))), + offset: Arc::new(Mutex::new(GuestAddress(offset))), size: Arc::new(AtomicU64::new(size)), mem_mapping, ops, diff --git a/docs/config_guidebook.md b/docs/config_guidebook.md index 44a3c46cb9275eb324f2ca22226942df7f7bbb80..3701a81f7b554206381acf8fa4b622fb9c19ae94 100644 --- a/docs/config_guidebook.md +++ b/docs/config_guidebook.md @@ -77,13 +77,15 @@ Default VM memory size is 256M. The supported VM memory size is among [128M, 512 ```shell # cmdline --m [size=][m|M|g|G] +-m [size=][m|M|g|G][,maxmem=[m|M|g|G]] -m 256m -m 256 -m 1G ``` +Note: The maxmem option is only used in memory hot-(un)plug scenarios. + #### 1.3.2 Memory Prealloc Memory Prealloc feature is used to preallocate VM physical memory in advance and create its page tables. Using this feature, the number of page faults will decrease, and the memory access performance of the VM will improve. @@ -1243,6 +1245,31 @@ Sample Configuration: Note: 1. Only host evdev passthrough supported. +### 2.23 virtio-mem +virtio-mem device allow dynamic resizing of virtual machine memory, provide a flexible, cross-architecture memory hot(un)plug solution. + +Five properties are supported for virtio-mem. +* id: unique device id. +* memdev: memory-backend id +* memaddr: the base GPA of the virtio-mem managed memory region. +* requested-size: the requested amount of plugged memory. +* block-size: the alignment size of a memory block. +* node: numa node id. + +For virtio-mem-pci, two more properties are required. +* bus: name of bus which to attach. +* addr: including slot number and function number. the first number represents slot number +of device and the second one represents function number of it. As virtio pci mem device is a +single function device, the function number should be set to zero. + +Sample Configuration: +```shell +# virtio mmio mem device +-device virtio-mem-device,id=,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>] +# virtio pci mem device +-device virtio-mem-pci,id=,bus=,addr=<0x4>,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>][,multifunction=on|off] +``` + ## 3. Trace Users can specify a configuration file which lists the traces that needs to be enabled, or specify the trace type that needs to be enabled. Setting both file and type is also allowed, so that traces with the specified type and traces listed in the file will all be enabled. diff --git a/docs/qmp.md b/docs/qmp.md index 5e5e25e9f6e1cb09f6933f9b32022acd36b0b36b..c1416db8909437419b3161520f098f7b2ee7ea67 100644 --- a/docs/qmp.md +++ b/docs/qmp.md @@ -413,6 +413,40 @@ Get memory size of guest. <- { "return": { "actual": 2147483648 } } ``` +## virtio-mem device management + +With QMP command you can set target memory size of guest and get memory size of guest. + +### set-viomem + +Set target memory size of guest. + +#### Arguments + +* `id` : The device's ID. +* `requested-size` : new request size of the virtio-mem device. + +#### Example + +```json +-> { "execute": "set-viomem", "arguments": { "id" : "viomem0", "requested-size": "1G" } } +<- { "return": {} } +``` + +### get-viomem + +Get memory size of guest. + +#### Arguments +* `id`: The device's ID. + +#### Example + +```json +-> { "execute": "get-viomem", "arguments" : { "id" : "viomem0"} } +<- { "return": { "node" : 0, "size": 21474883648, "block-size": 67108864, "plugged-size": 0, "requested-size": 0 } } +``` + ## Migration ### migrate diff --git a/machine/src/lib.rs b/machine/src/lib.rs index ef2ff1827c3f2a142a1f43c03fc03ac4f0afb628..f066cdc80afeff5cf7a54a17d56e74589317f95b 100644 --- a/machine/src/lib.rs +++ b/machine/src/lib.rs @@ -409,21 +409,21 @@ pub trait MachineOps: MachineLifecycle { let root = self.get_vm_ram(); let numa_nodes = self.get_numa_nodes(); - if numa_nodes.is_none() || mem_config.mem_zones.is_none() { + if numa_nodes.is_none() || mem_config.membackend_objs.is_none() { let default_mem = create_default_mem(mem_config, thread_num)?; root.add_subregion_not_update(default_mem, 0_u64)?; return Ok(()); } - let zones = mem_config.mem_zones.as_ref().unwrap(); + let mb_objs = mem_config.membackend_objs.as_ref().unwrap(); let mut offset = 0_u64; for node in numa_nodes.as_ref().unwrap().iter() { - for zone in zones.iter() { - if zone.id.eq(&node.1.mem_dev) { - let ram = create_backend_mem(zone, thread_num)?; + for mb_obj in mb_objs.iter() { + if mb_obj.id.eq(&node.1.mem_dev) { + let ram = create_backend_mem(mb_obj, thread_num)?; root.add_subregion_not_update(ram, offset)?; offset = offset - .checked_add(zone.size) - .with_context(|| "total zone size overflow")?; + .checked_add(mb_obj.size) + .with_context(|| "total mem backend size overflow")?; break; } } @@ -738,6 +738,57 @@ pub trait MachineOps: MachineLifecycle { Ok(()) } + /// Add virtio memory device. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `cfg_args` - Device configuration args. + fn add_virtio_mem(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let option = virtio::MemoryConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let memoption = vm_config + .object + .mem_object + .remove(&option.memdev) + .with_context(|| { + format!( + "Object for memory-backend-* {} config not found", + option.memdev + ) + })?; + + let max_size = vm_config.machine_config.mem_config.max_size; + let device = virtio::Memory::new_arc(option.clone(), memoption, max_size)?; + + let current_size = device.lock().unwrap().get_region_size() + + vm_config.machine_config.mem_config.current_size; + if current_size > max_size { + bail!("failed to add virtio-mem, current memory out of maxsize"); + } else { + vm_config.machine_config.mem_config.current_size = current_size; + } + + match option.classtype.as_str() { + "virtio-mem-device" => { + check_arg_nonexist!( + ("bus", option.bus), + ("addr", option.addr), + ("multifunction", option.multifunction) + ); + self.add_virtio_mmio_device(option.id.clone(), device) + .with_context(|| "Failed to add virtio mmio mem device")?; + } + _ => { + check_arg_exist!(("bus", option.bus), ("addr", option.addr)); + let bdf = PciBdf::new(option.bus.clone().unwrap(), option.addr.unwrap()); + let multi_func = option.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&option.id, &bdf, device, multi_func, false) + .with_context(|| "Failed to add pci mem device")?; + } + } + Ok(()) + } + /// Add virtio serial device. /// /// # Arguments @@ -1747,7 +1798,7 @@ pub trait MachineOps: MachineLifecycle { ) })?; - if !mem_cfg.share { + if !mem_cfg.share() { bail!("Object for share config is not on"); } @@ -2017,6 +2068,7 @@ pub trait MachineOps: MachineLifecycle { ("virtio-net-pci", add_virtio_pci_net, vm_config, cfg_args, false), ("pcie-root-port", add_pci_root_port, cfg_args), ("virtio-balloon-device" | "virtio-balloon-pci", add_virtio_balloon, vm_config, cfg_args), + ("virtio-mem-device" | "virtio-mem-pci", add_virtio_mem, vm_config, cfg_args), ("virtio-input-device" | "virtio-input-pci", add_virtio_input, cfg_args), ("virtio-serial-device" | "virtio-serial-pci", add_virtio_serial, vm_config, cfg_args), ("virtconsole" | "virtserialport", add_virtio_serial_port, vm_config, cfg_args), diff --git a/machine/src/micro_common/mod.rs b/machine/src/micro_common/mod.rs index 200028d568d0b0f827435933e7fc2315d0603953..b5b05ee9b0e1d5623c4ed825e40503f7885864ce 100644 --- a/machine/src/micro_common/mod.rs +++ b/machine/src/micro_common/mod.rs @@ -57,8 +57,8 @@ use machine_manager::config::get_chardev_socket_path; #[cfg(target_arch = "x86_64")] use machine_manager::config::Param; use machine_manager::config::{ - parse_incoming_uri, str_slip_to_clap, ConfigCheck, DriveConfig, MigrateMode, NetDevcfg, - NetworkInterfaceConfig, VmConfig, + parse_incoming_uri, parse_size, str_slip_to_clap, ConfigCheck, DriveConfig, MigrateMode, + NetDevcfg, NetworkInterfaceConfig, VmConfig, }; use machine_manager::machine::{ DeviceInterface, MachineAddressInterface, MachineExternalInterface, MachineInterface, @@ -983,6 +983,47 @@ impl DeviceInterface for LightMachine { None, ) } + + fn set_viomem(&mut self, args: Box) -> Response { + let requested_size = match parse_size(&args.requested_size) { + Ok(rs) => rs, + Err(_) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid set-viomem arguments".to_string(), + ), + None, + ) + } + }; + + match virtio::qmp_set_viomem(&args.id, requested_size) { + Ok(_) => Response::create_empty_response(), + Err(e) => { + error!( + "Failed to set viomem@{} requested size to {}, {:?}", + args.id, requested_size, e + ); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + + fn get_viomem(&self, args: Box) -> Response { + match virtio::qmp_get_viomem(&args.id) { + Ok(value) => Response::create_response(value, None), + Err(e) => { + error!("Failed to get viomem@{} information, {:?}", args.id, e); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } } impl MigrateInterface for LightMachine { diff --git a/machine/src/standard_common/mod.rs b/machine/src/standard_common/mod.rs index 4f44cf878f73c50e967d0d526b7685b55efeed74..b34401fc3c71b126682773b01a75f633727644f3 100644 --- a/machine/src/standard_common/mod.rs +++ b/machine/src/standard_common/mod.rs @@ -62,7 +62,7 @@ use machine_manager::config::get_cameradev_config; #[cfg(target_arch = "aarch64")] use machine_manager::config::ShutdownAction; use machine_manager::config::{ - get_chardev_config, get_netdev_config, memory_unit_conversion, parse_incoming_uri, + get_chardev_config, get_netdev_config, memory_unit_conversion, parse_incoming_uri, parse_size, BootIndexInfo, ConfigCheck, DiskFormat, DriveConfig, ExBool, MigrateMode, NumaNode, NumaNodes, M, }; @@ -1580,6 +1580,47 @@ impl DeviceInterface for StdMachine { } } + fn set_viomem(&mut self, args: Box) -> Response { + let requested_size = match parse_size(&args.requested_size) { + Ok(rs) => rs, + Err(_) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid set-viomem arguments".to_string(), + ), + None, + ) + } + }; + + match virtio::qmp_set_viomem(&args.id, requested_size) { + Ok(_) => Response::create_empty_response(), + Err(e) => { + error!( + "Failed to set viomem@{} requested size to {}, {:?}", + args.id, requested_size, e + ); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + + fn get_viomem(&self, args: Box) -> Response { + match virtio::qmp_get_viomem(&args.id) { + Ok(value) => Response::create_response(value, None), + Err(e) => { + error!("Failed to get viomem@{} information, {:?}", args.id, e); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + #[cfg(feature = "scream")] fn switch_audio_record(&self, authorized: String) -> Response { match authorized.as_str() { diff --git a/machine_manager/Cargo.toml b/machine_manager/Cargo.toml index ec787bc510c1f4eb388d618034d58f700841d431..b226af0f1c9548b5b63c084cd2f48d9f0f1bce59 100644 --- a/machine_manager/Cargo.toml +++ b/machine_manager/Cargo.toml @@ -22,6 +22,7 @@ thiserror = "1.0" anyhow = "1.0" trace = { path = "../trace" } util = { path = "../util" } +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"]} clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } [features] diff --git a/machine_manager/src/cmdline.rs b/machine_manager/src/cmdline.rs index 1092213299684aa96b5d123eefccce1a78d3db1d..cb00ee5fd4741f2dbff6fd7986c6d673149ef9c4 100644 --- a/machine_manager/src/cmdline.rs +++ b/machine_manager/src/cmdline.rs @@ -247,6 +247,8 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { \n\t\tadd vhost pci vsock: -device vhost-vsock-pci,id=,guest-cid=,bus=,addr=<0x3>[,multifunction=on|off]; \ \n\t\tadd virtio mmio balloon: -device virtio-balloon-device[,deflate-on-oom=true|false][,free-page-reporting=true|false]; \ \n\t\tadd virtio pci balloon: -device virtio-balloon-pci,id=,bus=,addr=<0x4>[,deflate-on-oom=true|false][,free-page-reporting=true|false][,multifunction=on|off]; \ + \n\t\tadd virtio mmio mem: -device virtio-mem-device,id=,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>]; \ + \n\t\tadd virtio pci mem: -device virtio-mem-pci,id=,bus=,addr=<0x4>,memdev=[,memaddr=<68719476736>][,requested-size=<68719476736>][,block-size=<4096>][,node=<0>][,multifunction=on|off]; \ \n\t\tadd virtio mmio rng: -device virtio-rng-device,rng=,max-bytes=<1234>,period=<1000>; \ \n\t\tadd virtio pci rng: -device virtio-rng-pci,id=,rng=,max-bytes=<1234>,period=<1000>,bus=,addr=<0x1>[,multifunction=on|off]; \ \n\t\tadd virtio mmio input: -device virtio-input-device,id=,evdev=; \ diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 3d277ba155d3c22e708aaa0d44a60c05a19ce36a..12ea0954091a679561c88e5de6cb40c9a92d0fe5 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -10,10 +10,14 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::fs::{remove_file, File}; +use std::os::unix::io::FromRawFd; use std::str::FromStr; +use std::sync::Arc; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser}; +use nix::sys::memfd::{memfd_create, MemFdCreateFlag}; use serde::{Deserialize, Serialize}; use super::error::ConfigError; @@ -86,7 +90,7 @@ impl From for HostMemPolicy { #[derive(Parser, Clone, Debug, Serialize, Deserialize)] #[command(no_binary_name(true))] -pub struct MemZoneConfig { +pub struct MemBackendObjConfig { #[arg(long, alias = "classtype", value_parser = ["memory-backend-ram", "memory-backend-file", "memory-backend-memfd"])] pub mem_type: String, #[arg(long, value_parser = valid_id)] @@ -106,42 +110,140 @@ pub struct MemZoneConfig { pub mem_path: Option, #[arg(long, default_value = "true", value_parser = parse_bool, action = ArgAction::Append)] pub dump_guest_core: bool, - #[arg(long, default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] - pub share: bool, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub share: Option, #[arg(long, alias = "mem-prealloc", default_value = "false", value_parser = parse_bool, action = ArgAction::Append)] pub prealloc: bool, } -impl MemZoneConfig { +impl MemBackendObjConfig { pub fn memfd(&self) -> bool { self.mem_type.eq("memory-backend-memfd") } + + pub fn share(&self) -> bool { + match self.share { + Some(share) => share, + None => matches!( + self.mem_type.as_str(), + "memory-backend-file" | "memory-backend-memfd" + ), + } + } } /// Config that contains machine's memory information config. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MachineMemConfig { pub mem_size: u64, + pub max_size: u64, + pub current_size: u64, pub mem_path: Option, pub dump_guest_core: bool, pub mem_share: bool, pub mem_prealloc: bool, - pub mem_zones: Option>, + pub membackend_objs: Option>, } impl Default for MachineMemConfig { fn default() -> Self { MachineMemConfig { mem_size: DEFAULT_MEMSIZE * M, + max_size: MAX_MEMSIZE, + current_size: DEFAULT_MEMSIZE * M, mem_path: None, dump_guest_core: true, mem_share: false, mem_prealloc: false, - mem_zones: None, + membackend_objs: None, } } } +pub const MEM_BACKEND_TYPE_ANON: u64 = 1; +pub const MEM_BACKEND_TYPE_MEMFD: u64 = 2; +pub const MEM_BACKEND_TYPE_FILE: u64 = 4; + +#[derive(Clone, Debug, Default)] +pub struct MemoryBackend { + pub mb_type: u64, + pub size: u64, + pub backend: Option>, + pub file_path: Option, + pub share: bool, +} + +impl MemoryBackend { + pub fn new(option: MemBackendObjConfig) -> Self { + let mut mb: MemoryBackend = Default::default(); + mb.mb_type = match option.mem_type.as_str() { + "memory-backend-memfd" => { + mb.file_path = Some(format!("stratovirt_memfd@{}", option.id)); + MEM_BACKEND_TYPE_MEMFD + } + "memory-backend-file" => { + mb.file_path = option.mem_path.clone(); + MEM_BACKEND_TYPE_FILE + } + _ => { + mb.file_path = None; + MEM_BACKEND_TYPE_ANON + } + }; + mb.backend = None; + mb.share = option.share(); + mb.size = option.size; + mb + } + + pub fn realize(&mut self) -> Result<()> { + match self.mb_type { + MEM_BACKEND_TYPE_MEMFD => { + let path_str = match self.file_path.as_ref() { + Some(path) => path.clone(), + None => bail!("memory-backend-memfd path absent"), + }; + let memfd = + memfd_create(&std::ffi::CString::new(path_str)?, MemFdCreateFlag::empty())?; + if memfd < 0 { + return Err(std::io::Error::last_os_error()) + .with_context(|| "Failed to create memfd"); + } + // SAFETY: The parameters memfd has checked upper. + let memfile = unsafe { File::from_raw_fd(memfd) }; + memfile + .set_len(self.size) + .with_context(|| "Failed to set the length of memfd file")?; + self.backend = Some(Arc::new(memfile)); + } + MEM_BACKEND_TYPE_FILE => { + let path_str = match self.file_path.as_ref() { + Some(path) => path.clone(), + None => bail!("memory-backend-file path absent"), + }; + let path = std::path::Path::new(&path_str); + let unlink = !path.exists(); + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(path) + .with_context(|| format!("Failed to open file: {}", path_str))?; + if file.metadata().unwrap().len() < self.size { + file.set_len(self.size)?; + } + if unlink { + remove_file(path.as_os_str())?; + } + self.backend = Some(Arc::new(file)); + } + _ => {} + }; + Ok(()) + } +} + #[derive(Parser, Clone, Debug, Serialize, Deserialize, Default)] #[command(no_binary_name(true))] pub struct CpuConfig { @@ -265,6 +367,8 @@ struct AccelConfig { struct MemSizeConfig { #[arg(long, alias = "classtype", value_parser = parse_size)] size: u64, + #[arg(long, default_value = "262144", value_parser = parse_size)] + maxmem: u64, } #[derive(Parser)] @@ -422,6 +526,11 @@ impl VmConfig { let mem_cfg = MemSizeConfig::try_parse_from(str_slip_to_clap(mem_config, !has_size_label, false))?; self.machine_config.mem_config.mem_size = mem_cfg.size; + self.machine_config.mem_config.max_size = mem_cfg.maxmem; + if mem_cfg.maxmem < mem_cfg.size { + bail!("maxmem must bigger than current memory size") + } + self.machine_config.mem_config.current_size = mem_cfg.size; Ok(()) } @@ -480,43 +589,44 @@ impl VmConfig { } impl VmConfig { - /// Convert memory zone cmdline to VM config + /// Convert memory backend cmdline to VM config /// /// # Arguments /// - /// * `mem_zone` - The memory zone cmdline string. - pub fn add_mem_zone(&mut self, mem_zone: &str) -> Result { - let zone_config = MemZoneConfig::try_parse_from(str_slip_to_clap(mem_zone, true, false))?; + /// * `mem_backend` - The memory backend cmdline string. + pub fn add_mem_backend(&mut self, mem_backend: &str) -> Result { + let mb_config = + MemBackendObjConfig::try_parse_from(str_slip_to_clap(mem_backend, true, false))?; - if (zone_config.mem_path.is_none() && zone_config.mem_type.eq("memory-backend-file")) - || (zone_config.mem_path.is_some() && zone_config.mem_type.ne("memory-backend-file")) + if (mb_config.mem_path.is_none() && mb_config.mem_type.eq("memory-backend-file")) + || (mb_config.mem_path.is_some() && mb_config.mem_type.ne("memory-backend-file")) { - bail!("Object type: {} config path err", zone_config.mem_type); + bail!("Object type: {} config path err", mb_config.mem_type); } - if self.object.mem_object.contains_key(&zone_config.id) { - bail!("Object: {} has been added", zone_config.id); + if self.object.mem_object.contains_key(&mb_config.id) { + bail!("Object: {} has been added", mb_config.id); } self.object .mem_object - .insert(zone_config.id.clone(), zone_config.clone()); + .insert(mb_config.id.clone(), mb_config.clone()); - if zone_config.host_numa_nodes.is_none() { - return Ok(zone_config); + if mb_config.host_numa_nodes.is_none() { + return Ok(mb_config); } - if self.machine_config.mem_config.mem_zones.is_some() { + if self.machine_config.mem_config.membackend_objs.is_some() { self.machine_config .mem_config - .mem_zones + .membackend_objs .as_mut() .unwrap() - .push(zone_config.clone()); + .push(mb_config.clone()); } else { - self.machine_config.mem_config.mem_zones = Some(vec![zone_config.clone()]); + self.machine_config.mem_config.membackend_objs = Some(vec![mb_config.clone()]); } - Ok(zone_config) + Ok(mb_config) } } @@ -620,7 +730,9 @@ mod tests { mem_share: false, dump_guest_core: false, mem_prealloc: false, - mem_zones: None, + membackend_objs: None, + max_size: MAX_MEMSIZE, + current_size: MAX_MEMSIZE, }; let mut machine_config = MachineConfig { mach_type: MachineType::MicroVm, @@ -822,6 +934,8 @@ mod tests { assert!(mem_cfg_ret.is_ok()); let mem_size = vm_config.machine_config.mem_config.mem_size; assert_eq!(mem_size, 8 * 1024 * 1024); + let max_size = vm_config.machine_config.mem_config.max_size; + assert_eq!(max_size, 256 * 1024 * 1024 * 1024); let memory_cfg = "size=8m"; let mem_cfg_ret = vm_config.add_memory(memory_cfg); @@ -834,6 +948,14 @@ mod tests { assert!(mem_cfg_ret.is_ok()); let mem_size = vm_config.machine_config.mem_config.mem_size; assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); + + let memory_cfg = "size=8G,maxmem=32G"; + let mem_cfg_ret = vm_config.add_memory(memory_cfg); + assert!(mem_cfg_ret.is_ok()); + let mem_size = vm_config.machine_config.mem_config.mem_size; + let max_size = vm_config.machine_config.mem_config.max_size; + assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); + assert_eq!(max_size, 32 * 1024 * 1024 * 1024); } #[test] @@ -955,37 +1077,38 @@ mod tests { } #[test] - fn test_add_mem_zone() { + fn test_add_mem_backend() { let mut vm_config = VmConfig::default(); - let zone_config_1 = vm_config - .add_mem_zone("memory-backend-ram,size=2G,id=mem1,host-nodes=1,policy=bind") + let mb_config_1 = vm_config + .add_mem_backend("memory-backend-ram,size=2G,id=mem1,host-nodes=1,policy=bind") .unwrap(); - assert_eq!(zone_config_1.id, "mem1"); - assert_eq!(zone_config_1.size, 2147483648); - assert_eq!(zone_config_1.host_numa_nodes, Some(vec![1])); - assert_eq!(zone_config_1.policy, "bind"); + assert_eq!(mb_config_1.id, "mem1"); + assert_eq!(mb_config_1.size, 2147483648); + assert_eq!(mb_config_1.host_numa_nodes, Some(vec![1])); + assert_eq!(mb_config_1.policy, "bind"); - let zone_config_2 = vm_config - .add_mem_zone("memory-backend-ram,size=2G,id=mem2,host-nodes=1-2,policy=default") + let mb_config_2 = vm_config + .add_mem_backend("memory-backend-ram,size=2G,id=mem2,host-nodes=1-2,policy=default") .unwrap(); - assert_eq!(zone_config_2.host_numa_nodes, Some(vec![1, 2])); + assert_eq!(mb_config_2.host_numa_nodes, Some(vec![1, 2])); - let zone_config_3 = vm_config - .add_mem_zone("memory-backend-ram,size=2M,id=mem3,share=on") + let mb_config_3 = vm_config + .add_mem_backend("memory-backend-ram,size=2M,id=mem3,share=on") .unwrap(); - assert_eq!(zone_config_3.size, 2 * 1024 * 1024); - assert!(zone_config_3.share); + assert_eq!(mb_config_3.size, 2 * 1024 * 1024); + assert!(mb_config_3.share()); - let zone_config_4 = vm_config - .add_mem_zone("memory-backend-ram,size=2M,id=mem4") + let mb_config_4 = vm_config + .add_mem_backend("memory-backend-ram,size=2M,id=mem4") .unwrap(); - assert!(!zone_config_4.share); - assert!(!zone_config_4.memfd()); + assert!(!mb_config_4.share()); + assert!(!mb_config_4.memfd()); - let zone_config_5 = vm_config - .add_mem_zone("memory-backend-memfd,size=2M,id=mem5") + let mb_config_5 = vm_config + .add_mem_backend("memory-backend-memfd,size=2M,id=mem5") .unwrap(); - assert!(zone_config_5.memfd()); + assert!(mb_config_5.share()); + assert!(mb_config_5.memfd()); } #[test] diff --git a/machine_manager/src/config/mod.rs b/machine_manager/src/config/mod.rs index a3f96b04a1bc49ab386ee7a050794c5606428947..144589bfcfd64dc553a934c541273a29b61f7f99 100644 --- a/machine_manager/src/config/mod.rs +++ b/machine_manager/src/config/mod.rs @@ -109,7 +109,7 @@ struct GlobalConfig { #[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct ObjectConfig { pub rng_object: HashMap, - pub mem_object: HashMap, + pub mem_object: HashMap, #[cfg(feature = "vnc_auth")] pub tls_object: HashMap, #[cfg(feature = "vnc_auth")] @@ -223,7 +223,7 @@ impl VmConfig { self.object.rng_object.insert(id, rng_cfg); } "memory-backend-ram" | "memory-backend-file" | "memory-backend-memfd" => { - self.add_mem_zone(object_args)?; + self.add_mem_backend(object_args)?; } #[cfg(feature = "vnc_auth")] "tls-creds-x509" => { diff --git a/machine_manager/src/machine.rs b/machine_manager/src/machine.rs index 3b684513ba3cecba2fadefd0d075cea7a79f8e29..f054becdd504f06fe4146d796a08a6d593d7ce21 100644 --- a/machine_manager/src/machine.rs +++ b/machine_manager/src/machine.rs @@ -24,9 +24,10 @@ use crate::qmp::qmp_response::{Response, Version}; use crate::qmp::qmp_schema::{ BlockDevAddArgument, BlockdevSnapshotInternalArgument, CameraDevAddArgument, CharDevAddArgument, ChardevInfo, Cmd, CmdLine, CmdParameter, DeviceAddArgument, DeviceProps, - Events, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, + Events, GetViomemArgument, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, MigrateCapabilities, NetDevAddArgument, PropList, QmpCommand, QmpErrorClass, QmpEvent, - QueryMemGpaArgument, QueryVcpuRegArgument, Target, TypeLists, UpdateRegionArgument, + QueryMemGpaArgument, QueryVcpuRegArgument, SetViomemArgument, Target, TypeLists, + UpdateRegionArgument, }; #[derive(Clone)] @@ -237,6 +238,12 @@ pub trait DeviceInterface { /// Query display of stratovirt. fn query_display_image(&self) -> Response; + /// Set requested-size of a virtio-mem device. + fn set_viomem(&mut self, args: Box) -> Response; + + /// Get information of a virtio-mem device. + fn get_viomem(&self, args: Box) -> Response; + /// Query state. fn query_workloads(&self) -> Response { Response::create_error_response( diff --git a/machine_manager/src/qmp/qmp_schema.rs b/machine_manager/src/qmp/qmp_schema.rs index 2342c0bebeadd6316b6c73728003e02e038d6893..6e85bd42743da418f97ac7e007214430b8588a11 100644 --- a/machine_manager/src/qmp/qmp_schema.rs +++ b/machine_manager/src/qmp/qmp_schema.rs @@ -97,6 +97,8 @@ define_qmp_command_enum!( blockdev_add("blockdev-add", Box), blockdev_del("blockdev-del", blockdev_del), balloon("balloon", balloon, default), + set_viomem("set-viomem", Box), + get_viomem("get-viomem", Box), query_mem("query-mem", query_mem, default), query_mem_gpa("query-mem-gpa", query_mem_gpa, default), query_balloon("query-balloon", query_balloon, default), @@ -707,6 +709,69 @@ pub struct cameradev_del { } generate_command_impl!(cameradev_del, Empty); +/// set-viomem +/// +/// # Arguments +/// +/// * `id` - The device's ID, must be unique. +/// * `requested-size` - new request size of the virtio-mem device. +/// +/// # Examples +/// +/// ```test +/// -> { "execute" : "set-viomem" , +/// "arguments" : { "id" : "viomem0", "requested-size" : "1G"} } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct set_viomem { + pub id: String, + #[serde(rename = "requested-size")] + pub requested_size: String, +} + +pub type SetViomemArgument = set_viomem; +generate_command_impl!(set_viomem, Empty); + +/// get-viomem +/// +/// Query virtio-mem device status. +/// +/// # Arguments +/// +/// * `id` - The device's ID, must be unique. +/// +/// # Examples +/// +/// ```test +/// -> { "execute" : "get-viomem" , +/// "arguments" : { "id" : "viomem0"} } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct get_viomem { + pub id: String, +} + +pub type GetViomemArgument = get_viomem; +generate_command_impl!(get_viomem, ViomemInfo); + +#[allow(clippy::upper_case_acronyms)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct ViomemInfo { + pub node: u16, + #[serde(rename = "size")] + pub region_size: usize, + #[serde(rename = "block-size")] + pub block_size: usize, + #[serde(rename = "requested-size")] + pub requested_size: usize, + #[serde(rename = "plugged-size")] + pub plugged_size: usize, +} + /// query-hotpluggable-cpus /// /// Query which CPU types could be plugged. diff --git a/machine_manager/src/qmp/qmp_socket.rs b/machine_manager/src/qmp/qmp_socket.rs index a3187884e443192583826a8ebe04c14d1aff055b..132a1c2769b12429c245746ea71aa21ad6c23951 100644 --- a/machine_manager/src/qmp/qmp_socket.rs +++ b/machine_manager/src/qmp/qmp_socket.rs @@ -484,6 +484,8 @@ fn qmp_command_exec( (netdev_add, netdev_add), (chardev_add, chardev_add), (cameradev_add, cameradev_add), + (set_viomem, set_viomem), + (get_viomem, get_viomem), (update_region, update_region), (human_monitor_command, human_monitor_command), (blockdev_snapshot_internal_sync, blockdev_snapshot_internal_sync), diff --git a/virtio/Cargo.toml b/virtio/Cargo.toml index b8692b39491280351478e519422301dedbe962d7..3d173235eafa8fcceb0c4d389ee568ae046f981e 100644 --- a/virtio/Cargo.toml +++ b/virtio/Cargo.toml @@ -12,6 +12,7 @@ thiserror = "1.0" anyhow = "1.0" libc = "0.2" log = "0.4" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" vmm-sys-util = "0.12.1" once_cell = "1.18.0" diff --git a/virtio/src/device/memory.rs b/virtio/src/device/memory.rs new file mode 100644 index 0000000000000000000000000000000000000000..ab002854139266f6e3b805efdde81228f00acbd5 --- /dev/null +++ b/virtio/src/device/memory.rs @@ -0,0 +1,942 @@ +// Copyright (c) 2025 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex, OnceLock}; +use std::vec::Vec; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; +use log::{error, info, warn}; +use machine_manager::config::{ + get_pci_df, parse_bool, valid_id, MemBackendObjConfig, MemoryBackend, DEFAULT_VIRTQUEUE_SIZE, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use util::bitmap::Bitmap; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; +use util::unix::do_mmap; + +use crate::error::VirtioError; +use crate::{ + iov_read_object, iov_write_object, read_config_default, report_virtio_error, Queue, VirtioBase, + VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_RING_EVENT_IDX, + VIRTIO_F_VERSION_1, VIRTIO_TYPE_MEM, +}; + +const QUEUE_NUM_MEM: usize = 1; + +const VIRTIO_MEM_REQ_PLUG: u16 = 0; +const VIRTIO_MEM_REQ_UNPLUG: u16 = 1; +const VIRTIO_MEM_REQ_UNPLUG_ALL: u16 = 2; +const VIRTIO_MEM_REQ_STATE: u16 = 3; + +const VIRTIO_MEM_RESP_ACK: u16 = 0; +const VIRTIO_MEM_RESP_NACK: u16 = 1; +const VIRTIO_MEM_RESP_BUSY: u16 = 2; +const VIRTIO_MEM_RESP_ERROR: u16 = 3; + +const VIRTIO_MEM_STATE_PLUGGED: u16 = 0; +const VIRTIO_MEM_STATE_UNPLUGGED: u16 = 1; +const VIRTIO_MEM_STATE_MIXED: u16 = 2; + +const VIRTIO_MEM_F_ACPI_PXM: u32 = 0; +const VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: u32 = 1; + +const DEFAULT_MEM_BLOCK_SIZE: u64 = 33554432; // 32 MB +const DEFAULT_MEM_BLOCK_ALIGN_SIZE: u64 = 16384; // 16 KB + +const NUMA_NONE: u16 = 4097; +const INVALID_ADDR: u64 = 0; + +type ViomemDeviceTable = HashMap>>; +static VIOMEM_DEV_LIST: OnceLock>> = OnceLock::new(); +static DEFAULT_PLUGGABLE_ADDR_BASE: OnceLock>> = OnceLock::new(); + +#[derive(Copy, Clone, Default)] +struct PluggableAddrBase { + addr: u64, + auto_alloc: bool, +} + +fn alloc_base_addr( + max_size: u64, + maddr_cfg: Option, + region_size: u64, + block_size: u64, +) -> u64 { + let auto_alloc = maddr_cfg.is_none(); + let mut pluggable = DEFAULT_PLUGGABLE_ADDR_BASE + .get_or_init(|| { + Arc::new(Mutex::new(PluggableAddrBase { + addr: max_size, + auto_alloc, + })) + }) + .lock() + .unwrap(); + if auto_alloc != pluggable.auto_alloc { + error!("inconsistent maddr configuration options"); + return INVALID_ADDR; + } + + let base_addr = match maddr_cfg { + Some(maddr) => maddr, + None => pluggable.addr.div_ceil(block_size) * block_size, + }; + pluggable.addr = base_addr + region_size; + + base_addr +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +struct ViomemInfo { + pub node: u16, + #[serde(rename = "size")] + pub region_size: u64, + #[serde(rename = "block-size")] + pub block_size: u64, + #[serde(rename = "requested-size")] + pub requested_size: u64, + #[serde(rename = "plugged-size")] + pub plugged_size: u64, +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemConfig { + /// size and the alignment in bytes of a memory block. + block_size: u64, + /// has no meaning without VIRTIO_MEM_F_ACPI_PXM. + node_id: u16, + /// reserved for future use. + padding: [u8; 6], + /// start guest physical address of device-managed memory region. + addr: u64, + /// the size of device-managed memory region in bytes. + region_size: u64, + /// the size of the usable device-managed memory region. + usable_region_size: u64, + /// the amount of plugged memory in bytes within the usable device-managed memory region. + plugged_size: u64, + /// the requested amount of plugged memory within the usable device-managed memory region. + requested_size: u64, +} + +impl VirtioMemConfig { + pub(crate) fn qmp_query(&self) -> Value { + let node_id = if self.node_id == NUMA_NONE { + 0 + } else { + self.node_id + }; + + serde_json::to_value(ViomemInfo { + node: node_id, + region_size: self.region_size, + block_size: self.block_size, + requested_size: self.requested_size, + plugged_size: self.plugged_size, + }) + .unwrap() + } +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReq { + req_type: u16, + padding: [u16; 3], + req_union: [u8; 16], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqPlug { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqUnplug { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioMemReqState { + addr: u64, + nb_blocks: u16, + padding: [u16; 3], +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemResp { + resp_type: u16, + padding: [u16; 3], + state: VirtioMemRespState, +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +struct VirtioMemRespState { + state_type: u16, +} + +impl ByteCode for VirtioMemConfig {} +impl ByteCode for VirtioMemReq {} +impl ByteCode for VirtioMemReqPlug {} +impl ByteCode for VirtioMemReqUnplug {} +impl ByteCode for VirtioMemReqState {} +impl ByteCode for VirtioMemResp {} +impl ByteCode for VirtioMemRespState {} + +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct MemoryConfig { + #[arg(long, value_parser = ["virtio-mem-device", "virtio-mem-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long)] + pub memaddr: Option, + #[arg(long)] + pub memdev: String, + #[arg(long)] + pub block_size: Option, + #[arg(long)] + pub node: Option, + #[arg(long)] + pub requested_size: Option, + #[arg(long, default_value = "false", value_parser = parse_bool, action = ArgAction::Append)] + pub unplugged_inaccessible: bool, +} + +struct MemRegionState { + base_gpa: u64, + block_size: u64, + nr_blocks: u64, + plugged_size: u64, + plugged_regions: Bitmap, + /// The memory backend host address + host_addr: u64, +} + +impl MemRegionState { + fn new(addr: u64, region_size: u64, block_size: u64, host_addr: u64) -> Self { + assert!(block_size != 0); + assert!((region_size % block_size) == 0); + let nr_blocks = region_size / block_size; + Self { + base_gpa: addr, + block_size, + nr_blocks, + plugged_size: 0, + plugged_regions: Bitmap::new(nr_blocks as usize), + host_addr, + } + } + + fn get_regions(&self, first_gpa: u64, nb_blocks: u64) -> Vec { + let mut regs = Vec::new(); + for n in 0..nb_blocks { + let gpa = first_gpa + n * self.block_size; + let block_addr = self.host_addr + (gpa - self.base_gpa); + let block = Arc::new( + HostMemMapping::new( + GuestAddress(gpa), + Some(block_addr), + self.block_size, + None, + false, + true, + false, + ) + .unwrap(), + ); + + regs.push(Region::init_ram_region( + block, + format!("viomem@{}-{}", gpa, self.block_size).as_str(), + )); + } + regs + } + + fn valid_range(&self, gpa: u64, nb_blocks: u64) -> u16 { + // 1. check gpa above region base gpa + if gpa < self.base_gpa || nb_blocks == 0 { + return VIRTIO_MEM_RESP_ERROR; + } + + // 2. check gpa addr aligned with block size + let addr_offset = gpa - self.base_gpa; + let block_offset: u64 = if addr_offset % self.block_size == 0 { + addr_offset / self.block_size + } else { + return VIRTIO_MEM_RESP_ERROR; + }; + + // 3. gpa + nb_blocks no overflow + let (end_block, overflow) = block_offset.overflowing_add(nb_blocks); + + if overflow { + return VIRTIO_MEM_RESP_ERROR; + } + + // 4. check add mem segment in the region + if end_block > self.nr_blocks { + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn top_plugged_range(&self) -> (u64, u16) { + let first_block = 0; + match self.plugged_regions.find_next_bit(first_block) { + Ok(begin) => match self.plugged_regions.find_next_zero(begin) { + Ok(end) => ( + self.base_gpa + begin as u64 * self.block_size, + (end - begin) as u16, + ), + Err(_) => (0, 0), + }, + Err(_) => (0, 0), + } + } + + fn check_range_unplugged(&self, gpa: u64, nb_blocks: u64) -> u16 { + let first_block = (gpa - self.base_gpa) / self.block_size; + let last_block = first_block + nb_blocks - 1; + match self.plugged_regions.find_next_bit(first_block as usize) { + Ok(found_block) => { + if found_block as u64 > last_block { + VIRTIO_MEM_RESP_ACK + } else { + VIRTIO_MEM_RESP_ERROR + } + } + Err(_) => VIRTIO_MEM_RESP_ERROR, + } + } + + fn check_range_plugged(&self, gpa: u64, nb_blocks: u64) -> u16 { + let first_block = (gpa - self.base_gpa) / self.block_size; + let last_block = first_block + nb_blocks - 1; + match self.plugged_regions.find_next_zero(first_block as usize) { + Ok(found_block) => { + if found_block as u64 > last_block { + VIRTIO_MEM_RESP_ACK + } else { + VIRTIO_MEM_RESP_ERROR + } + } + Err(_) => VIRTIO_MEM_RESP_ERROR, + } + } + + fn plug_range(&mut self, mem_space: Arc, gpa: u64, nb_blocks: usize) -> u16 { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + if self.valid_range(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("plug request region illegal"); + return VIRTIO_MEM_RESP_ERROR; + } + if self.check_range_unplugged(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("plug request region conflict"); + return VIRTIO_MEM_RESP_ERROR; + } + + for region in self.get_regions(gpa, nb_blocks as u64) { + let offset = region.offset().0; + warn!("add region offset {}", offset); + if mem_space.root().add_subregion(region, offset).is_err() { + error!("failed to add subregion"); + return VIRTIO_MEM_RESP_BUSY; + } + } + + if self + .plugged_regions + .set_range(first_block, nb_blocks) + .is_err() + { + error!("failed to set range"); + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn unplug_range(&mut self, mem_space: Arc, gpa: u64, nb_blocks: usize) -> u16 { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + if self.valid_range(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + error!("unplug request region illegal"); + return VIRTIO_MEM_RESP_ERROR; + } + if self.check_range_plugged(gpa, nb_blocks as u64) != VIRTIO_MEM_RESP_ACK { + warn!("unplug request region conflict"); + return VIRTIO_MEM_RESP_ERROR; + } + + for region in self.get_regions(gpa, nb_blocks as u64) { + warn!("del region offset {}", region.offset().0); + if mem_space.root().delete_subregion(®ion).is_err() { + error!("failed to delete subregion"); + return VIRTIO_MEM_RESP_ERROR; + } + } + + if self + .plugged_regions + .clear_range(first_block, nb_blocks) + .is_err() + { + error!("failed to delete subregion"); + return VIRTIO_MEM_RESP_ERROR; + } + + VIRTIO_MEM_RESP_ACK + } + + fn range_state(&self, gpa: u64, nb_blocks: u64) -> (u16, u16) { + let first_block = ((gpa - self.base_gpa) / self.block_size) as usize; + let last_block = first_block + nb_blocks as usize; + if self.valid_range(gpa, nb_blocks) != VIRTIO_MEM_RESP_ACK { + error!("plug request region illegal"); + return (VIRTIO_MEM_RESP_ERROR, 0); + } + + let bit = match self.plugged_regions.contain(first_block) { + Ok(bit) => bit, + Err(_) => return (VIRTIO_MEM_RESP_ERROR, 0), + }; + + if bit { + match self.plugged_regions.find_next_zero(first_block + 1) { + Ok(found_block) => { + if found_block >= last_block { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_PLUGGED) + } else { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_MIXED) + } + } + Err(_) => (VIRTIO_MEM_RESP_ERROR, 0), + } + } else { + match self.plugged_regions.find_next_bit(first_block + 1) { + Ok(found_block) => { + if found_block >= last_block { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_UNPLUGGED) + } else { + (VIRTIO_MEM_RESP_ACK, VIRTIO_MEM_STATE_MIXED) + } + } + Err(_) => (VIRTIO_MEM_RESP_ERROR, 0), + } + } + } +} + +struct MemoryHandler { + /// The guest request queue + pub(crate) queue: Arc>, + /// The eventfd used to notify the guest request queue event + pub(crate) queue_evt: Arc, + /// The function for interrupt triggering + pub(crate) interrupt_cb: Arc, + /// Configuration space of virtio mem device. + config: Arc>, + /// System address space. + pub(crate) mem_space: Arc, + /// Bit mask of features negotiated by the backend and the frontend + pub(crate) driver_features: u64, + /// Virtio mem device is broken or not. + pub(crate) device_broken: Arc, + /// Virtio mem Region list + pub(crate) regions: Arc>, +} + +impl MemoryHandler { + fn handle_plug_request(&self, req: &VirtioMemReqPlug) -> u16 { + info!("handle_plug_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let mut locked_regions = self.regions.lock().unwrap(); + let mut config = self.config.lock().unwrap(); + let plug_size = nb_blocks * config.block_size; + if (plug_size + config.plugged_size) > config.requested_size || plug_size == 0 { + return VIRTIO_MEM_RESP_NACK; + } + let ack = locked_regions.plug_range(self.mem_space.clone(), gpa, nb_blocks as usize); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + config.plugged_size += plug_size; + + locked_regions.plugged_size += nb_blocks * locked_regions.block_size; + + VIRTIO_MEM_RESP_ACK + } + + fn handle_unplug_request(&self, req: &VirtioMemReqUnplug) -> u16 { + info!("handle_unplug_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let mut config = self.config.lock().unwrap(); + let unplug_size = config.block_size * nb_blocks; + if (unplug_size + config.requested_size) > config.plugged_size || unplug_size == 0 { + return VIRTIO_MEM_RESP_NACK; + } + let mut locked_regions = self.regions.lock().unwrap(); + let ack = locked_regions.unplug_range(self.mem_space.clone(), gpa, nb_blocks as usize); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + config.plugged_size -= unplug_size; + locked_regions.plugged_size += nb_blocks * locked_regions.block_size; + + VIRTIO_MEM_RESP_ACK + } + + fn handle_state_request(&self, req: &VirtioMemReqState) -> (u16, u16) { + info!("handle_state_request: {:?}", req); + let gpa = req.addr; + let nb_blocks = req.nb_blocks as u64; + let locked_regions = self.regions.lock().unwrap(); + locked_regions.range_state(gpa, nb_blocks) + } + + fn handle_unplug_all_request(&self) -> u16 { + loop { + let (addr, nb_blocks) = self.regions.lock().unwrap().top_plugged_range(); + info!("find plugged memory region: ({}, {})", addr, nb_blocks); + if nb_blocks == 0 { + break; + } + let req = &VirtioMemReqUnplug { + addr, + nb_blocks, + ..Default::default() + }; + let ack = self.handle_unplug_request(req); + if ack != VIRTIO_MEM_RESP_ACK { + return ack; + } + } + VIRTIO_MEM_RESP_ACK + } + + pub fn process_queue(&self) -> Result<()> { + loop { + let mut locked_queue = self.queue.lock().unwrap(); + let elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| { + "Failed to pop avail ring element for process guest request queue" + })?; + if elem.desc_num == 0 { + break; + } + + let mut req = iov_read_object::( + &self.mem_space.clone(), + &elem.out_iovec, + locked_queue.vring.get_cache(), + )?; + + let mut send_response = |resp: VirtioMemResp| -> Result<()> { + iov_write_object( + &self.mem_space, + &elem.in_iovec, + locked_queue.vring.get_cache(), + resp, + )?; + + locked_queue + .vring + .add_used(elem.index, resp.as_bytes().len() as u32) + .with_context(|| { + format!( + "Failed to add used ring(guest request queue), index {}, len {}", + elem.index, + resp.as_bytes().len(), + ) + })?; + + if locked_queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "mem guest request queue", + VirtioInterruptType::Vring, + ) + })?; + } + + Ok(()) + }; + + match req.req_type { + VIRTIO_MEM_REQ_PLUG => { + let resp_type = self.handle_plug_request( + VirtioMemReqPlug::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ); + let resp = VirtioMemResp { + resp_type, + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_UNPLUG => { + let resp = VirtioMemResp { + resp_type: self.handle_unplug_request( + VirtioMemReqUnplug::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ), + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_UNPLUG_ALL => { + let resp = VirtioMemResp { + resp_type: self.handle_unplug_all_request(), + ..Default::default() + }; + send_response(resp)?; + } + VIRTIO_MEM_REQ_STATE => { + let (resp_type, state_type) = self.handle_state_request( + VirtioMemReqState::from_bytes(req.req_union.as_mut_slice()).unwrap(), + ); + let mut resp = VirtioMemResp { + resp_type, + ..Default::default() + }; + resp.state.state_type = state_type; + send_response(resp)?; + } + _ => { + bail!("virtio-mem: unknown request type {}", req.req_type); + } + } + } + Ok(()) + } +} + +impl EventNotifierHelper for MemoryHandler { + fn internal_notifiers(mh: Arc>) -> Vec { + let mut notifiers = Vec::new(); + let closure_mh = mh.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let locked_closure_mh = closure_mh.lock().unwrap(); + if let Err(e) = locked_closure_mh.process_queue() { + error!("Failed to plug/unplug mem: {:?}", e); + report_virtio_error( + locked_closure_mh.interrupt_cb.clone(), + locked_closure_mh.driver_features, + &locked_closure_mh.device_broken, + ); + } + + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + mh.lock().unwrap().queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + notifiers + } +} + +#[derive(Default)] +pub struct Memory { + /// Virtio device base property. + base: VirtioBase, + /// Virtio mem device id + id: String, + /// Configuration space of virtio mem device. + config: Arc>, + /// Memory + backend: Arc>, + /// unplugged-inaccessible + unplugged_inaccessible: bool, + /// Interrupt callback function. + interrupt_cb: Option>, +} + +impl Memory { + fn new_internal(option: MemoryConfig, memobj: MemBackendObjConfig, max_size: u64) -> Self { + info!("virtio-mem: new MemoryConfig {:?}", option); + let mut mem = Self { + base: VirtioBase::new(VIRTIO_TYPE_MEM, QUEUE_NUM_MEM, DEFAULT_VIRTQUEUE_SIZE), + id: option.id.clone(), + backend: Arc::new(Mutex::new(MemoryBackend::new(memobj))), + ..Default::default() + }; + + let mut config: std::sync::MutexGuard<'_, VirtioMemConfig> = mem.config.lock().unwrap(); + config.block_size = match option.block_size { + Some(block_size) => { + if block_size % DEFAULT_MEM_BLOCK_ALIGN_SIZE != 0 { + DEFAULT_MEM_BLOCK_ALIGN_SIZE + } else { + block_size + } + } + None => DEFAULT_MEM_BLOCK_SIZE, + }; + config.region_size = mem.backend.lock().unwrap().size; + config.addr = alloc_base_addr( + max_size, + option.memaddr, + config.region_size, + config.block_size, + ); + config.usable_region_size = config.region_size; + config.node_id = match option.node { + Some(node) => { + info!( + "virtio-mem not support ACPI NUMA, ignore node option(node={})", + node + ); + NUMA_NONE + } + None => NUMA_NONE, + }; + + config.plugged_size = 0; + config.requested_size = option.requested_size.unwrap_or(0); + drop(config); + + mem.unplugged_inaccessible = option.unplugged_inaccessible; + mem + } + + pub fn new_arc( + option: MemoryConfig, + memobj: MemBackendObjConfig, + max_size: u64, + ) -> Result>> { + let mem = Self::new_internal(option, memobj, max_size); + let id = mem.id.clone(); + let mem_arc = Arc::new(Mutex::new(mem)); + register_viomem_device(id, mem_arc.clone())?; + + Ok(mem_arc) + } + + pub fn get_region_size(&self) -> u64 { + self.config.lock().unwrap().region_size + } + + fn update_request(&mut self, request_size: u64) -> Result<()> { + info!("qmp request size {}", request_size); + if request_size > self.config.lock().unwrap().region_size { + bail!("request size out of the device region size") + } + if request_size % self.config.lock().unwrap().block_size != 0 { + bail!("requested_size not aligned with device block size") + } + let old_requested_size = self.config.lock().unwrap().requested_size; + self.config.lock().unwrap().requested_size = request_size; + self.signal_config_change().with_context(|| { + self.config.lock().unwrap().requested_size = old_requested_size; + "Failed to notify about configuration change after setting request memory size" + })?; + + Ok(()) + } + + /// Notify configuration changes to VM. + fn signal_config_change(&self) -> Result<()> { + if let Some(interrupt_cb) = &self.interrupt_cb { + interrupt_cb(&VirtioInterruptType::Config, None, false).with_context(|| { + VirtioError::InterruptTrigger("viomem", VirtioInterruptType::Config) + }) + } else { + Err(anyhow!(VirtioError::DeviceNotActivated( + "viomem".to_string() + ))) + } + } +} + +impl VirtioDevice for Memory { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + if self.config.lock().unwrap().addr == INVALID_ADDR { + bail!("inconsistent maddr configuration options"); + } + + self.backend.lock().unwrap().realize()?; + self.init_config_features()?; + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1u64 << VIRTIO_F_VERSION_1 | 1u64 << VIRTIO_F_RING_EVENT_IDX; + + if self.config.lock().unwrap().node_id != NUMA_NONE { + self.base.device_features |= 1u64 << VIRTIO_MEM_F_ACPI_PXM; + } + if self.unplugged_inaccessible { + self.base.device_features |= 1u64 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE; + } + + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let new_config = *self.config.lock().unwrap(); + let config_len = size_of::(); + let config = &new_config.as_bytes()[..config_len]; + read_config_default(config, offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + warn!( + "virtio-mem write config: offset = {}, data = {:?}", + offset, data + ); + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + info!("virtio-mem@{} activate", self.id); + let queues = &self.base.queues; + if queues.len() != self.queue_num() { + return Err(anyhow!(VirtioError::IncorrectQueueNum( + self.queue_num(), + queues.len() + ))); + } + self.interrupt_cb = Some(interrupt_cb.clone()); + + let config = self.config.lock().unwrap(); + let backend = self.backend.lock().unwrap(); + let host_addr = match &backend.backend { + Some(file) => do_mmap( + &Some(file.as_ref()), + config.region_size, + 0, + false, + backend.share, + false, + )?, + None => do_mmap(&None, config.region_size, 0, false, backend.share, false)?, + }; + drop(backend); + + let handler = MemoryHandler { + queue: queues[0].clone(), + queue_evt: queue_evts[0].clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features: self.base.driver_features, + config: self.config.clone(), + mem_space, + regions: Arc::new(Mutex::new(MemRegionState::new( + config.addr, + config.region_size, + config.block_size, + host_addr, + ))), + device_broken: self.base.broken.clone(), + }; + + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts) + .with_context(|| "Failed to register mem guest request event notifier to MainLoop")?; + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + info!("virtio-mem@{} deactivate", self.id); + unregister_event_helper(None, &mut self.base.deactivate_evts) + } + + fn reset(&mut self) -> Result<()> { + Ok(()) + } +} + +fn register_viomem_device(id: String, mem: Arc>) -> Result<()> { + VIOMEM_DEV_LIST + .get_or_init(|| Arc::new(Mutex::new(HashMap::new()))) + .lock() + .unwrap() + .insert(id, mem); + Ok(()) +} + +pub fn qmp_set_viomem(id: &String, request_size: u64) -> Result<()> { + if let Some(devlist) = VIOMEM_DEV_LIST.get() { + match devlist.lock().unwrap().get(id) { + Some(mem) => mem.lock().unwrap().update_request(request_size), + None => { + bail!("not found virtio-mem@{} device", id) + } + } + } else { + bail!("no virtio-mem device context") + } +} + +pub fn qmp_get_viomem(id: &String) -> Result { + if let Some(devlist) = VIOMEM_DEV_LIST.get() { + match devlist.lock().unwrap().get(id) { + Some(mem) => Ok(mem.lock().unwrap().config.lock().unwrap().qmp_query()), + None => { + bail!("not found virtio-mem@{} device", id) + } + } + } else { + bail!("no virtio-mem device context") + } +} diff --git a/virtio/src/device/mod.rs b/virtio/src/device/mod.rs index f8914b3a9974b28a0028be294763eb8e55f1e90a..cc3a7ab66b701617f3cb49eda47f9a34337d816a 100644 --- a/virtio/src/device/mod.rs +++ b/virtio/src/device/mod.rs @@ -15,6 +15,7 @@ pub mod block; #[cfg(feature = "virtio_gpu")] pub mod gpu; pub mod input; +pub mod memory; pub mod net; #[cfg(feature = "virtio_rng")] pub mod rng; diff --git a/virtio/src/lib.rs b/virtio/src/lib.rs index 2b48c9fb5f291aae5c4b1626ae743581413a036e..bfa0ef5db08e847178b8bb100fe63aa758f34fb9 100644 --- a/virtio/src/lib.rs +++ b/virtio/src/lib.rs @@ -37,6 +37,7 @@ pub use device::block::{Block, BlockState, VirtioBlkConfig, VirtioBlkDevConfig}; #[cfg(feature = "virtio_gpu")] pub use device::gpu::*; pub use device::input::*; +pub use device::memory::*; pub use device::net::*; #[cfg(feature = "virtio_rng")] pub use device::rng::{Rng, RngConfig, RngState}; @@ -66,7 +67,7 @@ use devices::pci::register_pcidevops_type; use devices::sysbus::register_sysbusdevops_type; use machine_manager::config::ConfigCheck; use migration_derive::ByteCode; -use util::aio::{mem_to_buf, Iovec}; +use util::aio::{iov_from_buf_direct, mem_to_buf, Iovec}; use util::byte_code::ByteCode; use util::num_ops::{read_u32, write_u32}; use util::AsAny; @@ -86,6 +87,7 @@ pub const VIRTIO_TYPE_SCSI: u32 = 8; pub const VIRTIO_TYPE_GPU: u32 = 16; pub const VIRTIO_TYPE_INPUT: u32 = 18; pub const VIRTIO_TYPE_VSOCK: u32 = 19; +pub const VIRTIO_TYPE_MEM: u32 = 24; pub const VIRTIO_TYPE_FS: u32 = 26; // The Status of Virtio Device. @@ -811,6 +813,37 @@ pub fn iov_read_object( Ok(obj) } +/// Write object typed `T` to iovec. +pub fn iov_write_object( + mem_space: &Arc, + iovec: &[ElemIovec], + cache: &Option, + obj: T, +) -> Result<()> { + let (in_size, ctrl_vec) = gpa_hva_iovec_map(iovec, mem_space, cache)?; + let obj_len = size_of::() as u64; + if in_size < obj_len { + bail!( + "Invalid length for object: get {}, expected {}", + in_size, + obj_len + ); + } + + // SAFETY: obj_len has checked above + unsafe { iov_from_buf_direct(&ctrl_vec, obj.as_bytes()) }.and_then(|size| { + if size as u64 != obj_len { + bail!( + "Expected send msg length is {}, actual send length {}.", + obj_len, + size + ) + }; + Ok(()) + })?; + Ok(()) +} + /// Read iovec to buf and return the read number of bytes. pub fn iov_to_buf( mem_space: &AddressSpace,