From 2ef47902bb7d6c5c35abb070a8dadd13fe9abdc2 Mon Sep 17 00:00:00 2001 From: sts Date: Thu, 27 Jun 2024 05:53:18 +0000 Subject: [PATCH 1/8] =?UTF-8?q?KVM=20=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. kvm-bindings库和kvm-ioctls库的RISCV支持 2. 添加最小化的kvm模型 --- .gitignore | 3 + Cargo.lock | 51 + Cargo.toml | 24 + kvm-bindings | 1 + kvm-ioctls/Cargo.lock | 41 + kvm-ioctls/Cargo.toml | 11 + kvm-ioctls/src/cap.rs | 155 + kvm-ioctls/src/ioctls/device.rs | 319 ++ kvm-ioctls/src/ioctls/mod.rs | 97 + kvm-ioctls/src/ioctls/system.rs | 847 +++++ kvm-ioctls/src/ioctls/vcpu.rs | 2830 +++++++++++++++++ kvm-ioctls/src/ioctls/vm.rs | 2253 +++++++++++++ kvm-ioctls/src/kvm_ioctls.rs | 302 ++ kvm-ioctls/src/lib.rs | 26 + license/LICENSE | 127 + ...Third_Party_Open_Source_Software_Notice.md | 358 +++ src/main.rs | 257 ++ 17 files changed, 7702 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 160000 kvm-bindings create mode 100644 kvm-ioctls/Cargo.lock create mode 100644 kvm-ioctls/Cargo.toml create mode 100644 kvm-ioctls/src/cap.rs create mode 100644 kvm-ioctls/src/ioctls/device.rs create mode 100644 kvm-ioctls/src/ioctls/mod.rs create mode 100644 kvm-ioctls/src/ioctls/system.rs create mode 100644 kvm-ioctls/src/ioctls/vcpu.rs create mode 100644 kvm-ioctls/src/ioctls/vm.rs create mode 100644 kvm-ioctls/src/kvm_ioctls.rs create mode 100644 kvm-ioctls/src/lib.rs create mode 100644 license/LICENSE create mode 100644 license/Third_Party_Open_Source_Software_Notice.md create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..ba5370112 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target +**/*.rs.bk + diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..0f0ab1612 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,51 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "StratoVirt" +version = "0.2.0" +dependencies = [ + "kvm-bindings", + "kvm-ioctls", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "kvm-bindings" +version = "0.8.2" +dependencies = [ + "vmm-sys-util", +] + +[[package]] +name = "kvm-ioctls" +version = "0.1.0" +dependencies = [ + "kvm-bindings", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "libc" +version = "0.2.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" + +[[package]] +name = "vmm-sys-util" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +dependencies = [ + "bitflags", + "libc", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..fb83af7ee --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "StratoVirt" +version = "0.2.0" +authors = ["Huawei StratoVirt Team"] +edition = "2018" +description = "a lightweight hypervisor with low memory overhead and fast booting speed" +license = "Mulan PSL v2" + +[dependencies] +libc = ">=0.2.39" +vmm-sys-util = ">=0.10.0" +kvm-ioctls = { path = "kvm-ioctls" } +kvm-bindings = { path = "kvm-bindings", features = ["fam-wrappers"]} + +[[bin]] +name = "stratovirt" +path = "src/main.rs" + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" +lto = true diff --git a/kvm-bindings b/kvm-bindings new file mode 160000 index 000000000..b4ee5ac90 --- /dev/null +++ b/kvm-bindings @@ -0,0 +1 @@ +Subproject commit b4ee5ac9052f94dc841b7c260fdb81139d78e827 diff --git a/kvm-ioctls/Cargo.lock b/kvm-ioctls/Cargo.lock new file mode 100644 index 000000000..065743fac --- /dev/null +++ b/kvm-ioctls/Cargo.lock @@ -0,0 +1,41 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "kvm-bindings" +version = "0.8.2" +dependencies = [ + "vmm-sys-util", +] + +[[package]] +name = "kvm-ioctls" +version = "0.1.0" +dependencies = [ + "kvm-bindings", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "vmm-sys-util" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +dependencies = [ + "bitflags", + "libc", +] diff --git a/kvm-ioctls/Cargo.toml b/kvm-ioctls/Cargo.toml new file mode 100644 index 000000000..dc98e4e39 --- /dev/null +++ b/kvm-ioctls/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "kvm-ioctls" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libc = "0.2" +kvm-bindings = { path = "../kvm-bindings", features = ["fam-wrappers"]} +vmm-sys-util = ">=0.10.0" \ No newline at end of file diff --git a/kvm-ioctls/src/cap.rs b/kvm-ioctls/src/cap.rs new file mode 100644 index 000000000..919b54124 --- /dev/null +++ b/kvm-ioctls/src/cap.rs @@ -0,0 +1,155 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; + +/// Capabilities exposed by KVM. +/// +/// The capabilities list can be used in conjunction with +/// [Kvm::check_extension()](struct.Kvm.html#method.check_extension) to check if a particular +/// capability is available. +/// +/// The list of capabilities is based on the the KVM_CAP_* defines from the +/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). +#[derive(Clone, Copy, Debug)] +#[repr(u32)] +// We are allowing docs to be missing here because this enum is a wrapper +// over auto-generated code. +#[allow(missing_docs)] +pub enum Cap { + Irqchip = KVM_CAP_IRQCHIP, + Hlt = KVM_CAP_HLT, + MmuShadowCacheControl = KVM_CAP_MMU_SHADOW_CACHE_CONTROL, + UserMemory = KVM_CAP_USER_MEMORY, + SetTssAddr = KVM_CAP_SET_TSS_ADDR, + Vapic = KVM_CAP_VAPIC, + ExtCpuid = KVM_CAP_EXT_CPUID, + Clocksource = KVM_CAP_CLOCKSOURCE, + NrVcpus = KVM_CAP_NR_VCPUS, + NrMemslots = KVM_CAP_NR_MEMSLOTS, + Pit = KVM_CAP_PIT, + NopIoDelay = KVM_CAP_NOP_IO_DELAY, + PvMmu = KVM_CAP_PV_MMU, + MpState = KVM_CAP_MP_STATE, + CoalescedMmio = KVM_CAP_COALESCED_MMIO, + SyncMmu = KVM_CAP_SYNC_MMU, + Iommu = KVM_CAP_IOMMU, + DestroyMemoryRegionWorks = KVM_CAP_DESTROY_MEMORY_REGION_WORKS, + UserNmi = KVM_CAP_USER_NMI, + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + SetGuestDebug = KVM_CAP_SET_GUEST_DEBUG, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + ReinjectControl = KVM_CAP_REINJECT_CONTROL, + IrqRouting = KVM_CAP_IRQ_ROUTING, + IrqInjectStatus = KVM_CAP_IRQ_INJECT_STATUS, + AssignDevIrq = KVM_CAP_ASSIGN_DEV_IRQ, + JoinMemoryRegionsWorks = KVM_CAP_JOIN_MEMORY_REGIONS_WORKS, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Mce = KVM_CAP_MCE, + Irqfd = KVM_CAP_IRQFD, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Pit2 = KVM_CAP_PIT2, + SetBootCpuId = KVM_CAP_SET_BOOT_CPU_ID, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + PitState2 = KVM_CAP_PIT_STATE2, + Ioeventfd = KVM_CAP_IOEVENTFD, + SetIdentityMapAddr = KVM_CAP_SET_IDENTITY_MAP_ADDR, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + XenHvm = KVM_CAP_XEN_HVM, + AdjustClock = KVM_CAP_ADJUST_CLOCK, + InternalErrorData = KVM_CAP_INTERNAL_ERROR_DATA, + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + VcpuEvents = KVM_CAP_VCPU_EVENTS, + S390Psw = KVM_CAP_S390_PSW, + PpcSegstate = KVM_CAP_PPC_SEGSTATE, + Hyperv = KVM_CAP_HYPERV, + HypervVapic = KVM_CAP_HYPERV_VAPIC, + HypervSpin = KVM_CAP_HYPERV_SPIN, + PciSegment = KVM_CAP_PCI_SEGMENT, + PpcPairedSingles = KVM_CAP_PPC_PAIRED_SINGLES, + IntrShadow = KVM_CAP_INTR_SHADOW, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Debugregs = KVM_CAP_DEBUGREGS, + X86RobustSinglestep = KVM_CAP_X86_ROBUST_SINGLESTEP, + PpcOsi = KVM_CAP_PPC_OSI, + PpcUnsetIrq = KVM_CAP_PPC_UNSET_IRQ, + EnableCap = KVM_CAP_ENABLE_CAP, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Xsave = KVM_CAP_XSAVE, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Xcrs = KVM_CAP_XCRS, + PpcGetPvinfo = KVM_CAP_PPC_GET_PVINFO, + PpcIrqLevel = KVM_CAP_PPC_IRQ_LEVEL, + AsyncPf = KVM_CAP_ASYNC_PF, + TscControl = KVM_CAP_TSC_CONTROL, + GetTscKhz = KVM_CAP_GET_TSC_KHZ, + PpcBookeSregs = KVM_CAP_PPC_BOOKE_SREGS, + SpaprTce = KVM_CAP_SPAPR_TCE, + PpcSmt = KVM_CAP_PPC_SMT, + PpcRma = KVM_CAP_PPC_RMA, + MaxVcpus = KVM_CAP_MAX_VCPUS, + MaxVcpuId = KVM_CAP_MAX_VCPU_ID, + PpcHior = KVM_CAP_PPC_HIOR, + PpcPapr = KVM_CAP_PPC_PAPR, + SwTlb = KVM_CAP_SW_TLB, + OneReg = KVM_CAP_ONE_REG, + S390Gmap = KVM_CAP_S390_GMAP, + TscDeadlineTimer = KVM_CAP_TSC_DEADLINE_TIMER, + S390Ucontrol = KVM_CAP_S390_UCONTROL, + SyncRegs = KVM_CAP_SYNC_REGS, + Pci23 = KVM_CAP_PCI_2_3, + KvmclockCtrl = KVM_CAP_KVMCLOCK_CTRL, + SignalMsi = KVM_CAP_SIGNAL_MSI, + PpcGetSmmuInfo = KVM_CAP_PPC_GET_SMMU_INFO, + S390Cow = KVM_CAP_S390_COW, + PpcAllocHtab = KVM_CAP_PPC_ALLOC_HTAB, + ReadonlyMem = KVM_CAP_READONLY_MEM, + IrqfdResample = KVM_CAP_IRQFD_RESAMPLE, + PpcBookeWatchdog = KVM_CAP_PPC_BOOKE_WATCHDOG, + PpcHtabFd = KVM_CAP_PPC_HTAB_FD, + S390CssSupport = KVM_CAP_S390_CSS_SUPPORT, + PpcEpr = KVM_CAP_PPC_EPR, + ArmPsci = KVM_CAP_ARM_PSCI, + ArmSetDeviceAddr = KVM_CAP_ARM_SET_DEVICE_ADDR, + DeviceCtrl = KVM_CAP_DEVICE_CTRL, + IrqMpic = KVM_CAP_IRQ_MPIC, + PpcRtas = KVM_CAP_PPC_RTAS, + IrqXics = KVM_CAP_IRQ_XICS, + ArmEl132bit = KVM_CAP_ARM_EL1_32BIT, + SpaprMultitce = KVM_CAP_SPAPR_MULTITCE, + ExtEmulCpuid = KVM_CAP_EXT_EMUL_CPUID, + HypervTime = KVM_CAP_HYPERV_TIME, + IoapicPolarityIgnored = KVM_CAP_IOAPIC_POLARITY_IGNORED, + EnableCapVm = KVM_CAP_ENABLE_CAP_VM, + S390Irqchip = KVM_CAP_S390_IRQCHIP, + IoeventfdNoLength = KVM_CAP_IOEVENTFD_NO_LENGTH, + VmAttributes = KVM_CAP_VM_ATTRIBUTES, + ArmPsci02 = KVM_CAP_ARM_PSCI_0_2, + PpcFixupHcall = KVM_CAP_PPC_FIXUP_HCALL, + PpcEnableHcall = KVM_CAP_PPC_ENABLE_HCALL, + CheckExtensionVm = KVM_CAP_CHECK_EXTENSION_VM, + S390UserSigp = KVM_CAP_S390_USER_SIGP, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + SplitIrqchip = KVM_CAP_SPLIT_IRQCHIP, + ImmediateExit = KVM_CAP_IMMEDIATE_EXIT, + ArmVmIPASize = KVM_CAP_ARM_VM_IPA_SIZE, + MsiDevid = KVM_CAP_MSI_DEVID, + HypervSynic = KVM_CAP_HYPERV_SYNIC, + HypervSynic2 = KVM_CAP_HYPERV_SYNIC2, + DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS, + DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS, +} diff --git a/kvm-ioctls/src/ioctls/device.rs b/kvm-ioctls/src/ioctls/device.rs new file mode 100644 index 000000000..66695703d --- /dev/null +++ b/kvm-ioctls/src/ioctls/device.rs @@ -0,0 +1,319 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::ioctls::Result; +use crate::kvm_ioctls::{KVM_GET_DEVICE_ATTR, KVM_HAS_DEVICE_ATTR, KVM_SET_DEVICE_ATTR}; +use kvm_bindings::kvm_device_attr; +use vmm_sys_util::errno; +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; + +/// Wrapper over the file descriptor obtained when creating an emulated device in the kernel. +#[derive(Debug)] +pub struct DeviceFd { + fd: File, +} + +impl DeviceFd { + /// Tests whether a device supports a particular attribute. + /// + /// See the documentation for `KVM_HAS_DEVICE_ATTR`. + /// # Arguments + /// + /// * `device_attr` - The device attribute to be tested. `addr` field is ignored. + pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: We are calling this function with a Device fd, and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets a specified piece of device configuration and/or state. + /// + /// See the documentation for `KVM_SET_DEVICE_ATTR`. + /// # Arguments + /// + /// * `device_attr` - The device attribute to be set. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// kvm_device_type_KVM_DEV_TYPE_VFIO, + /// KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD, KVM_CREATE_DEVICE_TEST + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// let mut device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_VFIO, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// + /// let device_fd = vm + /// .create_device(&mut device) + /// .expect("Cannot create KVM device"); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_DEV_VFIO_GROUP, + /// attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// if (device_fd.has_device_attr(&dist_attr).is_ok()) { + /// device_fd.set_device_attr(&dist_attr).unwrap(); + /// } + /// ``` + pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: We are calling this function with a Device fd, and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Gets a specified piece of device configuration and/or state. + /// + /// See the documentation for `KVM_GET_DEVICE_ATTR`. + /// + /// # Arguments + /// + /// * `device_attr` - The device attribute to be get. + /// Note: This argument serves as both input and output. + /// When calling this function, the user should explicitly provide + /// valid values for the `group` and the `attr` field of the + /// `kvm_device_attr` structure, and a valid userspace address + /// (i.e. the `addr` field) to access the returned device attribute + /// data. + /// + /// # Returns + /// + /// * Returns the last occured `errno` wrapped in an `Err`. + /// * `device_attr` - The `addr` field of the `device_attr` structure will point to + /// the device attribute data. + /// + /// # Examples + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // As on x86_64, `get_device_attr` is not necessarily needed. Therefore here + /// // the code example is only for AArch64. + /// #[cfg(any(target_arch = "aarch64"))] + /// { + /// use kvm_bindings::{ + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + /// }; + /// + /// // Create a GIC device. + /// let mut gic_device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// fd: 0, + /// flags: 0, + /// }; + /// let device_fd = match vm.create_device(&mut gic_device) { + /// Ok(fd) => fd, + /// Err(_) => { + /// gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + /// vm.create_device(&mut gic_device) + /// .expect("Cannot create KVM vGIC device") + /// } + /// }; + /// + /// let mut data: u32 = 0; + /// let mut gic_attr = kvm_bindings::kvm_device_attr::default(); + /// gic_attr.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS; + /// gic_attr.addr = &mut data as *const u32 as u64; + /// + /// device_fd.get_device_attr(&mut gic_attr).unwrap(); + /// } + /// ``` + pub fn get_device_attr(&self, device_attr: &mut kvm_device_attr) -> Result<()> { + // SAFETY: We are calling this function with a Device fd, and we trust the kernel. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } +} + +/// Helper function for creating a new device. +pub fn new_device(dev_fd: File) -> DeviceFd { + DeviceFd { fd: dev_fd } +} + +impl AsRawFd for DeviceFd { + fn as_raw_fd(&self) -> RawFd { + self.fd.as_raw_fd() + } +} + +impl FromRawFd for DeviceFd { + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + unsafe fn from_raw_fd(fd: RawFd) -> Self { + DeviceFd { + fd: File::from_raw_fd(fd), + } + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + use crate::ioctls::system::Kvm; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use kvm_bindings::{ + kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, kvm_device_type_KVM_DEV_TYPE_VFIO, + KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD, + }; + #[cfg(target_arch = "aarch64")] + use kvm_bindings::{ + KVM_DEV_ARM_VGIC_CTRL_INIT, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_VFIO_GROUP, + KVM_DEV_VFIO_GROUP_ADD, + }; + + use kvm_bindings::KVM_CREATE_DEVICE_TEST; + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_create_device() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + // This fails on x86_64 because there is no VGIC there. + assert!(vm.create_device(&mut gic_device).is_err()); + + gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_VFIO; + + let device_fd = vm + .create_device(&mut gic_device) + .expect("Cannot create KVM device"); + + // Following lines to re-construct device_fd are used to test + // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). + let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; + assert!(raw_fd >= 0); + let device_fd = unsafe { DeviceFd::from_raw_fd(raw_fd) }; + + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_VFIO_GROUP, + attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + addr: 0x0, + flags: 0, + }; + + let mut dist_attr_mut = dist_attr; + + // We are just creating a test device. Creating a real device would make the CI dependent + // on host configuration (like having /dev/vfio). We expect this to fail. + assert!(device_fd.has_device_attr(&dist_attr).is_err()); + assert!(device_fd.get_device_attr(&mut dist_attr_mut).is_err()); + assert!(device_fd.set_device_attr(&dist_attr).is_err()); + assert_eq!(errno::Error::last().errno(), 25); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_create_device() { + use crate::ioctls::vm::{create_gic_device, set_supported_nr_irqs}; + use kvm_bindings::{ + kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + }; + use vmm_sys_util::errno::Error; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + // This fails on aarch64 as it does not use MPIC (MultiProcessor Interrupt Controller), + // it uses the VGIC. + assert!(vm.create_device(&mut gic_device).is_err()); + + let device_fd = create_gic_device(&vm, 0); + + // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, + // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html + vm.create_vcpu(0).unwrap(); + + // Following lines to re-construct device_fd are used to test + // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). + let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; + assert!(raw_fd >= 0); + let device_fd = unsafe { DeviceFd::from_raw_fd(raw_fd) }; + + // Set some attribute that does not apply to VGIC, expect the test to fail. + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_VFIO_GROUP, + attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), + addr: 0x0, + flags: 0, + }; + assert!(device_fd.has_device_attr(&dist_attr).is_err()); + + // Set maximum supported number of IRQs of the vGIC device to 128. + set_supported_nr_irqs(&device_fd, 128); + + // Following attribute works with VGIC, they should be accepted. + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_ARM_VGIC_GRP_CTRL, + attr: u64::from(KVM_DEV_ARM_VGIC_CTRL_INIT), + addr: 0x0, + flags: 0, + }; + + assert!(device_fd.has_device_attr(&dist_attr).is_ok()); + assert!(device_fd.set_device_attr(&dist_attr).is_ok()); + + // Test `get_device_attr`. Here we try to extract the maximum supported number of IRQs. + // This value should be saved in the address provided to the ioctl. + let mut data: u32 = 0; + + let mut gic_attr = kvm_bindings::kvm_device_attr { + group: KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + addr: data as u64, + ..Default::default() + }; + + // Without properly providing the address to where the + // value will be stored, the ioctl fails with EFAULT. + let res = device_fd.get_device_attr(&mut gic_attr); + assert_eq!(res, Err(Error::new(libc::EFAULT))); + + gic_attr.addr = &mut data as *const u32 as u64; + assert!(device_fd.get_device_attr(&mut gic_attr).is_ok()); + // The maximum supported number of IRQs should be 128, same as the value + // when we initialize the GIC. + assert_eq!(data, 128); + } +} diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs new file mode 100644 index 000000000..9079acc78 --- /dev/null +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -0,0 +1,97 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::os::unix::io::AsRawFd; +use std::ptr::null_mut; + +use kvm_bindings::kvm_run; +use vmm_sys_util::errno; + +/// Wrappers over KVM device ioctls. +pub mod device; +/// Wrappers over KVM system ioctls. +pub mod system; +/// Wrappers over KVM VCPU ioctls. +pub mod vcpu; +/// Wrappers over KVM Virtual Machine ioctls. +pub mod vm; + +/// A specialized `Result` type for KVM ioctls. +/// +/// This typedef is generally used to avoid writing out errno::Error directly and +/// is otherwise a direct mapping to Result. +pub type Result = std::result::Result; + +/// Safe wrapper over the `kvm_run` struct. +/// +/// The wrapper is needed for sending the pointer to `kvm_run` between +/// threads as raw pointers do not implement `Send` and `Sync`. +#[derive(Debug)] +pub struct KvmRunWrapper { + kvm_run_ptr: *mut u8, + // This field is need so we can `munmap` the memory mapped to hold `kvm_run`. + mmap_size: usize, +} + +// SAFETY: Send and Sync aren't automatically inherited for the raw address pointer. +// Accessing that pointer is only done through the stateless interface which +// allows the object to be shared by multiple threads without a decrease in +// safety. +unsafe impl Send for KvmRunWrapper {} +// SAFETY: See above. +unsafe impl Sync for KvmRunWrapper {} + +impl KvmRunWrapper { + /// Maps the first `size` bytes of the given `fd`. + /// + /// # Arguments + /// * `fd` - File descriptor to mmap from. + /// * `size` - Size of memory region in bytes. + pub fn mmap_from_fd(fd: &dyn AsRawFd, size: usize) -> Result { + // SAFETY: This is safe because we are creating a mapping in a place not already used by + // any other area in this process. + let addr = unsafe { + libc::mmap( + null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + 0, + ) + }; + if addr == libc::MAP_FAILED { + return Err(errno::Error::last()); + } + + Ok(KvmRunWrapper { + kvm_run_ptr: addr as *mut u8, + mmap_size: size, + }) + } + + /// Returns a mutable reference to `kvm_run`. + #[allow(clippy::mut_from_ref)] + pub fn as_mut_ref(&self) -> &mut kvm_run { + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: Safe because we know we mapped enough memory to hold the kvm_run struct because + // the kernel told us how large it was. + unsafe { + &mut *(self.kvm_run_ptr as *mut kvm_run) + } + } +} + +impl Drop for KvmRunWrapper { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the area at kvm_run_ptr ourselves, + // and nobody else is holding a reference to it. + unsafe { + libc::munmap(self.kvm_run_ptr as *mut libc::c_void, self.mmap_size); + } + } +} diff --git a/kvm-ioctls/src/ioctls/system.rs b/kvm-ioctls/src/ioctls/system.rs new file mode 100644 index 000000000..5dacb0367 --- /dev/null +++ b/kvm-ioctls/src/ioctls/system.rs @@ -0,0 +1,847 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +use libc::{open, O_CLOEXEC, O_RDWR}; +use std::ffi::CStr; +use std::fs::File; +use std::os::raw::{c_char, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::cap::Cap; +use crate::ioctls::vm::{new_vmfd, VmFd}; +use crate::ioctls::Result; +use crate::kvm_ioctls::*; +#[cfg(any(target_arch = "aarch64"))] +use kvm_bindings::KVM_VM_TYPE_ARM_IPA_SIZE_MASK; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES}; +use vmm_sys_util::errno; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use vmm_sys_util::ioctl::ioctl_with_mut_ptr; +use vmm_sys_util::ioctl::{ioctl, ioctl_with_val}; + +/// Wrapper over KVM system ioctls. +#[derive(Debug)] +pub struct Kvm { + kvm: File, +} + +impl Kvm { + /// Opens `/dev/kvm` and returns a `Kvm` object on success. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new() -> Result { + // Open `/dev/kvm` using `O_CLOEXEC` flag. + let fd = Self::open_with_cloexec(true)?; + // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec` and we own + // the fd. + Ok(unsafe { Self::from_raw_fd(fd) }) + } + + /// Opens the KVM device at `kvm_path` and returns a `Kvm` object on success. + /// + /// # Arguments + /// + /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// use std::ffi::CString; + /// let kvm_path = CString::new("/dev/kvm").unwrap(); + /// let kvm = Kvm::new_with_path(&kvm_path).unwrap(); + /// ``` + #[allow(clippy::new_ret_no_self)] + pub fn new_with_path

(kvm_path: P) -> Result + where + P: AsRef, + { + // Open `kvm_path` using `O_CLOEXEC` flag. + let fd = Self::open_with_cloexec_at(kvm_path, true)?; + // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec_at` + // and we own the fd. + Ok(unsafe { Self::from_raw_fd(fd) }) + } + + /// Opens `/dev/kvm` and returns the fd number on success. + /// + /// One usecase for this method is opening `/dev/kvm` before exec-ing into a + /// process with seccomp filters enabled that blacklist the `sys_open` syscall. + /// For this usecase `open_with_cloexec` must be called with the `close_on_exec` + /// parameter set to false. + /// + /// # Arguments + /// + /// * `close_on_exec`: If true opens `/dev/kvm` using the `O_CLOEXEC` flag. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_fd = Kvm::open_with_cloexec(false).unwrap(); + /// // The `kvm_fd` can now be passed to another process where we can use + /// // `from_raw_fd` for creating a `Kvm` object: + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + pub fn open_with_cloexec(close_on_exec: bool) -> Result { + // SAFETY: Safe because we give a constant nul-terminated string. + let kvm_path = unsafe { CStr::from_bytes_with_nul_unchecked(b"/dev/kvm\0") }; + Self::open_with_cloexec_at(kvm_path, close_on_exec) + } + + /// Opens the KVM device at `kvm_path` and returns the fd number on success. + /// Same as [open_with_cloexec()](struct.Kvm.html#method.open_with_cloexec) + /// except this method opens `kvm_path` instead of `/dev/kvm`. + /// + /// # Arguments + /// + /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. + /// * `close_on_exec`: If true opens `kvm_path` using the `O_CLOEXEC` flag. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::ffi::CString; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_path = CString::new("/dev/kvm").unwrap(); + /// let kvm_fd = Kvm::open_with_cloexec_at(kvm_path, false).unwrap(); + /// // The `kvm_fd` can now be passed to another process where we can use + /// // `from_raw_fd` for creating a `Kvm` object: + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + pub fn open_with_cloexec_at

(path: P, close_on_exec: bool) -> Result + where + P: AsRef, + { + let open_flags = O_RDWR | if close_on_exec { O_CLOEXEC } else { 0 }; + // SAFETY: Safe because we verify the result. + let ret = unsafe { open(path.as_ref().as_ptr() as *const c_char, open_flags) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(ret) + } + } + + /// Returns the KVM API version. + /// + /// See the documentation for `KVM_GET_API_VERSION`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert_eq!(kvm.get_api_version(), 12); + /// ``` + pub fn get_api_version(&self) -> i32 { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + unsafe { ioctl(self, KVM_GET_API_VERSION()) } + } + + /// AArch64 specific call to get the host Intermediate Physical Address space limit. + /// + /// Returns 0 if the capability is not available and an integer >= 32 otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_host_ipa_limit(&self) -> i32 { + self.check_extension_int(Cap::ArmVmIPASize) + } + + /// AArch64 specific call to get the number of supported hardware breakpoints. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_guest_debug_hw_bps(&self) -> i32 { + self.check_extension_int(Cap::DebugHwBps) + } + + /// AArch64 specific call to get the number of supported hardware watchpoints. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + #[cfg(target_arch = "aarch64")] + pub fn get_guest_debug_hw_wps(&self) -> i32 { + self.check_extension_int(Cap::DebugHwWps) + } + + /// Wrapper over `KVM_CHECK_EXTENSION`. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - KVM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.check_extension_int(Cap::MaxVcpuId) > 0); + /// ``` + pub fn check_extension_int(&self, c: Cap) -> i32 { + // SAFETY: Safe because we know that our file is a KVM fd and that the extension is one of + // the ones defined by kernel. + unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) } + } + + /// Checks if a particular `Cap` is available. + /// + /// Returns true if the capability is supported and false otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - KVM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// // Check if `KVM_CAP_USER_MEMORY` is supported. + /// assert!(kvm.check_extension(Cap::UserMemory)); + /// ``` + pub fn check_extension(&self, c: Cap) -> bool { + self.check_extension_int(c) > 0 + } + + /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. + /// + /// See the documentation for `KVM_GET_VCPU_MMAP_SIZE`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_vcpu_mmap_size().unwrap() > 0); + /// ``` + pub fn get_vcpu_mmap_size(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and we verify the return result. + let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) }; + if res > 0 { + Ok(res as usize) + } else { + Err(errno::Error::last()) + } + } + + /// Gets the recommended number of VCPUs per VM. + /// + /// See the documentation for `KVM_CAP_NR_VCPUS`. + /// Default to 4 when `KVM_CAP_NR_VCPUS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// // We expect the number of vCPUs to be > 0 as per KVM API documentation. + /// assert!(kvm.get_nr_vcpus() > 0); + /// ``` + pub fn get_nr_vcpus(&self) -> usize { + let x = self.check_extension_int(Cap::NrVcpus); + if x > 0 { + x as usize + } else { + 4 + } + } + + /// Returns the maximum allowed memory slots per VM. + /// + /// KVM reports the number of available memory slots (`KVM_CAP_NR_MEMSLOTS`) + /// using the extension interface. Both x86 and s390 implement this, ARM + /// and powerpc do not yet enable it. + /// Default to 32 when `KVM_CAP_NR_MEMSLOTS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_nr_memslots() > 0); + /// ``` + pub fn get_nr_memslots(&self) -> usize { + let x = self.check_extension_int(Cap::NrMemslots); + if x > 0 { + x as usize + } else { + 32 + } + } + + /// Gets the recommended maximum number of VCPUs per VM. + /// + /// See the documentation for `KVM_CAP_MAX_VCPUS`. + /// Returns [get_nr_vcpus()](struct.Kvm.html#method.get_nr_vcpus) when + /// `KVM_CAP_MAX_VCPUS` is not implemented. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_max_vcpus() > 0); + /// ``` + pub fn get_max_vcpus(&self) -> usize { + match self.check_extension_int(Cap::MaxVcpus) { + 0 => self.get_nr_vcpus(), + x => x as usize, + } + } + + /// Gets the Maximum VCPU ID per VM. + /// + /// See the documentation for `KVM_CAP_MAX_VCPU_ID` + /// Returns [get_max_vcpus()](struct.Kvm.html#method.get_max_vcpus) when + /// `KVM_CAP_MAX_VCPU_ID` is not implemented + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// assert!(kvm.get_max_vcpu_id() > 0); + /// ``` + pub fn get_max_vcpu_id(&self) -> usize { + match self.check_extension_int(Cap::MaxVcpuId) { + 0 => self.get_max_vcpus(), + x => x as usize, + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn get_cpuid(&self, kind: u64, num_entries: usize) -> Result { + if num_entries > KVM_MAX_CPUID_ENTRIES { + // Returns the same error the underlying `ioctl` would have sent. + return Err(errno::Error::new(libc::ENOMEM)); + } + + let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; + // SAFETY: The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nent, which is set to the allocated + // size(num_entries) above. + let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + + Ok(cpuid) + } + + /// X86 specific call to get the system emulated CPUID values. + /// + /// See the documentation for `KVM_GET_EMULATED_CPUID`. + /// + /// # Arguments + /// + /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// this when the hardware does not support so many CPUID entries. + /// + /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than + /// `KVM_MAX_CPUID_ENTRIES`. + /// + /// # Example + /// + /// ``` + /// extern crate kvm_bindings; + /// use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut cpuid = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let cpuid_entries = cpuid.as_mut_slice(); + /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_emulated_cpuid(&self, num_entries: usize) -> Result { + self.get_cpuid(KVM_GET_EMULATED_CPUID(), num_entries) + } + + /// X86 specific call to get the system supported CPUID values. + /// + /// See the documentation for `KVM_GET_SUPPORTED_CPUID`. + /// + /// # Arguments + /// + /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// this when the hardware does not support so many CPUID entries. + /// + /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than + /// `KVM_MAX_CPUID_ENTRIES`. + /// + /// # Example + /// + /// ``` + /// extern crate kvm_bindings; + /// use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let cpuid_entries = cpuid.as_mut_slice(); + /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_supported_cpuid(&self, num_entries: usize) -> Result { + self.get_cpuid(KVM_GET_SUPPORTED_CPUID(), num_entries) + } + + /// X86 specific call to get list of supported MSRS + /// + /// See the documentation for `KVM_GET_MSR_INDEX_LIST`. + /// + /// # Example + /// + /// ``` + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let msr_index_list = kvm.get_msr_index_list().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_msr_index_list(&self) -> Result { + let mut msr_list = + MsrList::new(KVM_MAX_MSR_ENTRIES).map_err(|_| errno::Error::new(libc::ENOMEM))?; + + // SAFETY: The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nmsrs, which is set to the allocated + // size (MAX_KVM_MSR_ENTRIES) above. + let ret = unsafe { + ioctl_with_mut_ptr( + self, + KVM_GET_MSR_INDEX_LIST(), + msr_list.as_mut_fam_struct_ptr(), + ) + }; + if ret < 0 { + return Err(errno::Error::last()); + } + + // The ioctl will also update the internal `nmsrs` with the actual count. + Ok(msr_list) + } + + /// Creates a VM fd using the KVM fd. + /// + /// See the documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + #[cfg(not(any(target_arch = "aarch64")))] + pub fn create_vm(&self) -> Result { + self.create_vm_with_type(0) // Create using default VM type + } + + /// AArch64 specific create_vm to create a VM fd using the KVM fd using the host's maximum IPA size. + /// + /// See the arm64 section of KVM documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + #[cfg(any(target_arch = "aarch64"))] + pub fn create_vm(&self) -> Result { + let mut ipa_size = 0; // Create using default VM type + if self.check_extension(Cap::ArmVmIPASize) { + ipa_size = self.get_host_ipa_limit(); + } + self.create_vm_with_type(ipa_size as u64) + } + + /// AArch64 specific function to create a VM fd using the KVM fd with flexible IPA size. + /// + /// See the arm64 section of KVM documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// Note: `Cap::ArmVmIPASize` should be checked using `check_extension` before calling + /// this function to determine if the host machine supports the IPA size capability. + /// + /// # Arguments + /// + /// * `ipa_size` - Guest VM IPA size, 32 <= ipa_size <= Host_IPA_Limit. + /// The value of `Host_IPA_Limit` may be different between hardware + /// implementations and can be extracted by calling `get_host_ipa_limit`. + /// Possible values can be found in documentation of registers `TCR_EL2` + /// and `VTCR_EL2`. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// // Check if the ArmVmIPASize cap is supported. + /// if kvm.check_extension(Cap::ArmVmIPASize) { + /// let host_ipa_limit = kvm.get_host_ipa_limit(); + /// let vm = kvm.create_vm_with_ipa_size(host_ipa_limit as u32).unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// } + /// ``` + #[cfg(any(target_arch = "aarch64"))] + pub fn create_vm_with_ipa_size(&self, ipa_size: u32) -> Result { + self.create_vm_with_type((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK).into()) + } + + /// Creates a VM fd using the KVM fd of a specific type. + /// + /// See the documentation for `KVM_CREATE_VM`. + /// A call to this function will also initialize the size of the vcpu mmap area using the + /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. + /// + /// * `vm_type` - Platform and architecture specific platform VM type. A value of 0 is the equivalent + /// to using the default VM type. + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm_with_type(0).unwrap(); + /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. + /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); + /// ``` + pub fn create_vm_with_type(&self, vm_type: u64) -> Result { + // SAFETY: Safe because we know `self.kvm` is a real KVM fd as this module is the only one + // that create Kvm objects. + let ret = unsafe { ioctl_with_val(&self.kvm, KVM_CREATE_VM(), vm_type) }; + if ret >= 0 { + // SAFETY: Safe because we verify the value of ret and we are the owners of the fd. + let vm_file = unsafe { File::from_raw_fd(ret) }; + let run_mmap_size = self.get_vcpu_mmap_size()?; + Ok(new_vmfd(vm_file, run_mmap_size)) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a VmFd object from a VM RawFd. + /// + /// # Arguments + /// + /// * `fd` - the RawFd used for creating the VmFd object. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use std::os::unix::io::AsRawFd; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + /// assert!(rawfd >= 0); + /// let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + /// ``` + pub unsafe fn create_vmfd_from_rawfd(&self, fd: RawFd) -> Result { + let run_mmap_size = self.get_vcpu_mmap_size()?; + Ok(new_vmfd(File::from_raw_fd(fd), run_mmap_size)) + } +} + +impl AsRawFd for Kvm { + fn as_raw_fd(&self) -> RawFd { + self.kvm.as_raw_fd() + } +} + +impl FromRawFd for Kvm { + /// Creates a new Kvm object assuming `fd` represents an existing open file descriptor + /// associated with `/dev/kvm`. + /// + /// For usage examples check [open_with_cloexec()](struct.Kvm.html#method.open_with_cloexec). + /// + /// # Arguments + /// + /// * `fd` - File descriptor for `/dev/kvm`. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// # use std::os::unix::io::FromRawFd; + /// let kvm_fd = Kvm::open_with_cloexec(true).unwrap(); + /// // Safe because we verify that the fd is valid in `open_with_cloexec` and we own the fd. + /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; + /// ``` + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Kvm { + kvm: File::from_raw_fd(fd), + } + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + use libc::{fcntl, FD_CLOEXEC, F_GETFD}; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use vmm_sys_util::fam::FamStruct; + + #[test] + fn test_kvm_new() { + Kvm::new().unwrap(); + } + + #[test] + fn test_kvm_new_with_path() { + let kvm_path = unsafe { CStr::from_bytes_with_nul_unchecked(b"/dev/kvm\0") }; + Kvm::new_with_path(kvm_path).unwrap(); + } + + #[test] + fn test_open_with_cloexec() { + let fd = Kvm::open_with_cloexec(false).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, 0); + let fd = Kvm::open_with_cloexec(true).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + #[test] + fn test_open_with_cloexec_at() { + let kvm_path = std::ffi::CString::new("/dev/kvm").unwrap(); + let fd = Kvm::open_with_cloexec_at(&kvm_path, false).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, 0); + let fd = Kvm::open_with_cloexec_at(&kvm_path, true).unwrap(); + let flags = unsafe { fcntl(fd, F_GETFD, 0) }; + assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); + } + + #[test] + fn test_kvm_api_version() { + let kvm = Kvm::new().unwrap(); + assert_eq!(kvm.get_api_version(), 12); + assert!(kvm.check_extension(Cap::UserMemory)); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_get_host_ipa_limit() { + let kvm = Kvm::new().unwrap(); + let host_ipa_limit = kvm.get_host_ipa_limit(); + + if host_ipa_limit > 0 { + assert!(host_ipa_limit >= 32); + } else { + // if unsupported, the return value should be 0. + assert_eq!(host_ipa_limit, 0); + } + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_guest_debug_hw_capacity() { + let kvm = Kvm::new().unwrap(); + // The number of supported breakpoints and watchpoints may vary on + // different platforms. + // It could be 0 if no supported, or any positive integer otherwise. + assert!(kvm.get_guest_debug_hw_bps() >= 0); + assert!(kvm.get_guest_debug_hw_wps() >= 0); + } + + #[test] + fn test_kvm_getters() { + let kvm = Kvm::new().unwrap(); + + // vCPU related getters + let nr_vcpus = kvm.get_nr_vcpus(); + assert!(nr_vcpus >= 4); + + assert!(kvm.get_max_vcpus() >= nr_vcpus); + + // Memory related getters + assert!(kvm.get_vcpu_mmap_size().unwrap() > 0); + assert!(kvm.get_nr_memslots() >= 32); + } + + #[test] + fn test_create_vm() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Test create_vmfd_from_rawfd() + let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + assert!(rawfd >= 0); + let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + + assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); + } + + #[test] + fn test_create_vm_with_type() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm_with_type(0).unwrap(); + + // Test create_vmfd_from_rawfd() + let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; + assert!(rawfd >= 0); + let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; + + assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); + } + + #[test] + #[cfg(any(target_arch = "aarch64"))] + fn test_create_vm_with_ipa_size() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ArmVmIPASize) { + let host_ipa_limit = kvm.get_host_ipa_limit(); + // Here we test with the maximum value that the host supports to both test the + // discoverability of supported IPA sizes and likely some other values than 40. + kvm.create_vm_with_ipa_size(host_ipa_limit as u32).unwrap(); + // Test invalid input values + // Case 1: IPA size is smaller than 32. + assert!(kvm.create_vm_with_ipa_size(31).is_err()); + // Case 2: IPA size is bigger than Host_IPA_Limit. + assert!(kvm + .create_vm_with_ipa_size((host_ipa_limit + 1) as u32) + .is_err()); + } else { + // Unsupported, we can't provide an IPA size. Only KVM type=0 works. + assert!(kvm.create_vm_with_type(0).is_err()); + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn test_get_supported_cpuid() { + let kvm = Kvm::new().unwrap(); + let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let cpuid_entries = cpuid.as_mut_slice(); + assert!(!cpuid_entries.is_empty()); + assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + + // Test case for more than MAX entries + let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); + assert!(cpuid_err.is_err()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_get_emulated_cpuid() { + let kvm = Kvm::new().unwrap(); + let mut cpuid = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let cpuid_entries = cpuid.as_mut_slice(); + assert!(!cpuid_entries.is_empty()); + assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); + + // Test case for more than MAX entries + let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); + assert!(cpuid_err.is_err()); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn test_cpuid_clone() { + let kvm = Kvm::new().unwrap(); + + // Test from_raw_fd() + let rawfd = unsafe { libc::dup(kvm.as_raw_fd()) }; + assert!(rawfd >= 0); + let kvm = unsafe { Kvm::from_raw_fd(rawfd) }; + + let cpuid_1 = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let _ = CpuId::new(cpuid_1.as_fam_struct_ref().len()).unwrap(); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn get_msr_index_list() { + let kvm = Kvm::new().unwrap(); + let msr_list = kvm.get_msr_index_list().unwrap(); + assert!(msr_list.as_slice().len() >= 2); + } + + #[test] + fn test_bad_kvm_fd() { + let badf_errno = libc::EBADF; + + let faulty_kvm = Kvm { + kvm: unsafe { File::from_raw_fd(-2) }, + }; + + assert_eq!( + faulty_kvm.get_vcpu_mmap_size().unwrap_err().errno(), + badf_errno + ); + assert_eq!(faulty_kvm.get_nr_vcpus(), 4); + assert_eq!(faulty_kvm.get_nr_memslots(), 32); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + assert_eq!( + faulty_kvm.get_emulated_cpuid(4).err().unwrap().errno(), + badf_errno + ); + assert_eq!( + faulty_kvm.get_supported_cpuid(4).err().unwrap().errno(), + badf_errno + ); + + assert_eq!( + faulty_kvm.get_msr_index_list().err().unwrap().errno(), + badf_errno + ); + } + assert_eq!(faulty_kvm.create_vm().err().unwrap().errno(), badf_errno); + } +} diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs new file mode 100644 index 000000000..18cf31d11 --- /dev/null +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -0,0 +1,2830 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; +use libc::EINVAL; +use std::fs::File; +use std::os::unix::io::{AsRawFd, RawFd}; + +use crate::ioctls::{KvmRunWrapper, Result}; +use crate::kvm_ioctls::*; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use kvm_bindings::{CpuId, Msrs, KVM_MAX_CPUID_ENTRIES}; +use vmm_sys_util::errno; +use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref}; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr, ioctl_with_val}; + +/// Reasons for vCPU exits. +/// +/// The exit reasons are mapped to the `KVM_EXIT_*` defines in the +/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). +#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] +#[derive(Debug)] +pub enum VcpuExit<'a> { + /// An out port instruction was run on the given port with the given data. + IoOut(u16 /* port */, &'a [u8] /* data */), + /// An in port instruction was run on the given port. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + IoIn(u16 /* port */, &'a mut [u8] /* data */), + /// A read instruction was run against the given MMIO address. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + MmioRead(u64 /* address */, &'a mut [u8]), + /// A write instruction was run against the given MMIO address with the given data. + MmioWrite(u64 /* address */, &'a [u8]), + /// Corresponds to KVM_EXIT_UNKNOWN. + Unknown, + /// Corresponds to KVM_EXIT_EXCEPTION. + Exception, + /// Corresponds to KVM_EXIT_HYPERCALL. + Hypercall, + /// Corresponds to KVM_EXIT_DEBUG. + /// + /// Provides architecture specific information for the debug event. + Debug(kvm_debug_exit_arch), + /// Corresponds to KVM_EXIT_HLT. + Hlt, + /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN. + IrqWindowOpen, + /// Corresponds to KVM_EXIT_SHUTDOWN. + Shutdown, + /// Corresponds to KVM_EXIT_FAIL_ENTRY. + FailEntry( + u64, /* hardware_entry_failure_reason */ + u32, /* cpu */ + ), + /// Corresponds to KVM_EXIT_INTR. + Intr, + /// Corresponds to KVM_EXIT_SET_TPR. + SetTpr, + /// Corresponds to KVM_EXIT_TPR_ACCESS. + TprAccess, + /// Corresponds to KVM_EXIT_S390_SIEIC. + S390Sieic, + /// Corresponds to KVM_EXIT_S390_RESET. + S390Reset, + /// Corresponds to KVM_EXIT_DCR. + Dcr, + /// Corresponds to KVM_EXIT_NMI. + Nmi, + /// Corresponds to KVM_EXIT_INTERNAL_ERROR. + InternalError, + /// Corresponds to KVM_EXIT_OSI. + Osi, + /// Corresponds to KVM_EXIT_PAPR_HCALL. + PaprHcall, + /// Corresponds to KVM_EXIT_S390_UCONTROL. + S390Ucontrol, + /// Corresponds to KVM_EXIT_WATCHDOG. + Watchdog, + /// Corresponds to KVM_EXIT_S390_TSCH. + S390Tsch, + /// Corresponds to KVM_EXIT_EPR. + Epr, + /// Corresponds to KVM_EXIT_SYSTEM_EVENT. + SystemEvent(u32 /* type */, &'a [u64] /* flags */), + /// Corresponds to KVM_EXIT_S390_STSI. + S390Stsi, + /// Corresponds to KVM_EXIT_IOAPIC_EOI. + IoapicEoi(u8 /* vector */), + /// Corresponds to KVM_EXIT_HYPERV. + Hyperv, + /// Corresponds to an exit reason that is unknown from the current version + /// of the kvm-ioctls crate. Let the consumer decide about what to do with + /// it. + Unsupported(u32), +} +#[cfg(target_arch = "riscv64")] +#[derive(Debug)] +pub enum VcpuExit<'a> { + /// An out port instruction was run on the given port with the given data. + IoOut(u16 /* port */, &'a [u8] /* data */), + /// An in port instruction was run on the given port. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + IoIn(u16 /* port */, &'a mut [u8] /* data */), + /// A read instruction was run against the given MMIO address. + /// + /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) + /// is called again. + MmioRead(u64 /* address */, &'a mut [u8]), + /// A write instruction was run against the given MMIO address with the given data. + MmioWrite(u64 /* address */, &'a [u8]), + /// Corresponds to KVM_EXIT_UNKNOWN. + Unknown, + /// Corresponds to KVM_EXIT_EXCEPTION. + Exception, + /// Corresponds to KVM_EXIT_HYPERCALL. + Hypercall, + /// Corresponds to KVM_EXIT_DEBUG. + /// + /// Provides architecture specific information for the debug event. + Debug(kvm_debug_exit_arch), + /// Corresponds to KVM_EXIT_HLT. + Hlt, + /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN. + IrqWindowOpen, + /// Corresponds to KVM_EXIT_SHUTDOWN. + Shutdown, + /// Corresponds to KVM_EXIT_FAIL_ENTRY. + FailEntry( + u64, /* hardware_entry_failure_reason */ + u32, /* cpu */ + ), + /// Corresponds to KVM_EXIT_INTR. + Intr, + /// Corresponds to KVM_EXIT_SET_TPR. + SetTpr, + /// Corresponds to KVM_EXIT_TPR_ACCESS. + TprAccess, + /// Corresponds to KVM_EXIT_S390_SIEIC. + S390Sieic, + /// Corresponds to KVM_EXIT_S390_RESET. + S390Reset, + /// Corresponds to KVM_EXIT_DCR. + Dcr, + /// Corresponds to KVM_EXIT_NMI. + Nmi, + /// Corresponds to KVM_EXIT_INTERNAL_ERROR. + InternalError, + /// Corresponds to KVM_EXIT_OSI. + Osi, + /// Corresponds to KVM_EXIT_PAPR_HCALL. + PaprHcall, + /// Corresponds to KVM_EXIT_S390_UCONTROL. + S390Ucontrol, + /// Corresponds to KVM_EXIT_WATCHDOG. + Watchdog, + /// Corresponds to KVM_EXIT_S390_TSCH. + S390Tsch, + /// Corresponds to KVM_EXIT_EPR. + Epr, + /// Corresponds to KVM_EXIT_SYSTEM_EVENT. + SystemEvent(u32 /* type */, u64 /* flags */), + /// Corresponds to KVM_EXIT_S390_STSI. + S390Stsi, + /// Corresponds to KVM_EXIT_IOAPIC_EOI. + IoapicEoi(u8 /* vector */), + /// Corresponds to KVM_EXIT_HYPERV. + Hyperv, + /// Corresponds to an exit reason that is unknown from the current version + /// of the kvm-ioctls crate. Let the consumer decide about what to do with + /// it. + Unsupported(u32), +} + +/// Wrapper over KVM vCPU ioctls. +#[derive(Debug)] +pub struct VcpuFd { + vcpu: File, + kvm_run_ptr: KvmRunWrapper, +} + +/// KVM Sync Registers used to tell KVM which registers to sync +#[repr(u32)] +#[derive(Debug, Copy, Clone)] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub enum SyncReg { + /// General purpose registers, + Register = KVM_SYNC_X86_REGS, + + /// System registers + SystemRegister = KVM_SYNC_X86_SREGS, + + /// CPU events + VcpuEvents = KVM_SYNC_X86_EVENTS, +} + +impl VcpuFd { + /// Returns the vCPU general purpose registers. + /// + /// The registers are returned in a `kvm_regs` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_GET_REGS`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + /// let regs = vcpu.get_regs().unwrap(); + /// ``` + #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + pub fn get_regs(&self) -> Result { + let mut regs = kvm_regs::default(); + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(regs) + } + + /// Sets a specified piece of cpu configuration and/or state. + /// + /// See the documentation for `KVM_SET_DEVICE_ATTR` in + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) + /// # Arguments + /// + /// * `device_attr` - The cpu attribute to be set. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_ARM_VCPU_PMU_V3_CTRL, + /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// if (vcpu.has_device_attr(&dist_attr).is_ok()) { + /// vcpu.set_device_attr(&dist_attr).unwrap(); + /// } + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Tests whether a cpu supports a particular attribute. + /// + /// See the documentation for `KVM_HAS_DEVICE_ATTR` in + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) + /// # Arguments + /// + /// * `device_attr` - The cpu attribute to be tested. `addr` field is ignored. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let dist_attr = kvm_bindings::kvm_device_attr { + /// group: KVM_ARM_VCPU_PMU_V3_CTRL, + /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + /// addr: 0x0, + /// flags: 0, + /// }; + /// + /// vcpu.has_device_attr(&dist_attr); + /// ``` + #[cfg(target_arch = "aarch64")] + pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { + // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. + /// + /// # Arguments + /// + /// * `regs` - general purpose registers. For details check the `kvm_regs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + /// { + /// // Get the current vCPU registers. + /// let mut regs = vcpu.get_regs().unwrap(); + /// // Set a new value for the Instruction Pointer. + /// regs.rip = 0x100; + /// vcpu.set_regs(®s).unwrap(); + /// } + /// ``` + #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> { + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the vCPU special registers. + /// + /// The registers are returned in a `kvm_sregs` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_GET_SREGS`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + /// let sregs = vcpu.get_sregs().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_sregs(&self) -> Result { + let mut regs = kvm_sregs::default(); + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // write the correct amount of memory to our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(regs) + } + + /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. + /// + /// # Arguments + /// + /// * `sregs` - Special registers. For details check the `kvm_sregs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + /// { + /// let mut sregs = vcpu.get_sregs().unwrap(); + /// // Update the code segment (cs). + /// sregs.cs.base = 0; + /// sregs.cs.selector = 0; + /// vcpu.set_sregs(&sregs).unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> { + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // read the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the floating point state (FPU) from the vCPU. + /// + /// The state is returned in a `kvm_fpu` structure as defined in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See the documentation for `KVM_GET_FPU`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// let fpu = vcpu.get_fpu().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_fpu(&self) -> Result { + let mut fpu = kvm_fpu::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(fpu) + } + + /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. + /// + /// # Arguments + /// + /// * `fpu` - FPU configuration. For details check the `kvm_fpu` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::kvm_fpu; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// let KVM_FPU_CWD: u16 = 0x37f; + /// let fpu = kvm_fpu { + /// fcw: KVM_FPU_CWD, + /// ..Default::default() + /// }; + /// vcpu.set_fpu(&fpu).unwrap(); + /// } + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call to setup the CPUID registers. + /// + /// See the documentation for `KVM_SET_CPUID2`. + /// + /// # Arguments + /// + /// * `cpuid` - CPUID registers. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let mut kvm_cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // Update the CPUID entries to disable the EPB feature. + /// const ECX_EPB_SHIFT: u32 = 3; + /// { + /// let entries = kvm_cpuid.as_mut_slice(); + /// for entry in entries.iter_mut() { + /// match entry.function { + /// 6 => entry.ecx &= !(1 << ECX_EPB_SHIFT), + /// _ => (), + /// } + /// } + /// } + /// + /// vcpu.set_cpuid2(&kvm_cpuid).unwrap(); + /// ``` + /// + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + let ret = unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call to retrieve the CPUID registers. + /// + /// It requires knowledge of how many `kvm_cpuid_entry2` entries there are to get. + /// See the documentation for `KVM_GET_CPUID2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `num_entries` - Number of CPUID entries to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// ``` + /// + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_cpuid2(&self, num_entries: usize) -> Result { + if num_entries > KVM_MAX_CPUID_ENTRIES { + // Returns the same error the underlying `ioctl` would have sent. + return Err(errno::Error::new(libc::ENOMEM)); + } + + let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; + let ret = + // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + unsafe { ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(cpuid) + } + + /// + /// See the documentation for `KVM_ENABLE_CAP`. + /// + /// # Arguments + /// + /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_bindings::{kvm_enable_cap, KVM_MAX_CPUID_ENTRIES, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP}; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut cap: kvm_enable_cap = Default::default(); + /// if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { + /// // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled + /// cap.cap = KVM_CAP_SPLIT_IRQCHIP; + /// cap.args[0] = 24; + /// vm.enable_cap(&cap).unwrap(); + /// + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::HypervSynic) { + /// let mut cap: kvm_enable_cap = Default::default(); + /// cap.cap = KVM_CAP_HYPERV_SYNIC; + /// vcpu.enable_cap(&cap).unwrap(); + /// } + /// } + /// ``` + /// + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). + /// + /// The state is returned in a `kvm_lapic_state` structure as defined in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See the documentation for `KVM_GET_LAPIC`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU. + /// vm.create_irq_chip().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let lapic = vcpu.get_lapic().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_lapic(&self) -> Result { + let mut klapic = kvm_lapic_state::default(); + + // SAFETY: The ioctl is unsafe unless you trust the kernel not to write past the end of the + // local_apic struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(klapic) + } + + /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). + /// + /// See the documentation for `KVM_SET_LAPIC`. + /// + /// # Arguments + /// + /// * `klapic` - LAPIC state. For details check the `kvm_lapic_state` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// use std::io::Write; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU. + /// vm.create_irq_chip().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mut lapic = vcpu.get_lapic().unwrap(); + /// + /// // Write to APIC_ICR offset the value 2. + /// let apic_icr_offset = 0x300; + /// let write_value: &[u8] = &[2, 0, 0, 0]; + /// let mut apic_icr_slice = + /// unsafe { &mut *(&mut lapic.regs[apic_icr_offset..] as *mut [i8] as *mut [u8]) }; + /// apic_icr_slice.write(write_value).unwrap(); + /// + /// // Update the value of LAPIC. + /// vcpu.set_lapic(&lapic).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> { + // SAFETY: The ioctl is safe because the kernel will only read from the klapic struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the model-specific registers (MSR) for this vCPU. + /// + /// It emulates `KVM_GET_MSRS` ioctl's behavior by returning the number of MSRs + /// successfully read upon success or the last error number in case of failure. + /// The MSRs are returned in the `msr` method argument. + /// + /// # Arguments + /// + /// * `msrs` - MSRs (input/output). For details check the `kvm_msrs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{kvm_msr_entry, Msrs}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// // Configure the struct to say which entries we want to get. + /// let mut msrs = Msrs::from_entries(&[ + /// kvm_msr_entry { + /// index: 0x0000_0174, + /// ..Default::default() + /// }, + /// kvm_msr_entry { + /// index: 0x0000_0175, + /// ..Default::default() + /// }, + /// ]) + /// .unwrap(); + /// let read = vcpu.get_msrs(&mut msrs).unwrap(); + /// assert_eq!(read, 2); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_msrs(&self, msrs: &mut Msrs) -> Result { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. + let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret as usize) + } + + /// Setup the model-specific registers (MSR) for this vCPU. + /// Returns the number of MSR entries actually written. + /// + /// See the documentation for `KVM_SET_MSRS`. + /// + /// # Arguments + /// + /// * `msrs` - MSRs. For details check the `kvm_msrs` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{kvm_msr_entry, Msrs}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // Configure the entries we want to set. + /// let mut msrs = Msrs::from_entries(&[kvm_msr_entry { + /// index: 0x0000_0174, + /// ..Default::default() + /// }]) + /// .unwrap(); + /// let written = vcpu.set_msrs(&msrs).unwrap(); + /// assert_eq!(written, 1); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_msrs(&self, msrs: &Msrs) -> Result { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. + let ret = unsafe { ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) }; + // KVM_SET_MSRS actually returns the number of msr entries written. + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(ret as usize) + } + + /// Returns the vcpu's current "multiprocessing state". + /// + /// See the documentation for `KVM_GET_MP_STATE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_mp_state` - multiprocessing state to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mp_state = vcpu.get_mp_state().unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390", + target_arch = "riscv64" + ))] + pub fn get_mp_state(&self) -> Result { + let mut mp_state = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(mp_state) + } + + /// Sets the vcpu's current "multiprocessing state". + /// + /// See the documentation for `KVM_SET_MP_STATE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_mp_state` - multiprocessing state to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let mp_state = Default::default(); + /// // Your `mp_state` manipulation here. + /// vcpu.set_mp_state(mp_state).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390", + target_arch = "riscv64" + ))] + pub fn set_mp_state(&self, mp_state: kvm_mp_state) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "xsave struct". + /// + /// See the documentation for `KVM_GET_XSAVE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xsave` - xsave struct to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xsave = vcpu.get_xsave().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_xsave(&self) -> Result { + let mut xsave = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(xsave) + } + + /// X86 specific call that sets the vcpu's current "xsave struct". + /// + /// See the documentation for `KVM_SET_XSAVE` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xsave` - xsave struct to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xsave = Default::default(); + /// // Your `xsave` manipulation here. + /// vcpu.set_xsave(&xsave).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_xsave(&self, xsave: &kvm_xsave) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "xcrs". + /// + /// See the documentation for `KVM_GET_XCRS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xcrs` - xcrs to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xcrs = vcpu.get_xcrs().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_xcrs(&self) -> Result { + let mut xcrs = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(xcrs) + } + + /// X86 specific call that sets the vcpu's current "xcrs". + /// + /// See the documentation for `KVM_SET_XCRS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_xcrs` - xcrs to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let xcrs = Default::default(); + /// // Your `xcrs` manipulation here. + /// vcpu.set_xcrs(&xcrs).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// X86 specific call that returns the vcpu's current "debug registers". + /// + /// See the documentation for `KVM_GET_DEBUGREGS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_debugregs` - debug registers to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let debug_regs = vcpu.get_debug_regs().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_debug_regs(&self) -> Result { + let mut debug_regs = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(debug_regs) + } + + /// X86 specific call that sets the vcpu's current "debug registers". + /// + /// See the documentation for `KVM_SET_DEBUGREGS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_debugregs` - debug registers to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let debug_regs = Default::default(); + /// // Your `debug_regs` manipulation here. + /// vcpu.set_debug_regs(&debug_regs).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_debug_regs(&self, debug_regs: &kvm_debugregs) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns currently pending exceptions, interrupts, and NMIs as well as related + /// states of the vcpu. + /// + /// See the documentation for `KVM_GET_VCPU_EVENTS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_vcpu_events` - vcpu events to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// if kvm.check_extension(Cap::VcpuEvents) { + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let vcpu_events = vcpu.get_vcpu_events().unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn get_vcpu_events(&self) -> Result { + let mut vcpu_events = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(vcpu_events) + } + + /// Sets pending exceptions, interrupts, and NMIs as well as related states of the vcpu. + /// + /// See the documentation for `KVM_SET_VCPU_EVENTS` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `kvm_vcpu_events` - vcpu events to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// if kvm.check_extension(Cap::VcpuEvents) { + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let vcpu_events = Default::default(); + /// // Your `vcpu_events` manipulation here. + /// vcpu.set_vcpu_events(&vcpu_events).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + + pub fn set_vcpu_events(&self, vcpu_events: &kvm_vcpu_events) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the type of CPU to be exposed to the guest and optional features. + /// + /// This initializes an ARM vCPU to the specified type with the specified features + /// and resets the values of all of its registers to defaults. See the documentation for + /// `KVM_ARM_VCPU_INIT`. + /// + /// # Arguments + /// + /// * `kvi` - information about preferred CPU target type and recommended features for it. + /// For details check the `kvm_vcpu_init` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_vcpu_init; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// let mut kvi = kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu.vcpu_init(&kvi).unwrap(); + /// ``` + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub fn vcpu_init(&self, kvi: &kvm_vcpu_init) -> Result<()> { + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT(), kvi) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the guest registers that are supported for the + /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. + /// + /// # Arguments + /// + /// * `reg_list` - list of registers (input/output). For details check the `kvm_reg_list` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::RegList; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// // KVM_GET_REG_LIST demands that the vcpus be initalized. + /// let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + /// + /// let mut reg_list = RegList::new(500).unwrap(); + /// vcpu.get_reg_list(&mut reg_list).unwrap(); + /// assert!(reg_list.as_fam_struct_ref().n > 0); + /// ``` + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub fn get_reg_list(&self, reg_list: &mut RegList) -> Result<()> { + let ret = + // SAFETY: This is safe because we allocated the struct and we trust the kernel will read + // exactly the size of the struct. + unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST(), reg_list.as_mut_fam_struct()) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets processor-specific debug registers and configures the vcpu for handling + /// certain guest debug events using the `KVM_SET_GUEST_DEBUG` ioctl. + /// + /// # Arguments + /// + /// * `debug_struct` - control bitfields and debug registers, depending on the specific architecture. + /// For details check the `kvm_guest_debug` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ + /// # KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_SW_BP, kvm_guest_debug_arch, kvm_guest_debug + /// # }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] + /// { + /// let debug_struct = kvm_guest_debug { + /// // Configure the vcpu so that a KVM_DEBUG_EXIT would be generated + /// // when encountering a software breakpoint during execution + /// control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP, + /// pad: 0, + /// // Reset all arch-specific debug registers + /// arch: Default::default(), + /// }; + /// + /// vcpu.set_guest_debug(&debug_struct).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "s390", + target_arch = "ppc" + ))] + pub fn set_guest_debug(&self, debug_struct: &kvm_guest_debug) -> Result<()> { + // SAFETY: Safe because we allocated the structure and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), debug_struct) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Sets the value of one register for this vCPU. + /// + /// The id of the register is encoded as specified in the kernel documentation + /// for `KVM_SET_ONE_REG`. + /// + /// # Arguments + /// + /// * `reg_id` - ID of the register for which we are setting the value. + /// * `data` - value for the specified register. + #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] + pub fn set_one_reg(&self, reg_id: u64, data: u128) -> Result<()> { + let data_ptr = &data as *const _; + println!("in set reg,id is {:x}, data is {:x}", reg_id, data); + let onereg = kvm_one_reg { + id: reg_id, + addr: data_ptr as u64, + }; + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) }; + if ret < 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Returns the value of the specified vCPU register. + /// + /// The id of the register is encoded as specified in the kernel documentation + /// for `KVM_GET_ONE_REG`. + /// + /// # Arguments + /// + /// * `reg_id` - ID of the register. + #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] + pub fn get_one_reg(&self, reg_id: u64) -> Result { + let mut reg_value = 0; + let mut onereg = kvm_one_reg { + id: reg_id, + addr: &mut reg_value as *mut _ as u64, + }; + // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // exactly the size of the struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_ONE_REG(), &mut onereg) }; + if ret < 0 { + return Err(errno::Error::last()); + } + println!("finish get one reg"); + Ok(reg_value) + } + + /// This sets external interrupt for a virtual CPU and it will receive once it is ready. + /// + /// See the documentation for `KVM_INTERRUPT` in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + #[cfg(any(target_arch = "riscv64"))] + pub fn set_interrupt(&self) -> Result<()> { + let interrupt = kvm_interrupt { + irq: KVM_INTERRUPT_SET as u32, + }; + let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// This clears pending external interrupt for a virtual CPU. + /// + /// See the documentation for `KVM_INTERRUPT` in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + #[cfg(any(target_arch = "riscv64"))] + pub fn unset_interrupt(&self) -> Result<()> { + let interrupt = kvm_interrupt { + irq: KVM_INTERRUPT_UNSET as u32, + }; + let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Notify the guest about the vCPU being paused. + /// + /// See the documentation for `KVM_KVMCLOCK_CTRL` in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn kvmclock_ctrl(&self) -> Result<()> { + // SAFETY: Safe because we know that our file is a KVM fd and that the request + // is one of the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Triggers the running of the current virtual CPU returning an exit reason. + /// + /// See documentation for `KVM_RUN`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::io::Write; + /// # use std::ptr::null_mut; + /// # use std::slice; + /// # use kvm_ioctls::{Kvm, VcpuExit}; + /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; + /// # let kvm = Kvm::new().unwrap(); + /// # let vm = kvm.create_vm().unwrap(); + /// // This is a dummy example for running on x86 based on https://lwn.net/Articles/658511/. + /// #[cfg(target_arch = "x86_64")] + /// { + /// let mem_size = 0x4000; + /// let guest_addr: u64 = 0x1000; + /// let load_addr: *mut u8 = unsafe { + /// libc::mmap( + /// null_mut(), + /// mem_size, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + /// -1, + /// 0, + /// ) as *mut u8 + /// }; + /// + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: guest_addr, + /// memory_size: mem_size as u64, + /// userspace_addr: load_addr as u64, + /// flags: 0, + /// }; + /// unsafe { vm.set_user_memory_region(mem_region).unwrap() }; + /// + /// // Dummy x86 code that just calls halt. + /// let x86_code = [0xf4 /* hlt */]; + /// + /// // Write the code in the guest memory. This will generate a dirty page. + /// unsafe { + /// let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); + /// slice.write(&x86_code).unwrap(); + /// } + /// + /// let vcpu_fd = vm.create_vcpu(0).unwrap(); + /// + /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + /// vcpu_sregs.cs.base = 0; + /// vcpu_sregs.cs.selector = 0; + /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + /// + /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + /// // Set the Instruction Pointer to the guest address where we loaded the code. + /// vcpu_regs.rip = guest_addr; + /// vcpu_regs.rax = 2; + /// vcpu_regs.rbx = 3; + /// vcpu_regs.rflags = 2; + /// vcpu_fd.set_regs(&vcpu_regs).unwrap(); + /// + /// loop { + /// match vcpu_fd.run().expect("run failed") { + /// VcpuExit::Hlt => { + /// break; + /// } + /// exit_reason => panic!("unexpected exit reason: {:?}", exit_reason), + /// } + /// } + /// } + /// ``` + pub fn run(&self) -> Result { + // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result. + println!("begin KVM_RUN"); + let ret = unsafe { ioctl(self, KVM_RUN()) }; + println!("finish KVM_RUN"); + if ret == 0 { + println!("come to ret 0 in run()"); + let run = self.kvm_run_ptr.as_mut_ref(); + match run.exit_reason { + // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding + // when upgrading to a different kernel version + KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown), + KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), + KVM_EXIT_IO => { + let run_start = run as *mut kvm_run as *mut u8; + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let io = unsafe { run.__bindgen_anon_1.io }; + let port = io.port; + let data_size = io.count as usize * io.size as usize; + // SAFETY: The data_offset is defined by the kernel to be some number of bytes + // into the kvm_run stucture, which we have fully mmap'd. + let data_ptr = unsafe { run_start.offset(io.data_offset as isize) }; + // SAFETY: The slice's lifetime is limited to the lifetime of this vCPU, which is equal + // to the mmap of the `kvm_run` struct that this is slicing from. + let data_slice = unsafe { + std::slice::from_raw_parts_mut::(data_ptr as *mut u8, data_size) + }; + match u32::from(io.direction) { + KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn(port, data_slice)), + KVM_EXIT_IO_OUT => Ok(VcpuExit::IoOut(port, data_slice)), + _ => Err(errno::Error::new(EINVAL)), + } + } + KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), + KVM_EXIT_DEBUG => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let debug = unsafe { run.__bindgen_anon_1.debug }; + Ok(VcpuExit::Debug(debug.arch)) + } + KVM_EXIT_HLT => Ok(VcpuExit::Hlt), + KVM_EXIT_MMIO => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let mmio = unsafe { &mut run.__bindgen_anon_1.mmio }; + let addr = mmio.phys_addr; + let len = mmio.len as usize; + let data_slice = &mut mmio.data[..len]; + if mmio.is_write != 0 { + Ok(VcpuExit::MmioWrite(addr, data_slice)) + } else { + Ok(VcpuExit::MmioRead(addr, data_slice)) + } + } + KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), + KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown), + KVM_EXIT_FAIL_ENTRY => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let fail_entry = unsafe { &mut run.__bindgen_anon_1.fail_entry }; + Ok(VcpuExit::FailEntry( + fail_entry.hardware_entry_failure_reason, + fail_entry.cpu, + )) + } + KVM_EXIT_INTR => Ok(VcpuExit::Intr), + KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr), + KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess), + KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic), + KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset), + KVM_EXIT_DCR => Ok(VcpuExit::Dcr), + KVM_EXIT_NMI => Ok(VcpuExit::Nmi), + KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError), + KVM_EXIT_OSI => Ok(VcpuExit::Osi), + KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall), + KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol), + KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog), + KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch), + KVM_EXIT_EPR => Ok(VcpuExit::Epr), + KVM_EXIT_SYSTEM_EVENT => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let system_event = unsafe { &mut run.__bindgen_anon_1.system_event }; + #[cfg(target_arch = "x86_64")] + { + let ndata = system_event.ndata; + let data = + unsafe { &system_event.__bindgen_anon_1.data[0..ndata as usize] }; + Ok(VcpuExit::SystemEvent(system_event.type_, data)) + } + #[cfg(target_arch = "riscv64")] + { + Ok(VcpuExit::SystemEvent( + system_event.type_, + system_event.flags, + )) + } + } + KVM_EXIT_S390_STSI => Ok(VcpuExit::S390Stsi), + KVM_EXIT_IOAPIC_EOI => { + // SAFETY: Safe because the exit_reason (which comes from the kernel) told us + // which union field to use. + let eoi = unsafe { &mut run.__bindgen_anon_1.eoi }; + Ok(VcpuExit::IoapicEoi(eoi.vector)) + } + KVM_EXIT_HYPERV => Ok(VcpuExit::Hyperv), + r => Ok(VcpuExit::Unsupported(r)), + } + } else { + println!("run not 0"); + Err(errno::Error::last()) + } + } + + /// Returns a mutable reference to the kvm_run structure + pub fn get_kvm_run(&mut self) -> &mut kvm_run { + self.kvm_run_ptr.as_mut_ref() + } + + /// Sets the `immediate_exit` flag on the `kvm_run` struct associated with this vCPU to `val`. + pub fn set_kvm_immediate_exit(&self, val: u8) { + let kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.immediate_exit = val; + } + + /// Returns the vCPU TSC frequency in KHz or an error if the host has unstable TSC. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let tsc_khz = vcpu.get_tsc_khz().unwrap(); + /// ``` + /// + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_tsc_khz(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_GET_TSC_KHZ()) }; + if ret >= 0 { + Ok(ret as u32) + } else { + Err(errno::Error::new(ret)) + } + } + + /// Sets the specified vCPU TSC frequency. + /// + /// # Arguments + /// + /// * `freq` - The frequency unit is KHz as per the KVM API documentation + /// for `KVM_SET_TSC_KHZ`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Cap, Kvm}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl) { + /// vcpu.set_tsc_khz(1000).unwrap(); + /// } + /// ``` + /// + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_tsc_khz(&self, freq: u32) -> Result<()> { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl_with_val(self, KVM_SET_TSC_KHZ(), freq as u64) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(()) + } + } + + /// Translates a virtual address according to the vCPU's current address translation mode. + /// + /// The physical address is returned in a `kvm_translation` structure as defined in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// See documentation for `KVM_TRANSLATE`. + /// + /// # Arguments + /// + /// * `gva` - The virtual address to translate. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// let tr = vcpu.translate_gva(0x10000).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn translate_gva(&self, gva: u64) -> Result { + let mut tr = kvm_translation { + linear_address: gva, + ..Default::default() + }; + + // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only + // write the correct amount of memory to our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_TRANSLATE(), &mut tr) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(tr) + } + + /// Enable the given [`SyncReg`] to be copied to userspace on the next exit + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to copy out of the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + /// vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_sync_valid_reg(&mut self, reg: SyncReg) { + let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_valid_regs |= reg as u64; + } + + /// Tell KVM to copy the given [`SyncReg`] into the guest on the next entry + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to copy into the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.set_sync_dirty_reg(SyncReg::Register); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_sync_dirty_reg(&mut self, reg: SyncReg) { + let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_dirty_regs |= reg as u64; + } + + /// Disable the given [`SyncReg`] to be copied to userspace on the next exit + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to not copy out of the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.clear_sync_valid_reg(SyncReg::Register); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn clear_sync_valid_reg(&mut self, reg: SyncReg) { + let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_valid_regs &= !(reg as u64); + } + + /// Tell KVM to not copy the given [`SyncReg`] into the guest on the next entry + /// + /// # Arguments + /// + /// * `reg` - The [`SyncReg`] to not copy out into the guest + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// vcpu.clear_sync_dirty_reg(SyncReg::Register); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn clear_sync_dirty_reg(&mut self, reg: SyncReg) { + let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + kvm_run.kvm_dirty_regs &= !(reg as u64); + } + + /// Get the [`kvm_sync_regs`] from the VM + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::SyncRegs) { + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.run(); + /// let guest_rax = vcpu.sync_regs().regs.rax; + /// } + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn sync_regs(&self) -> kvm_sync_regs { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + + // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` + // allocation isn't large enough. The `kvm_run` region is set using + // `get_vcpu_map_size`, so this region is in bounds + unsafe { kvm_run.s.regs } + } + + /// Get a mutable reference to the [`kvm_sync_regs`] from the VM + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::SyncRegs) { + /// vcpu.set_sync_valid_reg(SyncReg::Register); + /// vcpu.run(); + /// // Set the guest RAX to 0xdeadbeef + /// vcpu.sync_regs_mut().regs.rax = 0xdeadbeef; + /// vcpu.set_sync_dirty_reg(SyncReg::Register); + /// vcpu.run(); + /// } + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn sync_regs_mut(&mut self) -> &mut kvm_sync_regs { + let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); + + // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` + // allocation isn't large enough. The `kvm_run` region is set using + // `get_vcpu_map_size`, so this region is in bounds + unsafe { &mut kvm_run.s.regs } + } +} + +/// Helper function to create a new `VcpuFd`. +/// +/// This should not be exported as a public function because the preferred way is to use +/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because +/// then it would be exported with the public `VcpuFd` interface. +pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { + VcpuFd { vcpu, kvm_run_ptr } +} + +impl AsRawFd for VcpuFd { + fn as_raw_fd(&self) -> RawFd { + self.vcpu.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + extern crate byteorder; + + use super::*; + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + use crate::cap::Cap; + use crate::ioctls::system::Kvm; + + // Helper function for memory mapping `size` bytes of anonymous memory. + // Panics if the mmap fails. + fn mmap_anonymous(size: usize) -> *mut u8 { + use std::ptr::null_mut; + + let addr = unsafe { + libc::mmap( + null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + -1, + 0, + ) + }; + if addr == libc::MAP_FAILED { + panic!("mmap failed."); + } + + addr as *mut u8 + } + + #[test] + fn test_create_vcpu() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + assert!(vm.create_vcpu(0).is_ok()); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let nr_vcpus = kvm.get_nr_vcpus(); + for cpu_idx in 0..nr_vcpus { + let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); + vcpu.set_cpuid2(&cpuid).unwrap(); + let retrieved_cpuid = vcpu.get_cpuid2(ncpuids).unwrap(); + // Only check the first few leafs as some (e.g. 13) are reserved. + assert_eq!(cpuid.as_slice()[..3], retrieved_cpuid.as_slice()[..3]); + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid_fail_num_entries_too_high() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let err_cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES + 1_usize).err(); + assert_eq!(err_cpuid.unwrap().errno(), libc::ENOMEM); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_cpuid_fail_num_entries_too_small() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let nr_vcpus = kvm.get_nr_vcpus(); + for cpu_idx in 0..nr_vcpus { + let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); + vcpu.set_cpuid2(&cpuid).unwrap(); + let err = vcpu.get_cpuid2(ncpuids - 1_usize).err(); + assert_eq!(err.unwrap().errno(), libc::E2BIG); + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_set_cpuid() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::ExtCpuid) { + let vm = kvm.create_vm().unwrap(); + let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + let ncpuids = cpuid.as_slice().len(); + assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); + let vcpu = vm.create_vcpu(0).unwrap(); + + // Setting Manufacturer ID + { + let entries = cpuid.as_mut_slice(); + for entry in entries.iter_mut() { + if entry.function == 0 { + // " KVMKVMKVM " + entry.ebx = 0x4b4d564b; + entry.ecx = 0x564b4d56; + entry.edx = 0x4d; + } + } + } + vcpu.set_cpuid2(&cpuid).unwrap(); + let cpuid_0 = vcpu.get_cpuid2(ncpuids).unwrap(); + for entry in cpuid_0.as_slice() { + if entry.function == 0 { + assert_eq!(entry.ebx, 0x4b4d564b); + assert_eq!(entry.ecx, 0x564b4d56); + assert_eq!(entry.edx, 0x4d); + } + } + + // Disabling Intel SHA extensions. + const EBX_SHA_SHIFT: u32 = 29; + let mut ebx_sha_off = 0u32; + { + let entries = cpuid.as_mut_slice(); + for entry in entries.iter_mut() { + if entry.function == 7 && entry.ecx == 0 { + entry.ebx &= !(1 << EBX_SHA_SHIFT); + ebx_sha_off = entry.ebx; + } + } + } + vcpu.set_cpuid2(&cpuid).unwrap(); + let cpuid_1 = vcpu.get_cpuid2(ncpuids).unwrap(); + for entry in cpuid_1.as_slice() { + if entry.function == 7 && entry.ecx == 0 { + assert_eq!(entry.ebx, ebx_sha_off); + } + } + } + } + + #[cfg(target_arch = "x86_64")] + #[allow(non_snake_case)] + #[test] + fn test_fpu() { + // as per https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/fpu/internal.h + let KVM_FPU_CWD: usize = 0x37f; + let KVM_FPU_MXCSR: usize = 0x1f80; + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mut fpu: kvm_fpu = kvm_fpu { + fcw: KVM_FPU_CWD as u16, + mxcsr: KVM_FPU_MXCSR as u32, + ..Default::default() + }; + + fpu.fcw = KVM_FPU_CWD as u16; + fpu.mxcsr = KVM_FPU_MXCSR as u32; + + vcpu.set_fpu(&fpu).unwrap(); + assert_eq!(vcpu.get_fpu().unwrap().fcw, KVM_FPU_CWD as u16); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn lapic_test() { + use std::io::Cursor; + // We might get read of byteorder if we replace mem::transmute with something safer. + use self::byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + // As per https://github.com/torvalds/linux/arch/x86/kvm/lapic.c + // Try to write and read the APIC_ICR (0x300) register which is non-read only and + // one can simply write to it. + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + let vm = kvm.create_vm().unwrap(); + // The get_lapic ioctl will fail if there is no irqchip created beforehand. + assert!(vm.create_irq_chip().is_ok()); + let vcpu = vm.create_vcpu(0).unwrap(); + let mut klapic: kvm_lapic_state = vcpu.get_lapic().unwrap(); + + let reg_offset = 0x300; + let value = 2_u32; + //try to write and read the APIC_ICR 0x300 + let write_slice = + unsafe { &mut *(&mut klapic.regs[reg_offset..] as *mut [i8] as *mut [u8]) }; + let mut writer = Cursor::new(write_slice); + writer.write_u32::(value).unwrap(); + vcpu.set_lapic(&klapic).unwrap(); + klapic = vcpu.get_lapic().unwrap(); + let read_slice = unsafe { &*(&klapic.regs[reg_offset..] as *const [i8] as *const [u8]) }; + let mut reader = Cursor::new(read_slice); + assert_eq!(reader.read_u32::().unwrap(), value); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn msrs_test() { + use vmm_sys_util::fam::FamStruct; + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + // Set the following MSRs. + let msrs_to_set = [ + kvm_msr_entry { + index: 0x0000_0174, + data: 0x0, + ..Default::default() + }, + kvm_msr_entry { + index: 0x0000_0175, + data: 0x1, + ..Default::default() + }, + ]; + let msrs_wrapper = Msrs::from_entries(&msrs_to_set).unwrap(); + vcpu.set_msrs(&msrs_wrapper).unwrap(); + + // Now test that GET_MSRS returns the same. + // Configure the struct to say which entries we want. + let mut returned_kvm_msrs = Msrs::from_entries(&[ + kvm_msr_entry { + index: 0x0000_0174, + ..Default::default() + }, + kvm_msr_entry { + index: 0x0000_0175, + ..Default::default() + }, + ]) + .unwrap(); + let nmsrs = vcpu.get_msrs(&mut returned_kvm_msrs).unwrap(); + + // Verify the lengths match. + assert_eq!(nmsrs, msrs_to_set.len()); + assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len()); + + // Verify the contents match. + let returned_kvm_msr_entries = returned_kvm_msrs.as_slice(); + for (i, entry) in returned_kvm_msr_entries.iter().enumerate() { + assert_eq!(entry, &msrs_to_set[i]); + } + } + + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390" + ))] + #[test] + fn mpstate_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let mp_state = vcpu.get_mp_state().unwrap(); + vcpu.set_mp_state(mp_state).unwrap(); + let other_mp_state = vcpu.get_mp_state().unwrap(); + assert_eq!(mp_state, other_mp_state); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn xsave_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let xsave = vcpu.get_xsave().unwrap(); + vcpu.set_xsave(&xsave).unwrap(); + let other_xsave = vcpu.get_xsave().unwrap(); + assert_eq!(&xsave.region[..], &other_xsave.region[..]); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn xcrs_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let xcrs = vcpu.get_xcrs().unwrap(); + vcpu.set_xcrs(&xcrs).unwrap(); + let other_xcrs = vcpu.get_xcrs().unwrap(); + assert_eq!(xcrs, other_xcrs); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn debugregs_test() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let debugregs = vcpu.get_debug_regs().unwrap(); + vcpu.set_debug_regs(&debugregs).unwrap(); + let other_debugregs = vcpu.get_debug_regs().unwrap(); + assert_eq!(debugregs, other_debugregs); + } + + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + #[test] + fn vcpu_events_test() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::VcpuEvents) { + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let vcpu_events = vcpu.get_vcpu_events().unwrap(); + vcpu.set_vcpu_events(&vcpu_events).unwrap(); + let other_vcpu_events = vcpu.get_vcpu_events().unwrap(); + assert_eq!(vcpu_events, other_vcpu_events); + } + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn test_run_code() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + #[rustfmt::skip] + let code = [ + 0x40, 0x20, 0x80, 0x52, /* mov w0, #0x102 */ + 0x00, 0x01, 0x00, 0xb9, /* str w0, [x8]; test physical memory write */ + 0x81, 0x60, 0x80, 0x52, /* mov w1, #0x304 */ + 0x02, 0x00, 0x80, 0x52, /* mov w2, #0x0 */ + 0x20, 0x01, 0x40, 0xb9, /* ldr w0, [x9]; test MMIO read */ + 0x1f, 0x18, 0x14, 0x71, /* cmp w0, #0x506 */ + 0x20, 0x00, 0x82, 0x1a, /* csel w0, w1, w2, eq */ + 0x20, 0x01, 0x00, 0xb9, /* str w0, [x9]; test MMIO write */ + 0x00, 0x80, 0xb0, 0x52, /* mov w0, #0x84000000 */ + 0x00, 0x00, 0x1d, 0x32, /* orr w0, w0, #0x08 */ + 0x02, 0x00, 0x00, 0xd4, /* hvc #0x0 */ + 0x00, 0x00, 0x00, 0x14, /* b ; shouldn't get here, but if so loop forever */ + ]; + + let mem_size = 0x20000; + let load_addr = mmap_anonymous(mem_size); + let guest_addr: u64 = 0x10000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let vcpu_fd = vm.create_vcpu(0).unwrap(); + let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi).unwrap(); + kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + vcpu_fd.vcpu_init(&kvi).unwrap(); + + let core_reg_base: u64 = 0x6030_0000_0010_0000; + let mmio_addr: u64 = guest_addr + mem_size as u64; + + // Set the PC to the guest address where we loaded the code. + vcpu_fd + .set_one_reg(core_reg_base + 2 * 32, guest_addr as u128) + .unwrap(); + + // Set x8 and x9 to the addresses the guest test code needs + vcpu_fd + .set_one_reg(core_reg_base + 2 * 8, guest_addr as u128 + 0x10000) + .unwrap(); + vcpu_fd + .set_one_reg(core_reg_base + 2 * 9, mmio_addr as u128) + .unwrap(); + + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + data[3] = 0x0; + data[2] = 0x0; + data[1] = 0x5; + data[0] = 0x6; + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, mmio_addr); + assert_eq!(data.len(), 4); + assert_eq!(data[3], 0x0); + assert_eq!(data[2], 0x0); + assert_eq!(data[1], 0x3); + assert_eq!(data[0], 0x4); + // The code snippet dirties one page at guest_addr + 0x10000. + // The code page should not be dirty, as it's not written by the guest. + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + let dirty_pages: u32 = dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum(); + assert_eq!(dirty_pages, 1); + } + VcpuExit::SystemEvent(type_, data) => { + assert_eq!(type_, KVM_SYSTEM_EVENT_SHUTDOWN); + assert_eq!(data[0], 0); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_run_code() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, %dx */ + 0xec, /* in %dx, %al */ + 0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/ + 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/ + 0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */ + 0xf4, /* hlt */ + ]; + let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let vcpu_fd = vm.create_vcpu(0).unwrap(); + + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + // Set the Instruction Pointer to the guest address where we loaded the code. + vcpu_regs.rip = guest_addr; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + let mut debug_struct = kvm_guest_debug { + control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, + pad: 0, + arch: kvm_guest_debug_arch { + debugreg: [0, 0, 0, 0, 0, 0, 0, 0], + }, + }; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + + let mut instr_idx = 0; + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoIn(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + } + VcpuExit::IoOut(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + assert_eq!(data[0], b'5'); + } + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + assert_eq!(data[0], 0); + } + VcpuExit::Debug(debug) => { + if instr_idx == expected_rips.len() - 1 { + // Disabling debugging/single-stepping + debug_struct.control = 0; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + } else if instr_idx >= expected_rips.len() { + unreachable!(); + } + let vcpu_regs = vcpu_fd.get_regs().unwrap(); + assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]); + assert_eq!(debug.exception, 1); + assert_eq!(debug.pc, expected_rips[instr_idx]); + // Check first 15 bits of DR6 + let mask = (1 << 16) - 1; + assert_eq!(debug.dr6 & mask, 0b100111111110000); + // Bit 10 in DR7 is always 1 + assert_eq!(debug.dr7, 1 << 10); + instr_idx += 1; + } + VcpuExit::Hlt => { + // The code snippet dirties 2 pages: + // * one when the code itself is loaded in memory; + // * and one more from the `movl` that writes to address 0x8000 + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + let dirty_pages: u32 = dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum(); + assert_eq!(dirty_pages, 2); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_faulty_vcpu_fd() { + use std::os::unix::io::FromRawFd; + + let badf_errno = libc::EBADF; + + let faulty_vcpu_fd = VcpuFd { + vcpu: unsafe { File::from_raw_fd(-2) }, + kvm_run_ptr: KvmRunWrapper { + kvm_run_ptr: mmap_anonymous(10), + mmap_size: 10, + }, + }; + + assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_regs(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_sregs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_sregs(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_fpu().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_fpu(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_cpuid2( + &Kvm::new() + .unwrap() + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .unwrap() + ) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_cpuid2(1).err().unwrap().errno(), + badf_errno + ); + // `kvm_lapic_state` does not implement debug by default so we cannot + // use unwrap_err here. + assert!(faulty_vcpu_fd.get_lapic().is_err()); + assert_eq!( + faulty_vcpu_fd + .set_lapic(&unsafe { std::mem::zeroed() }) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .get_msrs(&mut Msrs::new(1).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_msrs(&Msrs::new(1).unwrap()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_mp_state().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_mp_state(kvm_mp_state::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_xsave().err().unwrap().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_xsave(&kvm_xsave::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.get_xcrs().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd + .set_xcrs(&kvm_xcrs::default()) + .err() + .unwrap() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_debug_regs().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_debug_regs(&kvm_debugregs::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd.get_vcpu_events().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vcpu_fd + .set_vcpu_events(&kvm_vcpu_events::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vcpu_fd.run().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vcpu_fd.kvmclock_ctrl().unwrap_err().errno(), + badf_errno + ); + assert!(faulty_vcpu_fd.get_tsc_khz().is_err()); + assert!(faulty_vcpu_fd.set_tsc_khz(1000000).is_err()); + assert!(faulty_vcpu_fd.translate_gva(u64::MAX).is_err()); + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_get_preferred_target() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + assert!(vcpu.vcpu_init(&kvi).is_err()); + + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + assert!(vcpu.vcpu_init(&kvi).is_ok()); + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_set_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + let data: u128 = 0; + let reg_id: u64 = 0; + + assert!(vcpu.set_one_reg(reg_id, data).is_err()); + // Exercising KVM_SET_ONE_REG by trying to alter the data inside the PSTATE register (which is a + // specific aarch64 register). + const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042; + vcpu.set_one_reg(PSTATE_REG_ID, data) + .expect("Failed to set pstate register"); + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_get_one_reg() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + + // PSR (Processor State Register) bits. + // Taken from arch/arm64/include/uapi/asm/ptrace.h. + const PSR_MODE_EL1H: u64 = 0x0000_0005; + const PSR_F_BIT: u64 = 0x0000_0040; + const PSR_I_BIT: u64 = 0x0000_0080; + const PSR_A_BIT: u64 = 0x0000_0100; + const PSR_D_BIT: u64 = 0x0000_0200; + const PSTATE_FAULT_BITS_64: u64 = + PSR_MODE_EL1H | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; + let data: u128 = PSTATE_FAULT_BITS_64 as u128; + const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042; + vcpu.set_one_reg(PSTATE_REG_ID, data) + .expect("Failed to set pstate register"); + + assert_eq!( + vcpu.get_one_reg(PSTATE_REG_ID) + .expect("Failed to get pstate register"), + PSTATE_FAULT_BITS_64 as u128 + ); + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_get_reg_list() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let mut reg_list = RegList::new(1).unwrap(); + // KVM_GET_REG_LIST demands that the vcpus be initalized, so we expect this to fail. + let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); + assert!(err.errno() == libc::ENOEXEC); + + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); + + // KVM_GET_REG_LIST offers us a number of registers for which we have + // not allocated memory, so the first time it fails. + let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); + assert!(err.errno() == libc::E2BIG); + assert!(reg_list.as_mut_fam_struct().n > 0); + + // We make use of the number of registers returned to allocate memory and + // try one more time. + let mut reg_list = RegList::new(reg_list.as_mut_fam_struct().n as usize).unwrap(); + assert!(vcpu.get_reg_list(&mut reg_list).is_ok()); + } + + #[test] + fn test_get_kvm_run() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + vcpu.kvm_run_ptr.as_mut_ref().immediate_exit = 1; + assert_eq!(vcpu.get_kvm_run().immediate_exit, 1); + } + + #[test] + fn test_set_kvm_immediate_exit() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().immediate_exit, 0); + vcpu.set_kvm_immediate_exit(1); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().immediate_exit, 1); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_enable_cap() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut cap = kvm_enable_cap { + // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled + cap: KVM_CAP_SPLIT_IRQCHIP, + ..Default::default() + }; + cap.args[0] = 24; + vm.enable_cap(&cap).unwrap(); + + let vcpu = vm.create_vcpu(0).unwrap(); + if kvm.check_extension(Cap::HypervSynic) { + let cap = kvm_enable_cap { + cap: KVM_CAP_HYPERV_SYNIC, + ..Default::default() + }; + vcpu.enable_cap(&cap).unwrap(); + } + } + #[cfg(target_arch = "x86_64")] + #[test] + fn test_get_tsc_khz() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + if !kvm.check_extension(Cap::GetTscKhz) { + assert!(vcpu.get_tsc_khz().is_err()) + } else { + assert!(vcpu.get_tsc_khz().unwrap() > 0); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_set_tsc_khz() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + let freq = vcpu.get_tsc_khz().unwrap(); + + if !(kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl)) { + assert!(vcpu.set_tsc_khz(0).is_err()); + } else { + assert!(vcpu.set_tsc_khz(freq - 500000).is_ok()); + assert_eq!(vcpu.get_tsc_khz().unwrap(), freq - 500000); + assert!(vcpu.set_tsc_khz(freq + 500000).is_ok()); + assert_eq!(vcpu.get_tsc_khz().unwrap(), freq + 500000); + } + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_sync_regs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + + // Test setting each valid register + let sync_regs = [ + SyncReg::Register, + SyncReg::SystemRegister, + SyncReg::VcpuEvents, + ]; + for reg in &sync_regs { + vcpu.set_sync_valid_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, *reg as u64); + vcpu.clear_sync_valid_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, 0); + } + + // Test that multiple valid SyncRegs can be set at the same time + vcpu.set_sync_valid_reg(SyncReg::Register); + vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + assert_eq!( + vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, + SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 + ); + + // Test setting each dirty register + let sync_regs = [ + SyncReg::Register, + SyncReg::SystemRegister, + SyncReg::VcpuEvents, + ]; + + for reg in &sync_regs { + vcpu.set_sync_dirty_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, *reg as u64); + vcpu.clear_sync_dirty_reg(*reg); + assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, 0); + } + + // Test that multiple dirty SyncRegs can be set at the same time + vcpu.set_sync_dirty_reg(SyncReg::Register); + vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); + vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); + assert_eq!( + vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, + SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 + ); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_sync_regs_with_run() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::SyncRegs) { + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xff, 0xc0, /* inc eax */ + 0xf4, /* hlt */ + ]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + + let orig_sregs = vcpu.get_sregs().unwrap(); + + let mut sync_regs = vcpu.sync_regs_mut(); + + // Initialize the sregs in sync_regs to be the original sregs + sync_regs.sregs = orig_sregs; + sync_regs.sregs.cs.base = 0; + sync_regs.sregs.cs.selector = 0; + + // Set up the guest to attempt to `inc rax` + sync_regs.regs.rip = guest_addr; + sync_regs.regs.rax = 0x8000; + sync_regs.regs.rflags = 2; + + // Initialize the sync_reg flags + vcpu.set_sync_valid_reg(SyncReg::Register); + vcpu.set_sync_valid_reg(SyncReg::SystemRegister); + vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); + vcpu.set_sync_dirty_reg(SyncReg::Register); + vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); + vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); + + // hlt is the only expected return from guest execution + assert!(matches!(vcpu.run().expect("run failed"), VcpuExit::Hlt)); + + let regs = vcpu.get_regs().unwrap(); + + let sync_regs = vcpu.sync_regs(); + assert_eq!(regs, sync_regs.regs); + assert_eq!(sync_regs.regs.rax, 0x8001); + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_translate_gva() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + assert!(vcpu.translate_gva(0x10000).is_ok()); + assert_eq!(vcpu.translate_gva(0x10000).unwrap().valid, 1); + assert_eq!( + vcpu.translate_gva(0x10000).unwrap().physical_address, + 0x10000 + ); + assert!(vcpu.translate_gva(u64::MAX).is_ok()); + assert_eq!(vcpu.translate_gva(u64::MAX).unwrap().valid, 0); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_vcpu_attr() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let vcpu = vm.create_vcpu(0).unwrap(); + + let dist_attr = kvm_bindings::kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0x0, + flags: 0, + }; + + assert!(vcpu.has_device_attr(&dist_attr).is_err()); + assert!(vcpu.set_device_attr(&dist_attr).is_err()); + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + vm.get_preferred_target(&mut kvi) + .expect("Cannot get preferred target"); + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2 | 1 << KVM_ARM_VCPU_PMU_V3; + assert!(vcpu.vcpu_init(&kvi).is_ok()); + assert!(vcpu.has_device_attr(&dist_attr).is_ok()); + assert!(vcpu.set_device_attr(&dist_attr).is_ok()); + } +} diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs new file mode 100644 index 000000000..876ced3d8 --- /dev/null +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -0,0 +1,2253 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use kvm_bindings::*; +use std::fs::File; +use std::os::raw::c_void; +use std::os::raw::{c_int, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use crate::cap::Cap; +use crate::ioctls::device::new_device; +use crate::ioctls::device::DeviceFd; +use crate::ioctls::vcpu::new_vcpu; +use crate::ioctls::vcpu::VcpuFd; +use crate::ioctls::{KvmRunWrapper, Result}; +use crate::kvm_ioctls::*; +use vmm_sys_util::errno; +use vmm_sys_util::eventfd::EventFd; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use vmm_sys_util::ioctl::ioctl_with_mut_ptr; +use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; + +/// An address either in programmable I/O space or in memory mapped I/O space. +/// +/// The `IoEventAddress` is used for specifying the type when registering an event +/// in [register_ioevent](struct.VmFd.html#method.register_ioevent). +#[derive(Debug)] +pub enum IoEventAddress { + /// Representation of an programmable I/O address. + Pio(u64), + /// Representation of an memory mapped I/O address. + Mmio(u64), +} + +/// Helper structure for disabling datamatch. +/// +/// The structure can be used as a parameter to +/// [`register_ioevent`](struct.VmFd.html#method.register_ioevent) +/// to disable filtering of events based on the datamatch flag. For details check the +/// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). +#[derive(Debug)] +pub struct NoDatamatch; +impl From for u64 { + fn from(_: NoDatamatch) -> u64 { + 0 + } +} + +/// Wrapper over KVM VM ioctls. +#[derive(Debug)] +pub struct VmFd { + vm: File, + run_size: usize, +} + +impl VmFd { + /// Creates/modifies a guest physical memory slot. + /// + /// See the documentation for `KVM_SET_USER_MEMORY_REGION`. + /// + /// # Arguments + /// + /// * `user_memory_region` - Guest physical memory slot. For details check the + /// `kvm_userspace_memory_region` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Safety + /// + /// This function is unsafe because there is no guarantee `userspace_addr` points to a valid + /// memory region, nor the memory region lives as long as the kernel needs it to. + /// + /// The caller of this method must make sure that: + /// - the raw pointer (`userspace_addr`) points to valid memory + /// - the regions provided to KVM are not overlapping other memory regions. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// use kvm_bindings::kvm_userspace_memory_region; + /// use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: 0x10000 as u64, + /// memory_size: 0x10000 as u64, + /// userspace_addr: 0x0 as u64, + /// flags: 0, + /// }; + /// unsafe { + /// vm.set_user_memory_region(mem_region).unwrap(); + /// }; + /// ``` + pub unsafe fn set_user_memory_region( + &self, + user_memory_region: kvm_userspace_memory_region, + ) -> Result<()> { + let ret = ioctl_with_ref(self, KVM_SET_USER_MEMORY_REGION(), &user_memory_region); + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the address of the three-page region in the VM's address space. + /// + /// See the documentation for `KVM_SET_TSS_ADDR`. + /// + /// # Arguments + /// + /// * `offset` - Physical address of a three-page region in the guest's physical address space. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.set_tss_address(0xfffb_d000).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_tss_address(&self, offset: usize) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl_with_val(self, KVM_SET_TSS_ADDR(), offset as c_ulong) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the address of the one-page region in the VM's address space. + /// + /// See the documentation for `KVM_SET_IDENTITY_MAP_ADDR`. + /// + /// # Arguments + /// + /// * `address` - Physical address of a one-page region in the guest's physical address space. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.set_identity_map_address(0xfffb_c000).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_identity_map_address(&self, address: u64) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &address) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates an in-kernel interrupt controller. + /// + /// See the documentation for `KVM_CREATE_IRQCHIP`. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// vm.create_irq_chip().unwrap(); + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + /// { + /// use kvm_bindings::{ + /// kvm_create_device, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, KVM_CREATE_DEVICE_TEST, + /// }; + /// let mut gic_device = kvm_bindings::kvm_create_device { + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// if vm.create_device(&mut gic_device).is_ok() { + /// vm.create_irq_chip().unwrap(); + /// } + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn create_irq_chip(&self) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the state of a kernel interrupt controller. + /// + /// See the documentation for `KVM_GET_IRQCHIP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `irqchip` - `kvm_irqchip` (input/output) to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_irqchip, KVM_IRQCHIP_PIC_MASTER}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// vm.create_irq_chip().unwrap(); + /// let mut irqchip = kvm_irqchip::default(); + /// irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; + /// vm.get_irqchip(&mut irqchip).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_irqchip(&self, irqchip: &mut kvm_irqchip) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), irqchip) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the state of a kernel interrupt controller. + /// + /// See the documentation for `KVM_SET_IRQCHIP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `irqchip` - `kvm_irqchip` (input/output) to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_irqchip, KVM_IRQCHIP_PIC_MASTER}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// vm.create_irq_chip().unwrap(); + /// let mut irqchip = kvm_irqchip::default(); + /// irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; + /// // Your `irqchip` manipulation here. + /// vm.set_irqchip(&mut irqchip).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_irqchip(&self, irqchip: &kvm_irqchip) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), irqchip) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a PIT as per the `KVM_CREATE_PIT2` ioctl. + /// + /// # Arguments + /// + /// * pit_config - PIT configuration. For details check the `kvm_pit_config` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_pit_config; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn create_pit2(&self, pit_config: kvm_pit_config) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the state of the in-kernel PIT model. + /// + /// See the documentation for `KVM_GET_PIT2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `pitstate` - `kvm_pit_state2` to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::kvm_pit_config; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// let pitstate = vm.get_pit2().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_pit2(&self) -> Result { + let mut pitstate = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pitstate) }; + if ret == 0 { + Ok(pitstate) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the state of the in-kernel PIT model. + /// + /// See the documentation for `KVM_SET_PIT2` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `pitstate` - `kvm_pit_state2` to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::{kvm_pit_config, kvm_pit_state2}; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// let pit_config = kvm_pit_config::default(); + /// vm.create_pit2(pit_config).unwrap(); + /// let mut pitstate = kvm_pit_state2::default(); + /// // Your `pitstate` manipulation here. + /// vm.set_pit2(&mut pitstate).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_pit2(&self, pitstate: &kvm_pit_state2) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pitstate) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to retrieve the current timestamp of kvmclock. + /// + /// See the documentation for `KVM_GET_CLOCK` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `clock` - `kvm_clock_data` to be read. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let clock = vm.get_clock().unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_clock(&self) -> Result { + let mut clock = Default::default(); + // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock) }; + if ret == 0 { + Ok(clock) + } else { + Err(errno::Error::last()) + } + } + + /// X86 specific call to set the current timestamp of kvmclock. + /// + /// See the documentation for `KVM_SET_CLOCK` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `clock` - `kvm_clock_data` to be written. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_bindings::kvm_clock_data; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut clock = kvm_clock_data::default(); + /// vm.set_clock(&mut clock).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn set_clock(&self, clock: &kvm_clock_data) -> Result<()> { + // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Directly injects a MSI message as per the `KVM_SIGNAL_MSI` ioctl. + /// + /// See the documentation for `KVM_SIGNAL_MSI`. + /// + /// This ioctl returns > 0 when the MSI is successfully delivered and 0 + /// when the guest blocked the MSI. + /// + /// # Arguments + /// + /// * kvm_msi - MSI message configuration. For details check the `kvm_msi` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// In this example, the important function signal_msi() calling into + /// the actual ioctl is commented out. The reason is that MSI vectors are + /// not chosen from the HW side (VMM). The guest OS (or anything that runs + /// inside the VM) is supposed to allocate the MSI vectors, and usually + /// communicate back through PCI configuration space. Sending a random MSI + /// vector through this signal_msi() function will always result in a + /// failure, which is why it needs to be commented out. + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_msi; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let msi = kvm_msi::default(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// vm.create_irq_chip().unwrap(); + /// //vm.signal_msi(msi).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn signal_msi(&self, msi: kvm_msi) -> Result { + // SAFETY: Safe because we allocated the structure and we know the kernel + // will read exactly the size of the structure. + let ret = unsafe { ioctl_with_ref(self, KVM_SIGNAL_MSI(), &msi) }; + if ret >= 0 { + Ok(ret) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the GSI routing table entries, overwriting any previously set + /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. + /// + /// See the documentation for `KVM_SET_GSI_ROUTING`. + /// + /// Returns an io::Error when the table could not be updated. + /// + /// # Arguments + /// + /// * kvm_irq_routing - IRQ routing configuration. Describe all routes + /// associated with GSI entries. For details check + /// the `kvm_irq_routing` and `kvm_irq_routing_entry` + /// structures in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_irq_routing; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// vm.create_irq_chip().unwrap(); + /// + /// let irq_routing = kvm_irq_routing::default(); + /// vm.set_gsi_routing(&irq_routing).unwrap(); + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn set_gsi_routing(&self, irq_routing: &kvm_irq_routing) -> Result<()> { + // SAFETY: Safe because we allocated the structure and we know the kernel + // will read exactly the size of the structure. + let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), irq_routing) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event to be signaled whenever a certain address is written to. + /// + /// See the documentation for `KVM_IOEVENTFD`. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` which will be signaled. When signaling, the usual `vmexit` to userspace + /// is prevented. + /// * `addr` - Address being written to. + /// * `datamatch` - Limits signaling `fd` to only the cases where the value being written is + /// equal to this parameter. The size of `datamatch` is important and it must + /// match the expected size of the guest's write. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate libc; + /// extern crate vmm_sys_util; + /// # use kvm_ioctls::{IoEventAddress, Kvm, NoDatamatch}; + /// use libc::{eventfd, EFD_NONBLOCK}; + /// use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm_fd = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) + /// .unwrap(); + /// ``` + pub fn register_ioevent>( + &self, + fd: &EventFd, + addr: &IoEventAddress, + datamatch: T, + ) -> Result<()> { + let mut flags = 0; + if std::mem::size_of::() > 0 { + flags |= 1 << kvm_ioeventfd_flag_nr_datamatch + } + if let IoEventAddress::Pio(_) = *addr { + flags |= 1 << kvm_ioeventfd_flag_nr_pio + } + + let ioeventfd = kvm_ioeventfd { + datamatch: datamatch.into(), + len: std::mem::size_of::() as u32, + addr: match addr { + IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Mmio(ref m) => *m, + }, + fd: fd.as_raw_fd(), + flags, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregisters an event from a certain address it has been previously registered to. + /// + /// See the documentation for `KVM_IOEVENTFD`. + /// + /// # Arguments + /// + /// * `fd` - FD which will be unregistered. + /// * `addr` - Address being written to. + /// + /// # Safety + /// + /// This function is unsafe because it relies on RawFd. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate libc; + /// extern crate vmm_sys_util; + /// # use kvm_ioctls::{IoEventAddress, Kvm, NoDatamatch}; + /// use libc::EFD_NONBLOCK; + /// use vmm_sys_util::eventfd::EventFd; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm_fd = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let pio_addr = IoEventAddress::Pio(0xf4); + /// let mmio_addr = IoEventAddress::Mmio(0x1000); + /// vm_fd + /// .register_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .register_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); + /// vm_fd + /// .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); + /// vm_fd + /// .unregister_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); + /// ``` + pub fn unregister_ioevent>( + &self, + fd: &EventFd, + addr: &IoEventAddress, + datamatch: T, + ) -> Result<()> { + let mut flags = 1 << kvm_ioeventfd_flag_nr_deassign; + if std::mem::size_of::() > 0 { + flags |= 1 << kvm_ioeventfd_flag_nr_datamatch + } + if let IoEventAddress::Pio(_) = *addr { + flags |= 1 << kvm_ioeventfd_flag_nr_pio + } + + let ioeventfd = kvm_ioeventfd { + datamatch: datamatch.into(), + len: std::mem::size_of::() as u32, + addr: match addr { + IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Mmio(ref m) => *m, + }, + fd: fd.as_raw_fd(), + flags, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Gets the bitmap of pages dirtied since the last call of this function. + /// + /// Leverages the dirty page logging feature in KVM. As a side-effect, this also resets the + /// bitmap inside the kernel. For the dirty log to be available, you have to set the flag + /// `KVM_MEM_LOG_DIRTY_PAGES` when creating guest memory regions. + /// + /// Check the documentation for `KVM_GET_DIRTY_LOG`. + /// + /// # Arguments + /// + /// * `slot` - Guest memory slot identifier. + /// * `memory_size` - Size of the memory region. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::io::Write; + /// # use std::ptr::null_mut; + /// # use std::slice; + /// # use kvm_ioctls::{Kvm, VcpuExit}; + /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; + /// # let kvm = Kvm::new().unwrap(); + /// # let vm = kvm.create_vm().unwrap(); + /// // This example is based on https://lwn.net/Articles/658511/. + /// let mem_size = 0x4000; + /// let guest_addr: u64 = 0x1000; + /// let load_addr: *mut u8 = unsafe { + /// libc::mmap( + /// null_mut(), + /// mem_size, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, + /// -1, + /// 0, + /// ) as *mut u8 + /// }; + /// + /// // Initialize a guest memory region using the flag `KVM_MEM_LOG_DIRTY_PAGES`. + /// let mem_region = kvm_userspace_memory_region { + /// slot: 0, + /// guest_phys_addr: guest_addr, + /// memory_size: mem_size as u64, + /// userspace_addr: load_addr as u64, + /// flags: KVM_MEM_LOG_DIRTY_PAGES, + /// }; + /// unsafe { vm.set_user_memory_region(mem_region).unwrap() }; + /// + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// // ASM code that just forces a MMIO Write. + /// let asm_code = [0xc6, 0x06, 0x00, 0x80, 0x00]; + /// #[cfg(target_arch = "aarch64")] + /// let asm_code = [ + /// 0x01, 0x00, 0x00, 0x10, /* adr x1, */ + /// 0x22, 0x10, 0x00, 0xb9, /* str w2, [x1, #16]; write to this page */ + /// 0x02, 0x00, 0x00, 0xb9, /* str w2, [x0]; force MMIO exit */ + /// 0x00, 0x00, 0x00, + /// 0x14, /* b ; shouldn't get here, but if so loop forever */ + /// ]; + /// + /// // Write the code in the guest memory. This will generate a dirty page. + /// unsafe { + /// let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); + /// slice.write(&asm_code).unwrap(); + /// } + /// + /// let vcpu_fd = vm.create_vcpu(0).unwrap(); + /// + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// // x86_64 specific registry setup. + /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + /// vcpu_sregs.cs.base = 0; + /// vcpu_sregs.cs.selector = 0; + /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + /// + /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + /// // Set the Instruction Pointer to the guest address where we loaded the code. + /// vcpu_regs.rip = guest_addr; + /// vcpu_regs.rax = 2; + /// vcpu_regs.rbx = 3; + /// vcpu_regs.rflags = 2; + /// vcpu_fd.set_regs(&vcpu_regs).unwrap(); + /// } + /// + /// #[cfg(target_arch = "aarch64")] + /// { + /// // aarch64 specific registry setup. + /// let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// vcpu_fd.vcpu_init(&kvi).unwrap(); + /// + /// let core_reg_base: u64 = 0x6030_0000_0010_0000; + /// let mmio_addr: u64 = guest_addr + mem_size as u64; + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 32, guest_addr as u128); // set PC + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 0, mmio_addr as u128); // set X0 + /// } + /// + /// loop { + /// match vcpu_fd.run().expect("run failed") { + /// VcpuExit::MmioWrite(addr, data) => { + /// // On x86_64, the code snippet dirties 1 page when loading the code in memory + /// // while on aarch64 the dirty bit comes from writing to guest_addr (current PC). + /// let dirty_pages_bitmap = vm.get_dirty_log(0, mem_size).unwrap(); + /// let dirty_pages = dirty_pages_bitmap + /// .into_iter() + /// .map(|page| page.count_ones()) + /// .fold(0, |dirty_page_count, i| dirty_page_count + i); + /// assert_eq!(dirty_pages, 1); + /// break; + /// } + /// exit_reason => panic!("unexpected exit reason: {:?}", exit_reason), + /// } + /// } + /// ``` + pub fn get_dirty_log(&self, slot: u32, memory_size: usize) -> Result> { + // Compute the length of the bitmap needed for all dirty pages in one memory slot. + // One memory page is `page_size` bytes and `KVM_GET_DIRTY_LOG` returns one dirty bit for + // each page. + // SAFETY: We trust the sysconf libc function and we're calling it with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + // For ease of access we are saving the bitmap in a u64 vector. We are using ceil to + // make sure we count all dirty pages even when `memory_size` is not a multiple of + // `page_size * 64`. + let div_ceil = |dividend, divisor| (dividend + divisor - 1) / divisor; + let bitmap_size = div_ceil(memory_size, page_size * 64); + let mut bitmap = vec![0u64; bitmap_size]; + let dirtylog = kvm_dirty_log { + slot, + padding1: 0, + __bindgen_anon_1: kvm_dirty_log__bindgen_ty_1 { + dirty_bitmap: bitmap.as_mut_ptr() as *mut c_void, + }, + }; + // SAFETY: Safe because we know that our file is a VM fd, and we know that the amount of + // memory we allocated for the bitmap is at least one bit per page. + let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirtylog) }; + if ret == 0 { + Ok(bitmap) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event that will, when signaled, trigger the `gsi` IRQ. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd(&evtfd, 0).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + gsi, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Registers an event that will, when signaled, assert the `gsi` IRQ. + /// If the irqchip is resampled by the guest, the IRQ is de-asserted, + /// and `resamplefd` is notified. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `resamplefd` - `EventFd`to be notified on resample. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) + /// .unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn register_irqfd_with_resample( + &self, + fd: &EventFd, + resamplefd: &EventFd, + gsi: u32, + ) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + resamplefd: resamplefd.as_raw_fd() as u32, + gsi, + flags: KVM_IRQFD_FLAG_RESAMPLE, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. + /// + /// # Arguments + /// + /// * `fd` - `EventFd` to be signaled. + /// * `gsi` - IRQ to be triggered. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::Kvm; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// vm.create_irq_chip().unwrap(); + /// vm.register_irqfd(&evtfd, 0).unwrap(); + /// vm.unregister_irqfd(&evtfd, 0).unwrap(); + /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) + /// .unwrap(); + /// vm.unregister_irqfd(&evtfd, 0).unwrap(); + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + let irqfd = kvm_irqfd { + fd: fd.as_raw_fd() as u32, + gsi, + flags: KVM_IRQFD_FLAG_DEASSIGN, + ..Default::default() + }; + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise. + /// + /// # Arguments + /// + /// * `irq` - IRQ to be set. + /// * `active` - Level of the IRQ input. + /// + /// # Errors + /// + /// Returns an io::Error when the irq field is invalid + /// + /// # Examples + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # extern crate vmm_sys_util; + /// # use kvm_ioctls::{Kvm, VmFd}; + /// # use libc::EFD_NONBLOCK; + /// # use vmm_sys_util::eventfd::EventFd; + /// fn arch_setup(vm_fd: &VmFd) { + /// // Arch-specific setup: + /// // For x86 architectures, it simply means calling vm.create_irq_chip().unwrap(). + /// # #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// # vm_fd.create_irq_chip().unwrap(); + /// // For Arm architectures, the IRQ controllers need to be setup first. + /// // Details please refer to the kernel documentation. + /// // https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt + /// # #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { + /// # vm_fd.create_vcpu(0).unwrap(); + /// # // ... rest of setup for Arm goes here + /// # } + /// } + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// arch_setup(&vm); + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// { + /// vm.set_irq_line(4, true); + /// // ... + /// } + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + /// { + /// vm.set_irq_line(0x01_00_0020, true); + /// // .... + /// } + /// ``` + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> { + let mut irq_level = kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Creates a new KVM vCPU file descriptor and maps the memory corresponding + /// its `kvm_run` structure. + /// + /// See the documentation for `KVM_CREATE_VCPU`. + /// + /// # Arguments + /// + /// * `id` - The vCPU ID. + /// + /// # Errors + /// + /// Returns an io::Error when the VM fd is invalid or the vCPU memory cannot + /// be mapped correctly. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Create one vCPU with the ID=0. + /// let vcpu = vm.create_vcpu(0); + /// ``` + pub fn create_vcpu(&self, id: u64) -> Result { + #[allow(clippy::cast_lossless)] + // SAFETY: Safe because we know that vm is a VM fd and we verify the return result. + let vcpu_fd = unsafe { ioctl_with_val(&self.vm, KVM_CREATE_VCPU(), id as c_ulong) }; + if vcpu_fd < 0 { + return Err(errno::Error::last()); + } + + // Wrap the vCPU now in case the following ? returns early. + // SAFETY: This is safe because we verified the value of the fd and we own the fd. + let vcpu = unsafe { File::from_raw_fd(vcpu_fd) }; + + let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; + + Ok(new_vcpu(vcpu, kvm_run_ptr)) + } + + /// Creates a VcpuFd object from a vcpu RawFd. + /// + /// # Arguments + /// + /// * `fd` - the RawFd used for creating the VcpuFd object. + /// + /// # Safety + /// + /// This function is unsafe as the primitives currently returned have the contract that + /// they are the sole owner of the file descriptor they are wrapping. Usage of this function + /// could accidentally allow violating this contract which can cause memory unsafety in code + /// that relies on it being true. + /// + /// The caller of this method must make sure the fd is valid and nothing else uses it. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use std::os::unix::io::AsRawFd; + /// # use kvm_ioctls::Kvm; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Create one vCPU with the ID=0. + /// let vcpu = vm.create_vcpu(0).unwrap(); + /// let rawfd = unsafe { libc::dup(vcpu.as_raw_fd()) }; + /// assert!(rawfd >= 0); + /// let vcpu = unsafe { vm.create_vcpu_from_rawfd(rawfd).unwrap() }; + /// ``` + pub unsafe fn create_vcpu_from_rawfd(&self, fd: RawFd) -> Result { + let vcpu = File::from_raw_fd(fd); + let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; + Ok(new_vcpu(vcpu, kvm_run_ptr)) + } + + /// Creates an emulated device in the kernel. + /// + /// See the documentation for `KVM_CREATE_DEVICE`. + /// + /// # Arguments + /// + /// * `device`: device configuration. For details check the `kvm_create_device` structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{ + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// kvm_device_type_KVM_DEV_TYPE_VFIO, KVM_CREATE_DEVICE_TEST, + /// }; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Creating a device with the KVM_CREATE_DEVICE_TEST flag to check + /// // whether the device type is supported. This will not create the device. + /// // To create the device the flag needs to be removed. + /// let mut device = kvm_bindings::kvm_create_device { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// type_: kvm_device_type_KVM_DEV_TYPE_VFIO, + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// fd: 0, + /// flags: KVM_CREATE_DEVICE_TEST, + /// }; + /// // On ARM, creating VGICv3 may fail due to hardware dependency. + /// // Retry to create VGICv2 in that case. + /// let device_fd = vm.create_device(&mut device).unwrap_or_else(|_| { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// panic!("Cannot create VFIO device."); + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + /// { + /// device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + /// vm.create_device(&mut device) + /// .expect("Cannot create vGIC device") + /// } + /// }); + /// ``` + pub fn create_device(&self, device: &mut kvm_create_device) -> Result { + // SAFETY: Safe because we are calling this with the VM fd and we trust the kernel. + let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_DEVICE(), device) }; + if ret == 0 { + // SAFETY: We validated the return of the function creating the fd and we trust the + // kernel. + Ok(new_device(unsafe { File::from_raw_fd(device.fd as i32) })) + } else { + Err(errno::Error::last()) + } + } + + /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. + /// + /// The preferred CPU target is returned in the `kvi` parameter. + /// See documentation for `KVM_ARM_PREFERRED_TARGET`. + /// + /// # Arguments + /// * `kvi` - CPU target configuration (out). For details check the `kvm_vcpu_init` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_vcpu_init; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut kvi = kvm_vcpu_init::default(); + /// vm.get_preferred_target(&mut kvi).unwrap(); + /// ``` + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub fn get_preferred_target(&self, kvi: &mut kvm_vcpu_init) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_ARM_PREFERRED_TARGET(), kvi) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Enable the specified capability as per the `KVM_ENABLE_CAP` ioctl. + /// + /// See the documentation for `KVM_ENABLE_CAP`. + /// + /// Returns an io::Error when the capability could not be enabled. + /// + /// # Arguments + /// + /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap` + /// structure in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// extern crate kvm_bindings; + /// + /// # use kvm_ioctls::Kvm; + /// use kvm_bindings::{kvm_enable_cap, KVM_CAP_SPLIT_IRQCHIP}; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut cap: kvm_enable_cap = Default::default(); + /// // This example cannot enable an arm/aarch64 capability since there + /// // is no capability available for these architectures. + /// if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { + /// cap.cap = KVM_CAP_SPLIT_IRQCHIP; + /// // As per the KVM documentation, KVM_CAP_SPLIT_IRQCHIP only emulates + /// // the local APIC in kernel, expecting that a userspace IOAPIC will + /// // be implemented by the VMM. + /// // Along with this capability, the user needs to specify the number + /// // of pins reserved for the userspace IOAPIC. This number needs to be + /// // provided through the first argument of the capability structure, as + /// // specified in KVM documentation: + /// // args[0] - number of routes reserved for userspace IOAPICs + /// // + /// // Because an IOAPIC supports 24 pins, that's the reason why this test + /// // picked this number as reference. + /// cap.args[0] = 24; + /// vm.enable_cap(&cap).unwrap(); + /// } + /// ``` + #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { + // SAFETY: The ioctl is safe because we allocated the struct and we know the + // kernel will write exactly the size of the struct. + let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Get the `kvm_run` size. + pub fn run_size(&self) -> usize { + self.run_size + } + + /// Wrapper over `KVM_CHECK_EXTENSION`. + /// + /// Returns 0 if the capability is not available and a positive integer otherwise. + fn check_extension_int(&self, c: Cap) -> i32 { + // SAFETY: Safe because we know that our file is a VM fd and that the extension is one of + // the ones defined by kernel. + unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) } + } + + /// Checks if a particular `Cap` is available. + /// + /// Returns true if the capability is supported and false otherwise. + /// See the documentation for `KVM_CHECK_EXTENSION`. + /// + /// # Arguments + /// + /// * `c` - VM capability to check. + /// + /// # Example + /// + /// ``` + /// # use kvm_ioctls::Kvm; + /// use kvm_ioctls::Cap; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// // Check if `KVM_CAP_MP_STATE` is supported. + /// assert!(vm.check_extension(Cap::MpState)); + /// ``` + pub fn check_extension(&self, c: Cap) -> bool { + self.check_extension_int(c) > 0 + } + + /// Issues platform-specific memory encryption commands to manage encrypted VMs if + /// the platform supports creating those encrypted VMs. + /// + /// Currently, this ioctl is used for issuing Secure Encrypted Virtualization + /// (SEV) commands on AMD Processors. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_OP` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// For SEV-specific functionality, prefer safe wrapper: + /// - [`encrypt_op_sev`](Self::encrypt_op_sev) + /// + /// # Safety + /// + /// This function is unsafe because there is no guarantee `T` is valid in this context, how + /// much data kernel will read from memory and where it will write data on error. + /// + /// # Arguments + /// + /// * `op` - an opaque platform specific structure. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// use kvm_bindings::bindings::kvm_sev_cmd; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// unsafe { vm.encrypt_op(&mut init).unwrap() }; + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub unsafe fn encrypt_op(&self, op: *mut T) -> Result<()> { + let ret = ioctl_with_mut_ptr(self, KVM_MEMORY_ENCRYPT_OP(), op); + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Issue common lifecycle events of SEV guests, such as launching, running, snapshotting, + /// migrating and decommissioning via `KVM_MEMORY_ENCRYPT_OP` ioctl. + /// + /// Kernel documentation states that this ioctl can be used for testing whether SEV is enabled + /// by sending `NULL`. To do that, pass [`std::ptr::null_mut`](std::ptr::null_mut) to [`encrypt_op`](Self::encrypt_op). + /// + /// See the documentation for Secure Encrypted Virtualization (SEV). + /// + /// # Arguments + /// + /// * `op` - SEV-specific structure. For details check the + /// [Secure Encrypted Virtualization (SEV) doc](https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst). + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_ioctls; + /// # extern crate kvm_bindings; + /// # use std::{os::raw::c_void, ptr::null_mut}; + /// use kvm_bindings::bindings::kvm_sev_cmd; + /// # use kvm_ioctls::Kvm; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// + /// // Check whether SEV is enabled, optional. + /// assert!(unsafe { vm.encrypt_op(null_mut() as *mut c_void) }.is_ok()); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// vm.encrypt_op_sev(&mut init).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn encrypt_op_sev(&self, op: &mut kvm_sev_cmd) -> Result<()> { + // SAFETY: Safe because we know that kernel will only read the correct amount of memory + // from our pointer and we know where it will write it (op.error). + unsafe { self.encrypt_op(op) } + } + + /// Register a guest memory region which may contain encrypted data. + /// + /// It is used in the SEV-enabled guest. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_REG_REGION` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `memory_region` - Guest physical memory region. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # use std::{fs::OpenOptions, ptr::null_mut}; + /// # use std::os::unix::io::AsRawFd; + /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; + /// # use kvm_ioctls::Kvm; + /// use libc; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let sev = OpenOptions::new() + /// .read(true) + /// .write(true) + /// .open("/dev/sev") + /// .unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// assert!(vm.encrypt_op_sev(&mut init).is_ok()); + /// + /// // Create the memory encryption context. + /// let start_data: kvm_sev_launch_start = Default::default(); + /// let mut start = kvm_sev_cmd { + /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, + /// data: &start_data as *const kvm_sev_launch_start as _, + /// sev_fd: sev.as_raw_fd() as _, + /// ..Default::default() + /// }; + /// assert!(vm.encrypt_op_sev(&mut start).is_ok()); + /// + /// let addr = unsafe { + /// libc::mmap( + /// null_mut(), + /// 4096, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + /// -1, + /// 0, + /// ) + /// }; + /// assert_ne!(addr, libc::MAP_FAILED); + /// + /// let memory_region = kvm_enc_region { + /// addr: addr as _, + /// size: 4096, + /// }; + /// vm.register_enc_memory_region(&memory_region).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn register_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_REG_REGION(), memory_region) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + /// Unregister a guest memory region registered with + /// [`register_enc_memory_region`](Self::register_enc_memory_region). + /// + /// It is used in the SEV-enabled guest. + /// + /// See the documentation for `KVM_MEMORY_ENCRYPT_UNREG_REGION` in the + /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// + /// # Arguments + /// + /// * `memory_region` - Guest physical memory region. + /// + /// # Example + #[cfg_attr(has_sev, doc = "```rust")] + #[cfg_attr(not(has_sev), doc = "```rust,no_run")] + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # extern crate libc; + /// # use std::{fs::OpenOptions, ptr::null_mut}; + /// # use std::os::unix::io::AsRawFd; + /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; + /// # use kvm_ioctls::Kvm; + /// use libc; + /// + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let sev = OpenOptions::new() + /// .read(true) + /// .write(true) + /// .open("/dev/sev") + /// .unwrap(); + /// + /// // Initialize the SEV platform context. + /// let mut init: kvm_sev_cmd = Default::default(); + /// assert!(vm.encrypt_op_sev(&mut init).is_ok()); + /// + /// // Create the memory encryption context. + /// let start_data: kvm_sev_launch_start = Default::default(); + /// let mut start = kvm_sev_cmd { + /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, + /// data: &start_data as *const kvm_sev_launch_start as _, + /// sev_fd: sev.as_raw_fd() as _, + /// ..Default::default() + /// }; + /// assert!(vm.encrypt_op_sev(&mut start).is_ok()); + /// + /// let addr = unsafe { + /// libc::mmap( + /// null_mut(), + /// 4096, + /// libc::PROT_READ | libc::PROT_WRITE, + /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + /// -1, + /// 0, + /// ) + /// }; + /// assert_ne!(addr, libc::MAP_FAILED); + /// + /// let memory_region = kvm_enc_region { + /// addr: addr as _, + /// size: 4096, + /// }; + /// vm.register_enc_memory_region(&memory_region).unwrap(); + /// vm.unregister_enc_memory_region(&memory_region).unwrap(); + /// ``` + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn unregister_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_UNREG_REGION(), memory_region) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } +} + +/// Helper function to create a new `VmFd`. +/// +/// This should not be exported as a public function because the preferred way is to use +/// `create_vm` from `Kvm`. The function cannot be part of the `VmFd` implementation because +/// then it would be exported with the public `VmFd` interface. +pub fn new_vmfd(vm: File, run_size: usize) -> VmFd { + VmFd { vm, run_size } +} + +impl AsRawFd for VmFd { + fn as_raw_fd(&self) -> RawFd { + self.vm.as_raw_fd() + } +} + +/// Create a dummy GIC device. +/// +/// # Arguments +/// +/// * `vm` - The vm file descriptor. +/// * `flags` - Flags to be passed to `KVM_CREATE_DEVICE`. +#[cfg(test)] +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +pub(crate) fn create_gic_device(vm: &VmFd, flags: u32) -> DeviceFd { + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags, + }; + match vm.create_device(&mut gic_device) { + Ok(fd) => fd, + Err(_) => { + gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + vm.create_device(&mut gic_device) + .expect("Cannot create KVM vGIC device") + } + } +} + +/// Set supported number of IRQs for vGIC. +/// +/// # Arguments +/// +/// * `vgic` - The vGIC file descriptor. +/// * `nr_irqs` - Number of IRQs. +#[cfg(test)] +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +pub(crate) fn set_supported_nr_irqs(vgic: &DeviceFd, nr_irqs: u32) { + let vgic_attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + attr: 0, + addr: &nr_irqs as *const u32 as u64, + flags: 0, + }; + assert!(vgic.set_device_attr(&vgic_attr).is_ok()); +} + +/// Request the initialization of the vGIC. +/// +/// # Arguments +/// +/// * `vgic` - The vGIC file descriptor. +#[cfg(test)] +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +pub(crate) fn request_gic_init(vgic: &DeviceFd) { + let vgic_attr = kvm_bindings::kvm_device_attr { + group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + attr: u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + addr: 0, + flags: 0, + }; + assert!(vgic.set_device_attr(&vgic_attr).is_ok()); +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use super::*; + use crate::Kvm; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use std::{fs::OpenOptions, ptr::null_mut}; + + use libc::EFD_NONBLOCK; + + #[test] + fn test_set_invalid_memory() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let invalid_mem_region = kvm_userspace_memory_region { + slot: 0, + guest_phys_addr: 0, + memory_size: 0, + userspace_addr: 0, + flags: 0, + }; + assert!(unsafe { vm.set_user_memory_region(invalid_mem_region) }.is_err()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_set_tss_address() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.set_tss_address(0xfffb_d000).is_ok()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_set_identity_map_address() { + let kvm = Kvm::new().unwrap(); + if kvm.check_extension(Cap::SetIdentityMapAddr) { + let vm = kvm.create_vm().unwrap(); + assert!(vm.set_identity_map_address(0xfffb_c000).is_ok()); + vm.create_vcpu(0).unwrap(); + // Setting the identity map after creating a vCPU must fail. + assert!(vm.set_identity_map_address(0xfffb_c000).is_err()); + } + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_irq_chip() { + use Cap; + + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + let vm = kvm.create_vm().unwrap(); + assert!(vm.create_irq_chip().is_ok()); + + let mut irqchip = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + // Set the irq_base to a non-default value to check that set & get work. + irqchip.chip.pic.irq_base = 10; + assert!(vm.set_irqchip(&irqchip).is_ok()); + + // We initialize a dummy irq chip (`other_irqchip`) in which the + // function `get_irqchip` returns its result. + let mut other_irqchip = kvm_irqchip { + chip_id: KVM_IRQCHIP_PIC_MASTER, + ..Default::default() + }; + assert!(vm.get_irqchip(&mut other_irqchip).is_ok()); + + // Safe because we know that the irqchip type is PIC. + unsafe { assert_eq!(irqchip.chip.pic, other_irqchip.chip.pic) }; + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_irq_chip() { + use Cap; + + let kvm = Kvm::new().unwrap(); + assert!(kvm.check_extension(Cap::Irqchip)); + + let vm = kvm.create_vm().unwrap(); + + // On ARM/arm64, a GICv2 is created. It's better to check ahead whether GICv2 + // can be emulated or not. + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + fd: 0, + flags: KVM_CREATE_DEVICE_TEST, + }; + + let vgic_v2_supported = vm.create_device(&mut gic_device).is_ok(); + assert_eq!(vm.create_irq_chip().is_ok(), vgic_v2_supported); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_pit2() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.create_pit2(kvm_pit_config::default()).is_ok()); + + let pit2 = vm.get_pit2().unwrap(); + vm.set_pit2(&pit2).unwrap(); + let mut other_pit2 = vm.get_pit2().unwrap(); + // Load time will differ, let's overwrite it so we can test equality. + other_pit2.channels[0].count_load_time = pit2.channels[0].count_load_time; + other_pit2.channels[1].count_load_time = pit2.channels[1].count_load_time; + other_pit2.channels[2].count_load_time = pit2.channels[2].count_load_time; + assert_eq!(pit2, other_pit2); + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[test] + fn test_clock() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Get current time. + let orig = vm.get_clock().unwrap(); + + // Reset time. + let fudged = kvm_clock_data { + clock: 10, + ..Default::default() + }; + vm.set_clock(&fudged).unwrap(); + + // Get new time. + let new = vm.get_clock().unwrap(); + + // Verify new time has progressed but is smaller than orig time. + assert!(fudged.clock < new.clock); + assert!(new.clock < orig.clock); + } + + #[test] + fn test_register_ioevent() { + assert_eq!(std::mem::size_of::(), 0); + + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc1), 0x7fu8) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc2), 0x1337u16) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc4), 0xdead_beefu32) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &IoEventAddress::Pio(0xc8), 0xdead_beef_dead_beefu64) + .is_ok()); + } + + #[test] + fn test_unregister_ioevent() { + assert_eq!(std::mem::size_of::(), 0); + + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); + let pio_addr = IoEventAddress::Pio(0xf4); + let mmio_addr = IoEventAddress::Mmio(0x1000); + + // First try to unregister addresses which have not been registered. + assert!(vm_fd + .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + .is_err()); + assert!(vm_fd + .unregister_ioevent(&evtfd, &mmio_addr, NoDatamatch) + .is_err()); + + // Now register the addresses + assert!(vm_fd + .register_ioevent(&evtfd, &pio_addr, NoDatamatch) + .is_ok()); + assert!(vm_fd + .register_ioevent(&evtfd, &mmio_addr, 0x1337u16) + .is_ok()); + + // Try again unregistering the addresses. This time it should work + // since they have been previously registered. + assert!(vm_fd + .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + .is_ok()); + assert!(vm_fd + .unregister_ioevent(&evtfd, &mmio_addr, 0x1337u16) + .is_ok()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_register_unregister_irqfd() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); + let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + + assert!(vm_fd.create_irq_chip().is_ok()); + + assert!(vm_fd.register_irqfd(&evtfd1, 4).is_ok()); + assert!(vm_fd.register_irqfd(&evtfd2, 8).is_ok()); + assert!(vm_fd.register_irqfd(&evtfd3, 4).is_ok()); + assert!(vm_fd.unregister_irqfd(&evtfd2, 8).is_ok()); + // KVM irqfd doesn't report failure on this case:( + assert!(vm_fd.unregister_irqfd(&evtfd2, 8).is_ok()); + + // Duplicated eventfd registration. + // On x86_64 this fails as the event fd was already matched with a GSI. + assert!(vm_fd.register_irqfd(&evtfd3, 4).is_err()); + assert!(vm_fd.register_irqfd(&evtfd3, 5).is_err()); + // KVM irqfd doesn't report failure on this case:( + assert!(vm_fd.unregister_irqfd(&evtfd3, 5).is_ok()); + + if vm_fd.check_extension(Cap::IrqfdResample) { + assert!(vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .is_ok()); + assert!(vm_fd.unregister_irqfd(&evtfd4, 6).is_ok()); + } else { + assert!(vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .is_err()); + } + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_register_unregister_irqfd() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); + let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); + let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); + + // Create the vGIC device. + let vgic_fd = create_gic_device(&vm_fd, 0); + + // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, + // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html + vm_fd.create_vcpu(0).unwrap(); + + // Set supported number of IRQs. + set_supported_nr_irqs(&vgic_fd, 128); + // Request the initialization of the vGIC. + request_gic_init(&vgic_fd); + + assert!(vm_fd.register_irqfd(&evtfd1, 4).is_ok()); + assert!(vm_fd.register_irqfd(&evtfd2, 8).is_ok()); + assert!(vm_fd.register_irqfd(&evtfd3, 4).is_ok()); + assert!(vm_fd.unregister_irqfd(&evtfd2, 8).is_ok()); + // KVM irqfd doesn't report failure on this case:( + assert!(vm_fd.unregister_irqfd(&evtfd2, 8).is_ok()); + + // Duplicated eventfd registration. + // On aarch64, this fails because setting up the interrupt controller is mandatory before + // registering any IRQ. + assert!(vm_fd.register_irqfd(&evtfd3, 4).is_err()); + assert!(vm_fd.register_irqfd(&evtfd3, 5).is_err()); + // KVM irqfd doesn't report failure on this case:( + assert!(vm_fd.unregister_irqfd(&evtfd3, 5).is_ok()); + + if vm_fd.check_extension(Cap::IrqfdResample) { + assert!(vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .is_ok()); + assert!(vm_fd.unregister_irqfd(&evtfd4, 6).is_ok()); + } else { + assert!(vm_fd + .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) + .is_err()); + } + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_set_irq_line() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + + assert!(vm_fd.create_irq_chip().is_ok()); + + assert!(vm_fd.set_irq_line(4, true).is_ok()); + assert!(vm_fd.set_irq_line(4, false).is_ok()); + assert!(vm_fd.set_irq_line(4, true).is_ok()); + } + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_set_irq_line() { + let kvm = Kvm::new().unwrap(); + let vm_fd = kvm.create_vm().unwrap(); + // Create a vcpu for test case 2 of the KVM_IRQ_LINE API on aarch64. + vm_fd.create_vcpu(0).unwrap(); + + // Create the vGIC device. + let vgic_fd = create_gic_device(&vm_fd, 0); + // Set supported number of IRQs. + set_supported_nr_irqs(&vgic_fd, 128); + // Request the initialization of the vGIC. + request_gic_init(&vgic_fd); + + // On arm/aarch64, irq field is interpreted like this: + // bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | + // field: | irq_type | vcpu_index | irq_id | + // The irq_type field has the following values: + // - irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ + // - irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) (the vcpu_index field is ignored) + // - irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) + // Hence, using irq_type = 1, irq_id = 32 (decimal), the irq field in hex is: 0x01_00_0020 + assert!(vm_fd.set_irq_line(0x01_00_0020, true).is_ok()); + assert!(vm_fd.set_irq_line(0x01_00_0020, false).is_ok()); + assert!(vm_fd.set_irq_line(0x01_00_0020, true).is_ok()); + + // Case 2: using irq_type = 2, vcpu_index = 0, irq_id = 16 (decimal), the irq field in hex is: 0x02_00_0010 + assert!(vm_fd.set_irq_line(0x02_00_0010, true).is_ok()); + assert!(vm_fd.set_irq_line(0x02_00_0010, false).is_ok()); + assert!(vm_fd.set_irq_line(0x02_00_0010, true).is_ok()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_faulty_vm_fd() { + let badf_errno = libc::EBADF; + + let faulty_vm_fd = VmFd { + vm: unsafe { File::from_raw_fd(-2) }, + run_size: 0, + }; + + let invalid_mem_region = kvm_userspace_memory_region { + slot: 0, + guest_phys_addr: 0, + memory_size: 0, + userspace_addr: 0, + flags: 0, + }; + + assert_eq!( + unsafe { + faulty_vm_fd + .set_user_memory_region(invalid_mem_region) + .unwrap_err() + .errno() + }, + badf_errno + ); + assert_eq!( + faulty_vm_fd.set_tss_address(0).unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd.create_irq_chip().unwrap_err().errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .create_pit2(kvm_pit_config::default()) + .unwrap_err() + .errno(), + badf_errno + ); + let event_fd = EventFd::new(EFD_NONBLOCK).unwrap(); + assert_eq!( + faulty_vm_fd + .register_ioevent(&event_fd, &IoEventAddress::Pio(0), 0u64) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .get_irqchip(&mut kvm_irqchip::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .set_irqchip(&kvm_irqchip::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vm_fd.get_clock().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vm_fd + .set_clock(&kvm_clock_data::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!(faulty_vm_fd.get_pit2().unwrap_err().errno(), badf_errno); + assert_eq!( + faulty_vm_fd + .set_pit2(&kvm_pit_state2::default()) + .unwrap_err() + .errno(), + badf_errno + ); + assert_eq!( + faulty_vm_fd + .register_irqfd(&event_fd, 0) + .unwrap_err() + .errno(), + badf_errno + ); + + assert_eq!( + faulty_vm_fd.create_vcpu(0).err().unwrap().errno(), + badf_errno + ); + + assert_eq!( + faulty_vm_fd.get_dirty_log(0, 0).unwrap_err().errno(), + badf_errno + ); + } + + #[test] + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + fn test_get_preferred_target() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); + assert!(vm.get_preferred_target(&mut kvi).is_ok()); + } + + /// As explained in the example code related to signal_msi(), sending + /// a random MSI vector will always fail because no vector has been + /// previously allocated from the guest itself. + #[test] + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" + ))] + fn test_signal_msi_failure() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let msi = kvm_msi::default(); + assert!(vm.signal_msi(msi).is_err()); + } + + #[test] + #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] + fn test_enable_cap_failure() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let cap: kvm_enable_cap = Default::default(); + // Providing the `kvm_enable_cap` structure filled with default() should + // always result in a failure as it is not a valid capability. + assert!(vm.enable_cap(&cap).is_err()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn test_enable_split_irqchip_cap() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut cap = kvm_enable_cap { + cap: KVM_CAP_SPLIT_IRQCHIP, + ..Default::default() + }; + // As per the KVM documentation, KVM_CAP_SPLIT_IRQCHIP only emulates + // the local APIC in kernel, expecting that a userspace IOAPIC will + // be implemented by the VMM. + // Along with this capability, the user needs to specify the number + // of pins reserved for the userspace IOAPIC. This number needs to be + // provided through the first argument of the capability structure, as + // specified in KVM documentation: + // args[0] - number of routes reserved for userspace IOAPICs + // + // Because an IOAPIC supports 24 pins, that's the reason why this test + // picked this number as reference. + cap.args[0] = 24; + assert!(vm.enable_cap(&cap).is_ok()); + } + + #[test] + fn test_set_gsi_routing() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { + let irq_routing = kvm_irq_routing::default(); + // Expect failure for x86 since the irqchip is not created yet. + assert!(vm.set_gsi_routing(&irq_routing).is_err()); + vm.create_irq_chip().unwrap(); + } + let irq_routing = kvm_irq_routing::default(); + assert!(vm.set_gsi_routing(&irq_routing).is_ok()); + } + + #[test] + fn test_create_vcpu_different_ids() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Fails when an arbitrarily large value + let err = vm.create_vcpu(65537_u64).err(); + assert_eq!(err.unwrap().errno(), libc::EINVAL); + + // Fails when input `id` = `max_vcpu_id` + let max_vcpu_id = kvm.get_max_vcpu_id(); + let vcpu = vm.create_vcpu((max_vcpu_id - 1) as u64); + assert!(vcpu.is_ok()); + let vcpu_err = vm.create_vcpu(max_vcpu_id as u64).err(); + assert_eq!(vcpu_err.unwrap().errno(), libc::EINVAL); + } + + #[test] + fn test_check_extension() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + assert!(vm.check_extension(Cap::MpState)); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg_attr(not(has_sev), ignore)] + fn test_encrypt_op_sev() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + let mut init: kvm_sev_cmd = Default::default(); + assert!(vm.encrypt_op_sev(&mut init).is_ok()); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg_attr(not(has_sev), ignore)] + fn test_register_unregister_enc_memory_region() { + let sev = OpenOptions::new() + .read(true) + .write(true) + .open("/dev/sev") + .unwrap(); + + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + + // Perform SEV launch sequence according to + // https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst + + let mut init: kvm_sev_cmd = Default::default(); + assert!(vm.encrypt_op_sev(&mut init).is_ok()); + + let start_data: kvm_sev_launch_start = Default::default(); + let mut start = kvm_sev_cmd { + id: sev_cmd_id_KVM_SEV_LAUNCH_START, + data: &start_data as *const kvm_sev_launch_start as _, + sev_fd: sev.as_raw_fd() as _, + ..Default::default() + }; + assert!(vm.encrypt_op_sev(&mut start).is_ok()); + + let addr = unsafe { + libc::mmap( + null_mut(), + 4096, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + }; + assert_ne!(addr, libc::MAP_FAILED); + + assert_eq!( + vm.register_enc_memory_region(&Default::default()) + .unwrap_err() + .errno(), + libc::EINVAL + ); + assert_eq!( + vm.unregister_enc_memory_region(&Default::default()) + .unwrap_err() + .errno(), + libc::EINVAL + ); + + let memory_region = kvm_enc_region { + addr: addr as _, + size: 4096, + }; + assert_eq!( + vm.unregister_enc_memory_region(&memory_region) + .unwrap_err() + .errno(), + libc::EINVAL + ); + assert!(vm.register_enc_memory_region(&memory_region).is_ok()); + assert!(vm.unregister_enc_memory_region(&memory_region).is_ok()); + } +} diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs new file mode 100644 index 000000000..f7252a7fb --- /dev/null +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -0,0 +1,302 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Declares necessary ioctls specific to their platform. + +use kvm_bindings::*; + +// Ioctls for /dev/kvm. + +ioctl_io_nr!(KVM_GET_API_VERSION, KVMIO, 0x00); +ioctl_io_nr!(KVM_CREATE_VM, KVMIO, 0x01); +ioctl_io_nr!(KVM_CHECK_EXTENSION, KVMIO, 0x03); +ioctl_io_nr!(KVM_GET_VCPU_MMAP_SIZE, KVMIO, 0x04); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_SUPPORTED_CPUID, KVMIO, 0x05, kvm_cpuid2); +/* Available with KVM_CAP_EXT_EMUL_CPUID */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_EMULATED_CPUID, KVMIO, 0x09, kvm_cpuid2); + +// Ioctls for VM fds. + +ioctl_io_nr!(KVM_CREATE_VCPU, KVMIO, 0x41); +ioctl_iow_nr!(KVM_GET_DIRTY_LOG, KVMIO, 0x42, kvm_dirty_log); +/* Available with KVM_CAP_USER_MEMORY */ +ioctl_iow_nr!( + KVM_SET_USER_MEMORY_REGION, + KVMIO, + 0x46, + kvm_userspace_memory_region +); +/* Available with KVM_CAP_SET_TSS_ADDR */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_io_nr!(KVM_SET_TSS_ADDR, KVMIO, 0x47); +/* Available with KVM_CAP_SET_IDENTITY_MAP_ADDR */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_IDENTITY_MAP_ADDR, KVMIO, 0x48, u64); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390" +))] +ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] +ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level); +/* Available with KVM_CAP_IRQ_ROUTING */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] +ioctl_iow_nr!(KVM_SET_GSI_ROUTING, KVMIO, 0x6a, kvm_irq_routing); +/* Available with KVM_CAP_IRQFD */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390" +))] +ioctl_iow_nr!(KVM_IRQFD, KVMIO, 0x76, kvm_irqfd); +/* Available with KVM_CAP_PIT2 */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_CREATE_PIT2, KVMIO, 0x77, kvm_pit_config); +/* Available with KVM_CAP_IOEVENTFD */ +ioctl_iow_nr!(KVM_IOEVENTFD, KVMIO, 0x79, kvm_ioeventfd); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_IRQCHIP, KVMIO, 0x62, kvm_irqchip); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_SET_IRQCHIP, KVMIO, 0x63, kvm_irqchip); +/* Available with KVM_CAP_ADJUST_CLOCK */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_CLOCK, KVMIO, 0x7b, kvm_clock_data); +/* Available with KVM_CAP_ADJUST_CLOCK */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_CLOCK, KVMIO, 0x7c, kvm_clock_data); +/* Available with KVM_CAP_PIT_STATE2 */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_PIT2, KVMIO, 0x9f, kvm_pit_state2); +/* Available with KVM_CAP_PIT_STATE2 */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_PIT2, KVMIO, 0xa0, kvm_pit_state2); +/* KVM_MEMORY_ENCRYPT_OP. Takes opaque platform dependent type: i.e. TDX or SEV */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); +/* Available on SEV-enabled guests. */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_REG_REGION, KVMIO, 0xbb, kvm_enc_region); +/* Available on SEV-enabled guests. */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_UNREG_REGION, KVMIO, 0xbc, kvm_enc_region); + +// Ioctls for VCPU fds. + +ioctl_io_nr!(KVM_RUN, KVMIO, 0x80); +#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] +ioctl_ior_nr!(KVM_GET_REGS, KVMIO, 0x81, kvm_regs); +#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] +ioctl_iow_nr!(KVM_SET_REGS, KVMIO, 0x82, kvm_regs); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +ioctl_ior_nr!(KVM_GET_SREGS, KVMIO, 0x83, kvm_sregs); +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +ioctl_iow_nr!(KVM_SET_SREGS, KVMIO, 0x84, kvm_sregs); +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_TRANSLATE, KVMIO, 0x85, kvm_translation); +ioctl_iow_nr!(KVM_INTERRUPT, KVMIO, 0x86, kvm_interrupt); +/* Available with KVM_CAP_GET_MSR_FEATURES */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_MSR_INDEX_LIST, KVMIO, 0x02, kvm_msr_list); +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_MSRS, KVMIO, 0x88, kvm_msrs); +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_MSRS, KVMIO, 0x89, kvm_msrs); +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_FPU, KVMIO, 0x8c, kvm_fpu); +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_FPU, KVMIO, 0x8d, kvm_fpu); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_LAPIC, KVMIO, 0x8e, kvm_lapic_state); +/* Available with KVM_CAP_IRQCHIP */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_LAPIC, KVMIO, 0x8f, kvm_lapic_state); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_CPUID2, KVMIO, 0x90, kvm_cpuid2); +/* Available with KVM_CAP_EXT_CPUID */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iowr_nr!(KVM_GET_CPUID2, KVMIO, 0x91, kvm_cpuid2); +/* Available with KVM_CAP_MP_STATE */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390", + target_arch = "riscv64" +))] +ioctl_ior_nr!(KVM_GET_MP_STATE, KVMIO, 0x98, kvm_mp_state); +/* Available with KVM_CAP_MP_STATE */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "s390", + target_arch = "riscv64" +))] +ioctl_iow_nr!(KVM_SET_MP_STATE, KVMIO, 0x99, kvm_mp_state); +/* Available with KVM_CAP_VCPU_EVENTS */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] +ioctl_ior_nr!(KVM_GET_VCPU_EVENTS, KVMIO, 0x9f, kvm_vcpu_events); +/* Available with KVM_CAP_VCPU_EVENTS */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] +ioctl_iow_nr!(KVM_SET_VCPU_EVENTS, KVMIO, 0xa0, kvm_vcpu_events); +/* Available with KVM_CAP_DEBUGREGS */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_DEBUGREGS, KVMIO, 0xa1, kvm_debugregs); +/* Available with KVM_CAP_DEBUGREGS */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_DEBUGREGS, KVMIO, 0xa2, kvm_debugregs); +/* Available with KVM_CAP_XSAVE */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_XSAVE, KVMIO, 0xa4, kvm_xsave); +/* Available with KVM_CAP_XSAVE */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_XSAVE, KVMIO, 0xa5, kvm_xsave); +/* Available with KVM_CAP_XCRS */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_ior_nr!(KVM_GET_XCRS, KVMIO, 0xa6, kvm_xcrs); +/* Available with KVM_CAP_XCRS */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); +/* Available with KVM_CAP_KVMCLOCK_CTRL */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_io_nr!(KVM_KVMCLOCK_CTRL, KVMIO, 0xad); + +/* Available with KVM_CAP_TSC_CONTROL */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_io_nr!(KVM_SET_TSC_KHZ, KVMIO, 0xa2); +/* Available with KVM_CAP_GET_TSC_KHZ */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); + +/* Available with KVM_CAP_ENABLE_CAP */ +#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] +ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap); +/* Available with KVM_CAP_SIGNAL_MSI */ +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] +ioctl_iow_nr!(KVM_SIGNAL_MSI, KVMIO, 0xa5, kvm_msi); +/* Available with KVM_CAP_ONE_REG */ +#[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] +ioctl_iow_nr!(KVM_GET_ONE_REG, KVMIO, 0xab, kvm_one_reg); +#[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] +ioctl_iow_nr!(KVM_SET_ONE_REG, KVMIO, 0xac, kvm_one_reg); +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +ioctl_iow_nr!(KVM_ARM_VCPU_INIT, KVMIO, 0xae, kvm_vcpu_init); +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +ioctl_ior_nr!(KVM_ARM_PREFERRED_TARGET, KVMIO, 0xaf, kvm_vcpu_init); +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +ioctl_iowr_nr!(KVM_GET_REG_LIST, KVMIO, 0xb0, kvm_reg_list); + +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +ioctl_iow_nr!(KVM_SET_GUEST_DEBUG, KVMIO, 0x9b, kvm_guest_debug); + +// Device ioctls. + +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iowr_nr!(KVM_CREATE_DEVICE, KVMIO, 0xe0, kvm_create_device); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_SET_DEVICE_ATTR, KVMIO, 0xe1, kvm_device_attr); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_GET_DEVICE_ATTR, KVMIO, 0xe2, kvm_device_attr); +/* Available with KVM_CAP_DEVICE_CTRL */ +ioctl_iow_nr!(KVM_HAS_DEVICE_ATTR, KVMIO, 0xe3, kvm_device_attr); + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + use std::fs::File; + use std::os::unix::io::FromRawFd; + + use libc::{c_char, open, O_RDWR}; + use vmm_sys_util::ioctl::{ioctl, ioctl_with_val}; + + use super::*; + const KVM_PATH: &str = "/dev/kvm\0"; + + #[test] + fn get_version() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let ret = unsafe { ioctl(&File::from_raw_fd(sys_fd), KVM_GET_API_VERSION()) }; + assert_eq!(ret as u32, KVM_API_VERSION); + } + + #[test] + fn create_vm_fd() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let vm_fd = unsafe { ioctl(&File::from_raw_fd(sys_fd), KVM_CREATE_VM()) }; + assert!(vm_fd >= 0); + } + + #[test] + fn check_vm_extension() { + let sys_fd = unsafe { open(KVM_PATH.as_ptr() as *const c_char, O_RDWR) }; + assert!(sys_fd >= 0); + + let has_user_memory = unsafe { + ioctl_with_val( + &File::from_raw_fd(sys_fd), + KVM_CHECK_EXTENSION(), + KVM_CAP_USER_MEMORY.into(), + ) + }; + assert_eq!(has_user_memory, 1); + } +} diff --git a/kvm-ioctls/src/lib.rs b/kvm-ioctls/src/lib.rs new file mode 100644 index 000000000..2f3a0b0da --- /dev/null +++ b/kvm-ioctls/src/lib.rs @@ -0,0 +1,26 @@ +extern crate kvm_bindings; +extern crate libc; +#[macro_use] +extern crate vmm_sys_util; + +#[macro_use] +mod kvm_ioctls; +mod cap; +mod ioctls; + +pub use cap::Cap; +pub use ioctls::device::DeviceFd; +pub use ioctls::system::Kvm; +pub use ioctls::vcpu::{VcpuExit, VcpuFd}; + +pub use ioctls::vm::{IoEventAddress, NoDatamatch, VmFd}; +// The following example is used to verify that our public +// structures are exported properly. +/// # Example +/// +/// ``` +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// use kvm_ioctls::{Error, KvmRunWrapper}; +/// ``` +pub use ioctls::KvmRunWrapper; +pub use vmm_sys_util::errno::Error; diff --git a/license/LICENSE b/license/LICENSE new file mode 100644 index 000000000..9e32cdef1 --- /dev/null +++ b/license/LICENSE @@ -0,0 +1,127 @@ + 木兰宽松许可证, 第2版 + + 木兰宽松许可证, 第2版 + 2020年1月 http://license.coscl.org.cn/MulanPSL2 + + + 您对“软件”的复制、使用、修改及分发受木兰宽松许可证,第2版(“本许可证”)的如下条款的约束: + + 0. 定义 + + “软件”是指由“贡献”构成的许可在“本许可证”下的程序和相关文档的集合。 + + “贡献”是指由任一“贡献者”许可在“本许可证”下的受版权法保护的作品。 + + “贡献者”是指将受版权法保护的作品许可在“本许可证”下的自然人或“法人实体”。 + + “法人实体”是指提交贡献的机构及其“关联实体”。 + + “关联实体”是指,对“本许可证”下的行为方而言,控制、受控制或与其共同受控制的机构,此处的控制是指有受控方或共同受控方至少50%直接或间接的投票权、资金或其他有价证券。 + + 1. 授予版权许可 + + 每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的版权许可,您可以复制、使用、修改、分发其“贡献”,不论修改与否。 + + 2. 授予专利许可 + + 每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的(根据本条规定撤销除外)专利许可,供您制造、委托制造、使用、许诺销售、销售、进口其“贡献”或以其他方式转移其“贡献”。前述专利许可仅限于“贡献者”现在或将来拥有或控制的其“贡献”本身或其“贡献”与许可“贡献”时的“软件”结合而将必然会侵犯的专利权利要求,不包括对“贡献”的修改或包含“贡献”的其他结合。如果您或您的“关联实体”直接或间接地,就“软件”或其中的“贡献”对任何人发起专利侵权诉讼(包括反诉或交叉诉讼)或其他专利维权行动,指控其侵犯专利权,则“本许可证”授予您对“软件”的专利许可自您提起诉讼或发起维权行动之日终止。 + + 3. 无商标许可 + + “本许可证”不提供对“贡献者”的商品名称、商标、服务标志或产品名称的商标许可,但您为满足第4条规定的声明义务而必须使用除外。 + + 4. 分发限制 + + 您可以在任何媒介中将“软件”以源程序形式或可执行形式重新分发,不论修改与否,但您必须向接收者提供“本许可证”的副本,并保留“软件”中的版权、商标、专利及免责声明。 + + 5. 免责声明与责任限制 + + “软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于何种法律理论,即使其曾被建议有此种损失的可能性。 + + 6. 语言 + “本许可证”以中英文双语表述,中英文版本具有同等法律效力。如果中英文版本存在任何冲突不一致,以中文版为准。 + + 条款结束 + + 如何将木兰宽松许可证,第2版,应用到您的软件 + + 如果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步: + + 1, 请您补充如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字; + + 2, 请您在软件包的一级目录下创建以“LICENSE”为名的文件,将整个许可证文本放入该文件中; + + 3, 请将如下声明文本放入每个源文件的头部注释中。 + + Copyright (c) [Year] [name of copyright holder] + [Software Name] is licensed under Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. + You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. + + + Mulan Permissive Software License,Version 2 + + Mulan Permissive Software License,Version 2 (Mulan PSL v2) + January 2020 http://license.coscl.org.cn/MulanPSL2 + + Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions: + + 0. Definition + + Software means the program and related documents which are licensed under this License and comprise all Contribution(s). + + Contribution means the copyrightable work licensed by a particular Contributor under this License. + + Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License. + + Legal Entity means the entity making a Contribution and all its Affiliates. + + Affiliates means entities that control, are controlled by, or are under common control with the acting entity under this License, ‘control’ means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity. + + 1. Grant of Copyright License + + Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not. + + 2. Grant of Patent License + + Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution, where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed. The patent license shall not apply to any modification of the Contribution, and any other combination which includes the Contribution. If you or your Affiliates directly or indirectly institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken. + + 3. No Trademark License + + No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, except as required to fulfill notice requirements in Section 4. + + 4. Distribution Restriction + + You may distribute the Software in any medium with or without modification, whether in source or executable forms, provided that you provide recipients with a copy of this License and retain copyright, patent, trademark and disclaimer statements in the Software. + + 5. Disclaimer of Warranty and Limitation of Liability + + THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + 6. Language + + THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION SHALL PREVAIL. + + END OF THE TERMS AND CONDITIONS + + How to Apply the Mulan Permissive Software License,Version 2 (Mulan PSL v2) to Your Software + + To apply the Mulan PSL v2 to your work, for easy identification by recipients, you are suggested to complete following three steps: + + i Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner; + + ii Create a file named “LICENSE” which contains the whole context of this License in the first directory of your software package; + + iii Attach the statement to the appropriate annotated syntax at the beginning of each source file. + + + Copyright (c) [Year] [name of copyright holder] + [Software Name] is licensed under Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. + You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. diff --git a/license/Third_Party_Open_Source_Software_Notice.md b/license/Third_Party_Open_Source_Software_Notice.md new file mode 100644 index 000000000..51a17b2af --- /dev/null +++ b/license/Third_Party_Open_Source_Software_Notice.md @@ -0,0 +1,358 @@ +THIRD PARTY OPEN SOURCE SOFTWARE NOTICE +Please note we provide an open source software notice for the third party open source software along with this software and/or this software component contributed by Huawei (in the following just “this SOFTWARE”). The open source software licenses are granted by the respective right holders. + +Warranty Disclaimer +THE OPEN SOURCE SOFTWARE IN THIS SOFTWARE IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. + +Copyright Notice and License Texts +Software: libc 0.2.71 +Copyright notice: +Copyright (c) 2014-2020 The Rust Project Developers +License: MIT or Apache License Version 2.0 + +Copyright (C) + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +---------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Software: log 0.4.8 +Copyright notice: +Copyright (c) 2014 The Rust Project Developers +Copyright 2014-2015 The Rust Project Developers +Copyright 2015 The Rust Project Developers +License: MIT or Apache License Version 2.0 +Please see above. + +Software: byteorder 1.3.4 +Copyright notice: +Copyright (c) 2015 Andrew Gallant +License: MIT or Unlicense +Please see above. + +---------------------------------------------------------------- + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + +Software: serde 1.0.114 +Copyright notice: +Copyright (c) David Tolnay +Copyright (c) Erick Tryzelaar +License: MIT or Apache License Version 2.0 +Please see above. + +Software: serde_json 1.0.55 +Copyright notice: +Copyright (c) David Tolnay +Copyright (c) Erick Tryzelaar +License: MIT or Apache License Version 2.0 +Please see above. + +Software: error-chain 0.12.4 +Copyright notice: +Copyright (c) 2017 The Error-Chain Project Developers +License: MIT or Apache License Version 2.0 +Please see above. + +Software: vmm-sys-util 0.6.1 +Copyright notice: +Copyright 2019 Intel Corporation. All Rights Reserved. +Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +Copyright 2017 The Chromium OS Authors. All rights reserved. +Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved. +Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Copyright 2018 The Chromium OS Authors. All rights reserved. +Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +License: Apache License Version 2.0 or BSD 3-Clause +Please see above. + +---------------------------------------------------------------- + +Copyright (c) . All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + 1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + 3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Software: kvm-ioctls 0.6.0 +Copyright notice: +Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +Copyright 2017 The Chromium OS Authors. All rights reserved. +Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +License: MIT or Apache License Version 2.0 +Please see above. + +Software: kvm-bindings 0.3.0 +Copyright notice: +Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +License: The APACHE 2.0 License +Please see above. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000..a447ce483 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,257 @@ +use std::sync::Arc; +use kvm_ioctls::{Kvm, VcpuExit}; +use std::mem::size_of; +use libc; +use kvm_bindings::{kvm_userspace_memory_region,user_regs_struct, kvm_riscv_core,}; +use kvm_bindings::{KVM_REG_RISCV, KVM_REG_SIZE_U64, KVM_REG_RISCV_CORE}; + +#[macro_export] +macro_rules! __offset_of { + ($type_name: ty, $field:ident) => {{ + let tmp = core::mem::MaybeUninit::<$type_name>::uninit(); + let outer = tmp.as_ptr(); + + let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; + unsafe { + inner.offset_from(outer as *const u8) as usize + } + }}; +} + +#[macro_export] +macro_rules! offset_of { + ($type_name: ty, $field: ident) => { + $crate::__offset_of!($type_name, $field) + }; + ($type_name: ty, $field: ident, $($sub_type_name: ty, $sub_field: ident), +) => { + $crate::__offset_of!($type_name, $field) + offset_of!($($sub_type_name, $sub_field), +) + }; +} + +#[allow(dead_code)] +pub enum RISCVCoreRegs { + PC, + RA, + SP, + GP, + TP, + T0, + T1, + T2, + S0, + S1, + A0, + A1, + A2, + A3, + A4, + A5, + A6, + A7, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + T3, + T4, + T5, + T6, + MODE, +} + +impl Into for RISCVCoreRegs { + fn into(self) -> u64 { + let reg_offset = match self { + RISCVCoreRegs::PC => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, pc) + } + RISCVCoreRegs::RA => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, ra) + } + RISCVCoreRegs::SP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, sp) + } + RISCVCoreRegs::GP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, gp) + } + RISCVCoreRegs::TP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, tp) + } + RISCVCoreRegs::T0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) + } + RISCVCoreRegs::T1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t1) + } + RISCVCoreRegs::T2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t2) + } + RISCVCoreRegs::S0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s0) + } + RISCVCoreRegs::S1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s1) + } + RISCVCoreRegs::A0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a0) + } + RISCVCoreRegs::A1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a1) + } + RISCVCoreRegs::A2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a2) + } + RISCVCoreRegs::A3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a3) + } + RISCVCoreRegs::A4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a4) + } + RISCVCoreRegs::A5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a5) + } + RISCVCoreRegs::A6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a6) + } + RISCVCoreRegs::A7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a7) + } + RISCVCoreRegs::S2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s2) + } + RISCVCoreRegs::S3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s3) + } + RISCVCoreRegs::S4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s4) + } + RISCVCoreRegs::S5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s5) + } + RISCVCoreRegs::S6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s6) + } + RISCVCoreRegs::S7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s7) + } + RISCVCoreRegs::S8 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s8) + } + RISCVCoreRegs::S9 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s9) + } + RISCVCoreRegs::S10 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s10) + } + RISCVCoreRegs::S11 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s11) + } + RISCVCoreRegs::T3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t3) + } + RISCVCoreRegs::T4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t4) + } + RISCVCoreRegs::T5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t5) + } + RISCVCoreRegs::T6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t6) + } + RISCVCoreRegs::MODE => { + offset_of!(kvm_riscv_core, mode) + } + }; + + println!("reg_offset is {reg_offset}"); + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_CORE) + | (reg_offset / size_of::()) as u64 + } +} +fn main() { + let mem_size = 0x400000; + let guest_addr = 0x80002000; + + let asm_code: &[u8] = &[ + 0x13, 0x05, 0x80, 0x3f, + 0x0c, 0x61, + 0x01, 0xa0, + ]; + + // 1. create vm + let kvm = Kvm::new().expect("Failed to open /dev/kvm"); + let vm_fd = Arc::new(kvm.create_vm().expect("Failed to create a vm")); + + // 2. init memory + let host_addr: *mut u8 = unsafe { + libc::mmap( + std::ptr::null_mut() as *mut libc::c_void, + mem_size as libc::size_t, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + -1, + 0, + ) as *mut u8 + }; + + let kvm_region = kvm_userspace_memory_region { + slot: 0, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: host_addr as u64, + flags: 0, + }; + + unsafe { + vm_fd + .set_user_memory_region(kvm_region) + .expect("Failed to set memory region to KVM") + }; + // copy asm code + unsafe { + let mut slice = std::slice::from_raw_parts_mut(host_addr, mem_size); + let mut index = 0; + slice.fill_with(|| { + index += 1; + asm_code.get(index-1).copied().unwrap_or(0) + }); + } + + // 3. Create Vcpu, init register + let vcpu_fd = vm_fd.create_vcpu(0).expect("Failed to create vcpu"); + println!("vm is created!"); + { + vcpu_fd.set_one_reg(RISCVCoreRegs::PC.into(), guest_addr as u128); + } + println!("vm is run!"); + + // 4. catch Exception + loop { + match vcpu_fd.run().expect("Vcpu run failed") { + VcpuExit::IoIn(addr, data) => { + println!("VmExit IoIn, addr is 0x{:x}, data is {}",addr,data[0]) + } + VcpuExit::IoOut(addr, data) => { + println!("VmExit IoOut, addr is 0x{:x}, data is {}",addr,data[0]) + } + VcpuExit::MmioRead(addr, data) => { + println!("VmExit MMIO read, addr is 0x{:x}", addr) + } + VcpuExit::MmioWrite(addr, data) => { + println!("VmExit MMIO write, addr is 0x{:x}", addr) + } + VcpuExit::Hlt => { + println!("VmExit Hlt") + } + r => panic!("Unknown Exit type: {:?}", r) + } + } +} -- Gitee From b094db1546f00fa00d2f8a47ab2e3ec6bdc91348 Mon Sep 17 00:00:00 2001 From: sts Date: Thu, 27 Jun 2024 14:08:00 +0000 Subject: [PATCH 2/8] =?UTF-8?q?=E5=86=85=E5=AD=98=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/helper/byte_code.rs | 43 ++++++++++++++ src/helper/mod.rs | 1 + src/main.rs | 46 ++++----------- src/memory/guest_memory.rs | 111 +++++++++++++++++++++++++++++++++++++ src/memory/host_mmap.rs | 59 ++++++++++++++++++++ src/memory/mod.rs | 56 +++++++++++++++++++ 6 files changed, 280 insertions(+), 36 deletions(-) create mode 100644 src/helper/byte_code.rs create mode 100644 src/helper/mod.rs create mode 100644 src/memory/guest_memory.rs create mode 100644 src/memory/host_mmap.rs create mode 100644 src/memory/mod.rs diff --git a/src/helper/byte_code.rs b/src/helper/byte_code.rs new file mode 100644 index 000000000..0bc342c7e --- /dev/null +++ b/src/helper/byte_code.rs @@ -0,0 +1,43 @@ +use std::mem::size_of; +use std::slice::{ from_raw_parts, from_raw_parts_mut }; + +pub trait ByteCode: Default + Copy + Send + Sync { + fn as_bytes(&self) -> &[u8] { + unsafe { from_raw_parts(self as *const Self as *const u8, size_of::()) } + } + + fn as_mut_bytes(&mut self) ->&mut [u8] { + unsafe { from_raw_parts_mut(self as *mut Self as *mut u8, size_of::()) } + } + + fn from_bytes(data: &[u8]) -> Option<&Self> { + if data.len() != size_of::() { + return None; + } + + let obj_array = unsafe { from_raw_parts::(data.as_ptr() as *const _, data.len()) }; + Some(&obj_array[0]) + } + + fn from_mut_bytes(data: &mut [u8]) -> Option<&mut Self> { + if data.len() != size_of::() { + return None; + } + + let obj_array = unsafe { from_raw_parts_mut::(data.as_mut_ptr() as *mut _, data.len()) }; + Some(&mut obj_array[0]) + } +} + + +impl ByteCode for usize {} +impl ByteCode for u8 {} +impl ByteCode for u16 {} +impl ByteCode for u32 {} +impl ByteCode for u64 {} +impl ByteCode for isize {} +impl ByteCode for i8 {} +impl ByteCode for i16 {} +impl ByteCode for i32 {} +impl ByteCode for i64 {} + diff --git a/src/helper/mod.rs b/src/helper/mod.rs new file mode 100644 index 000000000..e3e7dbe68 --- /dev/null +++ b/src/helper/mod.rs @@ -0,0 +1 @@ +pub mod byte_code; diff --git a/src/main.rs b/src/main.rs index a447ce483..cccd8f5cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,12 @@ +mod helper; +mod memory; + use std::sync::Arc; use kvm_ioctls::{Kvm, VcpuExit}; use std::mem::size_of; -use libc; -use kvm_bindings::{kvm_userspace_memory_region,user_regs_struct, kvm_riscv_core,}; +use kvm_bindings::{user_regs_struct, kvm_riscv_core,}; use kvm_bindings::{KVM_REG_RISCV, KVM_REG_SIZE_U64, KVM_REG_RISCV_CORE}; +use memory::GuestMemory; #[macro_export] macro_rules! __offset_of { @@ -191,40 +194,11 @@ fn main() { let vm_fd = Arc::new(kvm.create_vm().expect("Failed to create a vm")); // 2. init memory - let host_addr: *mut u8 = unsafe { - libc::mmap( - std::ptr::null_mut() as *mut libc::c_void, - mem_size as libc::size_t, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, - -1, - 0, - ) as *mut u8 - }; - - let kvm_region = kvm_userspace_memory_region { - slot: 0, - guest_phys_addr: guest_addr, - memory_size: mem_size as u64, - userspace_addr: host_addr as u64, - flags: 0, - }; - - unsafe { - vm_fd - .set_user_memory_region(kvm_region) - .expect("Failed to set memory region to KVM") - }; - // copy asm code - unsafe { - let mut slice = std::slice::from_raw_parts_mut(host_addr, mem_size); - let mut index = 0; - slice.fill_with(|| { - index += 1; - asm_code.get(index-1).copied().unwrap_or(0) - }); - } - + let guest_memory = GuestMemory::new(&vm_fd, mem_size).expect("Failed to init guest memory"); + let asm_code_len = asm_code.len() as u64; + guest_memory + .write(&mut asm_code[..].as_ref(), guest_addr, asm_code_len) + .expect("Failed to load asm code to memory"); // 3. Create Vcpu, init register let vcpu_fd = vm_fd.create_vcpu(0).expect("Failed to create vcpu"); println!("vm is created!"); diff --git a/src/memory/guest_memory.rs b/src/memory/guest_memory.rs new file mode 100644 index 000000000..8a121690f --- /dev/null +++ b/src/memory/guest_memory.rs @@ -0,0 +1,111 @@ +use std::sync::Arc; +use kvm_bindings::kvm_userspace_memory_region; +use kvm_ioctls::VmFd; + +use super::host_mmap::HostMemMapping; +use super::{Error, LayoutEntryType, MEM_LAYOUT, Result}; + +use crate::helper::byte_code::ByteCode; + +#[derive(Clone)] +pub struct GuestMemory { + host_mmaps: Vec>, +} + +impl GuestMemory { + pub fn new(vm_fd: &Arc, mem_size: u64) -> Result { + let ranges = Self::arch_ram_ranges(mem_size); + + let mut host_mmaps = Vec::new(); + for (index, range) in ranges.iter().enumerate() { + println!("mmap start 0x{:x}, size 0x{:x}", range.0, range.1); + let host_mmap = Arc::new(HostMemMapping::new(range.0, range.1)?); + host_mmaps.push(host_mmap.clone()); + + let kvm_region = kvm_userspace_memory_region { + slot: index as u32, + guest_phys_addr: host_mmap.guest_addr(), + memory_size: host_mmap.size(), + userspace_addr: host_mmap.host_addr(), + flags: 0 + }; + unsafe { + vm_fd + .set_user_memory_region(kvm_region) + .map_err(Error::KvmSetMR)?; + } + } + Ok(GuestMemory{ host_mmaps }) + } + + pub fn arch_ram_ranges(mem_size: u64) -> Vec<(u64, u64)> { + let mut ranges = Vec::<(u64, u64)>::new(); + + let gap_start = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0; + let gap_size = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; + let gap_end = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; + ranges.push((gap_start, mem_size)); + + if mem_size > gap_size { + ranges.push((gap_end, mem_size - gap_size)); + } + ranges + } + + pub fn find_host_mmap(&self, addr: u64, size: u64) -> Result> { + for host_mmap in &self.host_mmaps { + if addr >= host_mmap.guest_addr() + && addr < host_mmap.guest_addr() + host_mmap.size() + { + if addr + size >= host_mmap.guest_addr() + host_mmap.size() { + return Err(Error::Overflow( + addr - host_mmap.guest_addr(), + size, + host_mmap.size() + )) + } + return Ok(host_mmap.clone()); + } + } + return Err(Error::HostMmapNotFound(addr)); + } + + pub fn read(&self, dst: &mut dyn std::io::Write, addr: u64, count: u64) -> Result<()> { + let host_mmap = self.find_host_mmap(addr, count)?; + let offset = addr - host_mmap.guest_addr(); + let host_addr = host_mmap.host_addr(); + + let slice = unsafe { + std::slice::from_raw_parts((host_addr + offset) as *const u8, count as usize) + }; + dst.write_all(slice).map_err(Error::IoError)?; + Ok(()) + } + + pub fn write(&self, src: &mut dyn std::io::Read, addr: u64, count: u64) -> Result<()> { + let host_mmap = self.find_host_mmap(addr, count)?; + let offset = addr - host_mmap.guest_addr(); + let host_addr = host_mmap.host_addr(); + + let slice = unsafe { + std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, count as usize) + }; + src.read_exact(slice).map_err(Error::IoError)?; + Ok(()) + } + + pub fn write_object(&self, data: &T, addr: u64) -> Result<()> { + self.write(&mut data.as_bytes(), addr, std::mem::size_of::() as u64) + } + + pub fn read_object(&self, addr: u64) -> Result { + let mut obj = T::default(); + self.read( + &mut obj.as_mut_bytes(), + addr, + std::mem::size_of::() as u64 + )?; + Ok(obj) + } +} diff --git a/src/memory/host_mmap.rs b/src/memory/host_mmap.rs new file mode 100644 index 000000000..7281b5673 --- /dev/null +++ b/src/memory/host_mmap.rs @@ -0,0 +1,59 @@ +use super::{Error, Result}; + +pub struct HostMemMapping { + // guest physical address of memmapping + guest_addr: u64, + // memmapping size + size: u64, + // host virtual address of memmapping + host_addr: u64, +} + +impl HostMemMapping { + pub fn new(guest_addr: u64, size: u64) -> Result { + let host_addr = unsafe { + let hva = libc::mmap( + std::ptr::null_mut() as *mut libc::c_void, + size as libc::size_t, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + -1, + 0 + ); + if hva == libc::MAP_FAILED { + return Err(Error::Mmap(std::io::Error::last_os_error())); + } + hva + }; + + + Ok(HostMemMapping { + guest_addr, + size, + host_addr: host_addr as u64, + }) + } + + pub fn size(&self) -> u64 { + self.size + } + + pub fn guest_addr(&self) -> u64 { + self.guest_addr + } + + pub fn host_addr(&self) -> u64 { + self.host_addr + } +} + +impl Drop for HostMemMapping { + fn drop(&mut self) { + unsafe { + libc::munmap( + self.host_addr as *mut libc::c_void, + self.size as libc::size_t + ); + } + } +} diff --git a/src/memory/mod.rs b/src/memory/mod.rs new file mode 100644 index 000000000..a6a33fa6a --- /dev/null +++ b/src/memory/mod.rs @@ -0,0 +1,56 @@ +mod host_mmap; +mod guest_memory; + +pub use guest_memory::GuestMemory; + +#[derive(Debug)] +pub enum Error { + Overflow(u64, u64, u64), + HostMmapNotFound(u64), + Mmap(std::io::Error), + IoError(std::io::Error), + KvmSetMR(kvm_ioctls::Error), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::Overflow(offset, count, size) => write!( + f, + "Failed to read/write memory, offset in host_mmap 0x{:x}, count {}, host_mmap size {}", + offset, count, size + ), + Error::HostMmapNotFound(addr) => write!( + f, + "Failed to find matched HostMemMaping, addr 0x{:x}", + addr + ), + Error::Mmap(ref e) => write!( + f, + "Failed to mmap, error is {}", e + ), + Error::IoError(ref e) => write!( + f, + "IO Error occurs when read/write memory, error is {}",e + ), + Error::KvmSetMR(ref e) => write!( + f, + "Failed to set memory region to KVM, error is {}", e + ), + } + } +} + +pub type Result = std::result::Result; + +#[repr(usize)] +pub enum LayoutEntryType { + MemRAM = 0_usize, + Mmio, + IoApic, + LocalApic, +} + +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0x8000_0000, 0xC000_0000) +]; -- Gitee From d21e2faa19f1a68f23453e0375a4fc58c8c278d2 Mon Sep 17 00:00:00 2001 From: sts Date: Tue, 16 Jul 2024 07:30:05 +0000 Subject: [PATCH 3/8] =?UTF-8?q?CPU=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cpu/mod.rs | 243 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 220 ++++---------------------------------------- 2 files changed, 259 insertions(+), 204 deletions(-) create mode 100644 src/cpu/mod.rs diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs new file mode 100644 index 000000000..b69452613 --- /dev/null +++ b/src/cpu/mod.rs @@ -0,0 +1,243 @@ +use kvm_bindings::{user_regs_struct, kvm_riscv_core, KVM_REG_RISCV, KVM_REG_SIZE_U64, KVM_REG_RISCV_CORE}; +use kvm_ioctls::{VmFd, VcpuFd, VcpuExit}; +use std::mem::size_of; +use std::thread; +use std::sync::Arc; + +#[macro_export] +macro_rules! __offset_of { + ($type_name: ty, $field: ident) => {{ + let tmp = std::mem::MaybeUninit::<$type_name>::uninit(); + let outer = tmp.as_ptr(); + + let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; + unsafe { + inner.offset_from( outer as *const u8 ) as usize + } + }} +} + +#[macro_export] +macro_rules! offset_of { + ($type_name: ty, $field: ident) => { + $crate::__offset_of!($type_name, $field) + }; + ($type_name: ty, $field: ident, $($sub_type_name: ty, $sub_field: ident), +) => { + $crate::__offset_of!($type_name, $field) + offset_of!($($sub_type_name, $sub_field), +) + }; +} + +pub enum RISCVCoreRegs { + PC, + RA, + SP, + GP, + TP, + T0, + T1, + T2, + S0, + S1, + A0, + A1, + A2, + A3, + A4, + A5, + A6, + A7, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + T3, + T4, + T5, + T6, + MODE, +} + +impl Into for RISCVCoreRegs { + fn into(self) -> u64 { + let reg_offset = match self { + RISCVCoreRegs::PC => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, pc) + } + RISCVCoreRegs::RA => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, ra) + } + RISCVCoreRegs::SP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, sp) + } + RISCVCoreRegs::GP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, gp) + } + RISCVCoreRegs::TP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, tp) + } + RISCVCoreRegs::T0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) + } + RISCVCoreRegs::T1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) + } + RISCVCoreRegs::T2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t2) + } + RISCVCoreRegs::S0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s0) + } + RISCVCoreRegs::S1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s1) + } + RISCVCoreRegs::A0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a0) + } + RISCVCoreRegs::A1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a1) + } + RISCVCoreRegs::A2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a2) + } + RISCVCoreRegs::A3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a3) + } + RISCVCoreRegs::A4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a4) + } + RISCVCoreRegs::A5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a5) + } + RISCVCoreRegs::A6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a6) + } + RISCVCoreRegs::A7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a7) + } + RISCVCoreRegs::S2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s2) + } + RISCVCoreRegs::S3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s3) + } + RISCVCoreRegs::S4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s4) + } + RISCVCoreRegs::S5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s5) + } + RISCVCoreRegs::S6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s6) + } + RISCVCoreRegs::S7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s7) + } + RISCVCoreRegs::S8 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s8) + } + RISCVCoreRegs::S9 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s9) + } + RISCVCoreRegs::S10 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s10) + } + RISCVCoreRegs::S11 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s11) + } + RISCVCoreRegs::T3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t3) + } + RISCVCoreRegs::T4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t4) + } + RISCVCoreRegs::T5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t5) + } + RISCVCoreRegs::T6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t6) + } + RISCVCoreRegs::MODE => { + offset_of!(kvm_riscv_core, mode) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_CORE) + | (reg_offset / size_of::()) as u64 + } +} + +pub struct CPU { + pub id: u8, + fd: VcpuFd, +} + +impl CPU { + pub fn new(vm_fd: &Arc, vcpu_id: u8) -> Self { + let vcpu_fd = vm_fd.create_vcpu(vcpu_id as u64).expect(&format!("Failed to create Vcpu{}",vcpu_id)); + Self { + id: vcpu_id, + fd: vcpu_fd, + } + } + + pub fn set_register(&self, reg: RISCVCoreRegs, value: u128) { + self.fd.set_one_reg(reg.into(), value); + } + + pub fn get_register(&self, reg: RISCVCoreRegs) -> u64{ + self.fd.get_one_reg(reg.into()).unwrap() as u64 + } + + pub fn kvm_vcpu_exec(&self) -> bool { + println!("Vcpu{}, PC = 0x{:x}",self.id, self.fd.get_one_reg(RISCVCoreRegs::PC.into()).unwrap()); + match self.fd.run().unwrap() { + VcpuExit::IoIn(addr, data) => { + println!("Vcpu{} VmExit IO in: addr 0x{:x}, data is {}", + self.id, addr, data[0] + ); + } + VcpuExit::IoOut(addr, data) => { + println!("Vcpu{} VmExit OUT in: addr 0x{:x}, data is {}", + self.id, addr, data[0] + ); + } + VcpuExit::MmioRead(addr, data) => { + println!("Vcpu{} VmExit MMIO read: addr 0x{:x}", + self.id, addr + ); + } + VcpuExit::MmioWrite(addr, data) => { + println!("Vcpu{} VmExit MMIO write: addr 0x{:x}", + self.id, addr + ); + } + VcpuExit::Hlt => { + println!("KVM_EXIT_HLT"); + return false; + } + r => panic!("Unexprcted exit reason: {:?}", r) + } + true + } + + pub fn start(arc_cpu: Arc) -> std::thread::JoinHandle<()> { + let cpu_id = arc_cpu.id; + thread::Builder::new() + .name(format!("CPU {}/KVM", cpu_id)) + .spawn(move || { + loop { + if !arc_cpu.kvm_vcpu_exec(){ + break; + } + } + println!("Vcpu{} exit", cpu_id); + }).expect(&format!("Failed to create thread for CPU {}/KVM", cpu_id)) + } +} diff --git a/src/main.rs b/src/main.rs index cccd8f5cb..3263ba3b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,192 +1,22 @@ mod helper; mod memory; +mod cpu; +use cpu::{CPU, RISCVCoreRegs}; use std::sync::Arc; -use kvm_ioctls::{Kvm, VcpuExit}; -use std::mem::size_of; -use kvm_bindings::{user_regs_struct, kvm_riscv_core,}; -use kvm_bindings::{KVM_REG_RISCV, KVM_REG_SIZE_U64, KVM_REG_RISCV_CORE}; +use kvm_ioctls::Kvm; use memory::GuestMemory; +use std::time::Duration; +use std::thread; -#[macro_export] -macro_rules! __offset_of { - ($type_name: ty, $field:ident) => {{ - let tmp = core::mem::MaybeUninit::<$type_name>::uninit(); - let outer = tmp.as_ptr(); - - let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; - unsafe { - inner.offset_from(outer as *const u8) as usize - } - }}; -} - -#[macro_export] -macro_rules! offset_of { - ($type_name: ty, $field: ident) => { - $crate::__offset_of!($type_name, $field) - }; - ($type_name: ty, $field: ident, $($sub_type_name: ty, $sub_field: ident), +) => { - $crate::__offset_of!($type_name, $field) + offset_of!($($sub_type_name, $sub_field), +) - }; -} - -#[allow(dead_code)] -pub enum RISCVCoreRegs { - PC, - RA, - SP, - GP, - TP, - T0, - T1, - T2, - S0, - S1, - A0, - A1, - A2, - A3, - A4, - A5, - A6, - A7, - S2, - S3, - S4, - S5, - S6, - S7, - S8, - S9, - S10, - S11, - T3, - T4, - T5, - T6, - MODE, -} - -impl Into for RISCVCoreRegs { - fn into(self) -> u64 { - let reg_offset = match self { - RISCVCoreRegs::PC => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, pc) - } - RISCVCoreRegs::RA => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, ra) - } - RISCVCoreRegs::SP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, sp) - } - RISCVCoreRegs::GP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, gp) - } - RISCVCoreRegs::TP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, tp) - } - RISCVCoreRegs::T0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) - } - RISCVCoreRegs::T1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t1) - } - RISCVCoreRegs::T2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t2) - } - RISCVCoreRegs::S0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s0) - } - RISCVCoreRegs::S1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s1) - } - RISCVCoreRegs::A0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a0) - } - RISCVCoreRegs::A1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a1) - } - RISCVCoreRegs::A2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a2) - } - RISCVCoreRegs::A3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a3) - } - RISCVCoreRegs::A4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a4) - } - RISCVCoreRegs::A5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a5) - } - RISCVCoreRegs::A6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a6) - } - RISCVCoreRegs::A7 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a7) - } - RISCVCoreRegs::S2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s2) - } - RISCVCoreRegs::S3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s3) - } - RISCVCoreRegs::S4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s4) - } - RISCVCoreRegs::S5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s5) - } - RISCVCoreRegs::S6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s6) - } - RISCVCoreRegs::S7 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s7) - } - RISCVCoreRegs::S8 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s8) - } - RISCVCoreRegs::S9 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s9) - } - RISCVCoreRegs::S10 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s10) - } - RISCVCoreRegs::S11 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s11) - } - RISCVCoreRegs::T3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t3) - } - RISCVCoreRegs::T4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t4) - } - RISCVCoreRegs::T5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t5) - } - RISCVCoreRegs::T6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t6) - } - RISCVCoreRegs::MODE => { - offset_of!(kvm_riscv_core, mode) - } - }; - - println!("reg_offset is {reg_offset}"); - KVM_REG_RISCV as u64 - | KVM_REG_SIZE_U64 as u64 - | u64::from(KVM_REG_RISCV_CORE) - | (reg_offset / size_of::()) as u64 - } -} fn main() { let mem_size = 0x400000; - let guest_addr = 0x80002000; + let guest_addr_0 = 0x80002000; - let asm_code: &[u8] = &[ + let asm_code_0: &[u8] = &[ 0x13, 0x05, 0x80, 0x3f, 0x0c, 0x61, - 0x01, 0xa0, + 0x01, 0xa0 ]; // 1. create vm @@ -195,37 +25,19 @@ fn main() { // 2. init memory let guest_memory = GuestMemory::new(&vm_fd, mem_size).expect("Failed to init guest memory"); - let asm_code_len = asm_code.len() as u64; + let asm_code_len = asm_code_0.len() as u64; guest_memory - .write(&mut asm_code[..].as_ref(), guest_addr, asm_code_len) + .write(&mut asm_code_0[..].as_ref(), guest_addr_0, asm_code_len) .expect("Failed to load asm code to memory"); // 3. Create Vcpu, init register - let vcpu_fd = vm_fd.create_vcpu(0).expect("Failed to create vcpu"); + let cpu0 = CPU::new(&vm_fd, 0); println!("vm is created!"); - { - vcpu_fd.set_one_reg(RISCVCoreRegs::PC.into(), guest_addr as u128); - } + cpu0.set_register(RISCVCoreRegs::PC, guest_addr_0 as u128); println!("vm is run!"); // 4. catch Exception - loop { - match vcpu_fd.run().expect("Vcpu run failed") { - VcpuExit::IoIn(addr, data) => { - println!("VmExit IoIn, addr is 0x{:x}, data is {}",addr,data[0]) - } - VcpuExit::IoOut(addr, data) => { - println!("VmExit IoOut, addr is 0x{:x}, data is {}",addr,data[0]) - } - VcpuExit::MmioRead(addr, data) => { - println!("VmExit MMIO read, addr is 0x{:x}", addr) - } - VcpuExit::MmioWrite(addr, data) => { - println!("VmExit MMIO write, addr is 0x{:x}", addr) - } - VcpuExit::Hlt => { - println!("VmExit Hlt") - } - r => panic!("Unknown Exit type: {:?}", r) - } - } + let arc_cpu0 = Arc::new(cpu0); + let cpu_task0 = CPU::start(arc_cpu0.clone()); + cpu_task0.join().expect(&format!("Failed to join thread task for cpu {}", arc_cpu0.id)); + } -- Gitee From 3cd3b17d5d5a66242e073af1a3e8c1d919af6f46 Mon Sep 17 00:00:00 2001 From: sts Date: Sat, 3 Aug 2024 13:21:48 +0000 Subject: [PATCH 4/8] =?UTF-8?q?BootLoader=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kvm-bindings | 2 +- src/bootloader/mod.rs | 116 +++++++++++++++++++++++++++++++++ src/cpu/mod.rs | 130 ++++++++++++++++++++++++++++++++++++- src/device/mod.rs | 12 ++++ src/kvm/mod.rs | 16 +++++ src/main.rs | 50 +++++++------- src/memory/guest_memory.rs | 11 ++++ src/memory/mod.rs | 7 +- 8 files changed, 312 insertions(+), 32 deletions(-) create mode 100644 src/bootloader/mod.rs create mode 100644 src/device/mod.rs create mode 100644 src/kvm/mod.rs diff --git a/kvm-bindings b/kvm-bindings index b4ee5ac90..878160941 160000 --- a/kvm-bindings +++ b/kvm-bindings @@ -1 +1 @@ -Subproject commit b4ee5ac9052f94dc841b7c260fdb81139d78e827 +Subproject commit 87816094111da7f005107d5b888f1b2661916743 diff --git a/src/bootloader/mod.rs b/src/bootloader/mod.rs new file mode 100644 index 000000000..f7c2e490b --- /dev/null +++ b/src/bootloader/mod.rs @@ -0,0 +1,116 @@ + +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::File; +use std::path::PathBuf; +use std::sync::Arc; + +use kvm_ioctls::VmFd; + +use crate::cpu::{CPU, CPUBootConfig}; +use crate::memory::GuestMemory; +use crate::device::FDT_MAX_SIZE; +use super::kvm::load_boot_source; + +const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000; +const FDT_ALIGN: u64 = 0x40_0000; +const INITRD_ALIGN: u64 = 0x8; + +/// Boot loader config used for aarch64. +#[derive(Default, Debug)] +pub struct Riscv64BootLoaderConfig { + /// Path of kernel image. + pub kernel: PathBuf, + /// Path of initrd image. + pub initrd: PathBuf, + /// Start address of guest memory. + pub mem_start: u64, +} + +/// The start address for `kernel image`, `initrd image` and `dtb` in guest memory. +pub struct Riscv64BootLoader { + /// Start address for `kernel` execute binary in guest memory. + pub kernel_start: u64, + /// Start address for `initrd image` in guest memory. + pub initrd_start: u64, + /// Initrd file size, 0 means no initrd file. + pub initrd_size: u64, + /// Start address for `dtb` in guest memory. + pub dtb_start: u64, +} + +/// Load PE(vmlinux.bin) linux kernel and other boot source to Guest Memory. +/// +/// # Steps +/// +/// 1. Prepare for linux kernel boot env, return guest memory layout. +/// 2. According guest memory layout, load linux kernel to guest memory. +/// 3. According guest memory layout, load initrd image to guest memory. +/// +/// # Arguments +/// +/// * `config` - boot source config, contains kernel, initrd. +/// * `sys_mem` - guest memory. +/// +/// # Errors +/// +/// Load kernel, initrd to guest memory failed. Boot source is broken or +/// guest memory is abnormal. +pub fn load_kernel( + config: &Riscv64BootLoaderConfig, + sys_mem: &Arc, +) -> Riscv64BootLoader { + let kernel_start = config.mem_start + RISCV64_KERNEL_OFFSET; + let mut kernel_image = File::open(&config.kernel).expect("Failed to open kernel file"); + let kernel_size = kernel_image.metadata().unwrap().len(); + let kernel_end = kernel_start + kernel_size; + sys_mem + .write(&mut kernel_image, kernel_start, kernel_size) + .expect("Failed to load kernel image to memory"); + + // align FDT ALIGN + let dtb_addr = (kernel_end + (FDT_ALIGN - 1)) & (!(FDT_ALIGN - 1)); + if dtb_addr + u64::from(FDT_MAX_SIZE) >= sys_mem.memory_end_address() { + panic!("no memory to load DTB") + } + let mut initrd_image = File::open(&config.initrd).expect("Failed to open initrd file"); + let initrd_size = initrd_image.metadata().unwrap().len(); + + let initrd_start = dtb_addr + u64::from(FDT_MAX_SIZE); + if initrd_start + u64::from(initrd_size) >= sys_mem.memory_end_address() { + panic!("no memory to load initrd image") + } + + sys_mem + .write(&mut initrd_image, initrd_start, initrd_size) + .expect("Failed to load initrd to memory"); + + Riscv64BootLoader { + kernel_start, + initrd_start, + initrd_size, + dtb_start: dtb_addr, + } +} + +pub fn kvm_load_kernel(guest_memory: &Arc, vcpu : &mut CPU, vm_fd: &Arc) -> Riscv64BootLoader{ + let layout = load_boot_source(guest_memory); + let cpu_boot_cfg = CPUBootConfig { + fdt_addr: layout.dtb_start, + kernel_addr: layout.kernel_start, + }; + vcpu.set_boot_config(&cpu_boot_cfg); + + + layout +} diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index b69452613..60dd317a0 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -1,4 +1,8 @@ -use kvm_bindings::{user_regs_struct, kvm_riscv_core, KVM_REG_RISCV, KVM_REG_SIZE_U64, KVM_REG_RISCV_CORE}; +use kvm_bindings::{ + user_regs_struct, kvm_riscv_core,kvm_riscv_config,kvm_riscv_timer, + KVM_REG_RISCV, KVM_REG_RISCV_CORE, KVM_REG_SIZE_U64, KVM_REG_RISCV_CONFIG, KVM_REG_RISCV_TIMER, + KVM_MP_STATE_STOPPED, KVM_SYSTEM_EVENT_RESET, +}; use kvm_ioctls::{VmFd, VcpuFd, VcpuExit}; use std::mem::size_of; use std::thread; @@ -27,6 +31,11 @@ macro_rules! offset_of { }; } +pub struct CPUBootConfig { + pub fdt_addr: u64, + pub kernel_addr: u64, +} + pub enum RISCVCoreRegs { PC, RA, @@ -172,10 +181,89 @@ impl Into for RISCVCoreRegs { | (reg_offset / size_of::()) as u64 } } +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Riscv64ConfigRegs{ + ISA, + ZICBOM_BLOCK_SIZE, + MVENDORID, + MARCHID, + MIMPID, + ZICBOZ_BLOCK_SIZE, + SATP_MODE, +} +#[allow(clippy::zero_ptr)] +impl Into for Riscv64ConfigRegs{ + fn into(self) -> u64 { + let reg_offset = match self { + Riscv64ConfigRegs::ISA => { + offset_of!(kvm_riscv_config, isa) + } + Riscv64ConfigRegs::ZICBOM_BLOCK_SIZE => { + offset_of!(kvm_riscv_config, zicbom_block_size) + } + Riscv64ConfigRegs::MVENDORID => { + offset_of!(kvm_riscv_config, mvendorid) + } + Riscv64ConfigRegs::MARCHID => { + offset_of!(kvm_riscv_config, marchid) + } + Riscv64ConfigRegs::MIMPID => { + offset_of!(kvm_riscv_config, mimpid) + } + Riscv64ConfigRegs::ZICBOZ_BLOCK_SIZE => { + offset_of!(kvm_riscv_config, zicboz_block_size) + } + Riscv64ConfigRegs::SATP_MODE => { + offset_of!(kvm_riscv_config, satp_mode) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_CONFIG) + | (reg_offset / size_of::()) as u64 + } +} +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Riscv64Timer{ + FREQUENCY, + TIME, + COMPARE, + STATE, +} +#[allow(clippy::zero_ptr)] +impl Into for Riscv64Timer{ + fn into(self) -> u64 { + let reg_offset = match self { + Riscv64Timer::FREQUENCY => { + offset_of!(kvm_riscv_timer, frequency) + } + Riscv64Timer::TIME => { + offset_of!(kvm_riscv_timer, time) + } + Riscv64Timer::COMPARE => { + offset_of!(kvm_riscv_timer, compare) + } + Riscv64Timer::STATE => { + offset_of!(kvm_riscv_timer, state) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_TIMER) + | (reg_offset / size_of::()) as u64 + } +} pub struct CPU { pub id: u8, fd: VcpuFd, + boot_ip: u64, + fdt_addr: u64, + pub isa: u64, + pub frequency: u64, + pub satp_mode: u64 } impl CPU { @@ -184,9 +272,25 @@ impl CPU { Self { id: vcpu_id, fd: vcpu_fd, + boot_ip: 0, + fdt_addr: 0, + isa: 0, + frequency: 0, + satp_mode: 0, } } + pub fn set_boot_config( + &mut self, + boot_config: &CPUBootConfig + ){ + self.boot_ip = boot_config.kernel_addr; + self.fdt_addr = boot_config.fdt_addr; + self.isa = self.fd.get_one_reg(Riscv64ConfigRegs::ISA.into()).unwrap() as u64; + self.satp_mode = self.fd.get_one_reg(Riscv64ConfigRegs::SATP_MODE.into()).unwrap() as u64; + self.frequency = self.fd.get_one_reg(Riscv64Timer::FREQUENCY.into()).unwrap() as u64; + } + pub fn set_register(&self, reg: RISCVCoreRegs, value: u128) { self.fd.set_one_reg(reg.into(), value); } @@ -194,6 +298,19 @@ impl CPU { pub fn get_register(&self, reg: RISCVCoreRegs) -> u64{ self.fd.get_one_reg(reg.into()).unwrap() as u64 } + pub fn reset_vcpu(&self) { + let mp_state = self.fd.get_mp_state().unwrap(); + if mp_state.mp_state == KVM_MP_STATE_STOPPED { + return; + } + self.fd.set_one_reg(RISCVCoreRegs::PC.into(), self.boot_ip as u128) + .expect("Failed to set PC register"); + self.fd.set_one_reg(RISCVCoreRegs::A0.into(), self.id as u128) + .expect("Failed to set hartid to a0"); + self.fd.set_one_reg(RISCVCoreRegs::A1.into(), self.fdt_addr as u128) + .expect("Failed to set fdt to a1"); + } + pub fn kvm_vcpu_exec(&self) -> bool { println!("Vcpu{}, PC = 0x{:x}",self.id, self.fd.get_one_reg(RISCVCoreRegs::PC.into()).unwrap()); @@ -217,7 +334,13 @@ impl CPU { println!("Vcpu{} VmExit MMIO write: addr 0x{:x}", self.id, addr ); - } + } + VcpuExit::SystemEvent(event_type, _ndata) => { + match event_type { + KVM_SYSTEM_EVENT_RESET => {}, + _ => return false + } + } VcpuExit::Hlt => { println!("KVM_EXIT_HLT"); return false; @@ -232,6 +355,7 @@ impl CPU { thread::Builder::new() .name(format!("CPU {}/KVM", cpu_id)) .spawn(move || { + arc_cpu.reset_vcpu(); loop { if !arc_cpu.kvm_vcpu_exec(){ break; @@ -241,3 +365,5 @@ impl CPU { }).expect(&format!("Failed to create thread for CPU {}/KVM", cpu_id)) } } + + diff --git a/src/device/mod.rs b/src/device/mod.rs new file mode 100644 index 000000000..ffe8ab2dd --- /dev/null +++ b/src/device/mod.rs @@ -0,0 +1,12 @@ +use kvm_ioctls::VmFd; + +use crate::{bootloader::Riscv64BootLoader, cpu::CPU, memory::GuestMemory}; +use std::sync::{Arc, Mutex}; + +pub const FDT_MAX_SIZE: u32 = 0x1_0000; +pub fn kvm_setup_fireware(layout : &Riscv64BootLoader) { + + let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; + let initrd_range = (layout.initrd_start, layout.initrd_size); + let fdt_addr = layout.dtb_start; +} diff --git a/src/kvm/mod.rs b/src/kvm/mod.rs new file mode 100644 index 000000000..43d727b7c --- /dev/null +++ b/src/kvm/mod.rs @@ -0,0 +1,16 @@ +use std::path::PathBuf; +use std::sync::Arc; +use kvm_ioctls::VmFd; + +use crate::memory::{GuestMemory, LayoutEntryType, MEM_LAYOUT}; +use crate::{bootloader::load_kernel, bootloader::Riscv64BootLoader as BootLoader, bootloader::Riscv64BootLoaderConfig as BootLoaderConfig}; +pub fn load_boot_source(guest_memory: &Arc) -> BootLoader { + let initrd_path = PathBuf::from("/tmp/initrd.img"); + let boot_cfg = BootLoaderConfig { + kernel: PathBuf::from("/tmp/vmlinux.bin"), + initrd: initrd_path, + mem_start: MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0, + }; + load_kernel(&boot_cfg, &guest_memory) +} + diff --git a/src/main.rs b/src/main.rs index 3263ba3b3..a1056b73a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,43 +1,41 @@ mod helper; mod memory; mod cpu; +mod kvm; +mod bootloader; +mod device; -use cpu::{CPU, RISCVCoreRegs}; +use cpu::CPU; +use bootloader::kvm_load_kernel; +use device::kvm_setup_fireware; use std::sync::Arc; use kvm_ioctls::Kvm; use memory::GuestMemory; -use std::time::Duration; -use std::thread; fn main() { - let mem_size = 0x400000; - let guest_addr_0 = 0x80002000; + let mem_size = 512 * 1024 * 1024; - let asm_code_0: &[u8] = &[ - 0x13, 0x05, 0x80, 0x3f, - 0x0c, 0x61, - 0x01, 0xa0 - ]; - - // 1. create vm + // 1. Open /dev/kvm and create a VM. let kvm = Kvm::new().expect("Failed to open /dev/kvm"); let vm_fd = Arc::new(kvm.create_vm().expect("Failed to create a vm")); - // 2. init memory + // 2. Initialize Guest Memory. let guest_memory = GuestMemory::new(&vm_fd, mem_size).expect("Failed to init guest memory"); - let asm_code_len = asm_code_0.len() as u64; - guest_memory - .write(&mut asm_code_0[..].as_ref(), guest_addr_0, asm_code_len) - .expect("Failed to load asm code to memory"); - // 3. Create Vcpu, init register - let cpu0 = CPU::new(&vm_fd, 0); - println!("vm is created!"); - cpu0.set_register(RISCVCoreRegs::PC, guest_addr_0 as u128); - println!("vm is run!"); - // 4. catch Exception - let arc_cpu0 = Arc::new(cpu0); - let cpu_task0 = CPU::start(arc_cpu0.clone()); - cpu_task0.join().expect(&format!("Failed to join thread task for cpu {}", arc_cpu0.id)); + // 3. Init vCPU. + let vcpu_count = 1_u32; + let arc_memory = Arc::new(guest_memory); + let mut vcpu = CPU::new(&vm_fd, 0); + // 4. load boot source and realize vCPU0. + let layout = kvm_load_kernel(&arc_memory,&mut vcpu, &vm_fd); + let vcpus = vec![Arc::new(vcpu)]; + kvm_setup_fireware(&layout); + println!("fireware set up !"); + + // 9. Run vCPU0. + let cpu_task_0 = CPU::start(vcpus[0].clone()); + println!("task created !"); + println!("Start to run linux kernel!"); + cpu_task_0.join().expect("Failed to wait cpu task 0"); } diff --git a/src/memory/guest_memory.rs b/src/memory/guest_memory.rs index 8a121690f..96fffab33 100644 --- a/src/memory/guest_memory.rs +++ b/src/memory/guest_memory.rs @@ -108,4 +108,15 @@ impl GuestMemory { )?; Ok(obj) } + pub fn memory_end_address(&self) -> u64 { + let mut end_address = 0; + for host_mmap in self.host_mmaps.iter() { + let addr = host_mmap.guest_addr() + host_mmap.size(); + if addr > end_address { + end_address = addr; + } + } + + end_address + } } diff --git a/src/memory/mod.rs b/src/memory/mod.rs index a6a33fa6a..571da36ec 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -45,12 +45,13 @@ pub type Result = std::result::Result; #[repr(usize)] pub enum LayoutEntryType { - MemRAM = 0_usize, + IrqChip = 0_usize, Mmio, - IoApic, - LocalApic, + MemRAM, } pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0x0800_0000, 0x0800_0000), + (0x1000_0000, 0x2000_0000), (0x8000_0000, 0xC000_0000) ]; -- Gitee From d7191a71dcf2bab8e629ea4d49785570de2ffa59 Mon Sep 17 00:00:00 2001 From: sts Date: Fri, 9 Aug 2024 12:33:23 +0000 Subject: [PATCH 5/8] =?UTF-8?q?PLIC=E5=B9=B3=E5=8F=B0=E7=BA=A7=E4=B8=AD?= =?UTF-8?q?=E6=96=AD=E6=8E=A7=E5=88=B6=E5=99=A8=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cargo/config | 24 +++ src/cpu/mod.rs | 25 ++- src/device/fdt.rs | 110 ++++++++++++ src/device/mod.rs | 12 +- src/device/plic.rs | 381 +++++++++++++++++++++++++++++++++++++++++ src/device_tree/mod.rs | 186 ++++++++++++++++++++ src/main.rs | 9 +- 7 files changed, 733 insertions(+), 14 deletions(-) create mode 100644 .cargo/config create mode 100644 src/device/fdt.rs create mode 100644 src/device/plic.rs create mode 100644 src/device_tree/mod.rs diff --git a/.cargo/config b/.cargo/config new file mode 100644 index 000000000..385060853 --- /dev/null +++ b/.cargo/config @@ -0,0 +1,24 @@ +# Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +# +# StratoVirt is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan +# PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +[build] + +[target.'cfg(any(target_arch="aarch64"))'] +rustflags = [ + "-C", "link-arg=-lgcc", + "-C", "link-arg=-lfdt", +] +[target.'cfg(any(target_arch="riscv64"))'] +rustflags = [ + "-C", "link-arg=-lgcc", + "-C", "link-arg=-lfdt", +] diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index 60dd317a0..700282f86 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -6,7 +6,8 @@ use kvm_bindings::{ use kvm_ioctls::{VmFd, VcpuFd, VcpuExit}; use std::mem::size_of; use std::thread; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; +use crate::device::PlicState; #[macro_export] macro_rules! __offset_of { @@ -258,25 +259,30 @@ impl Into for Riscv64Timer{ pub struct CPU { pub id: u8, - fd: VcpuFd, + pub fd: Arc, + pub nr_vcpus: u8, boot_ip: u64, fdt_addr: u64, pub isa: u64, pub frequency: u64, - pub satp_mode: u64 + pub satp_mode: u64, + pub plic: Option>> } impl CPU { - pub fn new(vm_fd: &Arc, vcpu_id: u8) -> Self { + pub fn new(vm_fd: &Arc, vcpu_id: u8, nr_vcpus: u8) -> Self { let vcpu_fd = vm_fd.create_vcpu(vcpu_id as u64).expect(&format!("Failed to create Vcpu{}",vcpu_id)); + let vcpu_fd = Arc::new(vcpu_fd); Self { id: vcpu_id, fd: vcpu_fd, + nr_vcpus, boot_ip: 0, fdt_addr: 0, isa: 0, frequency: 0, satp_mode: 0, + plic: None, } } @@ -294,6 +300,9 @@ impl CPU { pub fn set_register(&self, reg: RISCVCoreRegs, value: u128) { self.fd.set_one_reg(reg.into(), value); } + pub fn set_plic(&mut self, plic: Arc>) { + self.plic = Some(plic.clone()); + } pub fn get_register(&self, reg: RISCVCoreRegs) -> u64{ self.fd.get_one_reg(reg.into()).unwrap() as u64 @@ -350,14 +359,14 @@ impl CPU { true } - pub fn start(arc_cpu: Arc) -> std::thread::JoinHandle<()> { - let cpu_id = arc_cpu.id; + pub fn start(arc_cpu: Arc>) -> std::thread::JoinHandle<()> { + let cpu_id = arc_cpu.lock().unwrap().id; thread::Builder::new() .name(format!("CPU {}/KVM", cpu_id)) .spawn(move || { - arc_cpu.reset_vcpu(); + arc_cpu.lock().unwrap().reset_vcpu(); loop { - if !arc_cpu.kvm_vcpu_exec(){ + if !arc_cpu.lock().unwrap().kvm_vcpu_exec(){ break; } } diff --git a/src/device/fdt.rs b/src/device/fdt.rs new file mode 100644 index 000000000..e71ef8abe --- /dev/null +++ b/src/device/fdt.rs @@ -0,0 +1,110 @@ +use crate::device_tree::*; +use crate::memory::{LayoutEntryType, MEM_LAYOUT}; +use crate::cpu::CPU; +use crate::memory::GuestMemory; +use super::plic::PlicState; +use std::sync::{Mutex, Arc}; + +pub fn generate_fdt( + sys_mem: &Arc, + initrd_range: (u64, u64), + cmdline: &str, + plic: Arc>, + cpu: Arc>, + fdt_addr: u64, +) { + let mut fdt = vec![0; FDT_MAX_SIZE as usize]; + + create_device_tree(&mut fdt); + set_property_string(&mut fdt, "/", "compatible", "linux,dummy-virt"); + set_property_u32(&mut fdt, "/", "#address-cells", 0x2); + set_property_u32(&mut fdt, "/", "#size-cells", 0x2); + + generate_chosen_node(&mut fdt, cmdline, initrd_range.0, initrd_range.1); + generate_memory_node(&mut fdt, sys_mem); + generate_cpu_node(&mut fdt, cpu); + plic.lock().unwrap().generate_fdt_node(&mut fdt); + generate_devices_node(&mut fdt); + + let fdt_len = fdt.len() as u64; + sys_mem + .write(&mut fdt.as_slice(), fdt_addr, fdt_len) + .expect("Failed to load fdt to memory"); + + dump_dtb(&fdt, "/tmp/stratovirt.dtb"); +} + +fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { + let mem_base = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0; + let mem_size = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; + let node = "/memory"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "device_type", "memory"); + set_property_array_u64(fdt, node, "reg", &[mem_base, mem_size as u64]); +} + + +fn generate_cpu_node(fdt: &mut Vec, cpu: Arc>) { + let node = "/cpus"; + add_sub_node(fdt, node); + set_property_u32(fdt, node, "#address-cells", 0x01); + set_property_u32(fdt, node, "#size-cells", 0x00); + let state_guard = cpu.lock().unwrap(); + set_property_u32(fdt, node, "timebase-frequency", state_guard.frequency as u32); + + for num in 0..state_guard.nr_vcpus { + let node = format!("/cpus/cpu@{:x}", num); + add_sub_node(fdt, &node); + set_property_string(fdt, &node, "device_type", "cpu"); + set_property_string(fdt, &node, "compatible", "riscv"); + let mmu_type = match state_guard.satp_mode { + 10 => "riscv,sv57", + 9 => "riscv,sv48", + 8 => "riscv,sv39", + _ => "riscv,none", + }; + set_property_string(fdt, &node, "mmu-type", mmu_type); + let valid_isa_order = "IEMAFDQCLBJTPVNSUHKORWXYZG"; + let mut cpu_isa = String::from("rv64"); + for i in 0..valid_isa_order.len() { + let index = valid_isa_order.as_bytes()[i] as u32 - 65; + if state_guard.isa & (1 << index) != 0 { + let char_to_add = ((index as u8) + b'a') as char; + cpu_isa.push(char_to_add); + } + } + set_property_string(fdt, &node, "riscv,isa", &cpu_isa); + set_property_u32(fdt, &node, "reg", num as u32); + set_property_string(fdt, &node, "status", "okay"); + + let node = format!("/cpus/cpu@{:x}/interrupt-controller", num); + add_sub_node(fdt, &node); + set_property_string(fdt, &node, "compatible", "riscv,cpu-intc"); + set_property_u32(fdt, &node, "#interrupt-cells", 0x01); + set_property(fdt, &node, "interrupt-controller", None); + set_property_u32( + fdt, + &node, + "phandle", + u32::from(num) + CPU_PHANDLE_START, + ); + } +} + +fn generate_devices_node(fdt: &mut Vec) { + let node = "/smb"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "simple-bus"); + set_property_u32(fdt, node, "#address-cells", 0x02); + set_property_u32(fdt, node, "#size-cells", 0x02); + set_property_u32(fdt, node, "interrupt-parent", PHANDLE_PLIC); + set_property(fdt, node, "ranges", None); +} + + +fn generate_chosen_node(fdt: &mut Vec, cmdline: &str, initrd_addr: u64, initrd_size: u64) { + let node = "/chosen"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "bootargs", cmdline); + set_property_string(fdt, node, "stdout-path", "serial0"); +} diff --git a/src/device/mod.rs b/src/device/mod.rs index ffe8ab2dd..d4639b77a 100644 --- a/src/device/mod.rs +++ b/src/device/mod.rs @@ -1,12 +1,20 @@ +pub mod plic; +mod fdt; use kvm_ioctls::VmFd; use crate::{bootloader::Riscv64BootLoader, cpu::CPU, memory::GuestMemory}; use std::sync::{Arc, Mutex}; +pub use plic::*; pub const FDT_MAX_SIZE: u32 = 0x1_0000; -pub fn kvm_setup_fireware(layout : &Riscv64BootLoader) { - +pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, layout : &Riscv64BootLoader) { + let plic = PlicState::new(&vcpus, vcpus[0].lock().unwrap().nr_vcpus as u32); + for vcpu in vcpus.iter(){ + vcpu.lock().unwrap().set_plic(plic.clone()); + + } let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; let initrd_range = (layout.initrd_start, layout.initrd_size); let fdt_addr = layout.dtb_start; + fdt::generate_fdt(guest_memory, initrd_range, cmdline, plic.clone(), vcpus[0].clone(), fdt_addr); } diff --git a/src/device/plic.rs b/src/device/plic.rs new file mode 100644 index 000000000..21bc732c3 --- /dev/null +++ b/src/device/plic.rs @@ -0,0 +1,381 @@ +use crate::device_tree::*; +use crate::memory::{LayoutEntryType, MEM_LAYOUT}; +use crate::cpu::CPU; +use kvm_ioctls::{VcpuFd, VmFd}; +use std::sync::{Arc, Mutex}; + +const MAX_DEVICES: u32 = 1024; +const MAX_CONTEXTS: u32 = 15872; + +// interrupt source priority regs +const PRIORITY_BASE: u64 = 0; +const PRIORITY_PER_ID: u32 = 4; +// interrupt enable bit +const ENABLE_BASE: u64 = 0x2000; +const ENABLE_PER_HART: u32 = 0x80; +// interrupt threshold and claim/complete reg +const CONTEXT_BASE: u64 = 0x200000; +const CONTEXT_PER_HART: u32 = 0x1000; +const CONTEXT_THRESHOLD: u32 = 0; +const CONTEXT_CLAIM: u32 = 4; +const REG_SIZE: u64 = 0x1000000; + +struct PlicContext { + num: u32, + vcpu: Arc, + irq_priority_threshold: u8, + irq_enable: [u32; (MAX_DEVICES / 32) as usize], + irq_pending: [u32; (MAX_DEVICES / 32) as usize], + irq_pending_priority: [u8; MAX_DEVICES as usize], + irq_claimed: [u32; (MAX_DEVICES / 32) as usize], + irq_autoclear: [u32; (MAX_DEVICES / 32) as usize], +} + +pub struct PlicState { + ready: bool, + num_irq: u32, + num_irq_word: u32, + max_prio: u32, + num_context: u32, + contexts: Vec, + irq_priority: [u8; (MAX_DEVICES) as usize], + irq_level: [u32; (MAX_DEVICES / 32) as usize], +} + +impl PlicState { + pub fn new(vcpus: &Vec>>, vcpu_nr: u32) -> Arc> { + let mut contexts: Vec = Vec::new(); + for i in 0..vcpu_nr * 2 { + let context = PlicContext { + num: i as u32, + vcpu: vcpus[(i / 2) as usize].lock().unwrap().fd.clone(), + irq_priority_threshold: 0 as u8, + irq_enable: [0; (MAX_DEVICES / 32) as usize], + irq_pending: [0; (MAX_DEVICES / 32) as usize], + irq_pending_priority: [0; MAX_DEVICES as usize], + irq_claimed: [0; (MAX_DEVICES / 32) as usize], + irq_autoclear: [0; (MAX_DEVICES / 32) as usize], + }; + contexts.push(context); + } + let state = PlicState { + ready: true, + num_irq: MAX_DEVICES, + num_irq_word: MAX_DEVICES / 32, + max_prio: (1 << PRIORITY_PER_ID) - 1, + num_context: vcpu_nr * 2, + contexts, + irq_priority: [0; MAX_DEVICES as usize], + irq_level: [0; (MAX_DEVICES / 32) as usize], + }; + return Arc::new(Mutex::new(state)); + } + + pub fn generate_fdt_node(&self, fdt: &mut Vec) { + let node = format!( + "/interrupt-controller@{:x}", + MEM_LAYOUT[LayoutEntryType::IrqChip as usize].0 + ); + add_sub_node(fdt, &node); + set_property_string(fdt, &node, "compatible", "riscv,plic0"); + set_property_array_u32( + fdt, + &node, + "reg", + &[ + 0x0, + MEM_LAYOUT[LayoutEntryType::IrqChip as usize].0 as u32, + 0x0, + MEM_LAYOUT[LayoutEntryType::IrqChip as usize].1 as u32, + ], + ); + set_property_u32(fdt, &node, "#interrupt-cells", 1); + set_property(fdt, &node, "interrupt-controller", None); + set_property_u32(fdt, &node, "riscv,max-priority", (1 << PRIORITY_PER_ID) - 1); + set_property_u32(fdt, &node, "riscv,ndev", MAX_DEVICES - 1); + set_property_u32(fdt, &node, "phandle", PHANDLE_PLIC); + let mut interrupt_extend: Vec = Vec::new(); + for i in 0..self.num_context / 2 { + interrupt_extend.push(CPU_PHANDLE_START + i); + interrupt_extend.push(0xffffffff); + interrupt_extend.push(CPU_PHANDLE_START + i); + interrupt_extend.push(0x9); + } + set_property_array_u32(fdt, &node, "interrupts-extended", &interrupt_extend[..]); + } + fn __plic_context_best_pending_irq(&self, context_idx: u32) -> u32 { + let mut best_irq_prio: u8 = 0; + let mut irq: u32 = 0; + let mut best_irq: u32 = 0; + for i in 0..self.num_irq_word { + if self.contexts[context_idx as usize].irq_pending[i as usize] == 0 { + continue; + } + for j in 0..32 { + irq = i * 32 + j; + if self.num_irq <= irq + || (self.contexts[context_idx as usize].irq_pending[i as usize] & (1 << j)) == 0 + || (self.contexts[context_idx as usize].irq_claimed[i as usize] & (1 << j)) != 0 + { + continue; + } + if best_irq == 0 + || best_irq_prio + < self.contexts[context_idx as usize].irq_pending_priority[irq as usize] + { + best_irq = irq; + best_irq_prio = + self.contexts[context_idx as usize].irq_pending_priority[irq as usize]; + } + } + } + best_irq + } + + fn __plic_context_irq_update(&self, context_idx: u32) { + let irq: u32 = self.__plic_context_best_pending_irq(context_idx); + if irq == 0 { + self.contexts[context_idx as usize] + .vcpu + .unset_interrupt() + .expect("Failed to unset interrupt"); + } else { + self.contexts[context_idx as usize] + .vcpu + .set_interrupt() + .expect("Failed to set interrupt"); + } + } + + fn __plic_context_irq_claim(&mut self, context_idx: u32) -> u32 { + let best_irq: u32 = self.__plic_context_best_pending_irq(context_idx); + let best_irq_word: u32 = best_irq / 32; + let best_irq_mask: u32 = 1 << (best_irq % 32); + + self.contexts[context_idx as usize] + .vcpu + .unset_interrupt() + .expect("Failed to unset interrupt in irq claim!"); + + if best_irq != 0 { + if self.contexts[context_idx as usize].irq_autoclear[best_irq_word as usize] + & best_irq_mask + != 0 + { + self.contexts[context_idx as usize].irq_pending[best_irq_word as usize] &= + !best_irq_mask; + self.contexts[context_idx as usize].irq_pending_priority[best_irq as usize] = 0; + self.contexts[context_idx as usize].irq_claimed[best_irq_word as usize] &= + !best_irq_mask; + self.contexts[context_idx as usize].irq_autoclear[best_irq_word as usize] &= + !best_irq_mask; + } else { + self.contexts[context_idx as usize].irq_claimed[best_irq_word as usize] |= + best_irq_mask; + } + } + self.__plic_context_irq_update(context_idx); + best_irq + } + + pub fn plic__irq_trig(&mut self, irq: u32, level: bool, edge: bool) { + let mut irq_marked: bool = false; + let (irq_prio, irq_word): (u8, u8); + let irq_mask: u32; + + if !self.ready { + return; + } + if irq < 0 || self.num_irq <= irq { + return; + } + irq_prio = self.irq_priority[irq as usize]; + irq_word = (irq / 32) as u8; + irq_mask = 1 << (irq % 32); + + if level { + self.irq_level[irq_word as usize] |= irq_mask; + } else { + self.irq_level[irq_word as usize] &= !irq_mask; + } + + for i in 0..self.num_context { + let mut context = &mut self.contexts[i as usize]; + if (context.irq_enable[irq_word as usize] & irq_mask) != 0 { + if level { + context.irq_pending[irq_word as usize] |= irq_mask; + context.irq_pending_priority[irq as usize] = irq_prio; + if edge { + context.irq_autoclear[irq_word as usize] |= irq_mask; + } + } else { + context.irq_pending[irq_word as usize] &= !irq_mask; + context.irq_pending_priority[irq as usize] = 0; + context.irq_claimed[irq_word as usize] &= !irq_mask; + context.irq_autoclear[irq_word as usize] &= !irq_mask; + } + self.__plic_context_irq_update(i); + irq_marked = true; + } + if irq_marked { + break; + } + } + } + + fn plic__priority_read(&self, offset: u64) -> u32 { + let irq: u32 = (offset >> 2) as u32; + if irq == 0 || irq >= self.num_irq { + return 0; + } + self.irq_priority[irq as usize] as u32 + } + + fn plic__priority_write(&mut self, offset: u64, data: u32) { + let irq: u32 = (offset >> 2) as u32; + + if irq == 0 || irq >= self.num_irq { + return; + } + let val: u32 = data & ((1 << PRIORITY_PER_ID) - 1); + self.irq_priority[irq as usize] = data as u8; + } + + fn plic__context_enable_read(&mut self, context_idx: u32, offset: u64) -> u32 { + let irq_word: u32 = (offset >> 2) as u32; + if self.num_irq_word < irq_word { + return 0; + } + self.contexts[context_idx as usize].irq_enable[irq_word as usize] + } + + fn plic__context_enable_write(&mut self, context_idx: u32, offset: u64, data: u32) { + let mut irq_prio: u8; + let mut irq: u32 = 0; + let mut irq_mask: u32 = 0; + let mut old_val: u32 = 0; + let mut new_val: u32 = 0; + let mut xor_val: u32 = 0; + let irq_word: u32 = (offset >> 2) as u32; + if self.num_irq_word < irq_word { + return; + } + old_val = self.contexts[context_idx as usize].irq_enable[irq_word as usize]; + new_val = data; + if irq_word == 0 { + new_val &= !0x1; + } + self.contexts[context_idx as usize].irq_enable[irq_word as usize] = new_val; + xor_val = old_val ^ new_val; + for i in 0..32 { + irq = irq_word * 32 + i; + irq_mask = 1 << i; + irq_prio = self.irq_priority[irq as usize]; + if (xor_val & irq_mask) == 0 { + continue; + } + if (new_val & irq_mask) != 0 && (self.irq_level[irq_word as usize] & irq_mask) != 0 { + self.contexts[context_idx as usize].irq_pending[irq_word as usize] |= irq_mask; + self.contexts[context_idx as usize].irq_pending_priority[irq as usize] = irq_prio; + } else if (new_val & irq_mask) == 0 { + self.contexts[context_idx as usize].irq_pending[irq_word as usize] &= !irq_mask; + self.contexts[context_idx as usize].irq_pending_priority[irq as usize] = 0; + self.contexts[context_idx as usize].irq_claimed[irq_word as usize] &= !irq_mask; + } + } + self.__plic_context_irq_update(context_idx); + } + + fn plic__context_read(&mut self, context_idx: u32, offset: u64) -> u32 { + match offset { + 0 => self.contexts[context_idx as usize].irq_priority_threshold as u32, + 4 => self.__plic_context_irq_claim(context_idx), + _ => 0, + } + } + + fn plic__context_write(&mut self, context_idx: u32, offset: u64, data: u32) { + let mut val: u32; + let mut irq_word: u32; + let mut irq_mask: u32; + let mut irq_update: bool = false; + match offset { + 0 => { + val = data & ((1 << PRIORITY_PER_ID) - 1); + if data <= self.max_prio { + self.contexts[context_idx as usize].irq_priority_threshold = data as u8; + } else { + irq_update = true; + } + } + 4 => { + irq_word = data / 32; + irq_mask = 1 << (data % 32); + if data < self.num_irq + && (self.contexts[context_idx as usize].irq_enable[irq_word as usize] + & irq_mask) + != 0 + { + self.contexts[context_idx as usize].irq_claimed[irq_word as usize] &= !irq_mask; + irq_update = true; + } + } + _ => { + irq_update = true; + } + } + if irq_update { + self.__plic_context_irq_update(context_idx); + } + } + pub fn mmio_write(&mut self, addr: u64, data: u32) { + let mut address = addr & !0x3; + address -= MEM_LAYOUT[LayoutEntryType::IrqChip as usize].0; + if PRIORITY_BASE <= address && address < ENABLE_BASE { + self.plic__priority_write(address, data); + }else if ENABLE_BASE <= address && address < CONTEXT_BASE { + let cntx = ((address - ENABLE_BASE) as u32) / ENABLE_PER_HART; + address -= (cntx * ENABLE_PER_HART) as u64 + ENABLE_BASE; + if cntx < self.num_context { + self.plic__context_enable_write(cntx, address, data); + } + }else if CONTEXT_BASE <= address && address < REG_SIZE { + let cntx = ((address - CONTEXT_BASE)as u32) / CONTEXT_PER_HART; + address -= (cntx * CONTEXT_PER_HART)as u64 + CONTEXT_BASE; + if cntx < self.num_context { + self.plic__context_write(cntx, address, data); + } + } + } + + pub fn mmio_read(&mut self, addr: u64) -> u32{ + let mut data: u32 = 0; + let mut address = addr & !0x3; + address -= MEM_LAYOUT[LayoutEntryType::IrqChip as usize].0; + if PRIORITY_BASE <= address && address < ENABLE_BASE { + return self.plic__priority_read(address); + }else if ENABLE_BASE <= address && address < CONTEXT_BASE { + let cntx = ((address - ENABLE_BASE) as u32) / ENABLE_PER_HART; + address -= (cntx * ENABLE_PER_HART) as u64 + ENABLE_BASE; + if cntx < self.num_context { + return self.plic__context_enable_read(cntx, address); + } + }else if CONTEXT_BASE <= address && address < REG_SIZE { + let cntx = ((address - CONTEXT_BASE)as u32) / CONTEXT_PER_HART; + address -= (cntx * CONTEXT_PER_HART)as u64 + CONTEXT_BASE; + if cntx < self.num_context { + return self.plic__context_read(cntx, address); + } + } + 0 + } +} + +pub fn judge_plic_addr(addr: u64) -> Option { + let base: u64 = MEM_LAYOUT[LayoutEntryType::IrqChip as usize].0; + if (base..base + REG_SIZE).contains(&addr) { + Some(addr) + }else { + None + } +} diff --git a/src/device_tree/mod.rs b/src/device_tree/mod.rs new file mode 100644 index 000000000..b88a0537b --- /dev/null +++ b/src/device_tree/mod.rs @@ -0,0 +1,186 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use libc::{c_char, c_int, c_void}; +use std::ffi::CString; + +pub const PHANDLE_RESERVED: u32 = 0; +pub const PHANDLE_PLIC: u32 = 1; +pub const CPU_PHANDLE_START: u32 = 4; + +pub const FDT_MAX_SIZE: u32 = 0x1_0000; + +extern "C" { + fn fdt_create(buf: *mut c_void, bufsize: c_int) -> c_int; + fn fdt_finish_reservemap(fdt: *mut c_void) -> c_int; + fn fdt_begin_node(fdt: *mut c_void, name: *const c_char) -> c_int; + fn fdt_end_node(fdt: *mut c_void) -> c_int; + fn fdt_finish(fdt: *const c_void) -> c_int; + fn fdt_open_into(fdt: *const c_void, buf: *mut c_void, size: c_int) -> c_int; + + fn fdt_path_offset(fdt: *const c_void, path: *const c_char) -> c_int; + fn fdt_add_subnode(fdt: *mut c_void, offset: c_int, name: *const c_char) -> c_int; + fn fdt_setprop( + fdt: *mut c_void, + offset: c_int, + name: *const c_char, + val: *const c_void, + len: c_int, + ) -> c_int; +} + +pub fn create_device_tree(fdt: &mut Vec) { + let mut ret = unsafe { fdt_create(fdt.as_mut_ptr() as *mut c_void, FDT_MAX_SIZE as c_int) }; + if ret < 0 { + panic!("Failed to fdt_create, return {}.", ret); + } + + ret = unsafe { fdt_finish_reservemap(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_finish_reservemap, return {}.", ret); + } + + let c_str = CString::new("").unwrap(); + ret = unsafe { fdt_begin_node(fdt.as_mut_ptr() as *mut c_void, c_str.as_ptr()) }; + if ret < 0 { + panic!("Failed to fdt_begin_node, return {}.", ret); + } + + ret = unsafe { fdt_end_node(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_end_node, return {}.", ret); + } + + ret = unsafe { fdt_finish(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_finish, return {}.", ret); + } + + ret = unsafe { + fdt_open_into( + fdt.as_ptr() as *mut c_void, + fdt.as_mut_ptr() as *mut c_void, + FDT_MAX_SIZE as c_int, + ) + }; + if ret < 0 { + panic!("Failed to fdt_open_into, return {}.", ret); + } +} + +pub fn add_sub_node(fdt: &mut Vec, node_path: &str) { + let names: Vec<&str> = node_path.split('/').collect(); + if names.len() < 2 { + panic!("Failed to add sub node, node_path: {} invalid.", node_path); + } + + let node_name = names[names.len() - 1]; + let pare_name = names[0..names.len() - 1].join("/"); + + let c_str = if pare_name.is_empty() { + CString::new("/").unwrap() + } else { + CString::new(pare_name).unwrap() + }; + + let offset = unsafe { fdt_path_offset(fdt.as_ptr() as *const c_void, c_str.as_ptr()) }; + if offset < 0 { + panic!("Failed to fdt_path_offset, return {}.", offset); + } + + let c_str = CString::new(node_name).unwrap(); + let ret = unsafe { fdt_add_subnode(fdt.as_mut_ptr() as *mut c_void, offset, c_str.as_ptr()) }; + if ret < 0 { + panic!("Failed to fdt_add_subnode, return {}.", ret); + } +} + +pub fn set_property(fdt: &mut Vec, node_path: &str, prop: &str, val: Option<&[u8]>) { + let c_str = CString::new(node_path).unwrap(); + let offset = unsafe { fdt_path_offset(fdt.as_ptr() as *const c_void, c_str.as_ptr()) }; + if offset < 0 { + panic!("Failed to fdt_path_offset, return {}.", offset); + } + + let (ptr, len) = if let Some(val) = val { + (val.as_ptr() as *const c_void, val.len() as i32) + } else { + (std::ptr::null::(), 0) + }; + + let c_str = CString::new(prop).unwrap(); + let ret = unsafe { + fdt_setprop( + fdt.as_mut_ptr() as *mut c_void, + offset, + c_str.as_ptr(), + ptr, + len, + ) + }; + if ret < 0 { + panic!("Failed to fdt_setprop, return {}.", ret); + } +} + +pub fn set_property_string(fdt: &mut Vec, node_path: &str, prop: &str, val: &str) { + set_property( + fdt, + node_path, + prop, + Some(&([val.as_bytes(), &[0_u8]].concat())), + ) +} + +pub fn set_property_u32(fdt: &mut Vec, node_path: &str, prop: &str, val: u32) { + set_property(fdt, node_path, prop, Some(&val.to_be_bytes())) +} + +pub fn set_property_u64(fdt: &mut Vec, node_path: &str, prop: &str, val: u64) { + set_property(fdt, node_path, prop, Some(&val.to_be_bytes())) +} + +pub fn set_property_array_u32(fdt: &mut Vec, node_path: &str, prop: &str, array: &[u32]) { + let mut bytes: Vec = Vec::new(); + for &val in array { + bytes.append(&mut val.to_be_bytes().to_vec()); + } + set_property(fdt, node_path, prop, Some(&bytes)) +} + +pub fn set_property_array_u64(fdt: &mut Vec, node_path: &str, prop: &str, array: &[u64]) { + let mut bytes: Vec = Vec::new(); + for &val in array { + bytes.append(&mut val.to_be_bytes().to_vec()); + } + set_property(fdt, node_path, prop, Some(&bytes)) +} + +pub fn dump_dtb(fdt: &[u8], file_path: &str) { + use std::fs::File; + use std::io::Write; + + let mut f = File::create(file_path).unwrap(); + for i in fdt.iter() { + f.write_all(&[*i]).expect("Unable to write data"); + } +} + +/// Trait for devices to be added to the Flattened Device Tree. +pub trait CompileFDT { + /// function to generate fdt node + /// + /// # Arguments + /// + /// * `fdt` - the fdt slice to be expended. + fn generate_fdt_node(&self, fdt: &mut Vec); +} diff --git a/src/main.rs b/src/main.rs index a1056b73a..eb2ee8b7a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,11 +4,12 @@ mod cpu; mod kvm; mod bootloader; mod device; +mod device_tree; use cpu::CPU; use bootloader::kvm_load_kernel; use device::kvm_setup_fireware; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use kvm_ioctls::Kvm; use memory::GuestMemory; @@ -25,12 +26,12 @@ fn main() { // 3. Init vCPU. let vcpu_count = 1_u32; let arc_memory = Arc::new(guest_memory); - let mut vcpu = CPU::new(&vm_fd, 0); + let mut vcpu = CPU::new(&vm_fd, 0, vcpu_count as u8); // 4. load boot source and realize vCPU0. let layout = kvm_load_kernel(&arc_memory,&mut vcpu, &vm_fd); - let vcpus = vec![Arc::new(vcpu)]; - kvm_setup_fireware(&layout); + let vcpus = vec![Arc::new(Mutex::new(vcpu))]; + kvm_setup_fireware(&arc_memory, &vcpus, &layout); println!("fireware set up !"); // 9. Run vCPU0. -- Gitee From a32848b836c698116f3375d8b4f9d2d3b64e92f8 Mon Sep 17 00:00:00 2001 From: sts Date: Fri, 9 Aug 2024 13:59:25 +0000 Subject: [PATCH 6/8] =?UTF-8?q?=E4=B8=B2=E5=8F=A3=E5=92=8CEpoll=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=EF=BC=8C=E5=8F=AF=E8=B5=B7Linux?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 16 +- Cargo.toml | 2 +- kvm-ioctls/src/ioctls/vcpu.rs | 6 +- src/cpu/mod.rs | 104 ++++++++--- src/device/fdt.rs | 21 +++ src/device/mod.rs | 32 +++- src/device/serial.rs | 330 ++++++++++++++++++++++++++++++++++ src/helper/checksum.rs | 35 ++++ src/helper/epoll.rs | 96 ++++++++++ src/helper/mod.rs | 3 + src/helper/num_ops.rs | 108 +++++++++++ src/main.rs | 4 +- src/memory/guest_memory.rs | 7 +- 13 files changed, 722 insertions(+), 42 deletions(-) create mode 100644 src/device/serial.rs create mode 100644 src/helper/checksum.rs create mode 100644 src/helper/epoll.rs create mode 100644 src/helper/num_ops.rs diff --git a/Cargo.lock b/Cargo.lock index 0f0ab1612..4e6bbb5f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,7 @@ dependencies = [ "kvm-bindings", "kvm-ioctls", "libc", - "vmm-sys-util", + "vmm-sys-util 0.7.0", ] [[package]] @@ -22,7 +22,7 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" name = "kvm-bindings" version = "0.8.2" dependencies = [ - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -31,7 +31,7 @@ version = "0.1.0" dependencies = [ "kvm-bindings", "libc", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -40,6 +40,16 @@ version = "0.2.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" +[[package]] +name = "vmm-sys-util" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cdd1d72e262bbfb014de65ada24c1ac50e10a2e3b1e8ec052df188c2ee5dfa" +dependencies = [ + "bitflags", + "libc", +] + [[package]] name = "vmm-sys-util" version = "0.12.1" diff --git a/Cargo.toml b/Cargo.toml index fb83af7ee..7749e5c67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "Mulan PSL v2" [dependencies] libc = ">=0.2.39" -vmm-sys-util = ">=0.10.0" +vmm-sys-util = "0.7.0" kvm-ioctls = { path = "kvm-ioctls" } kvm-bindings = { path = "kvm-bindings", features = ["fam-wrappers"]} diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index 18cf31d11..f20a22efc 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -1411,11 +1411,11 @@ impl VcpuFd { /// ``` pub fn run(&self) -> Result { // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result. - println!("begin KVM_RUN"); + // println!("begin KVM_RUN"); let ret = unsafe { ioctl(self, KVM_RUN()) }; - println!("finish KVM_RUN"); + // println!("finish KVM_RUN"); if ret == 0 { - println!("come to ret 0 in run()"); + // println!("come to ret 0 in run()"); let run = self.kvm_run_ptr.as_mut_ref(); match run.exit_reason { // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index 700282f86..6ce02d247 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -5,9 +5,11 @@ use kvm_bindings::{ }; use kvm_ioctls::{VmFd, VcpuFd, VcpuExit}; use std::mem::size_of; +use crate::helper::byte_code::ByteCode; use std::thread; use std::sync::{Arc, Mutex}; -use crate::device::PlicState; +use crate::device::{PlicState, Serial, judge_serial_addr, judge_plic_addr as judge_interrupt_controller_addr}; +use crate::GuestMemory; #[macro_export] macro_rules! __offset_of { @@ -261,28 +263,32 @@ pub struct CPU { pub id: u8, pub fd: Arc, pub nr_vcpus: u8, + pub sys_mem: Arc, boot_ip: u64, fdt_addr: u64, pub isa: u64, pub frequency: u64, pub satp_mode: u64, - pub plic: Option>> + pub plic: Option>>, + pub serial: Option>> } impl CPU { - pub fn new(vm_fd: &Arc, vcpu_id: u8, nr_vcpus: u8) -> Self { + pub fn new(vm_fd: &Arc, sys_mem: Arc,vcpu_id: u8, nr_vcpus: u8) -> Self { let vcpu_fd = vm_fd.create_vcpu(vcpu_id as u64).expect(&format!("Failed to create Vcpu{}",vcpu_id)); let vcpu_fd = Arc::new(vcpu_fd); Self { id: vcpu_id, fd: vcpu_fd, nr_vcpus, + sys_mem, boot_ip: 0, fdt_addr: 0, isa: 0, frequency: 0, satp_mode: 0, plic: None, + serial: None } } @@ -303,6 +309,9 @@ impl CPU { pub fn set_plic(&mut self, plic: Arc>) { self.plic = Some(plic.clone()); } + pub fn set_serial(&mut self, serial: Arc>) { + self.serial = Some(serial.clone()); + } pub fn get_register(&self, reg: RISCVCoreRegs) -> u64{ self.fd.get_one_reg(reg.into()).unwrap() as u64 @@ -322,27 +331,70 @@ impl CPU { pub fn kvm_vcpu_exec(&self) -> bool { - println!("Vcpu{}, PC = 0x{:x}",self.id, self.fd.get_one_reg(RISCVCoreRegs::PC.into()).unwrap()); - match self.fd.run().unwrap() { + // println!("current PC: 0x{:x}", self.fd.get_one_reg(Riscv64CoreRegs::PC.into()).unwrap()); + match self.fd.run().expect("Unhandled error in vcpu emulation!") { VcpuExit::IoIn(addr, data) => { - println!("Vcpu{} VmExit IO in: addr 0x{:x}, data is {}", - self.id, addr, data[0] - ); - } + if let Some(offset) = judge_serial_addr(addr as u64) { + data[0] = self.serial.as_ref().unwrap().lock().unwrap().read(offset); + } + } VcpuExit::IoOut(addr, data) => { - println!("Vcpu{} VmExit OUT in: addr 0x{:x}, data is {}", - self.id, addr, data[0] - ); - } - VcpuExit::MmioRead(addr, data) => { - println!("Vcpu{} VmExit MMIO read: addr 0x{:x}", - self.id, addr - ); - } - VcpuExit::MmioWrite(addr, data) => { - println!("Vcpu{} VmExit MMIO write: addr 0x{:x}", - self.id, addr - ); + if let Some(offset) = judge_serial_addr(addr as u64) { + if self + .serial + .as_ref() + .unwrap() + .lock() + .unwrap() + .write(offset, data[0]) + .is_err() + { + println!("Failed to write data for serial, offset: {}", offset); + } + } + } + VcpuExit::MmioRead(addr, mut data) => { + if let Some(offset) = judge_serial_addr(addr as u64) { + data[0] = self.serial.as_ref().unwrap().lock().unwrap().read(offset); + } else if let Some(addr) = judge_interrupt_controller_addr(addr){ + let mut ic_guard = self.plic.as_ref().unwrap().lock().unwrap(); + let res: u32 = ic_guard.mmio_read(addr); + drop(ic_guard); + let res_bytes = res.as_bytes(); + for i in 0..res_bytes.len() { + data[i] = res_bytes[i]; + } + } else { + let data_len = data.len() as u64; + self.sys_mem + .read(&mut data, addr as u64, data_len) + .expect("Invalid mmio read."); + } + } + VcpuExit::MmioWrite(addr, mut data) => { + if let Some(offset) = judge_serial_addr(addr as u64) { + if self + .serial + .as_ref() + .unwrap() + .lock() + .unwrap() + .write(offset, data[0]) + .is_err() + { + println!("Failed to write data for serial, offset: {}", offset); + } + } else if let Some(addr) = judge_interrupt_controller_addr(addr as u64){ + let res: & u32 = u32::from_bytes(&data).unwrap(); + let mut ic_guard = self.plic.as_ref().unwrap().lock().unwrap(); + ic_guard.mmio_write(addr, *res); + drop(ic_guard); + } else { + let data_len = data.len() as u64; + self.sys_mem + .write(&mut data, addr as u64, data_len) + .expect("Invalid mmio write."); + } } VcpuExit::SystemEvent(event_type, _ndata) => { match event_type { @@ -354,8 +406,14 @@ impl CPU { println!("KVM_EXIT_HLT"); return false; } - r => panic!("Unexprcted exit reason: {:?}", r) + VcpuExit::Shutdown => { + println!("Guest shutdown"); + + return false; + } + r => panic!("Unexpected exit reason: {:?}", r), } + true } diff --git a/src/device/fdt.rs b/src/device/fdt.rs index e71ef8abe..2d991d566 100644 --- a/src/device/fdt.rs +++ b/src/device/fdt.rs @@ -5,6 +5,7 @@ use crate::memory::GuestMemory; use super::plic::PlicState; use std::sync::{Mutex, Arc}; +use super::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ}; pub fn generate_fdt( sys_mem: &Arc, initrd_range: (u64, u64), @@ -21,10 +22,17 @@ pub fn generate_fdt( set_property_u32(&mut fdt, "/", "#size-cells", 0x2); generate_chosen_node(&mut fdt, cmdline, initrd_range.0, initrd_range.1); + println!("chosen fdt node complete!"); generate_memory_node(&mut fdt, sys_mem); + println!("mem node complete!"); generate_cpu_node(&mut fdt, cpu); + println!("cpu node complete!"); plic.lock().unwrap().generate_fdt_node(&mut fdt); + println!("plic node complete!"); generate_devices_node(&mut fdt); + println!("serial node complete!"); + generate_aliases_node(&mut fdt); + println!("aliases node complete!"); let fdt_len = fdt.len() as u64; sys_mem @@ -99,6 +107,13 @@ fn generate_devices_node(fdt: &mut Vec) { set_property_u32(fdt, node, "#size-cells", 0x02); set_property_u32(fdt, node, "interrupt-parent", PHANDLE_PLIC); set_property(fdt, node, "ranges", None); + + let node = format!("/smb/U6_16550A@{:x}", MMIO_SERIAL_ADDR); + add_sub_node(fdt, &node); + set_property_string(fdt, &node, "compatible", "ns16550a"); + set_property_array_u64(fdt, &node, "reg", &[MMIO_SERIAL_ADDR, 8]); + set_property_u32(fdt, &node, "interrupts", MMIO_SERIAL_IRQ); + set_property_u32(fdt, &node, "clock-frequency", 0x1c2000); } @@ -108,3 +123,9 @@ fn generate_chosen_node(fdt: &mut Vec, cmdline: &str, initrd_addr: u64, init set_property_string(fdt, node, "bootargs", cmdline); set_property_string(fdt, node, "stdout-path", "serial0"); } +fn generate_aliases_node(fdt: &mut Vec) { + let node = "/aliases"; + add_sub_node(fdt, node); + let serial0 = format!("/smb/U6_16550A@{:x}", MMIO_SERIAL_ADDR); + set_property_string(fdt, node, "serial0", &serial0); +} diff --git a/src/device/mod.rs b/src/device/mod.rs index d4639b77a..5babf48d0 100644 --- a/src/device/mod.rs +++ b/src/device/mod.rs @@ -1,20 +1,44 @@ pub mod plic; mod fdt; +mod serial; use kvm_ioctls::VmFd; use crate::{bootloader::Riscv64BootLoader, cpu::CPU, memory::GuestMemory}; use std::sync::{Arc, Mutex}; pub use plic::*; +pub use serial::*; pub const FDT_MAX_SIZE: u32 = 0x1_0000; -pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, layout : &Riscv64BootLoader) { - let plic = PlicState::new(&vcpus, vcpus[0].lock().unwrap().nr_vcpus as u32); +#[derive(Debug)] +pub enum Error { + Overflow(usize, usize), + IoError(std::io::Error), + +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::Overflow(len, max_len) => write!(f, "The received buffer {} overflow, max_len: {}", len, max_len ), + Error::IoError(ref e) => { write!(f, "IO errors occurs when read/write memory, error is {}", e) } + } + + } + +} +pub type Result = std::result::Result; + +pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, vm_fd: Arc, layout : &Riscv64BootLoader) { + let nr_cpu = vcpus[0].lock().unwrap().nr_vcpus as u32; + let plic = PlicState::new(&vcpus, nr_cpu); + let serial = Serial::new(&vm_fd, plic.clone()); for vcpu in vcpus.iter(){ + vcpu.lock().unwrap().set_serial(serial.clone()); vcpu.lock().unwrap().set_plic(plic.clone()); - } let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; let initrd_range = (layout.initrd_start, layout.initrd_size); let fdt_addr = layout.dtb_start; fdt::generate_fdt(guest_memory, initrd_range, cmdline, plic.clone(), vcpus[0].clone(), fdt_addr); -} +} + diff --git a/src/device/serial.rs b/src/device/serial.rs new file mode 100644 index 000000000..ba8e4f92d --- /dev/null +++ b/src/device/serial.rs @@ -0,0 +1,330 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::VecDeque; +use std::io; +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex}; +use std::thread; + +use kvm_ioctls::VmFd; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, terminal::Terminal}; + +use super::{Error, Result, PlicState}; +use crate::memory::{MEM_LAYOUT, LayoutEntryType}; +use crate::helper::epoll::{EpollContext, EventNotifier}; + +pub const MMIO_SERIAL_IRQ: u32 = 1; +pub const MMIO_SERIAL_ADDR: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].0; +pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; + + +const UART_IER_RDI: u8 = 0x01; +const UART_IER_THRI: u8 = 0x02; +const UART_IIR_NO_INT: u8 = 0x01; +const UART_IIR_THRI: u8 = 0x02; +const UART_IIR_RDI: u8 = 0x04; +const _UART_IIR_ID: u8 = 0x06; + +const UART_LCR_DLAB: u8 = 0x80; +const UART_LSR_DR: u8 = 0x01; +const _UART_LSR_OE: u8 = 0x02; +const _UART_LSR_BI: u8 = 0x10; +const UART_LSR_THRE: u8 = 0x20; +const UART_LSR_TEMT: u8 = 0x40; + +const UART_MCR_OUT2: u8 = 0x08; +const UART_MCR_LOOP: u8 = 0x10; +const UART_MSR_CTS: u8 = 0x10; +const UART_MSR_DSR: u8 = 0x20; +const UART_MSR_DCD: u8 = 0x80; + +const RECEIVER_BUFF_SIZE: usize = 1024; + +pub fn judge_serial_addr(addr: u64) -> Option { + if (MMIO_SERIAL_ADDR..MMIO_SERIAL_ADDR + MMIO_SERIAL_ADDR_SIZE).contains(&addr) { + Some(addr - MMIO_SERIAL_ADDR) + } else { + None + } +} + +/// Contain registers and operation methods of serial. +pub struct Serial { + /// Receiver buffer register. + rbr: VecDeque, + /// Interrupt enable register. + ier: u8, + /// interrupt identification register. + iir: u8, + /// Line control register. + lcr: u8, + /// Modem control register. + mcr: u8, + /// Line status register. + lsr: u8, + /// Modem status register. + msr: u8, + /// Scratch register. + scr: u8, + /// Used to set baud rate. + div: u16, + /// Transmitter holding register. + thr_pending: u32, + /// Interrupt event file descriptor. + interrupt_evt: EventFd, + /// Operation methods. + output: Box, + /// serial interrupt control + serial_ctrl: Arc>, +} + +impl Serial { + /// Create a new `Serial` instance with default parameters. + pub fn new(vm_fd: &VmFd, serial_ctrl: Arc>) -> Arc> { + std::io::stdin() + .lock() + .set_raw_mode() + .expect("Failed to set raw mode to stdin"); + + let evt_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + // vm_fd + // .register_irqfd(&evt_fd, MMIO_SERIAL_IRQ) + // .expect("Failed to register irq fd for serial"); + let serial = Arc::new(Mutex::new(Serial { + rbr: VecDeque::new(), + ier: 0, + iir: UART_IIR_NO_INT, + lcr: 0x03, // 8 bits + mcr: UART_MCR_OUT2, + lsr: UART_LSR_TEMT | UART_LSR_THRE, + msr: UART_MSR_DCD | UART_MSR_DSR | UART_MSR_CTS, + scr: 0, + div: 0x0c, + thr_pending: 0, + interrupt_evt: evt_fd, + output: Box::new(std::io::stdout()), + serial_ctrl: serial_ctrl.clone() + })); + + let serial_clone = serial.clone(); + let mut epoll = EpollContext::new(); + let handler: Box = Box::new(move |event, _| { + if event == EventSet::IN && serial_clone.lock().unwrap().stdin_exce().is_err() { + println!("Failed to excecute the stdin"); + } + }); + + let notifier = EventNotifier::new( + libc::STDIN_FILENO, + EventSet::IN, + Arc::new(Mutex::new(handler)), + ); + + epoll.add_event(notifier); + + let _ = thread::Builder::new() + .name("serial".to_string()) + .spawn(move || loop { + if !epoll.run() { + break; + } + }); + + serial + } + + /// Update interrupt identification register, + /// this method would be called when the interrupt identification changes. + fn update_iir(&mut self) -> Result<()> { + let mut iir = UART_IIR_NO_INT; + + if self.ier & UART_IER_RDI != 0 && self.lsr & UART_LSR_DR != 0 { + iir &= !UART_IIR_NO_INT; + iir |= UART_IIR_RDI; + } else if self.ier & UART_IER_THRI != 0 && self.thr_pending > 0 { + iir &= !UART_IIR_NO_INT; + iir |= UART_IIR_THRI; + } + + self.iir = iir; + + if iir != UART_IIR_NO_INT { + self.serial_ctrl.lock().unwrap().plic__irq_trig(MMIO_SERIAL_IRQ, true, false);; + } + + Ok(()) + } + + fn receive(&mut self, data: &[u8]) -> Result<()> { + if self.mcr & UART_MCR_LOOP == 0 { + if self.rbr.len() >= RECEIVER_BUFF_SIZE { + return Err(Error::Overflow(self.rbr.len(), RECEIVER_BUFF_SIZE)); + } + + self.rbr.extend(data); + self.lsr |= UART_LSR_DR; + + self.update_iir()?; + } + + Ok(()) + } + + fn stdin_exce(&mut self) -> Result<()> { + let mut out = [0_u8; 64]; + if let Ok(count) = std::io::stdin().lock().read_raw(&mut out) { + for i in 0..count { + if out[i] == 0x01 && i + 1 < count && out[i + 1] == b'x'{ + } + } + self.receive(&out[..count]) + } else { + Ok(()) + } + } + + /// Read one byte data from a certain register selected by `offset`. + /// + /// # Arguments + /// + /// * `offset` - Used to select a register. + /// + /// # Errors + /// + /// Return Error if fail to update iir. + pub fn read(&mut self, offset: u64) -> u8 { + let mut ret: u8 = 0; + + match offset { + 0 => { + if self.lcr & UART_LCR_DLAB != 0 { + ret = self.div as u8; + } else { + if !self.rbr.is_empty() { + ret = self.rbr.pop_front().unwrap_or_default(); + } + if self.rbr.is_empty() { + self.lsr &= !UART_LSR_DR; + } + + if self.update_iir().is_err() { + println!( + "Failed to update iir for reading the register {} of serial", + offset + ); + } + } + } + 1 => { + if self.lcr & UART_LCR_DLAB != 0 { + ret = (self.div >> 8) as u8; + } else { + ret = self.ier + } + } + 2 => { + ret = self.iir | 0xc0; + self.thr_pending = 0; + self.iir = UART_IIR_NO_INT + } + 3 => { + ret = self.lcr; + } + 4 => { + ret = self.mcr; + } + 5 => { + ret = self.lsr; + } + 6 => { + if self.mcr & UART_MCR_LOOP != 0 { + ret = (self.mcr & 0x0c) << 4; + ret |= (self.mcr & 0x02) << 3; + ret |= (self.mcr & 0x01) << 5; + } else { + ret = self.msr; + } + } + 7 => { + ret = self.scr; + } + _ => {} + } + + ret + } + + /// Write one byte data to a certain register selected by `offset`. + /// + /// # Arguments + /// + /// * `offset` - Used to select a register. + /// * `data` - A u8-type data, which will be written to the register. + /// + /// # Errors + /// + /// Return Error if + /// * fail to get output file descriptor. + /// * fail to write serial. + /// * fail to flush serial. + pub fn write(&mut self, offset: u64, data: u8) -> Result<()> { + match offset { + 0 => { + if self.lcr & UART_LCR_DLAB != 0 { + self.div = (self.div & 0xff00) | u16::from(data); + } else { + self.thr_pending = 1; + + if self.mcr & UART_MCR_LOOP != 0 { + // loopback mode + if self.rbr.len() >= RECEIVER_BUFF_SIZE { + return Err(Error::Overflow(self.rbr.len(), RECEIVER_BUFF_SIZE)); + } + + self.rbr.push_back(data); + self.lsr |= UART_LSR_DR; + } else { + self.output.write_all(&[data]).map_err(Error::IoError)?; + self.output.flush().map_err(Error::IoError)?; + } + + self.update_iir()?; + } + } + 1 => { + if self.lcr & UART_LCR_DLAB != 0 { + self.div = (self.div & 0x00ff) | (u16::from(data) << 8); + } else { + let changed = (self.ier ^ data) & 0x0f; + self.ier = data & 0x0f; + + if changed != 0 { + self.update_iir()?; + } + } + } + 3 => { + self.lcr = data; + } + 4 => { + self.mcr = data; + } + 7 => { + self.scr = data; + } + _ => {} + } + + Ok(()) + } +} diff --git a/src/helper/checksum.rs b/src/helper/checksum.rs new file mode 100644 index 000000000..3755c157a --- /dev/null +++ b/src/helper/checksum.rs @@ -0,0 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use super::byte_code::ByteCode; + +pub fn checksum(slice: &[u8]) -> u8 { + let mut sum: u32 = 0; + + for byte in slice.iter() { + sum += u32::from(*byte); + sum &= 0xff; + } + + (sum & 0xff) as u8 +} + +pub fn obj_checksum(t: &T) -> u8 { + let mut sum: u32 = 0; + + for byte in t.as_bytes().iter() { + sum += u32::from(*byte); + sum &= 0xff; + } + + (sum & 0xff) as u8 +} diff --git a/src/helper/epoll.rs b/src/helper/epoll.rs new file mode 100644 index 000000000..8114167ab --- /dev/null +++ b/src/helper/epoll.rs @@ -0,0 +1,96 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for moe details. + +extern crate vmm_sys_util; + +use std::collections::BTreeMap; +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex}; + +use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; + +const READY_EVENT_MAX: usize = 128; + +pub type NotifierCallback = dyn Fn(EventSet, RawFd) + Send + Sync; + +/// Epoll Event Notifier Entry. +pub struct EventNotifier { + /// Raw file descriptor + pub raw_fd: RawFd, + /// The types of events for which we use this fd + pub event: EventSet, + /// Event Handler List, one fd event may have many handlers + pub handler: Arc>>, +} + +impl EventNotifier { + pub fn new(raw_fd: i32, event: EventSet, handler: Arc>>) -> Self { + EventNotifier { + raw_fd, + event, + handler, + } + } +} + +/// Epoll Context +pub struct EpollContext { + /// Epoll file descriptor. + epoll: Epoll, + /// The Event handler + events: Arc>>>, +} + +impl EpollContext { + pub fn new() -> Self { + EpollContext { + epoll: Epoll::new().unwrap(), + events: Arc::new(Mutex::new(BTreeMap::new())), + } + } + + pub fn add_event(&mut self, event: EventNotifier) { + let mut events = self.events.lock().unwrap(); + let raw_fd = event.raw_fd; + events.insert(raw_fd, Box::new(event)); + + let event = events.get(&raw_fd).unwrap(); + self.epoll + .ctl( + ControlOperation::Add, + raw_fd, + EpollEvent::new(event.event, &**event as *const _ as u64), + ) + .unwrap(); + } + + pub fn run(&self) -> bool { + let mut ready_events = vec![EpollEvent::default(); READY_EVENT_MAX]; + + let ev_count = match self.epoll.wait(READY_EVENT_MAX, -1, &mut ready_events[..]) { + Ok(ev_count) => ev_count, + Err(e) if e.raw_os_error() == Some(libc::EINTR) => 0, + Err(_e) => return false, + }; + + for ready_event in ready_events.iter().take(ev_count) { + let event = unsafe { + let event_ptr = ready_event.data() as *const EventNotifier; + &*event_ptr as &EventNotifier + }; + let handler = event.handler.lock().unwrap(); + handler(ready_event.event_set(), event.raw_fd); + } + + true + } +} diff --git a/src/helper/mod.rs b/src/helper/mod.rs index e3e7dbe68..22c89b112 100644 --- a/src/helper/mod.rs +++ b/src/helper/mod.rs @@ -1 +1,4 @@ pub mod byte_code; +pub mod checksum; +pub mod epoll; +pub mod num_ops; diff --git a/src/helper/num_ops.rs b/src/helper/num_ops.rs new file mode 100644 index 000000000..a847d2072 --- /dev/null +++ b/src/helper/num_ops.rs @@ -0,0 +1,108 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! This module implements some operations of Rust primitive types. + +/// Calculate the aligned-up u64 value. +/// +/// # Arguments +/// +/// * `origin` - the origin value. +/// * `align` - the alignment. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::round_up; +/// +/// let value = round_up(1003 as u64, 4 as u64); +/// assert!(value == Some(1004)); +/// ``` +pub fn round_up(origin: u64, align: u64) -> Option { + match origin % align { + 0 => Some(origin), + diff => origin.checked_add(align - diff), + } +} + +/// Calculate the aligned-down u64 value. +/// +/// # Arguments +/// +/// * `origin` - the origin value. +/// * `align` - the alignment. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::round_down; +/// +/// let value = round_down(1003 as u64, 4 as u64); +/// assert!(value == Some(1000)); +/// ``` +pub fn round_down(origin: u64, align: u64) -> Option { + match origin % align { + 0 => Some(origin), + diff => origin.checked_sub(diff), + } +} + +/// Get the first half or second half of u64. +/// +/// # Arguments +/// +/// * `value` - The origin value to get u32 from. +/// * `page` - Value is 0 or 1, determines which half to return. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::read_u32; +/// +/// let value = read_u32(0x2000_1000_0000, 1); +/// assert!(value == 0x2000); +/// ``` +pub fn read_u32(value: u64, page: u32) -> u32 { + match page { + 0 => value as u32, + 1 => (value >> 32) as u32, + _ => 0_u32, + } +} + +/// Write the given u32 to the first or second half in u64, +/// returns the u64 value. +/// +/// # Arguments +/// +/// * `value` - The origin u32 value. +/// * `page` - Value is 0 or 1, determines which half to write. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::write_u32; +/// +/// let value = write_u32(0x1000_0000, 1); +/// assert!(value == 0x1000_0000_0000_0000); +/// ``` +pub fn write_u32(value: u32, page: u32) -> u64 { + match page { + 0 => u64::from(value), + 1 => u64::from(value) << 32, + _ => 0_u64, + } +} diff --git a/src/main.rs b/src/main.rs index eb2ee8b7a..889583e2d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,12 +26,12 @@ fn main() { // 3. Init vCPU. let vcpu_count = 1_u32; let arc_memory = Arc::new(guest_memory); - let mut vcpu = CPU::new(&vm_fd, 0, vcpu_count as u8); + let mut vcpu = CPU::new(&vm_fd, arc_memory.clone(), 0, vcpu_count as u8); // 4. load boot source and realize vCPU0. let layout = kvm_load_kernel(&arc_memory,&mut vcpu, &vm_fd); let vcpus = vec![Arc::new(Mutex::new(vcpu))]; - kvm_setup_fireware(&arc_memory, &vcpus, &layout); + kvm_setup_fireware(&arc_memory, &vcpus, vm_fd.clone(), &layout); println!("fireware set up !"); // 9. Run vCPU0. diff --git a/src/memory/guest_memory.rs b/src/memory/guest_memory.rs index 96fffab33..4f923f230 100644 --- a/src/memory/guest_memory.rs +++ b/src/memory/guest_memory.rs @@ -43,13 +43,8 @@ impl GuestMemory { let gap_start = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0; let gap_size = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; - let gap_end = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0 + - MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; - ranges.push((gap_start, mem_size)); + ranges.push((gap_start, gap_size)); - if mem_size > gap_size { - ranges.push((gap_end, mem_size - gap_size)); - } ranges } -- Gitee From d86f62b8ddaa429dfe775f8b385a2b57b8260acf Mon Sep 17 00:00:00 2001 From: sts Date: Mon, 26 Aug 2024 00:39:01 +0000 Subject: [PATCH 7/8] =?UTF-8?q?=E5=BA=93=E7=89=88=E6=9C=AC=E8=B0=83?= =?UTF-8?q?=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1:调整kvm-ioctls的版本到官方edu要求的0.6.0 2:调整kvm-bindings版本到官方edu要求的0.3.0 --- kvm-ioctls/.cargo/config | 3 + kvm-ioctls/.gitignore | 6 + kvm-ioctls/.gitmodules | 3 + kvm-ioctls/CHANGELOG.md | 112 ++ kvm-ioctls/CODEOWNERS | 3 + kvm-ioctls/Cargo.lock | 41 - kvm-ioctls/Cargo.toml | 20 +- kvm-ioctls/LICENSE-APACHE | 202 +++ kvm-ioctls/LICENSE-MIT | 24 + kvm-ioctls/README.md | 47 + kvm-ioctls/THIRD-PARTY | 27 + kvm-ioctls/coverage_config_aarch64.json | 5 + kvm-ioctls/coverage_config_x86_64.json | 5 + kvm-ioctls/src/cap.rs | 7 +- kvm-ioctls/src/ioctls/device.rs | 44 +- kvm-ioctls/src/ioctls/mod.rs | 15 +- kvm-ioctls/src/ioctls/system.rs | 343 ++--- kvm-ioctls/src/ioctls/vcpu.rs | 1136 ++++------------- kvm-ioctls/src/ioctls/vm.rs | 737 +++-------- kvm-ioctls/src/kvm_ioctls.rs | 25 +- kvm-ioctls/src/lib.rs | 213 +++- src/arch/aarch64/boot_loader/mod.rs | 117 ++ src/arch/aarch64/cpu/mod.rs | 244 ++++ src/arch/aarch64/device/fdt.rs | 126 ++ src/arch/aarch64/device/gicv3.rs | 269 ++++ src/arch/aarch64/device/mod.rs | 32 + src/arch/aarch64/device/serial.rs | 4 + src/arch/aarch64/device_tree/mod.rs | 192 +++ src/{ => arch/aarch64}/kvm/mod.rs | 10 +- src/arch/aarch64/memory/mod.rs | 27 + src/arch/aarch64/mod.rs | 6 + src/arch/mod.rs | 60 + .../riscv64/boot_loader}/mod.rs | 10 +- src/arch/riscv64/cpu/mod.rs | 326 +++++ src/{ => arch/riscv64}/device/fdt.rs | 28 +- src/arch/riscv64/device/mod.rs | 33 + src/{ => arch/riscv64}/device/plic.rs | 9 +- src/arch/riscv64/device/serial.rs | 28 + src/{ => arch/riscv64}/device_tree/mod.rs | 0 src/arch/riscv64/kvm/mod.rs | 20 + src/arch/riscv64/memory/mod.rs | 23 + src/arch/riscv64/mod.rs | 6 + src/arch/x86_64/boot_loader/bootparam.rs | 213 ++++ src/arch/x86_64/boot_loader/gdt.rs | 145 +++ src/arch/x86_64/boot_loader/loader.rs | 127 ++ src/arch/x86_64/boot_loader/mod.rs | 77 ++ src/arch/x86_64/boot_loader/mptable.rs | 345 +++++ src/arch/x86_64/cpu/mod.rs | 355 ++++++ src/arch/x86_64/device/mod.rs | 15 + src/arch/x86_64/device/serial.rs | 3 + src/arch/x86_64/helper/cpuid.rs | 31 + src/arch/x86_64/helper/mod.rs | 1 + src/arch/x86_64/kvm/mod.rs | 46 + src/arch/x86_64/memory/mod.rs | 30 + src/arch/x86_64/mod.rs | 6 + src/cpu/mod.rs | 408 ++---- src/device/mod.rs | 56 +- src/device/serial.rs | 42 +- src/helper/byte_code.rs | 40 +- src/helper/mod.rs | 34 + src/main.rs | 56 +- src/memory/guest_memory.rs | 129 +- src/memory/host_mmap.rs | 49 +- src/memory/mod.rs | 45 +- 64 files changed, 4498 insertions(+), 2343 deletions(-) create mode 100644 kvm-ioctls/.cargo/config create mode 100644 kvm-ioctls/.gitignore create mode 100644 kvm-ioctls/.gitmodules create mode 100644 kvm-ioctls/CHANGELOG.md create mode 100644 kvm-ioctls/CODEOWNERS delete mode 100644 kvm-ioctls/Cargo.lock create mode 100644 kvm-ioctls/LICENSE-APACHE create mode 100644 kvm-ioctls/LICENSE-MIT create mode 100644 kvm-ioctls/README.md create mode 100644 kvm-ioctls/THIRD-PARTY create mode 100644 kvm-ioctls/coverage_config_aarch64.json create mode 100644 kvm-ioctls/coverage_config_x86_64.json create mode 100644 src/arch/aarch64/boot_loader/mod.rs create mode 100644 src/arch/aarch64/cpu/mod.rs create mode 100644 src/arch/aarch64/device/fdt.rs create mode 100644 src/arch/aarch64/device/gicv3.rs create mode 100644 src/arch/aarch64/device/mod.rs create mode 100644 src/arch/aarch64/device/serial.rs create mode 100644 src/arch/aarch64/device_tree/mod.rs rename src/{ => arch/aarch64}/kvm/mod.rs (57%) create mode 100644 src/arch/aarch64/memory/mod.rs create mode 100644 src/arch/aarch64/mod.rs create mode 100644 src/arch/mod.rs rename src/{bootloader => arch/riscv64/boot_loader}/mod.rs (96%) create mode 100644 src/arch/riscv64/cpu/mod.rs rename src/{ => arch/riscv64}/device/fdt.rs (84%) create mode 100644 src/arch/riscv64/device/mod.rs rename src/{ => arch/riscv64}/device/plic.rs (97%) create mode 100644 src/arch/riscv64/device/serial.rs rename src/{ => arch/riscv64}/device_tree/mod.rs (100%) create mode 100644 src/arch/riscv64/kvm/mod.rs create mode 100644 src/arch/riscv64/memory/mod.rs create mode 100644 src/arch/riscv64/mod.rs create mode 100644 src/arch/x86_64/boot_loader/bootparam.rs create mode 100644 src/arch/x86_64/boot_loader/gdt.rs create mode 100644 src/arch/x86_64/boot_loader/loader.rs create mode 100644 src/arch/x86_64/boot_loader/mod.rs create mode 100644 src/arch/x86_64/boot_loader/mptable.rs create mode 100644 src/arch/x86_64/cpu/mod.rs create mode 100644 src/arch/x86_64/device/mod.rs create mode 100644 src/arch/x86_64/device/serial.rs create mode 100644 src/arch/x86_64/helper/cpuid.rs create mode 100644 src/arch/x86_64/helper/mod.rs create mode 100644 src/arch/x86_64/kvm/mod.rs create mode 100644 src/arch/x86_64/memory/mod.rs create mode 100644 src/arch/x86_64/mod.rs diff --git a/kvm-ioctls/.cargo/config b/kvm-ioctls/.cargo/config new file mode 100644 index 000000000..ba63e46b3 --- /dev/null +++ b/kvm-ioctls/.cargo/config @@ -0,0 +1,3 @@ +[target.aarch64-unknown-linux-musl] +rustflags = [ "-C", "target-feature=+crt-static", "-C", "link-arg=-lgcc" ] + diff --git a/kvm-ioctls/.gitignore b/kvm-ioctls/.gitignore new file mode 100644 index 000000000..ac6209e0e --- /dev/null +++ b/kvm-ioctls/.gitignore @@ -0,0 +1,6 @@ +Cargo.lock +/target +.idea +**/*.rs.bk +**/.pytest_cache/ +**/__pycache__/* diff --git a/kvm-ioctls/.gitmodules b/kvm-ioctls/.gitmodules new file mode 100644 index 000000000..bda97eb35 --- /dev/null +++ b/kvm-ioctls/.gitmodules @@ -0,0 +1,3 @@ +[submodule "rust-vmm-ci"] + path = rust-vmm-ci + url = https://github.com/rust-vmm/rust-vmm-ci.git diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md new file mode 100644 index 000000000..f6044e828 --- /dev/null +++ b/kvm-ioctls/CHANGELOG.md @@ -0,0 +1,112 @@ +# v0.6.0 + +## Added +- Support for the vcpu ioctls: `KVM_SET_GUEST_DEBUG`, `KVM_KVMCLOCK_CTRL`, and + `KVM_GET_REG_LIST`. +- Support for the vm ioctl `KVM_GET_DEVICE_ATTR`. +- Support for the device ioctl `KVM_HAS_DEVICE_ATTR`. +- Support for `VcpuExit::Debug`. +- Support for enabling vcpu capabilities using `Vcpu::enable_cap`. +- Support for checking Hyper-V (`HypervSynic` and `HypervSynic2`), MSI + (`MsiDevid`), and IPA Size (`ArmVmIPASize`) capabilities. + using `kvm.check_extension`. +- Support for checking the VM capabilities via `Vm::check_extension`. +- Create a VM with flexible IPA size using `Kvm::create_vm_with_ipa_size`. + +## Removed +- Removed `Kvm::new_with_fd_number`. The same functionality is offered by the + `Kvm` [FromRawFd](https://doc.rust-lang.org/std/os/unix/io/trait.FromRawFd.html) + trait implementation. + +## Changed +- The VM ioctl `unregister_ioevent` now correctly unregisters the events that + correspond to the data match passed as a parameter. +- The `SystemEvent` Vcpu Exit now also contains the relevant type and flags. +- Updated `get_dirty_log` such that it does not assume the page size is 4K, + but instead reads it using `libc::sysconf`. + +# v0.5.0 + +## Added +- Support for the vcpu ioctls `KVM_GET/SET_VCPU_EVENTS` and `KVM_GET_DIRTY_LOG` + on `aarch64`. +- Support for the vcpu ioctl `KVM_IRQ_LINE`. + +# v0.4.0 + +## Added +- Support for unregistering ioeventfds through `KVM_IOEVENTFD`. + +## Changed +- Functions working with event FDs now require + vmm_sys_util::eventfd::EventFd in their interface instead of + RawFd. +- Functions working with FAM structs kvm_msr_list and kvm_msrs, were + changed to work with their respective safe counterparts MsrList and + respectively Msrs. +- Now exporting kvm_ioctls::Error type definition so that users of this + crate can create their own wrapping errors without having to know the + Error type used internally by this crate. +- No longer exporting kvm_ioctls::Result. Users of this crate should + not have to use kvm_ioctls::Result outside the crate. +- kvm_ioctls::Error now works with errno::Error instead of io::Error. + +## Removed +- CpuId safe wrapper over FAM struct kvm_cpuid2. The safe wrapper is + now provided by the kvm_bindings crate starting with v0.2.0. +- KVM_MAX_MSR_ENTRIES and MAX_KVM_CPUID_ENTRIES. Equivalent constants + are provided by the kvm_bindings crate starting with v0.2.0. + +# v0.3.0 + +## Added +- Support for setting vcpu `kvm_immediate_exit` flag +- Support for the vcpu ioctl `KVM_GET_CPUID2` +- Support for the vcpu ioctl `KVM_GET_MP_STATE` +- Support for the vcpu ioctl `KVM_SET_MP_STATE` +- Support for the vcpu ioctl `KVM_GET_VCPU_EVENTS` +- Support for the vcpu ioctl `KVM_SET_VCPU_EVENTS` +- Support for the vcpu ioctl `KVM_GET_DEBUGREGS` +- Support for the vcpu ioctl `KVM_SET_DEBUGREGS` +- Support for the vcpu ioctl `KVM_GET_XSAVE` +- Support for the vcpu ioctl `KVM_SET_XSAVE` +- Support for the vcpu ioctl `KVM_GET_XCRS` +- Support for the vcpu ioctl `KVM_SET_XCRS` +- Support for the vm ioctl `KVM_GET_IRQCHIP` +- Support for the vm ioctl `KVM_SET_IRQCHIP` +- Support for the vm ioctl `KVM_GET_CLOCK` +- Support for the vm ioctl `KVM_SET_CLOCK` +- Support for the vm ioctl `KVM_GET_PIT2` +- Support for the vm ioctl `KVM_SET_PIT2` +- Support for the vcpu ioctl `KVM_GET_ONE_REG` + +## Changed +- Function offering support for `KVM_SET_MSRS` also returns the number + of MSR entries successfully written. + +# v0.2.0 + +## Added +- Add support for `KVM_ENABLE_CAP`. +- Add support for `KVM_SIGNAL_MSI`. + +## Fixed +- Fix bug in KvmRunWrapper. The memory for kvm_run struct was not unmapped + after the KvmRunWrapper object got out of scope. +- Return proper value when receiving the EOI KVM exit. +- Mark set_user_memory_region as unsafe. + +# v0.1.0 + +First release of the kvm-ioctls crate. + +The kvm-ioctls crate provides safe wrappers over the KVM API, a set of ioctls +used for creating and configuring Virtual Machines (VMs) on Linux. +The ioctls are accessible through four structures: +- Kvm - wrappers over system ioctls +- VmFd - wrappers over VM ioctls +- VcpuFd - wrappers over vCPU ioctls +- DeviceFd - wrappers over device ioctls + +The kvm-ioctls can be used on x86_64 and aarch64. Right now the aarch64 +support is considered experimental. diff --git a/kvm-ioctls/CODEOWNERS b/kvm-ioctls/CODEOWNERS new file mode 100644 index 000000000..b2c60d335 --- /dev/null +++ b/kvm-ioctls/CODEOWNERS @@ -0,0 +1,3 @@ +# These owners will be the default owners for everything in +# the repo. +* @acatangiu @aghecenco @andreeaflorescu @sameo diff --git a/kvm-ioctls/Cargo.lock b/kvm-ioctls/Cargo.lock deleted file mode 100644 index 065743fac..000000000 --- a/kvm-ioctls/Cargo.lock +++ /dev/null @@ -1,41 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "kvm-bindings" -version = "0.8.2" -dependencies = [ - "vmm-sys-util", -] - -[[package]] -name = "kvm-ioctls" -version = "0.1.0" -dependencies = [ - "kvm-bindings", - "libc", - "vmm-sys-util", -] - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "vmm-sys-util" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" -dependencies = [ - "bitflags", - "libc", -] diff --git a/kvm-ioctls/Cargo.toml b/kvm-ioctls/Cargo.toml index dc98e4e39..32636ed1c 100644 --- a/kvm-ioctls/Cargo.toml +++ b/kvm-ioctls/Cargo.toml @@ -1,11 +1,17 @@ [package] name = "kvm-ioctls" -version = "0.1.0" -edition = "2018" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +version = "0.6.0" +authors = ["Amazon Firecracker Team "] +description = "Safe wrappers over KVM ioctls" +repository = "https://github.com/rust-vmm/kvm-ioctls" +readme = "README.md" +keywords = ["kvm"] +license = "Apache-2.0 OR MIT" [dependencies] -libc = "0.2" -kvm-bindings = { path = "../kvm-bindings", features = ["fam-wrappers"]} -vmm-sys-util = ">=0.10.0" \ No newline at end of file +libc = ">=0.2.39" +kvm-bindings = { path = "../kvm-bindings", features = ["fam-wrappers"] } +vmm-sys-util = ">=0.2.1" + +[dev-dependencies] +byteorder = ">=1.2.1" diff --git a/kvm-ioctls/LICENSE-APACHE b/kvm-ioctls/LICENSE-APACHE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/kvm-ioctls/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/kvm-ioctls/LICENSE-MIT b/kvm-ioctls/LICENSE-MIT new file mode 100644 index 000000000..5c6a64610 --- /dev/null +++ b/kvm-ioctls/LICENSE-MIT @@ -0,0 +1,24 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + diff --git a/kvm-ioctls/README.md b/kvm-ioctls/README.md new file mode 100644 index 000000000..862117fe7 --- /dev/null +++ b/kvm-ioctls/README.md @@ -0,0 +1,47 @@ +[![Build Status](https://badge.buildkite.com/9e0e6c88972a3248a0908506d6946624da84e4e18c0870c4d0.svg)](https://buildkite.com/rust-vmm/kvm-ioctls-ci) +![crates.io](https://img.shields.io/crates/v/kvm-ioctls.svg) + +# kvm-ioctls + +The kvm-ioctls crate provides safe wrappers over the +[KVM API](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt), a set +of ioctls used for creating and configuring Virtual Machines (VMs) on Linux. +The ioctls are accessible through four structures: +- `Kvm` - wrappers over system ioctls +- `VmFd` - wrappers over VM ioctls +- `VcpuFd` - wrappers over vCPU ioctls +- `DeviceFd` - wrappers over device ioctls + +For further details check the +[KVM API](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) as well +as the code documentation. + +## Supported Platforms + +The kvm-ioctls can be used on x86_64 and aarch64. Right now the aarch64 support +is considered experimental. For a production ready version, please check the +progress in the corresponding +[GitHub issue](https://github.com/rust-vmm/kvm-ioctls/issues/8). + +## Running the tests + +Our Continuous Integration (CI) pipeline is implemented on top of +[Buildkite](https://buildkite.com/). +For the complete list of tests, check our +[CI pipeline](https://buildkite.com/rust-vmm/kvm-ioctls-ci). + +Each individual test runs in a container. To reproduce a test locally, you can +use the dev-container on both x86 and arm64. + +```bash +docker run --device=/dev/kvm \ + -it \ + --security-opt seccomp=unconfined \ + --volume $(pwd)/kvm-ioctls:/kvm-ioctls \ + rustvmm/dev:v5 +cd kvm-ioctls/ +cargo test +``` + +For more details about the integration tests that are run for `kvm-ioctls`, +check the [rust-vmm-ci](https://github.com/rust-vmm/rust-vmm-ci) readme. diff --git a/kvm-ioctls/THIRD-PARTY b/kvm-ioctls/THIRD-PARTY new file mode 100644 index 000000000..8bafca303 --- /dev/null +++ b/kvm-ioctls/THIRD-PARTY @@ -0,0 +1,27 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/kvm-ioctls/coverage_config_aarch64.json b/kvm-ioctls/coverage_config_aarch64.json new file mode 100644 index 000000000..27587e41a --- /dev/null +++ b/kvm-ioctls/coverage_config_aarch64.json @@ -0,0 +1,5 @@ +{ + "coverage_score": 77.1, + "exclude_path": "", + "crate_features": "" +} diff --git a/kvm-ioctls/coverage_config_x86_64.json b/kvm-ioctls/coverage_config_x86_64.json new file mode 100644 index 000000000..debc8fb40 --- /dev/null +++ b/kvm-ioctls/coverage_config_x86_64.json @@ -0,0 +1,5 @@ +{ + "coverage_score": 91.2, + "exclude_path": "", + "crate_features": "" +} diff --git a/kvm-ioctls/src/cap.rs b/kvm-ioctls/src/cap.rs index 919b54124..483c87f50 100644 --- a/kvm-ioctls/src/cap.rs +++ b/kvm-ioctls/src/cap.rs @@ -15,6 +15,7 @@ use kvm_bindings::*; /// /// The list of capabilities is based on the the KVM_CAP_* defines from the /// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). +/// #[derive(Clone, Copy, Debug)] #[repr(u32)] // We are allowing docs to be missing here because this enum is a wrapper @@ -44,7 +45,8 @@ pub enum Cap { target_arch = "x86", target_arch = "x86_64", target_arch = "arm", - target_arch = "aarch64" + target_arch = "aarch64", + target_arch = "s390" ))] SetGuestDebug = KVM_CAP_SET_GUEST_DEBUG, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -102,7 +104,6 @@ pub enum Cap { PpcSmt = KVM_CAP_PPC_SMT, PpcRma = KVM_CAP_PPC_RMA, MaxVcpus = KVM_CAP_MAX_VCPUS, - MaxVcpuId = KVM_CAP_MAX_VCPU_ID, PpcHior = KVM_CAP_PPC_HIOR, PpcPapr = KVM_CAP_PPC_PAPR, SwTlb = KVM_CAP_SW_TLB, @@ -150,6 +151,4 @@ pub enum Cap { MsiDevid = KVM_CAP_MSI_DEVID, HypervSynic = KVM_CAP_HYPERV_SYNIC, HypervSynic2 = KVM_CAP_HYPERV_SYNIC2, - DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS, - DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS, } diff --git a/kvm-ioctls/src/ioctls/device.rs b/kvm-ioctls/src/ioctls/device.rs index 66695703d..7d64bb9e2 100644 --- a/kvm-ioctls/src/ioctls/device.rs +++ b/kvm-ioctls/src/ioctls/device.rs @@ -4,14 +4,13 @@ use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use crate::ioctls::Result; -use crate::kvm_ioctls::{KVM_GET_DEVICE_ATTR, KVM_HAS_DEVICE_ATTR, KVM_SET_DEVICE_ATTR}; +use ioctls::Result; use kvm_bindings::kvm_device_attr; +use kvm_ioctls::{KVM_GET_DEVICE_ATTR, KVM_HAS_DEVICE_ATTR, KVM_SET_DEVICE_ATTR}; use vmm_sys_util::errno; use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; /// Wrapper over the file descriptor obtained when creating an emulated device in the kernel. -#[derive(Debug)] pub struct DeviceFd { fd: File, } @@ -23,8 +22,8 @@ impl DeviceFd { /// # Arguments /// /// * `device_attr` - The device attribute to be tested. `addr` field is ignored. + /// pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { - // SAFETY: We are calling this function with a Device fd, and we trust the kernel. let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; if ret != 0 { return Err(errno::Error::last()); @@ -73,8 +72,8 @@ impl DeviceFd { /// device_fd.set_device_attr(&dist_attr).unwrap(); /// } /// ``` + /// pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { - // SAFETY: We are calling this function with a Device fd, and we trust the kernel. let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; if ret != 0 { return Err(errno::Error::last()); @@ -116,8 +115,8 @@ impl DeviceFd { /// #[cfg(any(target_arch = "aarch64"))] /// { /// use kvm_bindings::{ - /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - /// KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + /// KVM_DEV_ARM_VGIC_GRP_NR_IRQS, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, /// }; /// /// // Create a GIC device. @@ -127,12 +126,12 @@ impl DeviceFd { /// flags: 0, /// }; /// let device_fd = match vm.create_device(&mut gic_device) { - /// Ok(fd) => fd, - /// Err(_) => { - /// gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; - /// vm.create_device(&mut gic_device) - /// .expect("Cannot create KVM vGIC device") - /// } + /// Ok(fd) => fd, + /// Err(_) => { + /// gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; + /// vm.create_device(&mut gic_device) + /// .expect("Cannot create KVM vGIC device") + /// } /// }; /// /// let mut data: u32 = 0; @@ -143,8 +142,8 @@ impl DeviceFd { /// device_fd.get_device_attr(&mut gic_attr).unwrap(); /// } /// ``` + /// pub fn get_device_attr(&self, device_attr: &mut kvm_device_attr) -> Result<()> { - // SAFETY: We are calling this function with a Device fd, and we trust the kernel. let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEVICE_ATTR(), device_attr) }; if ret != 0 { return Err(errno::Error::last()); @@ -180,9 +179,8 @@ impl FromRawFd for DeviceFd { #[cfg(test)] mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] use super::*; - use crate::ioctls::system::Kvm; + use ioctls::system::Kvm; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use kvm_bindings::{ kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, kvm_device_type_KVM_DEV_TYPE_VFIO, @@ -242,7 +240,7 @@ mod tests { #[test] #[cfg(target_arch = "aarch64")] fn test_create_device() { - use crate::ioctls::vm::{create_gic_device, set_supported_nr_irqs}; + use ioctls::vm::{create_gic_device, set_supported_nr_irqs}; use kvm_bindings::{ kvm_device_type_KVM_DEV_TYPE_FSL_MPIC_20, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, }; @@ -262,10 +260,6 @@ mod tests { let device_fd = create_gic_device(&vm, 0); - // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, - // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html - vm.create_vcpu(0).unwrap(); - // Following lines to re-construct device_fd are used to test // DeviceFd::from_raw_fd() and DeviceFd::as_raw_fd(). let raw_fd = unsafe { libc::dup(device_fd.as_raw_fd()) }; @@ -299,11 +293,9 @@ mod tests { // This value should be saved in the address provided to the ioctl. let mut data: u32 = 0; - let mut gic_attr = kvm_bindings::kvm_device_attr { - group: KVM_DEV_ARM_VGIC_GRP_NR_IRQS, - addr: data as u64, - ..Default::default() - }; + let mut gic_attr = kvm_bindings::kvm_device_attr::default(); + gic_attr.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS; + gic_attr.addr = data as u64; // Without properly providing the address to where the // value will be stored, the ioctl fails with EFAULT. diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index 9079acc78..e28fc0fb1 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -30,19 +30,17 @@ pub type Result = std::result::Result; /// /// The wrapper is needed for sending the pointer to `kvm_run` between /// threads as raw pointers do not implement `Send` and `Sync`. -#[derive(Debug)] pub struct KvmRunWrapper { kvm_run_ptr: *mut u8, // This field is need so we can `munmap` the memory mapped to hold `kvm_run`. mmap_size: usize, } -// SAFETY: Send and Sync aren't automatically inherited for the raw address pointer. +// Send and Sync aren't automatically inherited for the raw address pointer. // Accessing that pointer is only done through the stateless interface which // allows the object to be shared by multiple threads without a decrease in // safety. unsafe impl Send for KvmRunWrapper {} -// SAFETY: See above. unsafe impl Sync for KvmRunWrapper {} impl KvmRunWrapper { @@ -52,8 +50,8 @@ impl KvmRunWrapper { /// * `fd` - File descriptor to mmap from. /// * `size` - Size of memory region in bytes. pub fn mmap_from_fd(fd: &dyn AsRawFd, size: usize) -> Result { - // SAFETY: This is safe because we are creating a mapping in a place not already used by - // any other area in this process. + // This is safe because we are creating a mapping in a place not already used by any other + // area in this process. let addr = unsafe { libc::mmap( null_mut(), @@ -75,11 +73,12 @@ impl KvmRunWrapper { } /// Returns a mutable reference to `kvm_run`. + /// #[allow(clippy::mut_from_ref)] pub fn as_mut_ref(&self) -> &mut kvm_run { + // Safe because we know we mapped enough memory to hold the kvm_run struct because the + // kernel told us how large it was. #[allow(clippy::cast_ptr_alignment)] - // SAFETY: Safe because we know we mapped enough memory to hold the kvm_run struct because - // the kernel told us how large it was. unsafe { &mut *(self.kvm_run_ptr as *mut kvm_run) } @@ -88,7 +87,7 @@ impl KvmRunWrapper { impl Drop for KvmRunWrapper { fn drop(&mut self) { - // SAFETY: This is safe because we mmap the area at kvm_run_ptr ourselves, + // This is safe because we mmap the area at kvm_run_ptr ourselves, // and nobody else is holding a reference to it. unsafe { libc::munmap(self.kvm_run_ptr as *mut libc::c_void, self.mmap_size); diff --git a/kvm-ioctls/src/ioctls/system.rs b/kvm-ioctls/src/ioctls/system.rs index 5dacb0367..18d7d57e5 100644 --- a/kvm-ioctls/src/ioctls/system.rs +++ b/kvm-ioctls/src/ioctls/system.rs @@ -5,26 +5,24 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use libc::{open, O_CLOEXEC, O_RDWR}; -use std::ffi::CStr; use std::fs::File; use std::os::raw::{c_char, c_ulong}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use crate::cap::Cap; -use crate::ioctls::vm::{new_vmfd, VmFd}; -use crate::ioctls::Result; -use crate::kvm_ioctls::*; +use cap::Cap; +use ioctls::vm::{new_vmfd, VmFd}; +use ioctls::Result; #[cfg(any(target_arch = "aarch64"))] use kvm_bindings::KVM_VM_TYPE_ARM_IPA_SIZE_MASK; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES}; +use kvm_bindings::{CpuId, MsrList, KVM_MAX_MSR_ENTRIES}; +use kvm_ioctls::*; use vmm_sys_util::errno; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use vmm_sys_util::ioctl::ioctl_with_mut_ptr; use vmm_sys_util::ioctl::{ioctl, ioctl_with_val}; /// Wrapper over KVM system ioctls. -#[derive(Debug)] pub struct Kvm { kvm: File, } @@ -38,38 +36,12 @@ impl Kvm { /// use kvm_ioctls::Kvm; /// let kvm = Kvm::new().unwrap(); /// ``` + /// #[allow(clippy::new_ret_no_self)] pub fn new() -> Result { // Open `/dev/kvm` using `O_CLOEXEC` flag. let fd = Self::open_with_cloexec(true)?; - // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec` and we own - // the fd. - Ok(unsafe { Self::from_raw_fd(fd) }) - } - - /// Opens the KVM device at `kvm_path` and returns a `Kvm` object on success. - /// - /// # Arguments - /// - /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. - /// - /// # Example - /// - /// ``` - /// use kvm_ioctls::Kvm; - /// use std::ffi::CString; - /// let kvm_path = CString::new("/dev/kvm").unwrap(); - /// let kvm = Kvm::new_with_path(&kvm_path).unwrap(); - /// ``` - #[allow(clippy::new_ret_no_self)] - pub fn new_with_path

(kvm_path: P) -> Result - where - P: AsRef, - { - // Open `kvm_path` using `O_CLOEXEC` flag. - let fd = Self::open_with_cloexec_at(kvm_path, true)?; - // SAFETY: Safe because we verify that the fd is valid in `open_with_cloexec_at` - // and we own the fd. + // Safe because we verify that the fd is valid in `open_with_cloexec` and we own the fd. Ok(unsafe { Self::from_raw_fd(fd) }) } @@ -94,40 +66,11 @@ impl Kvm { /// // `from_raw_fd` for creating a `Kvm` object: /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; /// ``` - pub fn open_with_cloexec(close_on_exec: bool) -> Result { - // SAFETY: Safe because we give a constant nul-terminated string. - let kvm_path = unsafe { CStr::from_bytes_with_nul_unchecked(b"/dev/kvm\0") }; - Self::open_with_cloexec_at(kvm_path, close_on_exec) - } - - /// Opens the KVM device at `kvm_path` and returns the fd number on success. - /// Same as [open_with_cloexec()](struct.Kvm.html#method.open_with_cloexec) - /// except this method opens `kvm_path` instead of `/dev/kvm`. - /// - /// # Arguments /// - /// * `kvm_path`: path to the KVM device. Usually it is `/dev/kvm`. - /// * `close_on_exec`: If true opens `kvm_path` using the `O_CLOEXEC` flag. - /// - /// # Example - /// - /// ``` - /// # use kvm_ioctls::Kvm; - /// # use std::ffi::CString; - /// # use std::os::unix::io::FromRawFd; - /// let kvm_path = CString::new("/dev/kvm").unwrap(); - /// let kvm_fd = Kvm::open_with_cloexec_at(kvm_path, false).unwrap(); - /// // The `kvm_fd` can now be passed to another process where we can use - /// // `from_raw_fd` for creating a `Kvm` object: - /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; - /// ``` - pub fn open_with_cloexec_at

(path: P, close_on_exec: bool) -> Result - where - P: AsRef, - { + pub fn open_with_cloexec(close_on_exec: bool) -> Result { let open_flags = O_RDWR | if close_on_exec { O_CLOEXEC } else { 0 }; - // SAFETY: Safe because we verify the result. - let ret = unsafe { open(path.as_ref().as_ptr() as *const c_char, open_flags) }; + // Safe because we give a constant nul-terminated string and verify the result. + let ret = unsafe { open("/dev/kvm\0".as_ptr() as *const c_char, open_flags) }; if ret < 0 { Err(errno::Error::last()) } else { @@ -146,57 +89,27 @@ impl Kvm { /// let kvm = Kvm::new().unwrap(); /// assert_eq!(kvm.get_api_version(), 12); /// ``` + /// pub fn get_api_version(&self) -> i32 { - // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of - // the ones defined by kernel. + // Safe because we know that our file is a KVM fd and that the request is one of the ones + // defined by kernel. unsafe { ioctl(self, KVM_GET_API_VERSION()) } } /// AArch64 specific call to get the host Intermediate Physical Address space limit. /// - /// Returns 0 if the capability is not available and an integer >= 32 otherwise. - #[cfg(target_arch = "aarch64")] + /// Returns 0 if the capability is not available and an integer larger than 32 otherwise. + #[cfg(any(target_arch = "aarch64"))] pub fn get_host_ipa_limit(&self) -> i32 { self.check_extension_int(Cap::ArmVmIPASize) } - /// AArch64 specific call to get the number of supported hardware breakpoints. - /// - /// Returns 0 if the capability is not available and a positive integer otherwise. - #[cfg(target_arch = "aarch64")] - pub fn get_guest_debug_hw_bps(&self) -> i32 { - self.check_extension_int(Cap::DebugHwBps) - } - - /// AArch64 specific call to get the number of supported hardware watchpoints. - /// - /// Returns 0 if the capability is not available and a positive integer otherwise. - #[cfg(target_arch = "aarch64")] - pub fn get_guest_debug_hw_wps(&self) -> i32 { - self.check_extension_int(Cap::DebugHwWps) - } - /// Wrapper over `KVM_CHECK_EXTENSION`. /// /// Returns 0 if the capability is not available and a positive integer otherwise. - /// See the documentation for `KVM_CHECK_EXTENSION`. - /// - /// # Arguments - /// - /// * `c` - KVM capability to check. - /// - /// # Example - /// - /// ``` - /// # use kvm_ioctls::Kvm; - /// use kvm_ioctls::Cap; - /// - /// let kvm = Kvm::new().unwrap(); - /// assert!(kvm.check_extension_int(Cap::MaxVcpuId) > 0); - /// ``` - pub fn check_extension_int(&self, c: Cap) -> i32 { - // SAFETY: Safe because we know that our file is a KVM fd and that the extension is one of - // the ones defined by kernel. + fn check_extension_int(&self, c: Cap) -> i32 { + // Safe because we know that our file is a KVM fd and that the extension is one of the ones + // defined by kernel. unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) } } @@ -219,6 +132,7 @@ impl Kvm { /// // Check if `KVM_CAP_USER_MEMORY` is supported. /// assert!(kvm.check_extension(Cap::UserMemory)); /// ``` + /// pub fn check_extension(&self, c: Cap) -> bool { self.check_extension_int(c) > 0 } @@ -234,8 +148,9 @@ impl Kvm { /// let kvm = Kvm::new().unwrap(); /// assert!(kvm.get_vcpu_mmap_size().unwrap() > 0); /// ``` + /// pub fn get_vcpu_mmap_size(&self) -> Result { - // SAFETY: Safe because we know that our file is a KVM fd and we verify the return result. + // Safe because we know that our file is a KVM fd and we verify the return result. let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE()) }; if res > 0 { Ok(res as usize) @@ -257,6 +172,7 @@ impl Kvm { /// // We expect the number of vCPUs to be > 0 as per KVM API documentation. /// assert!(kvm.get_nr_vcpus() > 0); /// ``` + /// pub fn get_nr_vcpus(&self) -> usize { let x = self.check_extension_int(Cap::NrVcpus); if x > 0 { @@ -280,6 +196,7 @@ impl Kvm { /// let kvm = Kvm::new().unwrap(); /// assert!(kvm.get_nr_memslots() > 0); /// ``` + /// pub fn get_nr_memslots(&self) -> usize { let x = self.check_extension_int(Cap::NrMemslots); if x > 0 { @@ -302,6 +219,7 @@ impl Kvm { /// let kvm = Kvm::new().unwrap(); /// assert!(kvm.get_max_vcpus() > 0); /// ``` + /// pub fn get_max_vcpus(&self) -> usize { match self.check_extension_int(Cap::MaxVcpus) { 0 => self.get_nr_vcpus(), @@ -309,38 +227,16 @@ impl Kvm { } } - /// Gets the Maximum VCPU ID per VM. - /// - /// See the documentation for `KVM_CAP_MAX_VCPU_ID` - /// Returns [get_max_vcpus()](struct.Kvm.html#method.get_max_vcpus) when - /// `KVM_CAP_MAX_VCPU_ID` is not implemented - /// - /// # Example - /// - /// ``` - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// assert!(kvm.get_max_vcpu_id() > 0); - /// ``` - pub fn get_max_vcpu_id(&self) -> usize { - match self.check_extension_int(Cap::MaxVcpuId) { - 0 => self.get_max_vcpus(), - x => x as usize, - } - } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn get_cpuid(&self, kind: u64, num_entries: usize) -> Result { - if num_entries > KVM_MAX_CPUID_ENTRIES { - // Returns the same error the underlying `ioctl` would have sent. - return Err(errno::Error::new(libc::ENOMEM)); - } + fn get_cpuid(&self, kind: u64, max_entries_count: usize) -> Result { + let mut cpuid = CpuId::new(max_entries_count); - let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; - // SAFETY: The kernel is trusted not to write beyond the bounds of the memory - // allocated for the struct. The limit is read from nent, which is set to the allocated - // size(num_entries) above. - let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_fam_struct_ptr()) }; + let ret = unsafe { + // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nent, which is set to the allocated + // size(max_entries_count) above. + ioctl_with_mut_ptr(self, kind, cpuid.as_mut_fam_struct_ptr()) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -354,12 +250,9 @@ impl Kvm { /// /// # Arguments /// - /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// * `max_entries_count` - Maximum number of CPUID entries. This function can return less than /// this when the hardware does not support so many CPUID entries. /// - /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than - /// `KVM_MAX_CPUID_ENTRIES`. - /// /// # Example /// /// ``` @@ -372,9 +265,10 @@ impl Kvm { /// let cpuid_entries = cpuid.as_mut_slice(); /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn get_emulated_cpuid(&self, num_entries: usize) -> Result { - self.get_cpuid(KVM_GET_EMULATED_CPUID(), num_entries) + pub fn get_emulated_cpuid(&self, max_entries_count: usize) -> Result { + self.get_cpuid(KVM_GET_EMULATED_CPUID(), max_entries_count) } /// X86 specific call to get the system supported CPUID values. @@ -383,12 +277,9 @@ impl Kvm { /// /// # Arguments /// - /// * `num_entries` - Maximum number of CPUID entries. This function can return less than + /// * `max_entries_count` - Maximum number of CPUID entries. This function can return less than /// this when the hardware does not support so many CPUID entries. /// - /// Returns Error `errno::Error(libc::ENOMEM)` when the input `num_entries` is greater than - /// `KVM_MAX_CPUID_ENTRIES`. - /// /// # Example /// /// ``` @@ -397,13 +288,14 @@ impl Kvm { /// use kvm_ioctls::Kvm; /// /// let kvm = Kvm::new().unwrap(); - /// let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); + /// let mut cpuid = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); /// let cpuid_entries = cpuid.as_mut_slice(); /// assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn get_supported_cpuid(&self, num_entries: usize) -> Result { - self.get_cpuid(KVM_GET_SUPPORTED_CPUID(), num_entries) + pub fn get_supported_cpuid(&self, max_entries_count: usize) -> Result { + self.get_cpuid(KVM_GET_SUPPORTED_CPUID(), max_entries_count) } /// X86 specific call to get list of supported MSRS @@ -420,13 +312,12 @@ impl Kvm { /// ``` #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_msr_index_list(&self) -> Result { - let mut msr_list = - MsrList::new(KVM_MAX_MSR_ENTRIES).map_err(|_| errno::Error::new(libc::ENOMEM))?; + let mut msr_list = MsrList::new(KVM_MAX_MSR_ENTRIES); - // SAFETY: The kernel is trusted not to write beyond the bounds of the memory - // allocated for the struct. The limit is read from nmsrs, which is set to the allocated - // size (MAX_KVM_MSR_ENTRIES) above. let ret = unsafe { + // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nmsrs, which is set to the allocated + // size (MAX_KVM_MSR_ENTRIES) above. ioctl_with_mut_ptr( self, KVM_GET_MSR_INDEX_LIST(), @@ -456,33 +347,19 @@ impl Kvm { /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); /// ``` - #[cfg(not(any(target_arch = "aarch64")))] - pub fn create_vm(&self) -> Result { - self.create_vm_with_type(0) // Create using default VM type - } - - /// AArch64 specific create_vm to create a VM fd using the KVM fd using the host's maximum IPA size. /// - /// See the arm64 section of KVM documentation for `KVM_CREATE_VM`. - /// A call to this function will also initialize the size of the vcpu mmap area using the - /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. - /// - /// # Example - /// - /// ``` - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. - /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); - /// ``` - #[cfg(any(target_arch = "aarch64"))] pub fn create_vm(&self) -> Result { - let mut ipa_size = 0; // Create using default VM type - if self.check_extension(Cap::ArmVmIPASize) { - ipa_size = self.get_host_ipa_limit(); + // Safe because we know `self.kvm` is a real KVM fd as this module is the only one that + // create Kvm objects. + let ret = unsafe { ioctl(&self.kvm, KVM_CREATE_VM()) }; + if ret >= 0 { + // Safe because we verify the value of ret and we are the owners of the fd. + let vm_file = unsafe { File::from_raw_fd(ret) }; + let run_mmap_size = self.get_vcpu_mmap_size()?; + Ok(new_vmfd(vm_file, run_mmap_size)) + } else { + Err(errno::Error::last()) } - self.create_vm_with_type(ipa_size as u64) } /// AArch64 specific function to create a VM fd using the KVM fd with flexible IPA size. @@ -515,34 +392,20 @@ impl Kvm { /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); /// } /// ``` + /// #[cfg(any(target_arch = "aarch64"))] pub fn create_vm_with_ipa_size(&self, ipa_size: u32) -> Result { - self.create_vm_with_type((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK).into()) - } - - /// Creates a VM fd using the KVM fd of a specific type. - /// - /// See the documentation for `KVM_CREATE_VM`. - /// A call to this function will also initialize the size of the vcpu mmap area using the - /// `KVM_GET_VCPU_MMAP_SIZE` ioctl. - /// - /// * `vm_type` - Platform and architecture specific platform VM type. A value of 0 is the equivalent - /// to using the default VM type. - /// # Example - /// - /// ``` - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm_with_type(0).unwrap(); - /// // Check that the VM mmap size is the same reported by `KVM_GET_VCPU_MMAP_SIZE`. - /// assert!(vm.run_size() == kvm.get_vcpu_mmap_size().unwrap()); - /// ``` - pub fn create_vm_with_type(&self, vm_type: u64) -> Result { - // SAFETY: Safe because we know `self.kvm` is a real KVM fd as this module is the only one - // that create Kvm objects. - let ret = unsafe { ioctl_with_val(&self.kvm, KVM_CREATE_VM(), vm_type) }; + // Safe because we know `self.kvm` is a real KVM fd as this module is the only one that + // create Kvm objects. + let ret = unsafe { + ioctl_with_val( + &self.kvm, + KVM_CREATE_VM(), + (ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK).into(), + ) + }; if ret >= 0 { - // SAFETY: Safe because we verify the value of ret and we are the owners of the fd. + // Safe because we verify the value of ret and we are the owners of the fd. let vm_file = unsafe { File::from_raw_fd(ret) }; let run_mmap_size = self.get_vcpu_mmap_size()?; Ok(new_vmfd(vm_file, run_mmap_size)) @@ -578,6 +441,7 @@ impl Kvm { /// assert!(rawfd >= 0); /// let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; /// ``` + /// pub unsafe fn create_vmfd_from_rawfd(&self, fd: RawFd) -> Result { let run_mmap_size = self.get_vcpu_mmap_size()?; Ok(new_vmfd(File::from_raw_fd(fd), run_mmap_size)) @@ -618,6 +482,7 @@ impl FromRawFd for Kvm { /// // Safe because we verify that the fd is valid in `open_with_cloexec` and we own the fd. /// let kvm = unsafe { Kvm::from_raw_fd(kvm_fd) }; /// ``` + /// unsafe fn from_raw_fd(fd: RawFd) -> Self { Kvm { kvm: File::from_raw_fd(fd), @@ -627,11 +492,9 @@ impl FromRawFd for Kvm { #[cfg(test)] mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] use super::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; - use libc::{fcntl, FD_CLOEXEC, F_GETFD}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use vmm_sys_util::fam::FamStruct; @@ -640,33 +503,6 @@ mod tests { Kvm::new().unwrap(); } - #[test] - fn test_kvm_new_with_path() { - let kvm_path = unsafe { CStr::from_bytes_with_nul_unchecked(b"/dev/kvm\0") }; - Kvm::new_with_path(kvm_path).unwrap(); - } - - #[test] - fn test_open_with_cloexec() { - let fd = Kvm::open_with_cloexec(false).unwrap(); - let flags = unsafe { fcntl(fd, F_GETFD, 0) }; - assert_eq!(flags & FD_CLOEXEC, 0); - let fd = Kvm::open_with_cloexec(true).unwrap(); - let flags = unsafe { fcntl(fd, F_GETFD, 0) }; - assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); - } - - #[test] - fn test_open_with_cloexec_at() { - let kvm_path = std::ffi::CString::new("/dev/kvm").unwrap(); - let fd = Kvm::open_with_cloexec_at(&kvm_path, false).unwrap(); - let flags = unsafe { fcntl(fd, F_GETFD, 0) }; - assert_eq!(flags & FD_CLOEXEC, 0); - let fd = Kvm::open_with_cloexec_at(&kvm_path, true).unwrap(); - let flags = unsafe { fcntl(fd, F_GETFD, 0) }; - assert_eq!(flags & FD_CLOEXEC, FD_CLOEXEC); - } - #[test] fn test_kvm_api_version() { let kvm = Kvm::new().unwrap(); @@ -675,7 +511,7 @@ mod tests { } #[test] - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64"))] fn test_get_host_ipa_limit() { let kvm = Kvm::new().unwrap(); let host_ipa_limit = kvm.get_host_ipa_limit(); @@ -688,17 +524,6 @@ mod tests { } } - #[test] - #[cfg(target_arch = "aarch64")] - fn test_guest_debug_hw_capacity() { - let kvm = Kvm::new().unwrap(); - // The number of supported breakpoints and watchpoints may vary on - // different platforms. - // It could be 0 if no supported, or any positive integer otherwise. - assert!(kvm.get_guest_debug_hw_bps() >= 0); - assert!(kvm.get_guest_debug_hw_wps() >= 0); - } - #[test] fn test_kvm_getters() { let kvm = Kvm::new().unwrap(); @@ -727,19 +552,6 @@ mod tests { assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); } - #[test] - fn test_create_vm_with_type() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm_with_type(0).unwrap(); - - // Test create_vmfd_from_rawfd() - let rawfd = unsafe { libc::dup(vm.as_raw_fd()) }; - assert!(rawfd >= 0); - let vm = unsafe { kvm.create_vmfd_from_rawfd(rawfd).unwrap() }; - - assert_eq!(vm.run_size(), kvm.get_vcpu_mmap_size().unwrap()); - } - #[test] #[cfg(any(target_arch = "aarch64"))] fn test_create_vm_with_ipa_size() { @@ -757,8 +569,8 @@ mod tests { .create_vm_with_ipa_size((host_ipa_limit + 1) as u32) .is_err()); } else { - // Unsupported, we can't provide an IPA size. Only KVM type=0 works. - assert!(kvm.create_vm_with_type(0).is_err()); + // Unsupported, here we can test with the default value 40. + assert!(kvm.create_vm_with_ipa_size(40).is_err()); } } @@ -770,10 +582,6 @@ mod tests { let cpuid_entries = cpuid.as_mut_slice(); assert!(!cpuid_entries.is_empty()); assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); - - // Test case for more than MAX entries - let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); - assert!(cpuid_err.is_err()); } #[test] @@ -784,10 +592,6 @@ mod tests { let cpuid_entries = cpuid.as_mut_slice(); assert!(!cpuid_entries.is_empty()); assert!(cpuid_entries.len() <= KVM_MAX_CPUID_ENTRIES); - - // Test case for more than MAX entries - let cpuid_err = kvm.get_emulated_cpuid(KVM_MAX_CPUID_ENTRIES + 1_usize); - assert!(cpuid_err.is_err()); } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -801,7 +605,8 @@ mod tests { let kvm = unsafe { Kvm::from_raw_fd(rawfd) }; let cpuid_1 = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); - let _ = CpuId::new(cpuid_1.as_fam_struct_ref().len()).unwrap(); + let cpuid_2 = CpuId::new(cpuid_1.as_fam_struct_ref().len()); + assert!(cpuid_1 != cpuid_2); } #[test] @@ -817,7 +622,7 @@ mod tests { let badf_errno = libc::EBADF; let faulty_kvm = Kvm { - kvm: unsafe { File::from_raw_fd(-2) }, + kvm: unsafe { File::from_raw_fd(-1) }, }; assert_eq!( diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index f20a22efc..edd69272e 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -10,99 +10,20 @@ use libc::EINVAL; use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::ioctls::{KvmRunWrapper, Result}; -use crate::kvm_ioctls::*; +use ioctls::{KvmRunWrapper, Result}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use kvm_bindings::{CpuId, Msrs, KVM_MAX_CPUID_ENTRIES}; +use kvm_bindings::{CpuId, Msrs}; +use kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr, ioctl_with_val}; +use vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr}; /// Reasons for vCPU exits. /// /// The exit reasons are mapped to the `KVM_EXIT_*` defines in the /// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). -#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] -#[derive(Debug)] -pub enum VcpuExit<'a> { - /// An out port instruction was run on the given port with the given data. - IoOut(u16 /* port */, &'a [u8] /* data */), - /// An in port instruction was run on the given port. - /// - /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) - /// is called again. - IoIn(u16 /* port */, &'a mut [u8] /* data */), - /// A read instruction was run against the given MMIO address. - /// - /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run) - /// is called again. - MmioRead(u64 /* address */, &'a mut [u8]), - /// A write instruction was run against the given MMIO address with the given data. - MmioWrite(u64 /* address */, &'a [u8]), - /// Corresponds to KVM_EXIT_UNKNOWN. - Unknown, - /// Corresponds to KVM_EXIT_EXCEPTION. - Exception, - /// Corresponds to KVM_EXIT_HYPERCALL. - Hypercall, - /// Corresponds to KVM_EXIT_DEBUG. - /// - /// Provides architecture specific information for the debug event. - Debug(kvm_debug_exit_arch), - /// Corresponds to KVM_EXIT_HLT. - Hlt, - /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN. - IrqWindowOpen, - /// Corresponds to KVM_EXIT_SHUTDOWN. - Shutdown, - /// Corresponds to KVM_EXIT_FAIL_ENTRY. - FailEntry( - u64, /* hardware_entry_failure_reason */ - u32, /* cpu */ - ), - /// Corresponds to KVM_EXIT_INTR. - Intr, - /// Corresponds to KVM_EXIT_SET_TPR. - SetTpr, - /// Corresponds to KVM_EXIT_TPR_ACCESS. - TprAccess, - /// Corresponds to KVM_EXIT_S390_SIEIC. - S390Sieic, - /// Corresponds to KVM_EXIT_S390_RESET. - S390Reset, - /// Corresponds to KVM_EXIT_DCR. - Dcr, - /// Corresponds to KVM_EXIT_NMI. - Nmi, - /// Corresponds to KVM_EXIT_INTERNAL_ERROR. - InternalError, - /// Corresponds to KVM_EXIT_OSI. - Osi, - /// Corresponds to KVM_EXIT_PAPR_HCALL. - PaprHcall, - /// Corresponds to KVM_EXIT_S390_UCONTROL. - S390Ucontrol, - /// Corresponds to KVM_EXIT_WATCHDOG. - Watchdog, - /// Corresponds to KVM_EXIT_S390_TSCH. - S390Tsch, - /// Corresponds to KVM_EXIT_EPR. - Epr, - /// Corresponds to KVM_EXIT_SYSTEM_EVENT. - SystemEvent(u32 /* type */, &'a [u64] /* flags */), - /// Corresponds to KVM_EXIT_S390_STSI. - S390Stsi, - /// Corresponds to KVM_EXIT_IOAPIC_EOI. - IoapicEoi(u8 /* vector */), - /// Corresponds to KVM_EXIT_HYPERV. - Hyperv, - /// Corresponds to an exit reason that is unknown from the current version - /// of the kvm-ioctls crate. Let the consumer decide about what to do with - /// it. - Unsupported(u32), -} -#[cfg(target_arch = "riscv64")] +/// #[derive(Debug)] pub enum VcpuExit<'a> { /// An out port instruction was run on the given port with the given data. @@ -126,9 +47,7 @@ pub enum VcpuExit<'a> { /// Corresponds to KVM_EXIT_HYPERCALL. Hypercall, /// Corresponds to KVM_EXIT_DEBUG. - /// - /// Provides architecture specific information for the debug event. - Debug(kvm_debug_exit_arch), + Debug, /// Corresponds to KVM_EXIT_HLT. Hlt, /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN. @@ -136,10 +55,7 @@ pub enum VcpuExit<'a> { /// Corresponds to KVM_EXIT_SHUTDOWN. Shutdown, /// Corresponds to KVM_EXIT_FAIL_ENTRY. - FailEntry( - u64, /* hardware_entry_failure_reason */ - u32, /* cpu */ - ), + FailEntry, /// Corresponds to KVM_EXIT_INTR. Intr, /// Corresponds to KVM_EXIT_SET_TPR. @@ -176,34 +92,14 @@ pub enum VcpuExit<'a> { IoapicEoi(u8 /* vector */), /// Corresponds to KVM_EXIT_HYPERV. Hyperv, - /// Corresponds to an exit reason that is unknown from the current version - /// of the kvm-ioctls crate. Let the consumer decide about what to do with - /// it. - Unsupported(u32), } /// Wrapper over KVM vCPU ioctls. -#[derive(Debug)] pub struct VcpuFd { vcpu: File, kvm_run_ptr: KvmRunWrapper, } -/// KVM Sync Registers used to tell KVM which registers to sync -#[repr(u32)] -#[derive(Debug, Copy, Clone)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub enum SyncReg { - /// General purpose registers, - Register = KVM_SYNC_X86_REGS, - - /// System registers - SystemRegister = KVM_SYNC_X86_SREGS, - - /// CPU events - VcpuEvents = KVM_SYNC_X86_EVENTS, -} - impl VcpuFd { /// Returns the vCPU general purpose registers. /// @@ -222,11 +118,12 @@ impl VcpuFd { /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] /// let regs = vcpu.get_regs().unwrap(); /// ``` + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] pub fn get_regs(&self) -> Result { - let mut regs = kvm_regs::default(); - // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only - // read the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a vCPU fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. + let mut regs = unsafe { std::mem::zeroed() }; let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) }; if ret != 0 { return Err(errno::Error::last()); @@ -234,88 +131,6 @@ impl VcpuFd { Ok(regs) } - /// Sets a specified piece of cpu configuration and/or state. - /// - /// See the documentation for `KVM_SET_DEVICE_ATTR` in - /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) - /// # Arguments - /// - /// * `device_attr` - The cpu attribute to be set. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # extern crate kvm_bindings; - /// # use kvm_ioctls::Kvm; - /// # use kvm_bindings::{ - /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT - /// }; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let vcpu = vm.create_vcpu(0).unwrap(); - /// - /// let dist_attr = kvm_bindings::kvm_device_attr { - /// group: KVM_ARM_VCPU_PMU_V3_CTRL, - /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), - /// addr: 0x0, - /// flags: 0, - /// }; - /// - /// if (vcpu.has_device_attr(&dist_attr).is_ok()) { - /// vcpu.set_device_attr(&dist_attr).unwrap(); - /// } - /// ``` - #[cfg(target_arch = "aarch64")] - pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { - // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) }; - if ret != 0 { - return Err(errno::Error::last()); - } - Ok(()) - } - - /// Tests whether a cpu supports a particular attribute. - /// - /// See the documentation for `KVM_HAS_DEVICE_ATTR` in - /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) - /// # Arguments - /// - /// * `device_attr` - The cpu attribute to be tested. `addr` field is ignored. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # extern crate kvm_bindings; - /// # use kvm_ioctls::Kvm; - /// # use kvm_bindings::{ - /// KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT - /// }; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let vcpu = vm.create_vcpu(0).unwrap(); - /// - /// let dist_attr = kvm_bindings::kvm_device_attr { - /// group: KVM_ARM_VCPU_PMU_V3_CTRL, - /// attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), - /// addr: 0x0, - /// flags: 0, - /// }; - /// - /// vcpu.has_device_attr(&dist_attr); - /// ``` - #[cfg(target_arch = "aarch64")] - pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> { - // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel. - let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) }; - if ret != 0 { - return Err(errno::Error::last()); - } - Ok(()) - } - /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. /// /// # Arguments @@ -332,8 +147,7 @@ impl VcpuFd { /// let vm = kvm.create_vm().unwrap(); /// let vcpu = vm.create_vcpu(0).unwrap(); /// - /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] - /// { + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] { /// // Get the current vCPU registers. /// let mut regs = vcpu.get_regs().unwrap(); /// // Set a new value for the Instruction Pointer. @@ -341,10 +155,11 @@ impl VcpuFd { /// vcpu.set_regs(®s).unwrap(); /// } /// ``` + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> { - // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only - // read the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a vCPU fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) }; if ret != 0 { return Err(errno::Error::last()); @@ -369,11 +184,13 @@ impl VcpuFd { /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] /// let sregs = vcpu.get_sregs().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_sregs(&self) -> Result { + // Safe because we know that our file is a vCPU fd, we know the kernel will only write the + // correct amount of memory to our pointer, and we verify the return result. let mut regs = kvm_sregs::default(); - // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only - // write the correct amount of memory to our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) }; if ret != 0 { return Err(errno::Error::last()); @@ -396,8 +213,7 @@ impl VcpuFd { /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// let vcpu = vm.create_vcpu(0).unwrap(); - /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] - /// { + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] { /// let mut sregs = vcpu.get_sregs().unwrap(); /// // Update the code segment (cs). /// sregs.cs.base = 0; @@ -405,10 +221,11 @@ impl VcpuFd { /// vcpu.set_sregs(&sregs).unwrap(); /// } /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> { - // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only - // read the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a vCPU fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) }; if ret != 0 { return Err(errno::Error::last()); @@ -433,11 +250,15 @@ impl VcpuFd { /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] /// let fpu = vcpu.get_fpu().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_fpu(&self) -> Result { let mut fpu = kvm_fpu::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) }; + + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_fpu struct. + ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -461,8 +282,7 @@ impl VcpuFd { /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// let vcpu = vm.create_vcpu(0).unwrap(); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { /// let KVM_FPU_CWD: u16 = 0x37f; /// let fpu = kvm_fpu { /// fcw: KVM_FPU_CWD, @@ -471,10 +291,13 @@ impl VcpuFd { /// vcpu.set_fpu(&fpu).unwrap(); /// } /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_fpu struct. + ioctl_with_ref(self, KVM_SET_FPU(), fpu) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -518,8 +341,10 @@ impl VcpuFd { /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. - let ret = unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -551,15 +376,11 @@ impl VcpuFd { /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_cpuid2(&self, num_entries: usize) -> Result { - if num_entries > KVM_MAX_CPUID_ENTRIES { - // Returns the same error the underlying `ioctl` would have sent. - return Err(errno::Error::new(libc::ENOMEM)); - } - - let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?; - let ret = - // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. - unsafe { ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) }; + let mut cpuid = CpuId::new(num_entries); + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_cpuid2 struct. + ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -602,7 +423,7 @@ impl VcpuFd { /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { - // SAFETY: The ioctl is safe because we allocated the struct and we know the + // The ioctl is safe because we allocated the struct and we know the // kernel will write exactly the size of the struct. let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; if ret == 0 { @@ -630,13 +451,16 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let lapic = vcpu.get_lapic().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_lapic(&self) -> Result { let mut klapic = kvm_lapic_state::default(); - // SAFETY: The ioctl is unsafe unless you trust the kernel not to write past the end of the - // local_apic struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) }; + let ret = unsafe { + // The ioctl is unsafe unless you trust the kernel not to write past the end of the + // local_apic struct. + ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -673,12 +497,15 @@ impl VcpuFd { /// apic_icr_slice.write(write_value).unwrap(); /// /// // Update the value of LAPIC. - /// vcpu.set_lapic(&lapic).unwrap(); + ///vcpu.set_lapic(&lapic).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> { - // SAFETY: The ioctl is safe because the kernel will only read from the klapic struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) }; + let ret = unsafe { + // The ioctl is safe because the kernel will only read from the klapic struct. + ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -716,15 +543,17 @@ impl VcpuFd { /// index: 0x0000_0175, /// ..Default::default() /// }, - /// ]) - /// .unwrap(); + /// ]); /// let read = vcpu.get_msrs(&mut msrs).unwrap(); /// assert_eq!(read, 2); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_msrs(&self, msrs: &mut Msrs) -> Result { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. - let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_msrs struct. + ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) + }; if ret < 0 { return Err(errno::Error::last()); } @@ -752,18 +581,22 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// /// // Configure the entries we want to set. - /// let mut msrs = Msrs::from_entries(&[kvm_msr_entry { - /// index: 0x0000_0174, - /// ..Default::default() - /// }]) - /// .unwrap(); + /// let mut msrs = Msrs::from_entries(&[ + /// kvm_msr_entry { + /// index: 0x0000_0174, + /// ..Default::default() + /// }, + /// ]); /// let written = vcpu.set_msrs(&msrs).unwrap(); /// assert_eq!(written, 1); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_msrs(&self, msrs: &Msrs) -> Result { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct. - let ret = unsafe { ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_msrs struct. + ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) + }; // KVM_SET_MSRS actually returns the number of msr entries written. if ret < 0 { return Err(errno::Error::last()); @@ -790,6 +623,7 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let mp_state = vcpu.get_mp_state().unwrap(); /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -800,8 +634,10 @@ impl VcpuFd { ))] pub fn get_mp_state(&self) -> Result { let mut mp_state = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_mp_state struct. + ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -829,6 +665,7 @@ impl VcpuFd { /// // Your `mp_state` manipulation here. /// vcpu.set_mp_state(mp_state).unwrap(); /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -838,8 +675,10 @@ impl VcpuFd { target_arch = "riscv64" ))] pub fn set_mp_state(&self, mp_state: kvm_mp_state) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_mp_state struct. + ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -865,11 +704,14 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let xsave = vcpu.get_xsave().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_xsave(&self) -> Result { let mut xsave = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_xsave struct. + ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -897,10 +739,13 @@ impl VcpuFd { /// // Your `xsave` manipulation here. /// vcpu.set_xsave(&xsave).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_xsave(&self, xsave: &kvm_xsave) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_xsave struct. + ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -926,11 +771,14 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let xcrs = vcpu.get_xcrs().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_xcrs(&self) -> Result { let mut xcrs = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_xcrs struct. + ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -958,10 +806,13 @@ impl VcpuFd { /// // Your `xcrs` manipulation here. /// vcpu.set_xcrs(&xcrs).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_xcrs struct. + ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -987,11 +838,14 @@ impl VcpuFd { /// let vcpu = vm.create_vcpu(0).unwrap(); /// let debug_regs = vcpu.get_debug_regs().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_debug_regs(&self) -> Result { let mut debug_regs = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_debugregs struct. + ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -1019,10 +873,13 @@ impl VcpuFd { /// // Your `debug_regs` manipulation here. /// vcpu.set_debug_regs(&debug_regs).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_debug_regs(&self, debug_regs: &kvm_debugregs) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_debugregs struct. + ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -1051,6 +908,7 @@ impl VcpuFd { /// let vcpu_events = vcpu.get_vcpu_events().unwrap(); /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -1059,8 +917,10 @@ impl VcpuFd { ))] pub fn get_vcpu_events(&self) -> Result { let mut vcpu_events = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -1090,6 +950,7 @@ impl VcpuFd { /// vcpu.set_vcpu_events(&vcpu_events).unwrap(); /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -1098,8 +959,10 @@ impl VcpuFd { ))] pub fn set_vcpu_events(&self, vcpu_events: &kvm_vcpu_events) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_vcpu_events struct. + ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) + }; if ret != 0 { return Err(errno::Error::last()); } @@ -1132,9 +995,10 @@ impl VcpuFd { /// vm.get_preferred_target(&mut kvi).unwrap(); /// vcpu.vcpu_init(&kvi).unwrap(); /// ``` + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub fn vcpu_init(&self, kvi: &kvm_vcpu_init) -> Result<()> { - // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // This is safe because we allocated the struct and we know the kernel will read // exactly the size of the struct. let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT(), kvi) }; if ret < 0 { @@ -1168,15 +1032,14 @@ impl VcpuFd { /// vm.get_preferred_target(&mut kvi).unwrap(); /// vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); /// - /// let mut reg_list = RegList::new(500).unwrap(); + /// let mut reg_list = RegList::new(500); /// vcpu.get_reg_list(&mut reg_list).unwrap(); /// assert!(reg_list.as_fam_struct_ref().n > 0); /// ``` + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub fn get_reg_list(&self, reg_list: &mut RegList) -> Result<()> { let ret = - // SAFETY: This is safe because we allocated the struct and we trust the kernel will read - // exactly the size of the struct. unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST(), reg_list.as_mut_fam_struct()) }; if ret < 0 { return Err(errno::Error::last()); @@ -1206,29 +1069,30 @@ impl VcpuFd { /// let vm = kvm.create_vm().unwrap(); /// let vcpu = vm.create_vcpu(0).unwrap(); /// - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] - /// { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { /// let debug_struct = kvm_guest_debug { /// // Configure the vcpu so that a KVM_DEBUG_EXIT would be generated /// // when encountering a software breakpoint during execution /// control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP, /// pad: 0, - /// // Reset all arch-specific debug registers - /// arch: Default::default(), + /// // Reset all x86-specific debug registers + /// arch: kvm_guest_debug_arch { + /// debugreg: [0, 0, 0, 0, 0, 0, 0, 0], + /// }, /// }; /// /// vcpu.set_guest_debug(&debug_struct).unwrap(); /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", - target_arch = "aarch64", + target_arch = "arm64", target_arch = "s390", target_arch = "ppc" ))] pub fn set_guest_debug(&self, debug_struct: &kvm_guest_debug) -> Result<()> { - // SAFETY: Safe because we allocated the structure and we trust the kernel. let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), debug_struct) }; if ret < 0 { return Err(errno::Error::last()); @@ -1245,15 +1109,15 @@ impl VcpuFd { /// /// * `reg_id` - ID of the register for which we are setting the value. /// * `data` - value for the specified register. + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] - pub fn set_one_reg(&self, reg_id: u64, data: u128) -> Result<()> { - let data_ptr = &data as *const _; - println!("in set reg,id is {:x}, data is {:x}", reg_id, data); + pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> { + let data_ref = &data as *const u64; let onereg = kvm_one_reg { id: reg_id, - addr: data_ptr as u64, + addr: data_ref as u64, }; - // SAFETY: This is safe because we allocated the struct and we know the kernel will read + // This is safe because we allocated the struct and we know the kernel will read // exactly the size of the struct. let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) }; if ret < 0 { @@ -1270,62 +1134,61 @@ impl VcpuFd { /// # Arguments /// /// * `reg_id` - ID of the register. + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] - pub fn get_one_reg(&self, reg_id: u64) -> Result { + pub fn get_one_reg(&self, reg_id: u64) -> Result { let mut reg_value = 0; let mut onereg = kvm_one_reg { id: reg_id, - addr: &mut reg_value as *mut _ as u64, + addr: &mut reg_value as *mut u64 as u64, }; - // SAFETY: This is safe because we allocated the struct and we know the kernel will read - // exactly the size of the struct. + let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_ONE_REG(), &mut onereg) }; if ret < 0 { return Err(errno::Error::last()); } - println!("finish get one reg"); Ok(reg_value) } - /// This sets external interrupt for a virtual CPU and it will receive once it is ready. + /// This sets pending external interrupt for a virtual CPU /// - /// See the documentation for `KVM_INTERRUPT` in the - /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// see documentation for 'KVM_INTERRUPT' in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) #[cfg(any(target_arch = "riscv64"))] pub fn set_interrupt(&self) -> Result<()> { let interrupt = kvm_interrupt { - irq: KVM_INTERRUPT_SET as u32, + irq: KVM_INTERRUPT_SET as u32, }; - let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; - if ret != 0 { + let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; + if ret != 0 { return Err(errno::Error::last()); } Ok(()) } - /// This clears pending external interrupt for a virtual CPU. + /// This unsets pending external interrupt for a virtual CPU /// - /// See the documentation for `KVM_INTERRUPT` in the - /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// see documentation for 'KVM_INTERRUPT' in the + /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt) #[cfg(any(target_arch = "riscv64"))] pub fn unset_interrupt(&self) -> Result<()> { let interrupt = kvm_interrupt { - irq: KVM_INTERRUPT_UNSET as u32, + irq: KVM_INTERRUPT_UNSET as u32, }; - let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; - if ret != 0 { + let ret = unsafe { ioctl_with_ref(self, KVM_INTERRUPT(), &interrupt) }; + if ret != 0 { return Err(errno::Error::last()); } Ok(()) } - /// Notify the guest about the vCPU being paused. /// /// See the documentation for `KVM_KVMCLOCK_CTRL` in the /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn kvmclock_ctrl(&self) -> Result<()> { - // SAFETY: Safe because we know that our file is a KVM fd and that the request + // Safe because we know that our file is a KVM fd and that the request // is one of the ones defined by kernel. let ret = unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) }; if ret != 0 { @@ -1351,8 +1214,7 @@ impl VcpuFd { /// # let kvm = Kvm::new().unwrap(); /// # let vm = kvm.create_vm().unwrap(); /// // This is a dummy example for running on x86 based on https://lwn.net/Articles/658511/. - /// #[cfg(target_arch = "x86_64")] - /// { + /// #[cfg(target_arch = "x86_64")] { /// let mem_size = 0x4000; /// let guest_addr: u64 = 0x1000; /// let load_addr: *mut u8 = unsafe { @@ -1376,7 +1238,9 @@ impl VcpuFd { /// unsafe { vm.set_user_memory_region(mem_region).unwrap() }; /// /// // Dummy x86 code that just calls halt. - /// let x86_code = [0xf4 /* hlt */]; + /// let x86_code = [ + /// 0xf4, /* hlt */ + /// ]; /// /// // Write the code in the guest memory. This will generate a dirty page. /// unsafe { @@ -1409,13 +1273,11 @@ impl VcpuFd { /// } /// } /// ``` + /// pub fn run(&self) -> Result { - // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result. - // println!("begin KVM_RUN"); + // Safe because we know that our file is a vCPU fd and we verify the return result. let ret = unsafe { ioctl(self, KVM_RUN()) }; - // println!("finish KVM_RUN"); if ret == 0 { - // println!("come to ret 0 in run()"); let run = self.kvm_run_ptr.as_mut_ref(); match run.exit_reason { // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding @@ -1424,15 +1286,15 @@ impl VcpuFd { KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), KVM_EXIT_IO => { let run_start = run as *mut kvm_run as *mut u8; - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. let io = unsafe { run.__bindgen_anon_1.io }; let port = io.port; let data_size = io.count as usize * io.size as usize; - // SAFETY: The data_offset is defined by the kernel to be some number of bytes - // into the kvm_run stucture, which we have fully mmap'd. + // The data_offset is defined by the kernel to be some number of bytes into the + // kvm_run stucture, which we have fully mmap'd. let data_ptr = unsafe { run_start.offset(io.data_offset as isize) }; - // SAFETY: The slice's lifetime is limited to the lifetime of this vCPU, which is equal + // The slice's lifetime is limited to the lifetime of this vCPU, which is equal // to the mmap of the `kvm_run` struct that this is slicing from. let data_slice = unsafe { std::slice::from_raw_parts_mut::(data_ptr as *mut u8, data_size) @@ -1444,16 +1306,11 @@ impl VcpuFd { } } KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), - KVM_EXIT_DEBUG => { - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. - let debug = unsafe { run.__bindgen_anon_1.debug }; - Ok(VcpuExit::Debug(debug.arch)) - } + KVM_EXIT_DEBUG => Ok(VcpuExit::Debug), KVM_EXIT_HLT => Ok(VcpuExit::Hlt), KVM_EXIT_MMIO => { - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. let mmio = unsafe { &mut run.__bindgen_anon_1.mmio }; let addr = mmio.phys_addr; let len = mmio.len as usize; @@ -1466,15 +1323,7 @@ impl VcpuFd { } KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown), - KVM_EXIT_FAIL_ENTRY => { - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. - let fail_entry = unsafe { &mut run.__bindgen_anon_1.fail_entry }; - Ok(VcpuExit::FailEntry( - fail_entry.hardware_entry_failure_reason, - fail_entry.cpu, - )) - } + KVM_EXIT_FAIL_ENTRY => Ok(VcpuExit::FailEntry), KVM_EXIT_INTR => Ok(VcpuExit::Intr), KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr), KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess), @@ -1490,289 +1339,34 @@ impl VcpuFd { KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch), KVM_EXIT_EPR => Ok(VcpuExit::Epr), KVM_EXIT_SYSTEM_EVENT => { - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. let system_event = unsafe { &mut run.__bindgen_anon_1.system_event }; - #[cfg(target_arch = "x86_64")] - { - let ndata = system_event.ndata; - let data = - unsafe { &system_event.__bindgen_anon_1.data[0..ndata as usize] }; - Ok(VcpuExit::SystemEvent(system_event.type_, data)) - } - #[cfg(target_arch = "riscv64")] - { - Ok(VcpuExit::SystemEvent( - system_event.type_, - system_event.flags, - )) - } + Ok(VcpuExit::SystemEvent( + system_event.type_, + system_event.flags, + )) } KVM_EXIT_S390_STSI => Ok(VcpuExit::S390Stsi), KVM_EXIT_IOAPIC_EOI => { - // SAFETY: Safe because the exit_reason (which comes from the kernel) told us - // which union field to use. + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. let eoi = unsafe { &mut run.__bindgen_anon_1.eoi }; Ok(VcpuExit::IoapicEoi(eoi.vector)) } KVM_EXIT_HYPERV => Ok(VcpuExit::Hyperv), - r => Ok(VcpuExit::Unsupported(r)), + r => panic!("unknown kvm exit reason: {}", r), } } else { - println!("run not 0"); Err(errno::Error::last()) } } - /// Returns a mutable reference to the kvm_run structure - pub fn get_kvm_run(&mut self) -> &mut kvm_run { - self.kvm_run_ptr.as_mut_ref() - } - /// Sets the `immediate_exit` flag on the `kvm_run` struct associated with this vCPU to `val`. pub fn set_kvm_immediate_exit(&self, val: u8) { let kvm_run = self.kvm_run_ptr.as_mut_ref(); kvm_run.immediate_exit = val; } - - /// Returns the vCPU TSC frequency in KHz or an error if the host has unstable TSC. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let vcpu = vm.create_vcpu(0).unwrap(); - /// let tsc_khz = vcpu.get_tsc_khz().unwrap(); - /// ``` - /// - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn get_tsc_khz(&self) -> Result { - // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of - // the ones defined by kernel. - let ret = unsafe { ioctl(self, KVM_GET_TSC_KHZ()) }; - if ret >= 0 { - Ok(ret as u32) - } else { - Err(errno::Error::new(ret)) - } - } - - /// Sets the specified vCPU TSC frequency. - /// - /// # Arguments - /// - /// * `freq` - The frequency unit is KHz as per the KVM API documentation - /// for `KVM_SET_TSC_KHZ`. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Cap, Kvm}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let vcpu = vm.create_vcpu(0).unwrap(); - /// if kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl) { - /// vcpu.set_tsc_khz(1000).unwrap(); - /// } - /// ``` - /// - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn set_tsc_khz(&self, freq: u32) -> Result<()> { - // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of - // the ones defined by kernel. - let ret = unsafe { ioctl_with_val(self, KVM_SET_TSC_KHZ(), freq as u64) }; - if ret < 0 { - Err(errno::Error::last()) - } else { - Ok(()) - } - } - - /// Translates a virtual address according to the vCPU's current address translation mode. - /// - /// The physical address is returned in a `kvm_translation` structure as defined in the - /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). - /// See documentation for `KVM_TRANSLATE`. - /// - /// # Arguments - /// - /// * `gva` - The virtual address to translate. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let vcpu = vm.create_vcpu(0).unwrap(); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// let tr = vcpu.translate_gva(0x10000).unwrap(); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn translate_gva(&self, gva: u64) -> Result { - let mut tr = kvm_translation { - linear_address: gva, - ..Default::default() - }; - - // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only - // write the correct amount of memory to our pointer, and we verify the return result. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_TRANSLATE(), &mut tr) }; - if ret != 0 { - return Err(errno::Error::last()); - } - Ok(tr) - } - - /// Enable the given [`SyncReg`] to be copied to userspace on the next exit - /// - /// # Arguments - /// - /// * `reg` - The [`SyncReg`] to copy out of the guest - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// vcpu.set_sync_valid_reg(SyncReg::Register); - /// vcpu.set_sync_valid_reg(SyncReg::SystemRegister); - /// vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn set_sync_valid_reg(&mut self, reg: SyncReg) { - let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - kvm_run.kvm_valid_regs |= reg as u64; - } - - /// Tell KVM to copy the given [`SyncReg`] into the guest on the next entry - /// - /// # Arguments - /// - /// * `reg` - The [`SyncReg`] to copy into the guest - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// vcpu.set_sync_dirty_reg(SyncReg::Register); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn set_sync_dirty_reg(&mut self, reg: SyncReg) { - let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - kvm_run.kvm_dirty_regs |= reg as u64; - } - - /// Disable the given [`SyncReg`] to be copied to userspace on the next exit - /// - /// # Arguments - /// - /// * `reg` - The [`SyncReg`] to not copy out of the guest - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// vcpu.clear_sync_valid_reg(SyncReg::Register); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn clear_sync_valid_reg(&mut self, reg: SyncReg) { - let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - kvm_run.kvm_valid_regs &= !(reg as u64); - } - - /// Tell KVM to not copy the given [`SyncReg`] into the guest on the next entry - /// - /// # Arguments - /// - /// * `reg` - The [`SyncReg`] to not copy out into the guest - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// vcpu.clear_sync_dirty_reg(SyncReg::Register); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn clear_sync_dirty_reg(&mut self, reg: SyncReg) { - let mut kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - kvm_run.kvm_dirty_regs &= !(reg as u64); - } - - /// Get the [`kvm_sync_regs`] from the VM - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// if kvm.check_extension(Cap::SyncRegs) { - /// vcpu.set_sync_valid_reg(SyncReg::Register); - /// vcpu.run(); - /// let guest_rax = vcpu.sync_regs().regs.rax; - /// } - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn sync_regs(&self) -> kvm_sync_regs { - let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - - // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` - // allocation isn't large enough. The `kvm_run` region is set using - // `get_vcpu_map_size`, so this region is in bounds - unsafe { kvm_run.s.regs } - } - - /// Get a mutable reference to the [`kvm_sync_regs`] from the VM - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::{Kvm, SyncReg, Cap}; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let mut vcpu = vm.create_vcpu(0).unwrap(); - /// if kvm.check_extension(Cap::SyncRegs) { - /// vcpu.set_sync_valid_reg(SyncReg::Register); - /// vcpu.run(); - /// // Set the guest RAX to 0xdeadbeef - /// vcpu.sync_regs_mut().regs.rax = 0xdeadbeef; - /// vcpu.set_sync_dirty_reg(SyncReg::Register); - /// vcpu.run(); - /// } - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn sync_regs_mut(&mut self) -> &mut kvm_sync_regs { - let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref(); - - // SAFETY: Accessing this union field could be out of bounds if the `kvm_run` - // allocation isn't large enough. The `kvm_run` region is set using - // `get_vcpu_map_size`, so this region is in bounds - unsafe { &mut kvm_run.s.regs } - } } /// Helper function to create a new `VcpuFd`. @@ -1792,18 +1386,17 @@ impl AsRawFd for VcpuFd { #[cfg(test)] mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] extern crate byteorder; use super::*; + use ioctls::system::Kvm; #[cfg(any( target_arch = "x86", target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64" ))] - use crate::cap::Cap; - use crate::ioctls::system::Kvm; + use Cap; // Helper function for memory mapping `size` bytes of anonymous memory. // Panics if the mmap fails. @@ -1846,7 +1439,7 @@ mod tests { assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); let nr_vcpus = kvm.get_nr_vcpus(); for cpu_idx in 0..nr_vcpus { - let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); + let vcpu = vm.create_vcpu(cpu_idx as u8).unwrap(); vcpu.set_cpuid2(&cpuid).unwrap(); let retrieved_cpuid = vcpu.get_cpuid2(ncpuids).unwrap(); // Only check the first few leafs as some (e.g. 13) are reserved. @@ -1855,37 +1448,6 @@ mod tests { } } - #[cfg(target_arch = "x86_64")] - #[test] - fn test_get_cpuid_fail_num_entries_too_high() { - let kvm = Kvm::new().unwrap(); - if kvm.check_extension(Cap::ExtCpuid) { - let vm = kvm.create_vm().unwrap(); - let vcpu = vm.create_vcpu(0).unwrap(); - let err_cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES + 1_usize).err(); - assert_eq!(err_cpuid.unwrap().errno(), libc::ENOMEM); - } - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_get_cpuid_fail_num_entries_too_small() { - let kvm = Kvm::new().unwrap(); - if kvm.check_extension(Cap::ExtCpuid) { - let vm = kvm.create_vm().unwrap(); - let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap(); - let ncpuids = cpuid.as_slice().len(); - assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES); - let nr_vcpus = kvm.get_nr_vcpus(); - for cpu_idx in 0..nr_vcpus { - let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap(); - vcpu.set_cpuid2(&cpuid).unwrap(); - let err = vcpu.get_cpuid2(ncpuids - 1_usize).err(); - assert_eq!(err.unwrap().errno(), libc::E2BIG); - } - } - } - #[cfg(target_arch = "x86_64")] #[test] fn test_set_cpuid() { @@ -1901,21 +1463,27 @@ mod tests { { let entries = cpuid.as_mut_slice(); for entry in entries.iter_mut() { - if entry.function == 0 { - // " KVMKVMKVM " - entry.ebx = 0x4b4d564b; - entry.ecx = 0x564b4d56; - entry.edx = 0x4d; + match entry.function { + 0 => { + // " KVMKVMKVM " + entry.ebx = 0x4b4d564b; + entry.ecx = 0x564b4d56; + entry.edx = 0x4d; + } + _ => (), } } } vcpu.set_cpuid2(&cpuid).unwrap(); let cpuid_0 = vcpu.get_cpuid2(ncpuids).unwrap(); for entry in cpuid_0.as_slice() { - if entry.function == 0 { - assert_eq!(entry.ebx, 0x4b4d564b); - assert_eq!(entry.ecx, 0x564b4d56); - assert_eq!(entry.edx, 0x4d); + match entry.function { + 0 => { + assert_eq!(entry.ebx, 0x4b4d564b); + assert_eq!(entry.ecx, 0x564b4d56); + assert_eq!(entry.edx, 0x4d); + } + _ => (), } } @@ -1925,17 +1493,27 @@ mod tests { { let entries = cpuid.as_mut_slice(); for entry in entries.iter_mut() { - if entry.function == 7 && entry.ecx == 0 { - entry.ebx &= !(1 << EBX_SHA_SHIFT); - ebx_sha_off = entry.ebx; + match entry.function { + 7 => { + if entry.ecx == 0 { + entry.ebx &= !(1 << EBX_SHA_SHIFT); + ebx_sha_off = entry.ebx; + } + } + _ => (), } } } vcpu.set_cpuid2(&cpuid).unwrap(); let cpuid_1 = vcpu.get_cpuid2(ncpuids).unwrap(); for entry in cpuid_1.as_slice() { - if entry.function == 7 && entry.ecx == 0 { - assert_eq!(entry.ebx, ebx_sha_off); + match entry.function { + 7 => { + if entry.ecx == 0 { + assert_eq!(entry.ebx, ebx_sha_off); + } + } + _ => (), } } } @@ -1982,7 +1560,7 @@ mod tests { let mut klapic: kvm_lapic_state = vcpu.get_lapic().unwrap(); let reg_offset = 0x300; - let value = 2_u32; + let value = 2 as u32; //try to write and read the APIC_ICR 0x300 let write_slice = unsafe { &mut *(&mut klapic.regs[reg_offset..] as *mut [i8] as *mut [u8]) }; @@ -2016,7 +1594,7 @@ mod tests { ..Default::default() }, ]; - let msrs_wrapper = Msrs::from_entries(&msrs_to_set).unwrap(); + let msrs_wrapper = Msrs::from_entries(&msrs_to_set); vcpu.set_msrs(&msrs_wrapper).unwrap(); // Now test that GET_MSRS returns the same. @@ -2030,13 +1608,12 @@ mod tests { index: 0x0000_0175, ..Default::default() }, - ]) - .unwrap(); + ]); let nmsrs = vcpu.get_msrs(&mut returned_kvm_msrs).unwrap(); // Verify the lengths match. assert_eq!(nmsrs, msrs_to_set.len()); - assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len()); + assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len() as usize); // Verify the contents match. let returned_kvm_msr_entries = returned_kvm_msrs.as_slice(); @@ -2160,7 +1737,7 @@ mod tests { // Get a mutable slice of `mem_size` from `load_addr`. // This is safe because we mapped it before. let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); - slice.write_all(&code).unwrap(); + slice.write(&code).unwrap(); } let vcpu_fd = vm.create_vcpu(0).unwrap(); @@ -2174,15 +1751,15 @@ mod tests { // Set the PC to the guest address where we loaded the code. vcpu_fd - .set_one_reg(core_reg_base + 2 * 32, guest_addr as u128) + .set_one_reg(core_reg_base + 2 * 32, guest_addr) .unwrap(); // Set x8 and x9 to the addresses the guest test code needs vcpu_fd - .set_one_reg(core_reg_base + 2 * 8, guest_addr as u128 + 0x10000) + .set_one_reg(core_reg_base + 2 * 8, guest_addr + 0x10000) .unwrap(); vcpu_fd - .set_one_reg(core_reg_base + 2 * 9, mmio_addr as u128) + .set_one_reg(core_reg_base + 2 * 9, mmio_addr) .unwrap(); loop { @@ -2205,15 +1782,15 @@ mod tests { // The code snippet dirties one page at guest_addr + 0x10000. // The code page should not be dirty, as it's not written by the guest. let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); - let dirty_pages: u32 = dirty_pages_bitmap + let dirty_pages = dirty_pages_bitmap .into_iter() .map(|page| page.count_ones()) - .sum(); + .fold(0, |dirty_page_count, i| dirty_page_count + i); assert_eq!(dirty_pages, 1); } - VcpuExit::SystemEvent(type_, data) => { + VcpuExit::SystemEvent(type_, flags) => { assert_eq!(type_, KVM_SYSTEM_EVENT_SHUTDOWN); - assert_eq!(data[0], 0); + assert_eq!(flags, 0); break; } r => panic!("unexpected exit reason: {:?}", r), @@ -2312,23 +1889,18 @@ mod tests { assert_eq!(data.len(), 1); assert_eq!(data[0], 0); } - VcpuExit::Debug(debug) => { + VcpuExit::Debug => { if instr_idx == expected_rips.len() - 1 { // Disabling debugging/single-stepping debug_struct.control = 0; vcpu_fd.set_guest_debug(&debug_struct).unwrap(); - } else if instr_idx >= expected_rips.len() { - unreachable!(); + } else { + if instr_idx >= expected_rips.len() { + assert!(false); + } } let vcpu_regs = vcpu_fd.get_regs().unwrap(); assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]); - assert_eq!(debug.exception, 1); - assert_eq!(debug.pc, expected_rips[instr_idx]); - // Check first 15 bits of DR6 - let mask = (1 << 16) - 1; - assert_eq!(debug.dr6 & mask, 0b100111111110000); - // Bit 10 in DR7 is always 1 - assert_eq!(debug.dr7, 1 << 10); instr_idx += 1; } VcpuExit::Hlt => { @@ -2356,7 +1928,7 @@ mod tests { let badf_errno = libc::EBADF; let faulty_vcpu_fd = VcpuFd { - vcpu: unsafe { File::from_raw_fd(-2) }, + vcpu: unsafe { File::from_raw_fd(-1) }, kvm_run_ptr: KvmRunWrapper { kvm_run_ptr: mmap_anonymous(10), mmap_size: 10, @@ -2415,16 +1987,13 @@ mod tests { ); assert_eq!( faulty_vcpu_fd - .get_msrs(&mut Msrs::new(1).unwrap()) + .get_msrs(&mut Msrs::new(1)) .unwrap_err() .errno(), badf_errno ); assert_eq!( - faulty_vcpu_fd - .set_msrs(&Msrs::new(1).unwrap()) - .unwrap_err() - .errno(), + faulty_vcpu_fd.set_msrs(&Msrs::new(1)).unwrap_err().errno(), badf_errno ); assert_eq!( @@ -2485,9 +2054,6 @@ mod tests { faulty_vcpu_fd.kvmclock_ctrl().unwrap_err().errno(), badf_errno ); - assert!(faulty_vcpu_fd.get_tsc_khz().is_err()); - assert!(faulty_vcpu_fd.set_tsc_khz(1000000).is_err()); - assert!(faulty_vcpu_fd.translate_gva(u64::MAX).is_err()); } #[test] @@ -2516,7 +2082,7 @@ mod tests { vm.get_preferred_target(&mut kvi) .expect("Cannot get preferred target"); vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu"); - let data: u128 = 0; + let data: u64 = 0; let reg_id: u64 = 0; assert!(vcpu.set_one_reg(reg_id, data).is_err()); @@ -2548,7 +2114,7 @@ mod tests { const PSR_D_BIT: u64 = 0x0000_0200; const PSTATE_FAULT_BITS_64: u64 = PSR_MODE_EL1H | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; - let data: u128 = PSTATE_FAULT_BITS_64 as u128; + let data: u64 = PSTATE_FAULT_BITS_64; const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042; vcpu.set_one_reg(PSTATE_REG_ID, data) .expect("Failed to set pstate register"); @@ -2556,7 +2122,7 @@ mod tests { assert_eq!( vcpu.get_one_reg(PSTATE_REG_ID) .expect("Failed to get pstate register"), - PSTATE_FAULT_BITS_64 as u128 + PSTATE_FAULT_BITS_64 ); } @@ -2567,7 +2133,7 @@ mod tests { let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); - let mut reg_list = RegList::new(1).unwrap(); + let mut reg_list = RegList::new(1); // KVM_GET_REG_LIST demands that the vcpus be initalized, so we expect this to fail. let err = vcpu.get_reg_list(&mut reg_list).unwrap_err(); assert!(err.errno() == libc::ENOEXEC); @@ -2585,21 +2151,12 @@ mod tests { // We make use of the number of registers returned to allocate memory and // try one more time. - let mut reg_list = RegList::new(reg_list.as_mut_fam_struct().n as usize).unwrap(); + let mut reg_list = RegList::new(reg_list.as_mut_fam_struct().n as usize); assert!(vcpu.get_reg_list(&mut reg_list).is_ok()); } #[test] - fn test_get_kvm_run() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let mut vcpu = vm.create_vcpu(0).unwrap(); - vcpu.kvm_run_ptr.as_mut_ref().immediate_exit = 1; - assert_eq!(vcpu.get_kvm_run().immediate_exit, 1); - } - - #[test] - fn test_set_kvm_immediate_exit() { + fn set_kvm_immediate_exit() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); @@ -2613,218 +2170,17 @@ mod tests { fn test_enable_cap() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); - let mut cap = kvm_enable_cap { - // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled - cap: KVM_CAP_SPLIT_IRQCHIP, - ..Default::default() - }; + let mut cap: kvm_enable_cap = Default::default(); + // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled + cap.cap = KVM_CAP_SPLIT_IRQCHIP; cap.args[0] = 24; vm.enable_cap(&cap).unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); if kvm.check_extension(Cap::HypervSynic) { - let cap = kvm_enable_cap { - cap: KVM_CAP_HYPERV_SYNIC, - ..Default::default() - }; + let mut cap: kvm_enable_cap = Default::default(); + cap.cap = KVM_CAP_HYPERV_SYNIC; vcpu.enable_cap(&cap).unwrap(); } } - #[cfg(target_arch = "x86_64")] - #[test] - fn test_get_tsc_khz() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let vcpu = vm.create_vcpu(0).unwrap(); - - if !kvm.check_extension(Cap::GetTscKhz) { - assert!(vcpu.get_tsc_khz().is_err()) - } else { - assert!(vcpu.get_tsc_khz().unwrap() > 0); - } - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_set_tsc_khz() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let vcpu = vm.create_vcpu(0).unwrap(); - let freq = vcpu.get_tsc_khz().unwrap(); - - if !(kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl)) { - assert!(vcpu.set_tsc_khz(0).is_err()); - } else { - assert!(vcpu.set_tsc_khz(freq - 500000).is_ok()); - assert_eq!(vcpu.get_tsc_khz().unwrap(), freq - 500000); - assert!(vcpu.set_tsc_khz(freq + 500000).is_ok()); - assert_eq!(vcpu.get_tsc_khz().unwrap(), freq + 500000); - } - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_sync_regs() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let mut vcpu = vm.create_vcpu(0).unwrap(); - - // Test setting each valid register - let sync_regs = [ - SyncReg::Register, - SyncReg::SystemRegister, - SyncReg::VcpuEvents, - ]; - for reg in &sync_regs { - vcpu.set_sync_valid_reg(*reg); - assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, *reg as u64); - vcpu.clear_sync_valid_reg(*reg); - assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, 0); - } - - // Test that multiple valid SyncRegs can be set at the same time - vcpu.set_sync_valid_reg(SyncReg::Register); - vcpu.set_sync_valid_reg(SyncReg::SystemRegister); - vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); - assert_eq!( - vcpu.kvm_run_ptr.as_mut_ref().kvm_valid_regs, - SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 - ); - - // Test setting each dirty register - let sync_regs = [ - SyncReg::Register, - SyncReg::SystemRegister, - SyncReg::VcpuEvents, - ]; - - for reg in &sync_regs { - vcpu.set_sync_dirty_reg(*reg); - assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, *reg as u64); - vcpu.clear_sync_dirty_reg(*reg); - assert_eq!(vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, 0); - } - - // Test that multiple dirty SyncRegs can be set at the same time - vcpu.set_sync_dirty_reg(SyncReg::Register); - vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); - vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); - assert_eq!( - vcpu.kvm_run_ptr.as_mut_ref().kvm_dirty_regs, - SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64 - ); - } - - #[cfg(target_arch = "x86_64")] - #[test] - fn test_sync_regs_with_run() { - use std::io::Write; - - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - if kvm.check_extension(Cap::SyncRegs) { - // This example is based on https://lwn.net/Articles/658511/ - #[rustfmt::skip] - let code = [ - 0xff, 0xc0, /* inc eax */ - 0xf4, /* hlt */ - ]; - - let mem_size = 0x4000; - let load_addr = mmap_anonymous(mem_size); - let guest_addr: u64 = 0x1000; - let slot: u32 = 0; - let mem_region = kvm_userspace_memory_region { - slot, - guest_phys_addr: guest_addr, - memory_size: mem_size as u64, - userspace_addr: load_addr as u64, - flags: KVM_MEM_LOG_DIRTY_PAGES, - }; - unsafe { - vm.set_user_memory_region(mem_region).unwrap(); - } - - unsafe { - // Get a mutable slice of `mem_size` from `load_addr`. - // This is safe because we mapped it before. - let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); - slice.write_all(&code).unwrap(); - } - - let mut vcpu = vm.create_vcpu(0).unwrap(); - - let orig_sregs = vcpu.get_sregs().unwrap(); - - let mut sync_regs = vcpu.sync_regs_mut(); - - // Initialize the sregs in sync_regs to be the original sregs - sync_regs.sregs = orig_sregs; - sync_regs.sregs.cs.base = 0; - sync_regs.sregs.cs.selector = 0; - - // Set up the guest to attempt to `inc rax` - sync_regs.regs.rip = guest_addr; - sync_regs.regs.rax = 0x8000; - sync_regs.regs.rflags = 2; - - // Initialize the sync_reg flags - vcpu.set_sync_valid_reg(SyncReg::Register); - vcpu.set_sync_valid_reg(SyncReg::SystemRegister); - vcpu.set_sync_valid_reg(SyncReg::VcpuEvents); - vcpu.set_sync_dirty_reg(SyncReg::Register); - vcpu.set_sync_dirty_reg(SyncReg::SystemRegister); - vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents); - - // hlt is the only expected return from guest execution - assert!(matches!(vcpu.run().expect("run failed"), VcpuExit::Hlt)); - - let regs = vcpu.get_regs().unwrap(); - - let sync_regs = vcpu.sync_regs(); - assert_eq!(regs, sync_regs.regs); - assert_eq!(sync_regs.regs.rax, 0x8001); - } - } - - #[test] - #[cfg(target_arch = "x86_64")] - fn test_translate_gva() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let vcpu = vm.create_vcpu(0).unwrap(); - assert!(vcpu.translate_gva(0x10000).is_ok()); - assert_eq!(vcpu.translate_gva(0x10000).unwrap().valid, 1); - assert_eq!( - vcpu.translate_gva(0x10000).unwrap().physical_address, - 0x10000 - ); - assert!(vcpu.translate_gva(u64::MAX).is_ok()); - assert_eq!(vcpu.translate_gva(u64::MAX).unwrap().valid, 0); - } - - #[test] - #[cfg(target_arch = "aarch64")] - fn test_vcpu_attr() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - let vcpu = vm.create_vcpu(0).unwrap(); - - let dist_attr = kvm_bindings::kvm_device_attr { - group: KVM_ARM_VCPU_PMU_V3_CTRL, - attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), - addr: 0x0, - flags: 0, - }; - - assert!(vcpu.has_device_attr(&dist_attr).is_err()); - assert!(vcpu.set_device_attr(&dist_attr).is_err()); - let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); - vm.get_preferred_target(&mut kvi) - .expect("Cannot get preferred target"); - kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2 | 1 << KVM_ARM_VCPU_PMU_V3; - assert!(vcpu.vcpu_init(&kvi).is_ok()); - assert!(vcpu.has_device_attr(&dist_attr).is_ok()); - assert!(vcpu.set_device_attr(&dist_attr).is_ok()); - } } diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 876ced3d8..1de0c000a 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -11,24 +11,22 @@ use std::os::raw::c_void; use std::os::raw::{c_int, c_ulong}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use crate::cap::Cap; -use crate::ioctls::device::new_device; -use crate::ioctls::device::DeviceFd; -use crate::ioctls::vcpu::new_vcpu; -use crate::ioctls::vcpu::VcpuFd; -use crate::ioctls::{KvmRunWrapper, Result}; -use crate::kvm_ioctls::*; +use cap::Cap; +use ioctls::device::new_device; +use ioctls::device::DeviceFd; +use ioctls::vcpu::new_vcpu; +use ioctls::vcpu::VcpuFd; +use ioctls::{KvmRunWrapper, Result}; +use kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use vmm_sys_util::ioctl::ioctl_with_mut_ptr; use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; /// An address either in programmable I/O space or in memory mapped I/O space. /// /// The `IoEventAddress` is used for specifying the type when registering an event /// in [register_ioevent](struct.VmFd.html#method.register_ioevent). -#[derive(Debug)] +/// pub enum IoEventAddress { /// Representation of an programmable I/O address. Pio(u64), @@ -42,16 +40,15 @@ pub enum IoEventAddress { /// [`register_ioevent`](struct.VmFd.html#method.register_ioevent) /// to disable filtering of events based on the datamatch flag. For details check the /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). -#[derive(Debug)] +/// pub struct NoDatamatch; -impl From for u64 { - fn from(_: NoDatamatch) -> u64 { +impl Into for NoDatamatch { + fn into(self) -> u64 { 0 } } /// Wrapper over KVM VM ioctls. -#[derive(Debug)] pub struct VmFd { vm: File, run_size: usize, @@ -83,22 +80,23 @@ impl VmFd { /// # extern crate kvm_ioctls; /// extern crate kvm_bindings; /// - /// use kvm_bindings::kvm_userspace_memory_region; /// use kvm_ioctls::Kvm; + /// use kvm_bindings::kvm_userspace_memory_region; /// /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// let mem_region = kvm_userspace_memory_region { - /// slot: 0, - /// guest_phys_addr: 0x10000 as u64, - /// memory_size: 0x10000 as u64, - /// userspace_addr: 0x0 as u64, - /// flags: 0, - /// }; + /// slot: 0, + /// guest_phys_addr: 0x10000 as u64, + /// memory_size: 0x10000 as u64, + /// userspace_addr: 0x0 as u64, + /// flags: 0, + /// }; /// unsafe { /// vm.set_user_memory_region(mem_region).unwrap(); /// }; /// ``` + /// pub unsafe fn set_user_memory_region( &self, user_memory_region: kvm_userspace_memory_region, @@ -128,9 +126,10 @@ impl VmFd { /// let vm = kvm.create_vm().unwrap(); /// vm.set_tss_address(0xfffb_d000).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_tss_address(&self, offset: usize) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + // Safe because we know that our file is a VM fd and we verify the return result. let ret = unsafe { ioctl_with_val(self, KVM_SET_TSS_ADDR(), offset as c_ulong) }; if ret == 0 { Ok(()) @@ -139,34 +138,6 @@ impl VmFd { } } - /// Sets the address of the one-page region in the VM's address space. - /// - /// See the documentation for `KVM_SET_IDENTITY_MAP_ADDR`. - /// - /// # Arguments - /// - /// * `address` - Physical address of a one-page region in the guest's physical address space. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # use kvm_ioctls::Kvm; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// vm.set_identity_map_address(0xfffb_c000).unwrap(); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn set_identity_map_address(&self, address: u64) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR(), &address) }; - if ret == 0 { - Ok(()) - } else { - Err(errno::Error::last()) - } - } - /// Creates an in-kernel interrupt controller. /// /// See the documentation for `KVM_CREATE_IRQCHIP`. @@ -182,11 +153,9 @@ impl VmFd { /// /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] /// vm.create_irq_chip().unwrap(); - /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] - /// { - /// use kvm_bindings::{ - /// kvm_create_device, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, KVM_CREATE_DEVICE_TEST, - /// }; + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { + /// use kvm_bindings::{kvm_create_device, + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, KVM_CREATE_DEVICE_TEST}; /// let mut gic_device = kvm_bindings::kvm_create_device { /// type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, /// fd: 0, @@ -197,6 +166,7 @@ impl VmFd { /// } /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -204,7 +174,7 @@ impl VmFd { target_arch = "aarch64" ))] pub fn create_irq_chip(&self) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd and we verify the return result. + // Safe because we know that our file is a VM fd and we verify the return result. let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP()) }; if ret == 0 { Ok(()) @@ -237,10 +207,13 @@ impl VmFd { /// irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; /// vm.get_irqchip(&mut irqchip).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_irqchip(&self, irqchip: &mut kvm_irqchip) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), irqchip) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_irqchip struct. + ioctl_with_mut_ref(self, KVM_GET_IRQCHIP(), irqchip) + }; if ret == 0 { Ok(()) } else { @@ -273,10 +246,13 @@ impl VmFd { /// // Your `irqchip` manipulation here. /// vm.set_irqchip(&mut irqchip).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_irqchip(&self, irqchip: &kvm_irqchip) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_irqchip struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP(), irqchip) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_irqchip struct. + ioctl_with_ref(self, KVM_SET_IRQCHIP(), irqchip) + }; if ret == 0 { Ok(()) } else { @@ -303,10 +279,11 @@ impl VmFd { /// let pit_config = kvm_pit_config::default(); /// vm.create_pit2(pit_config).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn create_pit2(&self, pit_config: kvm_pit_config) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2(), &pit_config) }; if ret == 0 { Ok(()) @@ -338,11 +315,14 @@ impl VmFd { /// vm.create_pit2(pit_config).unwrap(); /// let pitstate = vm.get_pit2().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_pit2(&self) -> Result { let mut pitstate = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pitstate) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + ioctl_with_mut_ref(self, KVM_GET_PIT2(), &mut pitstate) + }; if ret == 0 { Ok(pitstate) } else { @@ -375,10 +355,13 @@ impl VmFd { /// // Your `pitstate` manipulation here. /// vm.set_pit2(&mut pitstate).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_pit2(&self, pitstate: &kvm_pit_state2) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2(), pitstate) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_pit_state2 struct. + ioctl_with_ref(self, KVM_SET_PIT2(), pitstate) + }; if ret == 0 { Ok(()) } else { @@ -404,11 +387,14 @@ impl VmFd { /// let vm = kvm.create_vm().unwrap(); /// let clock = vm.get_clock().unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn get_clock(&self) -> Result { let mut clock = Default::default(); - // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. - let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_clock_data struct. + ioctl_with_mut_ref(self, KVM_GET_CLOCK(), &mut clock) + }; if ret == 0 { Ok(clock) } else { @@ -437,10 +423,13 @@ impl VmFd { /// let mut clock = kvm_clock_data::default(); /// vm.set_clock(&mut clock).unwrap(); /// ``` + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn set_clock(&self, clock: &kvm_clock_data) -> Result<()> { - // SAFETY: Here we trust the kernel not to read past the end of the kvm_clock_data struct. - let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK(), clock) }; + let ret = unsafe { + // Here we trust the kernel not to read past the end of the kvm_clock_data struct. + ioctl_with_ref(self, KVM_SET_CLOCK(), clock) + }; if ret == 0 { Ok(()) } else { @@ -482,6 +471,7 @@ impl VmFd { /// vm.create_irq_chip().unwrap(); /// //vm.signal_msi(msi).unwrap(); /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -489,7 +479,7 @@ impl VmFd { target_arch = "aarch64" ))] pub fn signal_msi(&self, msi: kvm_msi) -> Result { - // SAFETY: Safe because we allocated the structure and we know the kernel + // Safe because we allocated the structure and we know the kernel // will read exactly the size of the structure. let ret = unsafe { ioctl_with_ref(self, KVM_SIGNAL_MSI(), &msi) }; if ret >= 0 { @@ -529,6 +519,7 @@ impl VmFd { /// let irq_routing = kvm_irq_routing::default(); /// vm.set_gsi_routing(&irq_routing).unwrap(); /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -536,7 +527,7 @@ impl VmFd { target_arch = "aarch64" ))] pub fn set_gsi_routing(&self, irq_routing: &kvm_irq_routing) -> Result<()> { - // SAFETY: Safe because we allocated the structure and we know the kernel + // Safe because we allocated the structure and we know the kernel // will read exactly the size of the structure. let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING(), irq_routing) }; if ret == 0 { @@ -572,12 +563,13 @@ impl VmFd { /// let vm_fd = kvm.create_vm().unwrap(); /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); /// vm_fd - /// .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) - /// .unwrap(); + /// .register_ioevent(&evtfd, &IoEventAddress::Pio(0xf4), NoDatamatch) + /// .unwrap(); /// vm_fd - /// .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) - /// .unwrap(); + /// .register_ioevent(&evtfd, &IoEventAddress::Mmio(0x1000), NoDatamatch) + /// .unwrap(); /// ``` + /// pub fn register_ioevent>( &self, fd: &EventFd, @@ -596,15 +588,15 @@ impl VmFd { datamatch: datamatch.into(), len: std::mem::size_of::() as u32, addr: match addr { - IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Pio(ref p) => *p as u64, IoEventAddress::Mmio(ref m) => *m, }, fd: fd.as_raw_fd(), flags, ..Default::default() }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; if ret == 0 { Ok(()) @@ -642,18 +634,19 @@ impl VmFd { /// let pio_addr = IoEventAddress::Pio(0xf4); /// let mmio_addr = IoEventAddress::Mmio(0x1000); /// vm_fd - /// .register_ioevent(&evtfd, &pio_addr, NoDatamatch) - /// .unwrap(); + /// .register_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); /// vm_fd - /// .register_ioevent(&evtfd, &mmio_addr, 0x1234u32) - /// .unwrap(); + /// .register_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); /// vm_fd - /// .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) - /// .unwrap(); + /// .unregister_ioevent(&evtfd, &pio_addr, NoDatamatch) + /// .unwrap(); /// vm_fd - /// .unregister_ioevent(&evtfd, &mmio_addr, 0x1234u32) - /// .unwrap(); + /// .unregister_ioevent(&evtfd, &mmio_addr, 0x1234u32) + /// .unwrap(); /// ``` + /// pub fn unregister_ioevent>( &self, fd: &EventFd, @@ -672,15 +665,15 @@ impl VmFd { datamatch: datamatch.into(), len: std::mem::size_of::() as u32, addr: match addr { - IoEventAddress::Pio(ref p) => *p, + IoEventAddress::Pio(ref p) => *p as u64, IoEventAddress::Mmio(ref m) => *m, }, fd: fd.as_raw_fd(), flags, ..Default::default() }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD(), &ioeventfd) }; if ret == 0 { Ok(()) @@ -740,14 +733,15 @@ impl VmFd { /// /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] /// // ASM code that just forces a MMIO Write. - /// let asm_code = [0xc6, 0x06, 0x00, 0x80, 0x00]; + /// let asm_code = [ + /// 0xc6, 0x06, 0x00, 0x80, 0x00, + /// ]; /// #[cfg(target_arch = "aarch64")] /// let asm_code = [ /// 0x01, 0x00, 0x00, 0x10, /* adr x1, */ /// 0x22, 0x10, 0x00, 0xb9, /* str w2, [x1, #16]; write to this page */ /// 0x02, 0x00, 0x00, 0xb9, /* str w2, [x0]; force MMIO exit */ - /// 0x00, 0x00, 0x00, - /// 0x14, /* b ; shouldn't get here, but if so loop forever */ + /// 0x00, 0x00, 0x00, 0x14, /* b ; shouldn't get here, but if so loop forever */ /// ]; /// /// // Write the code in the guest memory. This will generate a dirty page. @@ -784,8 +778,8 @@ impl VmFd { /// /// let core_reg_base: u64 = 0x6030_0000_0010_0000; /// let mmio_addr: u64 = guest_addr + mem_size as u64; - /// vcpu_fd.set_one_reg(core_reg_base + 2 * 32, guest_addr as u128); // set PC - /// vcpu_fd.set_one_reg(core_reg_base + 2 * 0, mmio_addr as u128); // set X0 + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 32, guest_addr); // set PC + /// vcpu_fd.set_one_reg(core_reg_base + 2 * 0, mmio_addr); // set X0 /// } /// /// loop { @@ -795,9 +789,9 @@ impl VmFd { /// // while on aarch64 the dirty bit comes from writing to guest_addr (current PC). /// let dirty_pages_bitmap = vm.get_dirty_log(0, mem_size).unwrap(); /// let dirty_pages = dirty_pages_bitmap - /// .into_iter() - /// .map(|page| page.count_ones()) - /// .fold(0, |dirty_page_count, i| dirty_page_count + i); + /// .into_iter() + /// .map(|page| page.count_ones()) + /// .fold(0, |dirty_page_count, i| dirty_page_count + i); /// assert_eq!(dirty_pages, 1); /// break; /// } @@ -805,11 +799,11 @@ impl VmFd { /// } /// } /// ``` + /// pub fn get_dirty_log(&self, slot: u32, memory_size: usize) -> Result> { // Compute the length of the bitmap needed for all dirty pages in one memory slot. // One memory page is `page_size` bytes and `KVM_GET_DIRTY_LOG` returns one dirty bit for // each page. - // SAFETY: We trust the sysconf libc function and we're calling it with a correct parameter. let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { -1 => return Err(errno::Error::last()), ps => ps as usize, @@ -828,8 +822,8 @@ impl VmFd { dirty_bitmap: bitmap.as_mut_ptr() as *mut c_void, }, }; - // SAFETY: Safe because we know that our file is a VM fd, and we know that the amount of - // memory we allocated for the bitmap is at least one bit per page. + // Safe because we know that our file is a VM fd, and we know that the amount of memory + // we allocated for the bitmap is at least one bit per page. let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG(), &dirtylog) }; if ret == 0 { Ok(bitmap) @@ -857,85 +851,26 @@ impl VmFd { /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { /// vm.create_irq_chip().unwrap(); /// vm.register_irqfd(&evtfd, 0).unwrap(); /// } /// ``` - #[cfg(any( - target_arch = "x86", - target_arch = "x86_64", - target_arch = "arm", - target_arch = "aarch64" - ))] - pub fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { - let irqfd = kvm_irqfd { - fd: fd.as_raw_fd() as u32, - gsi, - ..Default::default() - }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. - let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; - if ret == 0 { - Ok(()) - } else { - Err(errno::Error::last()) - } - } - - /// Registers an event that will, when signaled, assert the `gsi` IRQ. - /// If the irqchip is resampled by the guest, the IRQ is de-asserted, - /// and `resamplefd` is notified. - /// - /// # Arguments /// - /// * `fd` - `EventFd` to be signaled. - /// * `resamplefd` - `EventFd`to be notified on resample. - /// * `gsi` - IRQ to be triggered. - /// - /// # Example - /// - /// ```rust - /// # extern crate kvm_ioctls; - /// # extern crate libc; - /// # extern crate vmm_sys_util; - /// # use kvm_ioctls::Kvm; - /// # use libc::EFD_NONBLOCK; - /// # use vmm_sys_util::eventfd::EventFd; - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); - /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// { - /// vm.create_irq_chip().unwrap(); - /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) - /// .unwrap(); - /// } - /// ``` #[cfg(any( target_arch = "x86", target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64" ))] - pub fn register_irqfd_with_resample( - &self, - fd: &EventFd, - resamplefd: &EventFd, - gsi: u32, - ) -> Result<()> { + pub fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { let irqfd = kvm_irqfd { fd: fd.as_raw_fd() as u32, - resamplefd: resamplefd.as_raw_fd() as u32, gsi, - flags: KVM_IRQFD_FLAG_RESAMPLE, ..Default::default() }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; if ret == 0 { Ok(()) @@ -963,17 +898,13 @@ impl VmFd { /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// let evtfd = EventFd::new(EFD_NONBLOCK).unwrap(); - /// let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { /// vm.create_irq_chip().unwrap(); /// vm.register_irqfd(&evtfd, 0).unwrap(); /// vm.unregister_irqfd(&evtfd, 0).unwrap(); - /// vm.register_irqfd_with_resample(&evtfd, &resamplefd, 0) - /// .unwrap(); - /// vm.unregister_irqfd(&evtfd, 0).unwrap(); /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -987,8 +918,8 @@ impl VmFd { flags: KVM_IRQFD_FLAG_DEASSIGN, ..Default::default() }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD(), &irqfd) }; if ret == 0 { Ok(()) @@ -1034,17 +965,16 @@ impl VmFd { /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); /// arch_setup(&vm); - /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - /// { + /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { /// vm.set_irq_line(4, true); /// // ... /// } - /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] - /// { + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] { /// vm.set_irq_line(0x01_00_0020, true); /// // .... /// } /// ``` + /// #[cfg(any( target_arch = "x86", target_arch = "x86_64", @@ -1054,10 +984,10 @@ impl VmFd { pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> { let mut irq_level = kvm_irq_level::default(); irq_level.__bindgen_anon_1.irq = irq; - irq_level.level = u32::from(active); + irq_level.level = if active { 1 } else { 0 }; - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE(), &irq_level) }; if ret == 0 { Ok(()) @@ -1090,16 +1020,17 @@ impl VmFd { /// // Create one vCPU with the ID=0. /// let vcpu = vm.create_vcpu(0); /// ``` - pub fn create_vcpu(&self, id: u64) -> Result { + /// + pub fn create_vcpu(&self, id: u8) -> Result { + // Safe because we know that vm is a VM fd and we verify the return result. #[allow(clippy::cast_lossless)] - // SAFETY: Safe because we know that vm is a VM fd and we verify the return result. let vcpu_fd = unsafe { ioctl_with_val(&self.vm, KVM_CREATE_VCPU(), id as c_ulong) }; if vcpu_fd < 0 { return Err(errno::Error::last()); } - // Wrap the vCPU now in case the following ? returns early. - // SAFETY: This is safe because we verified the value of the fd and we own the fd. + // Wrap the vCPU now in case the following ? returns early. This is safe because we verified + // the value of the fd and we own the fd. let vcpu = unsafe { File::from_raw_fd(vcpu_fd) }; let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; @@ -1136,6 +1067,7 @@ impl VmFd { /// assert!(rawfd >= 0); /// let vcpu = unsafe { vm.create_vcpu_from_rawfd(rawfd).unwrap() }; /// ``` + /// pub unsafe fn create_vcpu_from_rawfd(&self, fd: RawFd) -> Result { let vcpu = File::from_raw_fd(fd); let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; @@ -1158,8 +1090,10 @@ impl VmFd { /// # extern crate kvm_bindings; /// # use kvm_ioctls::Kvm; /// use kvm_bindings::{ - /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - /// kvm_device_type_KVM_DEV_TYPE_VFIO, KVM_CREATE_DEVICE_TEST, + /// kvm_device_type_KVM_DEV_TYPE_VFIO, + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + /// kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + /// KVM_CREATE_DEVICE_TEST, /// }; /// let kvm = Kvm::new().unwrap(); /// let vm = kvm.create_vm().unwrap(); @@ -1183,17 +1117,14 @@ impl VmFd { /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] /// { /// device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; - /// vm.create_device(&mut device) - /// .expect("Cannot create vGIC device") + /// vm.create_device(&mut device).expect("Cannot create vGIC device") /// } /// }); /// ``` + /// pub fn create_device(&self, device: &mut kvm_create_device) -> Result { - // SAFETY: Safe because we are calling this with the VM fd and we trust the kernel. let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_DEVICE(), device) }; if ret == 0 { - // SAFETY: We validated the return of the function creating the fd and we trust the - // kernel. Ok(new_device(unsafe { File::from_raw_fd(device.fd as i32) })) } else { Err(errno::Error::last()) @@ -1222,9 +1153,10 @@ impl VmFd { /// let mut kvi = kvm_vcpu_init::default(); /// vm.get_preferred_target(&mut kvi).unwrap(); /// ``` + /// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub fn get_preferred_target(&self, kvi: &mut kvm_vcpu_init) -> Result<()> { - // SAFETY: The ioctl is safe because we allocated the struct and we know the + // The ioctl is safe because we allocated the struct and we know the // kernel will write exactly the size of the struct. let ret = unsafe { ioctl_with_mut_ref(self, KVM_ARM_PREFERRED_TARGET(), kvi) }; if ret != 0 { @@ -1276,9 +1208,10 @@ impl VmFd { /// vm.enable_cap(&cap).unwrap(); /// } /// ``` + /// #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { - // SAFETY: The ioctl is safe because we allocated the struct and we know the + // The ioctl is safe because we allocated the struct and we know the // kernel will write exactly the size of the struct. let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; if ret == 0 { @@ -1297,8 +1230,8 @@ impl VmFd { /// /// Returns 0 if the capability is not available and a positive integer otherwise. fn check_extension_int(&self, c: Cap) -> i32 { - // SAFETY: Safe because we know that our file is a VM fd and that the extension is one of - // the ones defined by kernel. + // Safe because we know that our file is a VM fd and that the extension is one of the ones + // defined by kernel. unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION(), c as c_ulong) } } @@ -1322,246 +1255,10 @@ impl VmFd { /// // Check if `KVM_CAP_MP_STATE` is supported. /// assert!(vm.check_extension(Cap::MpState)); /// ``` + /// pub fn check_extension(&self, c: Cap) -> bool { self.check_extension_int(c) > 0 } - - /// Issues platform-specific memory encryption commands to manage encrypted VMs if - /// the platform supports creating those encrypted VMs. - /// - /// Currently, this ioctl is used for issuing Secure Encrypted Virtualization - /// (SEV) commands on AMD Processors. - /// - /// See the documentation for `KVM_MEMORY_ENCRYPT_OP` in the - /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). - /// - /// For SEV-specific functionality, prefer safe wrapper: - /// - [`encrypt_op_sev`](Self::encrypt_op_sev) - /// - /// # Safety - /// - /// This function is unsafe because there is no guarantee `T` is valid in this context, how - /// much data kernel will read from memory and where it will write data on error. - /// - /// # Arguments - /// - /// * `op` - an opaque platform specific structure. - /// - /// # Example - #[cfg_attr(has_sev, doc = "```rust")] - #[cfg_attr(not(has_sev), doc = "```rust,no_run")] - /// # extern crate kvm_ioctls; - /// # extern crate kvm_bindings; - /// use kvm_bindings::bindings::kvm_sev_cmd; - /// # use kvm_ioctls::Kvm; - /// - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// - /// // Initialize the SEV platform context. - /// let mut init: kvm_sev_cmd = Default::default(); - /// unsafe { vm.encrypt_op(&mut init).unwrap() }; - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub unsafe fn encrypt_op(&self, op: *mut T) -> Result<()> { - let ret = ioctl_with_mut_ptr(self, KVM_MEMORY_ENCRYPT_OP(), op); - if ret == 0 { - Ok(()) - } else { - Err(errno::Error::last()) - } - } - - /// Issue common lifecycle events of SEV guests, such as launching, running, snapshotting, - /// migrating and decommissioning via `KVM_MEMORY_ENCRYPT_OP` ioctl. - /// - /// Kernel documentation states that this ioctl can be used for testing whether SEV is enabled - /// by sending `NULL`. To do that, pass [`std::ptr::null_mut`](std::ptr::null_mut) to [`encrypt_op`](Self::encrypt_op). - /// - /// See the documentation for Secure Encrypted Virtualization (SEV). - /// - /// # Arguments - /// - /// * `op` - SEV-specific structure. For details check the - /// [Secure Encrypted Virtualization (SEV) doc](https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst). - /// - /// # Example - #[cfg_attr(has_sev, doc = "```rust")] - #[cfg_attr(not(has_sev), doc = "```rust,no_run")] - /// # extern crate kvm_ioctls; - /// # extern crate kvm_bindings; - /// # use std::{os::raw::c_void, ptr::null_mut}; - /// use kvm_bindings::bindings::kvm_sev_cmd; - /// # use kvm_ioctls::Kvm; - /// - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// - /// // Check whether SEV is enabled, optional. - /// assert!(unsafe { vm.encrypt_op(null_mut() as *mut c_void) }.is_ok()); - /// - /// // Initialize the SEV platform context. - /// let mut init: kvm_sev_cmd = Default::default(); - /// vm.encrypt_op_sev(&mut init).unwrap(); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn encrypt_op_sev(&self, op: &mut kvm_sev_cmd) -> Result<()> { - // SAFETY: Safe because we know that kernel will only read the correct amount of memory - // from our pointer and we know where it will write it (op.error). - unsafe { self.encrypt_op(op) } - } - - /// Register a guest memory region which may contain encrypted data. - /// - /// It is used in the SEV-enabled guest. - /// - /// See the documentation for `KVM_MEMORY_ENCRYPT_REG_REGION` in the - /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). - /// - /// # Arguments - /// - /// * `memory_region` - Guest physical memory region. - /// - /// # Example - #[cfg_attr(has_sev, doc = "```rust")] - #[cfg_attr(not(has_sev), doc = "```rust,no_run")] - /// # extern crate kvm_bindings; - /// # extern crate kvm_ioctls; - /// # extern crate libc; - /// # use std::{fs::OpenOptions, ptr::null_mut}; - /// # use std::os::unix::io::AsRawFd; - /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; - /// # use kvm_ioctls::Kvm; - /// use libc; - /// - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let sev = OpenOptions::new() - /// .read(true) - /// .write(true) - /// .open("/dev/sev") - /// .unwrap(); - /// - /// // Initialize the SEV platform context. - /// let mut init: kvm_sev_cmd = Default::default(); - /// assert!(vm.encrypt_op_sev(&mut init).is_ok()); - /// - /// // Create the memory encryption context. - /// let start_data: kvm_sev_launch_start = Default::default(); - /// let mut start = kvm_sev_cmd { - /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, - /// data: &start_data as *const kvm_sev_launch_start as _, - /// sev_fd: sev.as_raw_fd() as _, - /// ..Default::default() - /// }; - /// assert!(vm.encrypt_op_sev(&mut start).is_ok()); - /// - /// let addr = unsafe { - /// libc::mmap( - /// null_mut(), - /// 4096, - /// libc::PROT_READ | libc::PROT_WRITE, - /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - /// -1, - /// 0, - /// ) - /// }; - /// assert_ne!(addr, libc::MAP_FAILED); - /// - /// let memory_region = kvm_enc_region { - /// addr: addr as _, - /// size: 4096, - /// }; - /// vm.register_enc_memory_region(&memory_region).unwrap(); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn register_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. - let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_REG_REGION(), memory_region) }; - if ret == 0 { - Ok(()) - } else { - Err(errno::Error::last()) - } - } - - /// Unregister a guest memory region registered with - /// [`register_enc_memory_region`](Self::register_enc_memory_region). - /// - /// It is used in the SEV-enabled guest. - /// - /// See the documentation for `KVM_MEMORY_ENCRYPT_UNREG_REGION` in the - /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt). - /// - /// # Arguments - /// - /// * `memory_region` - Guest physical memory region. - /// - /// # Example - #[cfg_attr(has_sev, doc = "```rust")] - #[cfg_attr(not(has_sev), doc = "```rust,no_run")] - /// # extern crate kvm_bindings; - /// # extern crate kvm_ioctls; - /// # extern crate libc; - /// # use std::{fs::OpenOptions, ptr::null_mut}; - /// # use std::os::unix::io::AsRawFd; - /// use kvm_bindings::bindings::{kvm_enc_region, kvm_sev_cmd, kvm_sev_launch_start, sev_cmd_id_KVM_SEV_LAUNCH_START}; - /// # use kvm_ioctls::Kvm; - /// use libc; - /// - /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); - /// let sev = OpenOptions::new() - /// .read(true) - /// .write(true) - /// .open("/dev/sev") - /// .unwrap(); - /// - /// // Initialize the SEV platform context. - /// let mut init: kvm_sev_cmd = Default::default(); - /// assert!(vm.encrypt_op_sev(&mut init).is_ok()); - /// - /// // Create the memory encryption context. - /// let start_data: kvm_sev_launch_start = Default::default(); - /// let mut start = kvm_sev_cmd { - /// id: sev_cmd_id_KVM_SEV_LAUNCH_START, - /// data: &start_data as *const kvm_sev_launch_start as _, - /// sev_fd: sev.as_raw_fd() as _, - /// ..Default::default() - /// }; - /// assert!(vm.encrypt_op_sev(&mut start).is_ok()); - /// - /// let addr = unsafe { - /// libc::mmap( - /// null_mut(), - /// 4096, - /// libc::PROT_READ | libc::PROT_WRITE, - /// libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - /// -1, - /// 0, - /// ) - /// }; - /// assert_ne!(addr, libc::MAP_FAILED); - /// - /// let memory_region = kvm_enc_region { - /// addr: addr as _, - /// size: 4096, - /// }; - /// vm.register_enc_memory_region(&memory_region).unwrap(); - /// vm.unregister_enc_memory_region(&memory_region).unwrap(); - /// ``` - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - pub fn unregister_enc_memory_region(&self, memory_region: &kvm_enc_region) -> Result<()> { - // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read - // the correct amount of memory from our pointer, and we verify the return result. - let ret = unsafe { ioctl_with_ref(self, KVM_MEMORY_ENCRYPT_UNREG_REGION(), memory_region) }; - if ret == 0 { - Ok(()) - } else { - Err(errno::Error::last()) - } - } } /// Helper function to create a new `VmFd`. @@ -1585,6 +1282,7 @@ impl AsRawFd for VmFd { /// /// * `vm` - The vm file descriptor. /// * `flags` - Flags to be passed to `KVM_CREATE_DEVICE`. +/// #[cfg(test)] #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub(crate) fn create_gic_device(vm: &VmFd, flags: u32) -> DeviceFd { @@ -1593,14 +1291,15 @@ pub(crate) fn create_gic_device(vm: &VmFd, flags: u32) -> DeviceFd { fd: 0, flags, }; - match vm.create_device(&mut gic_device) { + let device_fd = match vm.create_device(&mut gic_device) { Ok(fd) => fd, Err(_) => { gic_device.type_ = kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; vm.create_device(&mut gic_device) .expect("Cannot create KVM vGIC device") } - } + }; + device_fd } /// Set supported number of IRQs for vGIC. @@ -1609,6 +1308,7 @@ pub(crate) fn create_gic_device(vm: &VmFd, flags: u32) -> DeviceFd { /// /// * `vgic` - The vGIC file descriptor. /// * `nr_irqs` - Number of IRQs. +/// #[cfg(test)] #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub(crate) fn set_supported_nr_irqs(vgic: &DeviceFd, nr_irqs: u32) { @@ -1626,6 +1326,7 @@ pub(crate) fn set_supported_nr_irqs(vgic: &DeviceFd, nr_irqs: u32) { /// # Arguments /// /// * `vgic` - The vGIC file descriptor. +/// #[cfg(test)] #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] pub(crate) fn request_gic_init(vgic: &DeviceFd) { @@ -1640,12 +1341,8 @@ pub(crate) fn request_gic_init(vgic: &DeviceFd) { #[cfg(test)] mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] use super::*; - use crate::Kvm; - - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - use std::{fs::OpenOptions, ptr::null_mut}; + use Kvm; use libc::EFD_NONBLOCK; @@ -1671,19 +1368,6 @@ mod tests { assert!(vm.set_tss_address(0xfffb_d000).is_ok()); } - #[test] - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn test_set_identity_map_address() { - let kvm = Kvm::new().unwrap(); - if kvm.check_extension(Cap::SetIdentityMapAddr) { - let vm = kvm.create_vm().unwrap(); - assert!(vm.set_identity_map_address(0xfffb_c000).is_ok()); - vm.create_vcpu(0).unwrap(); - // Setting the identity map after creating a vCPU must fail. - assert!(vm.set_identity_map_address(0xfffb_c000).is_err()); - } - } - #[test] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn test_irq_chip() { @@ -1694,20 +1378,16 @@ mod tests { let vm = kvm.create_vm().unwrap(); assert!(vm.create_irq_chip().is_ok()); - let mut irqchip = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_MASTER, - ..Default::default() - }; + let mut irqchip = kvm_irqchip::default(); + irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; // Set the irq_base to a non-default value to check that set & get work. irqchip.chip.pic.irq_base = 10; assert!(vm.set_irqchip(&irqchip).is_ok()); // We initialize a dummy irq chip (`other_irqchip`) in which the // function `get_irqchip` returns its result. - let mut other_irqchip = kvm_irqchip { - chip_id: KVM_IRQCHIP_PIC_MASTER, - ..Default::default() - }; + let mut other_irqchip = kvm_irqchip::default(); + other_irqchip.chip_id = KVM_IRQCHIP_PIC_MASTER; assert!(vm.get_irqchip(&mut other_irqchip).is_ok()); // Safe because we know that the irqchip type is PIC. @@ -1763,10 +1443,8 @@ mod tests { let orig = vm.get_clock().unwrap(); // Reset time. - let fudged = kvm_clock_data { - clock: 10, - ..Default::default() - }; + let mut fudged = kvm_clock_data::default(); + fudged.clock = 10; vm.set_clock(&fudged).unwrap(); // Get new time. @@ -1848,8 +1526,6 @@ mod tests { let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); - let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); - let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); assert!(vm_fd.create_irq_chip().is_ok()); @@ -1866,17 +1542,6 @@ mod tests { assert!(vm_fd.register_irqfd(&evtfd3, 5).is_err()); // KVM irqfd doesn't report failure on this case:( assert!(vm_fd.unregister_irqfd(&evtfd3, 5).is_ok()); - - if vm_fd.check_extension(Cap::IrqfdResample) { - assert!(vm_fd - .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) - .is_ok()); - assert!(vm_fd.unregister_irqfd(&evtfd4, 6).is_ok()); - } else { - assert!(vm_fd - .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) - .is_err()); - } } #[test] @@ -1887,16 +1552,10 @@ mod tests { let evtfd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let evtfd2 = EventFd::new(EFD_NONBLOCK).unwrap(); let evtfd3 = EventFd::new(EFD_NONBLOCK).unwrap(); - let evtfd4 = EventFd::new(EFD_NONBLOCK).unwrap(); - let resamplefd = EventFd::new(EFD_NONBLOCK).unwrap(); // Create the vGIC device. let vgic_fd = create_gic_device(&vm_fd, 0); - // GICv3 on arm/aarch64 requires an online vCPU prior to setting device attributes, - // see: https://www.kernel.org/doc/html/latest/virt/kvm/devices/arm-vgic-v3.html - vm_fd.create_vcpu(0).unwrap(); - // Set supported number of IRQs. set_supported_nr_irqs(&vgic_fd, 128); // Request the initialization of the vGIC. @@ -1916,17 +1575,6 @@ mod tests { assert!(vm_fd.register_irqfd(&evtfd3, 5).is_err()); // KVM irqfd doesn't report failure on this case:( assert!(vm_fd.unregister_irqfd(&evtfd3, 5).is_ok()); - - if vm_fd.check_extension(Cap::IrqfdResample) { - assert!(vm_fd - .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) - .is_ok()); - assert!(vm_fd.unregister_irqfd(&evtfd4, 6).is_ok()); - } else { - assert!(vm_fd - .register_irqfd_with_resample(&evtfd4, &resamplefd, 6) - .is_err()); - } } #[test] @@ -1981,7 +1629,7 @@ mod tests { let badf_errno = libc::EBADF; let faulty_vm_fd = VmFd { - vm: unsafe { File::from_raw_fd(-2) }, + vm: unsafe { File::from_raw_fd(-1) }, run_size: 0, }; @@ -2116,10 +1764,8 @@ mod tests { fn test_enable_split_irqchip_cap() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); - let mut cap = kvm_enable_cap { - cap: KVM_CAP_SPLIT_IRQCHIP, - ..Default::default() - }; + let mut cap: kvm_enable_cap = Default::default(); + cap.cap = KVM_CAP_SPLIT_IRQCHIP; // As per the KVM documentation, KVM_CAP_SPLIT_IRQCHIP only emulates // the local APIC in kernel, expecting that a userspace IOAPIC will // be implemented by the VMM. @@ -2149,105 +1795,10 @@ mod tests { assert!(vm.set_gsi_routing(&irq_routing).is_ok()); } - #[test] - fn test_create_vcpu_different_ids() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - - // Fails when an arbitrarily large value - let err = vm.create_vcpu(65537_u64).err(); - assert_eq!(err.unwrap().errno(), libc::EINVAL); - - // Fails when input `id` = `max_vcpu_id` - let max_vcpu_id = kvm.get_max_vcpu_id(); - let vcpu = vm.create_vcpu((max_vcpu_id - 1) as u64); - assert!(vcpu.is_ok()); - let vcpu_err = vm.create_vcpu(max_vcpu_id as u64).err(); - assert_eq!(vcpu_err.unwrap().errno(), libc::EINVAL); - } - #[test] fn test_check_extension() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); assert!(vm.check_extension(Cap::MpState)); } - - #[test] - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[cfg_attr(not(has_sev), ignore)] - fn test_encrypt_op_sev() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - - let mut init: kvm_sev_cmd = Default::default(); - assert!(vm.encrypt_op_sev(&mut init).is_ok()); - } - - #[test] - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[cfg_attr(not(has_sev), ignore)] - fn test_register_unregister_enc_memory_region() { - let sev = OpenOptions::new() - .read(true) - .write(true) - .open("/dev/sev") - .unwrap(); - - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); - - // Perform SEV launch sequence according to - // https://www.kernel.org/doc/Documentation/virtual/kvm/amd-memory-encryption.rst - - let mut init: kvm_sev_cmd = Default::default(); - assert!(vm.encrypt_op_sev(&mut init).is_ok()); - - let start_data: kvm_sev_launch_start = Default::default(); - let mut start = kvm_sev_cmd { - id: sev_cmd_id_KVM_SEV_LAUNCH_START, - data: &start_data as *const kvm_sev_launch_start as _, - sev_fd: sev.as_raw_fd() as _, - ..Default::default() - }; - assert!(vm.encrypt_op_sev(&mut start).is_ok()); - - let addr = unsafe { - libc::mmap( - null_mut(), - 4096, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - -1, - 0, - ) - }; - assert_ne!(addr, libc::MAP_FAILED); - - assert_eq!( - vm.register_enc_memory_region(&Default::default()) - .unwrap_err() - .errno(), - libc::EINVAL - ); - assert_eq!( - vm.unregister_enc_memory_region(&Default::default()) - .unwrap_err() - .errno(), - libc::EINVAL - ); - - let memory_region = kvm_enc_region { - addr: addr as _, - size: 4096, - }; - assert_eq!( - vm.unregister_enc_memory_region(&memory_region) - .unwrap_err() - .errno(), - libc::EINVAL - ); - assert!(vm.register_enc_memory_region(&memory_region).is_ok()); - assert!(vm.unregister_enc_memory_region(&memory_region).is_ok()); - } } diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs index f7252a7fb..4206e341d 100644 --- a/kvm-ioctls/src/kvm_ioctls.rs +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -36,9 +36,6 @@ ioctl_iow_nr!( /* Available with KVM_CAP_SET_TSS_ADDR */ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ioctl_io_nr!(KVM_SET_TSS_ADDR, KVMIO, 0x47); -/* Available with KVM_CAP_SET_IDENTITY_MAP_ADDR */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_iow_nr!(KVM_SET_IDENTITY_MAP_ADDR, KVMIO, 0x48, u64); /* Available with KVM_CAP_IRQCHIP */ #[cfg(any( target_arch = "x86", @@ -96,18 +93,9 @@ ioctl_ior_nr!(KVM_GET_PIT2, KVMIO, 0x9f, kvm_pit_state2); /* Available with KVM_CAP_PIT_STATE2 */ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ioctl_iow_nr!(KVM_SET_PIT2, KVMIO, 0xa0, kvm_pit_state2); -/* KVM_MEMORY_ENCRYPT_OP. Takes opaque platform dependent type: i.e. TDX or SEV */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); -/* Available on SEV-enabled guests. */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_REG_REGION, KVMIO, 0xbb, kvm_enc_region); -/* Available on SEV-enabled guests. */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_ior_nr!(KVM_MEMORY_ENCRYPT_UNREG_REGION, KVMIO, 0xbc, kvm_enc_region); // Ioctls for VCPU fds. - +ioctl_iow_nr!(KVM_INTERRUPT, KVMIO, 0x86, kvm_interrupt); ioctl_io_nr!(KVM_RUN, KVMIO, 0x80); #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] ioctl_ior_nr!(KVM_GET_REGS, KVMIO, 0x81, kvm_regs); @@ -127,9 +115,6 @@ ioctl_ior_nr!(KVM_GET_SREGS, KVMIO, 0x83, kvm_sregs); target_arch = "powerpc64" ))] ioctl_iow_nr!(KVM_SET_SREGS, KVMIO, 0x84, kvm_sregs); -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_iowr_nr!(KVM_TRANSLATE, KVMIO, 0x85, kvm_translation); -ioctl_iow_nr!(KVM_INTERRUPT, KVMIO, 0x86, kvm_interrupt); /* Available with KVM_CAP_GET_MSR_FEATURES */ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ioctl_iowr_nr!(KVM_GET_MSR_INDEX_LIST, KVMIO, 0x02, kvm_msr_list); @@ -211,13 +196,6 @@ ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ioctl_io_nr!(KVM_KVMCLOCK_CTRL, KVMIO, 0xad); -/* Available with KVM_CAP_TSC_CONTROL */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_io_nr!(KVM_SET_TSC_KHZ, KVMIO, 0xa2); -/* Available with KVM_CAP_GET_TSC_KHZ */ -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); - /* Available with KVM_CAP_ENABLE_CAP */ #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap); @@ -257,7 +235,6 @@ ioctl_iow_nr!(KVM_HAS_DEVICE_ATTR, KVMIO, 0xe3, kvm_device_attr); #[cfg(test)] mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] use std::fs::File; use std::os::unix::io::FromRawFd; diff --git a/kvm-ioctls/src/lib.rs b/kvm-ioctls/src/lib.rs index 2f3a0b0da..10c3cbae3 100644 --- a/kvm-ioctls/src/lib.rs +++ b/kvm-ioctls/src/lib.rs @@ -1,3 +1,213 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +#![deny(missing_docs)] + +//! A safe wrapper around the kernel's KVM interface. +//! +//! This crate offers safe wrappers for: +//! - [system ioctls](struct.Kvm.html) using the `Kvm` structure +//! - [VM ioctls](struct.VmFd.html) using the `VmFd` structure +//! - [vCPU ioctls](struct.VcpuFd.html) using the `VcpuFd` structure +//! - [device ioctls](struct.DeviceFd.html) using the `DeviceFd` structure +//! +//! # Platform support +//! +//! - x86_64 +//! - arm64 (experimental) +//! +//! **NOTE:** The list of available ioctls is not extensive. +//! +//! # Example - Running a VM on x86_64 +//! +//! In this example we are creating a Virtual Machine (VM) with one vCPU. +//! On the vCPU we are running machine specific code. This example is based on +//! the [LWN article](https://lwn.net/Articles/658511/) on using the KVM API. +//! The aarch64 example was modified accordingly. +//! +//! To get code running on the vCPU we are going through the following steps: +//! +//! 1. Instantiate KVM. This is used for running +//! [system specific ioctls](struct.Kvm.html). +//! 2. Use the KVM object to create a VM. The VM is used for running +//! [VM specific ioctls](struct.VmFd.html). +//! 3. Initialize the guest memory for the created VM. In this dummy example we +//! are adding only one memory region and write the code in one memory page. +//! 4. Create a vCPU using the VM object. The vCPU is used for running +//! [vCPU specific ioctls](struct.VcpuFd.html). +//! 5. Setup architectural specific general purpose registers and special registers. For +//! details about how and why these registers are set, please check the +//! [LWN article](https://lwn.net/Articles/658511/) on which this example is +//! built. +//! 6. Run the vCPU code in a loop and check the +//! [exit reasons](enum.VcpuExit.html). +//! +//! +//! ```rust +//! extern crate kvm_ioctls; +//! extern crate kvm_bindings; +//! +//! use kvm_ioctls::{Kvm, VmFd, VcpuFd}; +//! use kvm_ioctls::VcpuExit; +//! +//! fn main(){ +//! use std::io::Write; +//! use std::slice; +//! use std::ptr::null_mut; +//! +//! use kvm_bindings::KVM_MEM_LOG_DIRTY_PAGES; +//! use kvm_bindings::kvm_userspace_memory_region; +//! +//! let mem_size = 0x4000; +//! let guest_addr = 0x1000; +//! let asm_code: &[u8]; +//! +//! // Setting up architectural dependent values. +//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! { +//! asm_code = &[ +//! 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ +//! 0x00, 0xd8, /* add %bl, %al */ +//! 0x04, b'0', /* add $'0', %al */ +//! 0xee, /* out %al, %dx */ +//! 0xec, /* in %dx, %al */ +//! 0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/ +//! 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/ +//! 0xf4, /* hlt */ +//! ]; +//! } +//! #[cfg(target_arch = "aarch64")] +//! { +//! asm_code = &[ +//! 0x01, 0x00, 0x00, 0x10, /* adr x1, */ +//! 0x22, 0x10, 0x00, 0xb9, /* str w2, [x1, #16]; write to this page */ +//! 0x02, 0x00, 0x00, 0xb9, /* str w2, [x0]; This generates a MMIO Write.*/ +//! 0x00, 0x00, 0x00, 0x14, /* b ; shouldn't get here, but if so loop forever */ +//! ]; +//! } +//! +//! // 1. Instantiate KVM. +//! let kvm = Kvm::new().unwrap(); +//! +//! // 2. Create a VM. +//! let vm = kvm.create_vm().unwrap(); +//! +//! // 3. Initialize Guest Memory. +//! let load_addr: *mut u8 = unsafe { +//! libc::mmap( +//! null_mut(), +//! mem_size, +//! libc::PROT_READ | libc::PROT_WRITE, +//! libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, +//! -1, +//! 0, +//! ) as *mut u8 +//! }; +//! +//! let slot = 0; +//! // When initializing the guest memory slot specify the +//! // `KVM_MEM_LOG_DIRTY_PAGES` to enable the dirty log. +//! let mem_region = kvm_userspace_memory_region { +//! slot, +//! guest_phys_addr: guest_addr, +//! memory_size: mem_size as u64, +//! userspace_addr: load_addr as u64, +//! flags: KVM_MEM_LOG_DIRTY_PAGES, +//! }; +//! unsafe { vm.set_user_memory_region(mem_region).unwrap() }; +//! +//! // Write the code in the guest memory. This will generate a dirty page. +//! unsafe { +//! let mut slice = slice::from_raw_parts_mut(load_addr, mem_size); +//! slice.write(&asm_code).unwrap(); +//! } +//! +//! // 4. Create one vCPU. +//! let vcpu_fd = vm.create_vcpu(0).unwrap(); +//! +//! // 5. Initialize general purpose and special registers. +//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! { +//! // x86_64 specific registry setup. +//! let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); +//! vcpu_sregs.cs.base = 0; +//! vcpu_sregs.cs.selector = 0; +//! vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); +//! +//! let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); +//! vcpu_regs.rip = guest_addr; +//! vcpu_regs.rax = 2; +//! vcpu_regs.rbx = 3; +//! vcpu_regs.rflags = 2; +//! vcpu_fd.set_regs(&vcpu_regs).unwrap(); +//! } +//! +//! #[cfg(target_arch = "aarch64")] +//! { +//! // aarch64 specific registry setup. +//! let mut kvi = kvm_bindings::kvm_vcpu_init::default(); +//! vm.get_preferred_target(&mut kvi).unwrap(); +//! vcpu_fd.vcpu_init(&kvi).unwrap(); +//! +//! let core_reg_base: u64 = 0x6030_0000_0010_0000; +//! let mmio_addr: u64 = guest_addr + mem_size as u64; +//! vcpu_fd.set_one_reg(core_reg_base + 2 * 32, guest_addr); // set PC +//! vcpu_fd.set_one_reg(core_reg_base + 2 * 0, mmio_addr); // set X0 +//! } +//! +//! // 6. Run code on the vCPU. +//! loop { +//! match vcpu_fd.run().expect("run failed") { +//! VcpuExit::IoIn(addr, data) => { +//! println!( +//! "Received an I/O in exit. Address: {:#x}. Data: {:#x}", +//! addr, +//! data[0], +//! ); +//! } +//! VcpuExit::IoOut(addr, data) => { +//! println!( +//! "Received an I/O out exit. Address: {:#x}. Data: {:#x}", +//! addr, +//! data[0], +//! ); +//! } +//! VcpuExit::MmioRead(addr, data) => { +//! println!( +//! "Received an MMIO Read Request for the address {:#x}.", +//! addr, +//! ); +//! } +//! VcpuExit::MmioWrite(addr, data) => { +//! println!( +//! "Received an MMIO Write Request to the address {:#x}.", +//! addr, +//! ); +//! // The code snippet dirties 1 page when it is loaded in memory +//! let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); +//! let dirty_pages = dirty_pages_bitmap +//! .into_iter() +//! .map(|page| page.count_ones()) +//! .fold(0, |dirty_page_count, i| dirty_page_count + i); +//! assert_eq!(dirty_pages, 1); +//! // Since on aarch64 there is not halt instruction, +//! // we break immediately after the last known instruction +//! // of the asm code example so that we avoid an infinite loop. +//! #[cfg(target_arch = "aarch64")] +//! break; +//! } +//! VcpuExit::Hlt => { +//! break; +//! } +//! r => panic!("Unexpected exit reason: {:?}", r), +//! } +//! } +//! } +//! ``` + extern crate kvm_bindings; extern crate libc; #[macro_use] @@ -12,7 +222,6 @@ pub use cap::Cap; pub use ioctls::device::DeviceFd; pub use ioctls::system::Kvm; pub use ioctls::vcpu::{VcpuExit, VcpuFd}; - pub use ioctls::vm::{IoEventAddress, NoDatamatch, VmFd}; // The following example is used to verify that our public // structures are exported properly. @@ -20,7 +229,7 @@ pub use ioctls::vm::{IoEventAddress, NoDatamatch, VmFd}; /// /// ``` /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -/// use kvm_ioctls::{Error, KvmRunWrapper}; +/// use kvm_ioctls::{KvmRunWrapper, Error}; /// ``` pub use ioctls::KvmRunWrapper; pub use vmm_sys_util::errno::Error; diff --git a/src/arch/aarch64/boot_loader/mod.rs b/src/arch/aarch64/boot_loader/mod.rs new file mode 100644 index 000000000..220509eb2 --- /dev/null +++ b/src/arch/aarch64/boot_loader/mod.rs @@ -0,0 +1,117 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::File; +use std::path::PathBuf; +use std::sync::Arc; + +use kvm_ioctls::VmFd; + +use crate::arch::aarch64::device_tree::*; +use crate::arch::CPUBootConfig; +use crate::cpu::CPU; +use crate::memory::GuestMemory; + +use super::kvm::load_boot_source; + +const AARCH64_KERNEL_OFFSET: u64 = 0x8_0000; + +/// Boot loader config used for aarch64. +#[derive(Default, Debug)] +pub struct AArch64BootLoaderConfig { + /// Path of kernel image. + pub kernel: PathBuf, + /// Path of initrd image. + pub initrd: PathBuf, + /// Start address of guest memory. + pub mem_start: u64, +} + +/// The start address for `kernel image`, `initrd image` and `dtb` in guest memory. +pub struct AArch64BootLoader { + /// Start address for `kernel` execute binary in guest memory. + pub kernel_start: u64, + /// Start address for `initrd image` in guest memory. + pub initrd_start: u64, + /// Initrd file size, 0 means no initrd file. + pub initrd_size: u64, + /// Start address for `dtb` in guest memory. + pub dtb_start: u64, +} + +/// Load PE(vmlinux.bin) linux kernel and other boot source to Guest Memory. +/// +/// # Steps +/// +/// 1. Prepare for linux kernel boot env, return guest memory layout. +/// 2. According guest memory layout, load linux kernel to guest memory. +/// 3. According guest memory layout, load initrd image to guest memory. +/// +/// # Arguments +/// +/// * `config` - boot source config, contains kernel, initrd. +/// * `sys_mem` - guest memory. +/// +/// # Errors +/// +/// Load kernel, initrd to guest memory failed. Boot source is broken or +/// guest memory is abnormal. +pub fn load_kernel( + config: &AArch64BootLoaderConfig, + sys_mem: &Arc, +) -> AArch64BootLoader { + let kernel_start = config.mem_start + AARCH64_KERNEL_OFFSET; + let mut kernel_image = File::open(&config.kernel).expect("Failed to open kernel file"); + let kernel_size = kernel_image.metadata().unwrap().len(); + let kernel_end = kernel_start + kernel_size; + sys_mem + .write(&mut kernel_image, kernel_start, kernel_size) + .expect("Failed to load kernel image to memory"); + + let dtb_addr = sys_mem + .memory_end_address() + .checked_sub(u64::from(FDT_MAX_SIZE)) + .filter(|addr| *addr > kernel_end) + .expect("no memory to load DTB"); + + let mut initrd_image = File::open(&config.initrd).expect("Failed to open initrd file"); + let initrd_size = initrd_image.metadata().unwrap().len(); + + let initrd_start = dtb_addr + .checked_sub(initrd_size) + .filter(|addr| *addr > kernel_end) + .expect("No memory to load initrd"); + + sys_mem + .write(&mut initrd_image, initrd_start, initrd_size) + .expect("Failed to load initrd to memory"); + + AArch64BootLoader { + kernel_start, + initrd_start, + initrd_size, + dtb_start: dtb_addr, + } +} + +pub fn kvm_load_kernel(guest_memory: &Arc, vcpu : &mut CPU, vm_fd: &Arc) -> AArch64BootLoader{ + + let layout = load_boot_source(guest_memory); + let cpu_boot_cfg = CPUBootConfig { + fdt_addr: layout.dtb_start, + kernel_addr: layout.kernel_start, + }; + vcpu.realize(&vm_fd, cpu_boot_cfg); + + + layout +} diff --git a/src/arch/aarch64/cpu/mod.rs b/src/arch/aarch64/cpu/mod.rs new file mode 100644 index 000000000..9c366ed5d --- /dev/null +++ b/src/arch/aarch64/cpu/mod.rs @@ -0,0 +1,244 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::convert::Into; +use std::mem; +use std::sync::Arc; + +use kvm_bindings::{ + kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, + KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, +}; +use kvm_ioctls::{VcpuFd, VmFd}; + +use crate::offset_of; + +// PSR (Processor State Register) bits. +// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/ptrace.h#L34 +#[allow(non_upper_case_globals)] +const PSR_MODE_EL1h: u64 = 0x0000_0005; +const PSR_F_BIT: u64 = 0x0000_0040; +const PSR_I_BIT: u64 = 0x0000_0080; +const PSR_A_BIT: u64 = 0x0000_0100; +const PSR_D_BIT: u64 = 0x0000_0200; + +// MPIDR - Multiprocessor Affinity Register. +// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/asm/sysreg.h#L130 +pub const SYS_MPIDR_EL1: u64 = 0x6030_0000_0013_c005; + +// MPIDR is Multiprocessor Affinity Register +// [40:63] bit reserved on AArch64 Architecture, +const UNINIT_MPIDR: u64 = 0xFFFF_FF00_0000_0000; + +// AArch64 cpu core register +// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/kvm.h#L50 + +// User structures for general purpose, floating point and debug registers. +// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/ptrace.h#L75 +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Arm64CoreRegs { + KVM_USER_PT_REGS, + KVM_SP_EL1, + KVM_ELR_EL1, + KVM_SPSR(usize), + KVM_USER_FPSIMD_STATE, + USER_PT_REG_REGS(usize), + USER_PT_REG_SP, + USER_PT_REG_PC, + USER_PT_REG_PSTATE, + USER_FPSIMD_STATE_VREGS(usize), + USER_FPSIMD_STATE_FPSR, + USER_FPSIMD_STATE_FPCR, + USER_FPSIMD_STATE_RES(usize), +} + +#[allow(clippy::zero_ptr)] +impl Into for Arm64CoreRegs { + fn into(self) -> u64 { + let register_size; + let regid = match self { + Arm64CoreRegs::KVM_USER_PT_REGS => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, regs) + } + Arm64CoreRegs::KVM_SP_EL1 => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, sp_el1) + } + Arm64CoreRegs::KVM_ELR_EL1 => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, elr_el1) + } + Arm64CoreRegs::KVM_SPSR(idx) if idx < KVM_NR_SPSR as usize => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, spsr) + idx * 8 + } + Arm64CoreRegs::KVM_USER_FPSIMD_STATE => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, fp_regs) + } + Arm64CoreRegs::USER_PT_REG_REGS(idx) if idx < 31 => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, regs, user_pt_regs, regs) + idx * 8 + } + Arm64CoreRegs::USER_PT_REG_SP => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, regs, user_pt_regs, sp) + } + Arm64CoreRegs::USER_PT_REG_PC => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, regs, user_pt_regs, pc) + } + Arm64CoreRegs::USER_PT_REG_PSTATE => { + register_size = KVM_REG_SIZE_U64; + offset_of!(kvm_regs, regs, user_pt_regs, pstate) + } + Arm64CoreRegs::USER_FPSIMD_STATE_VREGS(idx) if idx < 32 => { + register_size = KVM_REG_SIZE_U128; + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, vregs) + idx * 16 + } + Arm64CoreRegs::USER_FPSIMD_STATE_FPSR => { + register_size = KVM_REG_SIZE_U32; + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpsr) + } + Arm64CoreRegs::USER_FPSIMD_STATE_FPCR => { + register_size = KVM_REG_SIZE_U32; + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpcr) + } + Arm64CoreRegs::USER_FPSIMD_STATE_RES(idx) if idx < 2 => { + register_size = 128; + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, __reserved) + idx * 8 + } + _ => panic!("No such Register"), + }; + + KVM_REG_ARM64 as u64 + | register_size as u64 + | u64::from(KVM_REG_ARM_CORE) + | (regid / mem::size_of::()) as u64 + } +} + +/// AArch64 CPU booting configure information +/// +/// Before jumping into the kernel, primary CPU general-purpose +/// register `x0` need to setting to physical address of device +/// tree blob (dtb) in system RAM. +/// +/// See: https://elixir.bootlin.com/linux/v5.6/source/Documentation/arm64/booting.rst +#[derive(Default, Copy, Clone)] +pub struct AArch64CPUBootConfig { + pub fdt_addr: u64, + pub kernel_addr: u64, +} + +/// AArch64 CPU architect information +#[derive(Default, Copy, Clone)] +pub struct CPUState { + /// The vcpu id, `0` means primary CPU. + vcpu_id: u32, + /// MPIDR register value of this vcpu, + /// The MPIDR provides an additional processor identification mechanism + /// for scheduling purposes. + mpidr: u64, + /// The guest physical address of kernel start point. + boot_ip: u64, + /// The guest physical address of device tree blob (dtb). + fdt_addr: u64, + + pub nr_vcpus: u32, +} + +impl CPUState { + pub fn new(vcpu_id: u32, nr_vcpus: u32) -> Self { + CPUState { + vcpu_id, + mpidr: UNINIT_MPIDR, + boot_ip: 0, + fdt_addr: 0, + nr_vcpus, + } + } + + pub fn set_boot_config( + &mut self, + vm_fd: &Arc, + vcpu_fd: &VcpuFd, + boot_config: &AArch64CPUBootConfig, + ) { + self.boot_ip = boot_config.kernel_addr; + self.fdt_addr = boot_config.fdt_addr; + + let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + vm_fd + .get_preferred_target(&mut kvi) + .expect("Failed to get kvm vcpu preferred target"); + + // support PSCI 0.2 + // We already checked that the capability is supported. + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; + // Non-boot cpus are powered off initially. + if self.vcpu_id != 0 { + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; + } + + vcpu_fd.vcpu_init(&kvi).expect("Failed to init kvm vcpu"); + self.get_mpidr(vcpu_fd); + } + pub fn get_mpidr(&mut self, vcpu_fd: &VcpuFd) -> u64 { + if self.mpidr == UNINIT_MPIDR { + self.mpidr = match vcpu_fd.get_one_reg(SYS_MPIDR_EL1) { + Ok(mpidr) => mpidr as u64, + Err(e) => panic!("update vcpu mpidr failed {:?}", e), + }; + } + self.mpidr + } + + pub fn reset_vcpu(&self, vcpu: &VcpuFd) { + // Configure PSTATE(Processor State), mask all interrupts. + let data: u64 = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_PSTATE.into(), data) + .expect( + format!( + "Failed to set core reg pstate register for CPU {}/KVM", + self.vcpu_id + ) + .as_str(), + ); + + // Reset x1, x2, x3 register to zero. + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_REGS(1).into(), 0) + .expect(format!("Failed to init x1 to zero for CPU {}/KVM", self.vcpu_id).as_str()); + + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_REGS(2).into(), 0) + .expect(format!("Failed to init x2 to zero for CPU {}/KVM", self.vcpu_id).as_str()); + + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_REGS(3).into(), 0) + .expect(format!("Failed to init x3 to zero for CPU {}/KVM", self.vcpu_id).as_str()); + + // Configure boot ip and device tree address, prepare for kernel setup + if self.vcpu_id == 0 { + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_REGS(0).into(), self.fdt_addr) + .expect( + format!( + "Failed to set device tree address for CPU {}/KVM", + self.vcpu_id + ) + .as_str(), + ); + vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_PC.into(), self.boot_ip) + .expect(format!("Failed to set boot ip for CPU {}/KVM", self.vcpu_id).as_str()); + } + } +} diff --git a/src/arch/aarch64/device/fdt.rs b/src/arch/aarch64/device/fdt.rs new file mode 100644 index 000000000..d7e399dc6 --- /dev/null +++ b/src/arch/aarch64/device/fdt.rs @@ -0,0 +1,126 @@ +use crate::arch::aarch64::device_tree::*; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +use crate::cpu::CPU; +use crate::memory::GuestMemory; +use std::sync::Arc; + +use super::gicv3::GICv3; +use crate::arch::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ}; +pub fn generate_fdt( + sys_mem: &Arc, + gic: &GICv3, + initrd_range: (u64, u64), + cmdline: &str, + cpu: Arc>, + fdt_addr: u64, +) { + let mut fdt = vec![0; FDT_MAX_SIZE as usize]; + + create_device_tree(&mut fdt); + set_property_string(&mut fdt, "/", "compatible", "linux,dummy-virt"); + set_property_u32(&mut fdt, "/", "#address-cells", 0x2); + set_property_u32(&mut fdt, "/", "#size-cells", 0x2); + set_property_u32(&mut fdt, "/", "interrupt-parent", GIC_PHANDLE); + + generate_cpu_node(&mut fdt, cpu); + generate_memory_node(&mut fdt, sys_mem); + generate_devices_node(&mut fdt); + generate_chosen_node(&mut fdt, cmdline, initrd_range.0, initrd_range.1); + gic.generate_fdt_node(&mut fdt); + + let fdt_len = fdt.len() as u64; + sys_mem + .write(&mut fdt.as_slice(), fdt_addr, fdt_len) + .expect("Failed to load fdt to memory"); + + dump_dtb(&fdt, "/tmp/stratovirt.dtb"); +} + +fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { + let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let mem_size = sys_mem.memory_end_address() - MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let node = "/memory"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "device_type", "memory"); + set_property_array_u64(fdt, node, "reg", &[mem_base, mem_size as u64]); +} + + +fn generate_cpu_node(fdt: &mut Vec, cpu: Arc>) { + let node = "/cpus"; + add_sub_node(fdt, node); + set_property_u32(fdt, node, "#address-cells", 0x02); + set_property_u32(fdt, node, "#size-cells", 0x0); + + let mpidr = cpu.lock().unwrap().state.get_mpidr(&cpu.fd); + let node = format!("/cpus/cpu@{:x}", mpidr); + add_sub_node(fdt, &node); + set_property_u32( + fdt, + &node, + "phandle", + u32::from(cpu.id) + CPU_PHANDLE_START, + ); + set_property_string(fdt, &node, "device_type", "cpu"); + set_property_string(fdt, &node, "compatible", "arm,arm-v8"); + set_property_u64(fdt, &node, "reg", mpidr & 0x007F_FFFF); +} + +fn generate_devices_node(fdt: &mut Vec) { + // timer + let mut cells: Vec = Vec::new(); + for &irq in [13, 14, 11, 10].iter() { + cells.push(GIC_FDT_IRQ_TYPE_PPI); + cells.push(irq); + cells.push(IRQ_TYPE_LEVEL_HIGH); + } + let node = "/timer"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "arm,armv8-timer"); + set_property(fdt, node, "always-on", None); + set_property_array_u32(fdt, node, "interrupts", &cells); + // clock + let node = "/apb-pclk"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "fixed-clock"); + set_property_string(fdt, node, "clock-output-names", "clk24mhz"); + set_property_u32(fdt, node, "#clock-cells", 0x0); + set_property_u32(fdt, node, "clock-frequency", 24_000_000); + set_property_u32(fdt, node, "phandle", CLK_PHANDLE); + // psci + let node = "/psci"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "arm,psci-0.2"); + set_property_string(fdt, node, "method", "hvc"); + // serial + let node = format!("/uart@{:x}", MMIO_SERIAL_ADDR); + add_sub_node(fdt, &node); + set_property_string(fdt, &node, "compatible", "ns16550a"); + set_property_string(fdt, &node, "clock-names", "apb_pclk"); + set_property_u32(fdt, &node, "clocks", CLK_PHANDLE); + set_property_array_u64( + fdt, + &node, + "reg", + &[MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE], + ); + set_property_array_u32( + fdt, + &node, + "interrupts", + &[ + GIC_FDT_IRQ_TYPE_SPI, + MMIO_SERIAL_IRQ, + IRQ_TYPE_EDGE_RISING, + ], + ); +} + + +fn generate_chosen_node(fdt: &mut Vec, cmdline: &str, initrd_addr: u64, initrd_size: u64) { + let node = "/chosen"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "bootargs", cmdline); + set_property_u64(fdt, node, "linux,initrd-start", initrd_addr); + set_property_u64(fdt, node, "linux,initrd-end", initrd_addr + initrd_size); +} diff --git a/src/arch/aarch64/device/gicv3.rs b/src/arch/aarch64/device/gicv3.rs new file mode 100644 index 000000000..ab66a2c28 --- /dev/null +++ b/src/arch/aarch64/device/gicv3.rs @@ -0,0 +1,269 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::Arc; + +use kvm_ioctls::{DeviceFd, VmFd}; +use crate::arch::aarch64::device_tree::*; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; + +// See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. +const SZ_64K: u64 = 0x0001_0000; +const KVM_VGIC_V3_REDIST_SIZE: u64 = 2 * SZ_64K; +// First 32 are private to each CPU (SGIs and PPIs). +const GIC_IRQ_INTERNAL: u32 = 32; + +#[derive(Debug)] +pub enum Error { + InvalidConfig(String), + CreateKvmDevice(kvm_ioctls::Error), + CheckDeviceAttribute(kvm_ioctls::Error), + GetDeviceAttribute(kvm_ioctls::Error), + SetDeviceAttribute(kvm_ioctls::Error), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::InvalidConfig(info) => write!(f, "Invalid GIC config: {}", info), + Error::CreateKvmDevice(ref e) => { + write!(f, "Failed to create kvm device: {:#?}.", e) + } + Error::CheckDeviceAttribute(ref e) => { + write!(f, "Failed to check device attributes for GIC: {:#?}.", e) + } + Error::GetDeviceAttribute(ref e) => { + write!(f, "Failed to get device attributes for GIC: {:#?}.", e) + } + Error::SetDeviceAttribute(ref e) => { + write!(f, "Failed to set device attributes for GIC: {:#?}.", e) + } + } + } +} + +pub type Result = std::result::Result; + +fn kvm_device_check(fd: &DeviceFd, group: u32, attr: u64) -> Result<()> { + let attr = kvm_bindings::kvm_device_attr { + group, + attr, + addr: 0, + flags: 0, + }; + + Ok(fd + .has_device_attr(&attr) + .map_err(Error::CheckDeviceAttribute)?) +} + +fn kvm_device_access(fd: &DeviceFd, group: u32, attr: u64, addr: u64, write: bool) -> Result<()> { + let attr = kvm_bindings::kvm_device_attr { + group, + attr, + addr, + flags: 0, + }; + + if write { + fd.set_device_attr(&attr) + .map_err(Error::SetDeviceAttribute)?; + } else { + let mut attr = attr; + fd.get_device_attr(&mut attr) + .map_err(Error::GetDeviceAttribute)?; + }; + + Ok(()) +} + +/// A wrapper around creating and managing a `GICv3`. +pub struct GICv3 { + /// The fd for the GICv3 device. + fd: DeviceFd, + /// GICv3 ITS device. + its_dev: GICv3Its, + /// Maximum irq number. + nr_irqs: u32, + /// Base address of GICv3 redistributor. + redist_base: u64, + /// Size of agicv3 redistributor. + redist_size: u64, + /// Base address in the guest physical address space of the GICv3 distributor + /// register mappings. + dist_base: u64, + /// GICv3 distributor region size. + dist_size: u64, +} + +impl GICv3 { + pub fn new(vm: &Arc, vcpu_count: u64, max_irq: u32) -> Result { + let capability = std::cmp::min( + MEM_LAYOUT[LayoutEntryType::GicRedist as usize].1 / KVM_VGIC_V3_REDIST_SIZE, + 254, + ); + if vcpu_count > capability || vcpu_count == 0 { + return Err(Error::InvalidConfig(format!( + "GIC only support maximum {} vcpus", + capability + )) + .into()); + } + if max_irq <= GIC_IRQ_INTERNAL { + return Err(Error::InvalidConfig("GIC irq numbers need above 32".to_string()).into()); + } + + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags: 0, + }; + + let gic_fd = vm + .create_device(&mut gic_device) + .map_err(Error::CreateKvmDevice)?; + + Ok(GICv3 { + fd: gic_fd, + nr_irqs: max_irq, + its_dev: GICv3Its::new(&vm)?, + redist_base: MEM_LAYOUT[LayoutEntryType::GicRedist as usize].0, + redist_size: vcpu_count * KVM_VGIC_V3_REDIST_SIZE, + dist_base: MEM_LAYOUT[LayoutEntryType::GicDist as usize].0, + dist_size: MEM_LAYOUT[LayoutEntryType::GicDist as usize].1, + }) + } + + pub fn realize(&self) -> Result<()> { + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST), + &self.redist_base as *const u64 as u64, + true, + )?; + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_DIST), + &self.dist_base as *const u64 as u64, + true, + )?; + kvm_device_check(&self.fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)?; + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, + &self.nr_irqs as *const u32 as u64, + true, + )?; + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + 0, + true, + )?; + + self.its_dev.realize()?; + Ok(()) + } + + pub fn generate_fdt_node(&self, fdt: &mut Vec) { + let gic_reg = vec![ + self.dist_base, + self.dist_size, + self.redist_base, + self.redist_size, + ]; + let node = "/intc"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "arm,gic-v3"); + set_property(fdt, node, "interrupt-controller", None); + set_property_u32(fdt, node, "#interrupt-cells", 0x3); + set_property_u32(fdt, node, "phandle", GIC_PHANDLE); + set_property_u32(fdt, node, "#address-cells", 0x2); + set_property_u32(fdt, node, "#size-cells", 0x2); + set_property_u32(fdt, node, "#redistributor-regions", 0x1); + set_property_array_u64(fdt, node, "reg", &gic_reg); + + let gic_intr = [ + GIC_FDT_IRQ_TYPE_PPI, + 0x9, + IRQ_TYPE_LEVEL_HIGH, + ]; + set_property_array_u32(fdt, node, "interrupts", &gic_intr); + + set_property(fdt, node, "ranges", None); + let its_reg = [self.its_dev.msi_base, self.its_dev.msi_size]; + let node = "/intc/its"; + add_sub_node(fdt, node); + set_property_string(fdt, node, "compatible", "arm,gic-v3-its"); + set_property(fdt, node, "msi-controller", None); + set_property_u32(fdt, node, "phandle", GIC_ITS_PHANDLE); + set_property_array_u64(fdt, node, "reg", &its_reg); + } +} + +struct GICv3Its { + /// The fd for the GICv3Its device + fd: DeviceFd, + /// Base address in the guest physical address space of the GICv3 ITS + /// control register frame. + msi_base: u64, + /// GICv3 ITS needs to be 64K aligned and the region covers 128K. + msi_size: u64, +} + +impl GICv3Its { + fn new(vm: &Arc) -> Result { + let mut its_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, + fd: 0, + flags: 0, + }; + + let its_fd = vm + .create_device(&mut its_device) + .map_err(Error::CreateKvmDevice)?; + + Ok(GICv3Its { + fd: its_fd, + msi_base: MEM_LAYOUT[LayoutEntryType::GicIts as usize].0, + msi_size: MEM_LAYOUT[LayoutEntryType::GicIts as usize].1, + }) + } + + fn realize(&self) -> Result<()> { + kvm_device_check( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), + )?; + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), + &self.msi_base as *const u64 as u64, + true, + )?; + kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + &self.msi_base as *const u64 as u64, + true, + )?; + + Ok(()) + } +} diff --git a/src/arch/aarch64/device/mod.rs b/src/arch/aarch64/device/mod.rs new file mode 100644 index 000000000..c5889006f --- /dev/null +++ b/src/arch/aarch64/device/mod.rs @@ -0,0 +1,32 @@ +pub mod gicv3; +pub mod serial; +mod fdt; +use gicv3::GICv3; +use kvm_ioctls::VmFd; + +use crate::{arch::aarch64::boot_loader::AArch64BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; +use std::sync::Arc; +use crate::arch::CPUState; +pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, vm_fd: &Arc, layout : &AArch64BootLoader) { + + let vcpu_count = vcpus[0].lock().unwrap().state.nr_vcpus; + let gic = GICv3::new(&vm_fd, vcpu_count as u64, 192).expect("Failed to create GICv3 device"); + gic.realize().expect("Failed to realize GICv3 device"); + + let serial = Serial::new(&vm_fd); + for vcpu in vcpus.iter() { + vcpu.lock().unwrap().set_serial_dev(serial.clone()); + } + + let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; + let initrd_range = (layout.initrd_start, layout.initrd_size); + let fdt_addr = layout.dtb_start; + fdt::generate_fdt( + guest_memory, + &gic, + initrd_range, + cmdline, + vcpus[0].clone(), + fdt_addr, + ); +} diff --git a/src/arch/aarch64/device/serial.rs b/src/arch/aarch64/device/serial.rs new file mode 100644 index 000000000..c90c53278 --- /dev/null +++ b/src/arch/aarch64/device/serial.rs @@ -0,0 +1,4 @@ +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +pub const MMIO_SERIAL_IRQ: u32 = 32; +pub const MMIO_SERIAL_ADDR: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].0; +pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; \ No newline at end of file diff --git a/src/arch/aarch64/device_tree/mod.rs b/src/arch/aarch64/device_tree/mod.rs new file mode 100644 index 000000000..4ea4401fd --- /dev/null +++ b/src/arch/aarch64/device_tree/mod.rs @@ -0,0 +1,192 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use libc::{c_char, c_int, c_void}; +use std::ffi::CString; + +pub const CLK_PHANDLE: u32 = 1; +pub const GIC_PHANDLE: u32 = 2; +pub const GIC_ITS_PHANDLE: u32 = 3; +pub const CPU_PHANDLE_START: u32 = 10; + +pub const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; +pub const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; +pub const IRQ_TYPE_EDGE_RISING: u32 = 1; +pub const IRQ_TYPE_LEVEL_HIGH: u32 = 4; + +pub const FDT_MAX_SIZE: u32 = 0x1_0000; + +extern "C" { + fn fdt_create(buf: *mut c_void, bufsize: c_int) -> c_int; + fn fdt_finish_reservemap(fdt: *mut c_void) -> c_int; + fn fdt_begin_node(fdt: *mut c_void, name: *const c_char) -> c_int; + fn fdt_end_node(fdt: *mut c_void) -> c_int; + fn fdt_finish(fdt: *const c_void) -> c_int; + fn fdt_open_into(fdt: *const c_void, buf: *mut c_void, size: c_int) -> c_int; + + fn fdt_path_offset(fdt: *const c_void, path: *const c_char) -> c_int; + fn fdt_add_subnode(fdt: *mut c_void, offset: c_int, name: *const c_char) -> c_int; + fn fdt_setprop( + fdt: *mut c_void, + offset: c_int, + name: *const c_char, + val: *const c_void, + len: c_int, + ) -> c_int; +} + +pub fn create_device_tree(fdt: &mut Vec) { + let mut ret = unsafe { fdt_create(fdt.as_mut_ptr() as *mut c_void, FDT_MAX_SIZE as c_int) }; + if ret < 0 { + panic!("Failed to fdt_create, return {}.", ret); + } + + ret = unsafe { fdt_finish_reservemap(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_finish_reservemap, return {}.", ret); + } + + let c_str = CString::new("").unwrap(); + ret = unsafe { fdt_begin_node(fdt.as_mut_ptr() as *mut c_void, c_str.as_ptr()) }; + if ret < 0 { + panic!("Failed to fdt_begin_node, return {}.", ret); + } + + ret = unsafe { fdt_end_node(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_end_node, return {}.", ret); + } + + ret = unsafe { fdt_finish(fdt.as_mut_ptr() as *mut c_void) }; + if ret < 0 { + panic!("Failed to fdt_finish, return {}.", ret); + } + + ret = unsafe { + fdt_open_into( + fdt.as_ptr() as *mut c_void, + fdt.as_mut_ptr() as *mut c_void, + FDT_MAX_SIZE as c_int, + ) + }; + if ret < 0 { + panic!("Failed to fdt_open_into, return {}.", ret); + } +} + +pub fn add_sub_node(fdt: &mut Vec, node_path: &str) { + let names: Vec<&str> = node_path.split('/').collect(); + if names.len() < 2 { + panic!("Failed to add sub node, node_path: {} invalid.", node_path); + } + + let node_name = names[names.len() - 1]; + let pare_name = names[0..names.len() - 1].join("/"); + + let c_str = if pare_name.is_empty() { + CString::new("/").unwrap() + } else { + CString::new(pare_name).unwrap() + }; + + let offset = unsafe { fdt_path_offset(fdt.as_ptr() as *const c_void, c_str.as_ptr()) }; + if offset < 0 { + panic!("Failed to fdt_path_offset, return {}.", offset); + } + + let c_str = CString::new(node_name).unwrap(); + let ret = unsafe { fdt_add_subnode(fdt.as_mut_ptr() as *mut c_void, offset, c_str.as_ptr()) }; + if ret < 0 { + panic!("Failed to fdt_add_subnode, return {}.", ret); + } +} + +pub fn set_property(fdt: &mut Vec, node_path: &str, prop: &str, val: Option<&[u8]>) { + let c_str = CString::new(node_path).unwrap(); + let offset = unsafe { fdt_path_offset(fdt.as_ptr() as *const c_void, c_str.as_ptr()) }; + if offset < 0 { + panic!("Failed to fdt_path_offset, return {}.", offset); + } + + let (ptr, len) = if let Some(val) = val { + (val.as_ptr() as *const c_void, val.len() as i32) + } else { + (std::ptr::null::(), 0) + }; + + let c_str = CString::new(prop).unwrap(); + let ret = unsafe { + fdt_setprop( + fdt.as_mut_ptr() as *mut c_void, + offset, + c_str.as_ptr(), + ptr, + len, + ) + }; + if ret < 0 { + panic!("Failed to fdt_setprop, return {}.", ret); + } +} + +pub fn set_property_string(fdt: &mut Vec, node_path: &str, prop: &str, val: &str) { + set_property( + fdt, + node_path, + prop, + Some(&([val.as_bytes(), &[0_u8]].concat())), + ) +} + +pub fn set_property_u32(fdt: &mut Vec, node_path: &str, prop: &str, val: u32) { + set_property(fdt, node_path, prop, Some(&val.to_be_bytes())) +} + +pub fn set_property_u64(fdt: &mut Vec, node_path: &str, prop: &str, val: u64) { + set_property(fdt, node_path, prop, Some(&val.to_be_bytes())) +} + +pub fn set_property_array_u32(fdt: &mut Vec, node_path: &str, prop: &str, array: &[u32]) { + let mut bytes: Vec = Vec::new(); + for &val in array { + bytes.append(&mut val.to_be_bytes().to_vec()); + } + set_property(fdt, node_path, prop, Some(&bytes)) +} + +pub fn set_property_array_u64(fdt: &mut Vec, node_path: &str, prop: &str, array: &[u64]) { + let mut bytes: Vec = Vec::new(); + for &val in array { + bytes.append(&mut val.to_be_bytes().to_vec()); + } + set_property(fdt, node_path, prop, Some(&bytes)) +} + +pub fn dump_dtb(fdt: &[u8], file_path: &str) { + use std::fs::File; + use std::io::Write; + + let mut f = File::create(file_path).unwrap(); + for i in fdt.iter() { + f.write_all(&[*i]).expect("Unable to write data"); + } +} + +/// Trait for devices to be added to the Flattened Device Tree. +pub trait CompileFDT { + /// function to generate fdt node + /// + /// # Arguments + /// + /// * `fdt` - the fdt slice to be expended. + fn generate_fdt_node(&self, fdt: &mut Vec); +} diff --git a/src/kvm/mod.rs b/src/arch/aarch64/kvm/mod.rs similarity index 57% rename from src/kvm/mod.rs rename to src/arch/aarch64/kvm/mod.rs index 43d727b7c..d336890ed 100644 --- a/src/kvm/mod.rs +++ b/src/arch/aarch64/kvm/mod.rs @@ -2,15 +2,19 @@ use std::path::PathBuf; use std::sync::Arc; use kvm_ioctls::VmFd; -use crate::memory::{GuestMemory, LayoutEntryType, MEM_LAYOUT}; -use crate::{bootloader::load_kernel, bootloader::Riscv64BootLoader as BootLoader, bootloader::Riscv64BootLoaderConfig as BootLoaderConfig}; +use crate::memory::{GuestMemory }; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +use crate::arch::{load_kernel, BootLoader, BootLoaderConfig}; pub fn load_boot_source(guest_memory: &Arc) -> BootLoader { let initrd_path = PathBuf::from("/tmp/initrd.img"); let boot_cfg = BootLoaderConfig { kernel: PathBuf::from("/tmp/vmlinux.bin"), initrd: initrd_path, - mem_start: MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0, + mem_start: MEM_LAYOUT[LayoutEntryType::Mem as usize].0, }; load_kernel(&boot_cfg, &guest_memory) } +pub fn arch_init_based_dev(vm_fd: &Arc) { + +} \ No newline at end of file diff --git a/src/arch/aarch64/memory/mod.rs b/src/arch/aarch64/memory/mod.rs new file mode 100644 index 000000000..2bb31c03d --- /dev/null +++ b/src/arch/aarch64/memory/mod.rs @@ -0,0 +1,27 @@ + +/// The type of memory layout entry on aarch64 +#[repr(usize)] +pub enum LayoutEntryType { + GicDist, + GicIts, + GicRedist, + Mmio, + Mem, +} + +/// Layout of aarch64 + +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0x0800_0000, 0x0001_0000), // GicDist + (0x0808_0000, 0x0002_0000), // GicIts + (0x080A_0000, 0x00F6_0000), // GicRedist (max 123 redistributors) + (0x0A00_0000, 0x0000_0200), // Mmio + (0x4000_0000, 0x80_0000_0000), // Mem +]; + + +pub fn arch_add_ram_ranges(mem_size: u64, ranges: &mut Vec<(u64, u64)>) { + + ranges.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)); + +} \ No newline at end of file diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs new file mode 100644 index 000000000..8a5dbcd79 --- /dev/null +++ b/src/arch/aarch64/mod.rs @@ -0,0 +1,6 @@ +pub mod kvm; +pub mod cpu; +pub mod memory; +pub mod device_tree; +pub mod device; +pub mod boot_loader; \ No newline at end of file diff --git a/src/arch/mod.rs b/src/arch/mod.rs new file mode 100644 index 000000000..9d6015dab --- /dev/null +++ b/src/arch/mod.rs @@ -0,0 +1,60 @@ +#[cfg(target_arch = "aarch64")] +mod aarch64; +#[cfg(target_arch = "aarch64")] +pub use aarch64::kvm::*; +#[cfg(target_arch = "aarch64")] +pub use aarch64::cpu::AArch64CPUBootConfig as CPUBootConfig; +#[cfg(target_arch = "aarch64")] +pub use aarch64::cpu::CPUState; +#[cfg(target_arch = "aarch64")] +pub use aarch64::boot_loader::{ + load_kernel, kvm_load_kernel,AArch64BootLoader as BootLoader, AArch64BootLoaderConfig as BootLoaderConfig, +}; +#[cfg(target_arch = "aarch64")] +pub use aarch64::device::kvm_setup_fireware; +#[cfg(target_arch = "aarch64")] +pub use aarch64::device::serial::*; +#[cfg(target_arch = "aarch64")] +pub use aarch64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; + + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub use x86_64::kvm::*; +#[cfg(target_arch = "x86_64")] +pub use x86_64::cpu::X86CPUBootConfig as CPUBootConfig; +#[cfg(target_arch = "x86_64")] +pub use x86_64::cpu::CPUState; +#[cfg(target_arch = "x86_64")] +pub use x86_64::boot_loader::{ + load_kernel, kvm_load_kernel,X86BootLoader as BootLoader, X86BootLoaderConfig as BootLoaderConfig, +}; +#[cfg(target_arch = "x86_64")] +pub use x86_64::device::kvm_setup_fireware; +#[cfg(target_arch = "x86_64")] +pub use x86_64::device::serial::*; +#[cfg(target_arch = "x86_64")] +pub use x86_64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; + + +#[cfg(target_arch = "riscv64")] +mod riscv64; +#[cfg(target_arch = "riscv64")] +pub use riscv64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::kvm::*; +#[cfg(target_arch = "riscv64")] +pub use riscv64::cpu::Riscv64CPUBootConfig as CPUBootConfig; +#[cfg(target_arch = "riscv64")] +pub use riscv64::cpu::{CPUState, Riscv64CoreRegs}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::boot_loader::{ + load_kernel, kvm_load_kernel,Riscv64BootLoader as BootLoader, Riscv64BootLoaderConfig as BootLoaderConfig, +}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::device::kvm_setup_fireware; +#[cfg(target_arch = "riscv64")] +pub use riscv64::device::serial::*; +#[cfg(target_arch = "riscv64")] +pub use riscv64::device::plic::{judge_plic_addr as judge_interrupt_controller_addr}; diff --git a/src/bootloader/mod.rs b/src/arch/riscv64/boot_loader/mod.rs similarity index 96% rename from src/bootloader/mod.rs rename to src/arch/riscv64/boot_loader/mod.rs index f7c2e490b..8cffd73df 100644 --- a/src/bootloader/mod.rs +++ b/src/arch/riscv64/boot_loader/mod.rs @@ -1,4 +1,3 @@ - // Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. @@ -17,9 +16,11 @@ use std::sync::Arc; use kvm_ioctls::VmFd; -use crate::cpu::{CPU, CPUBootConfig}; +use crate::arch::riscv64::device_tree::*; +use crate::arch::CPUBootConfig; +use crate::cpu::CPU; use crate::memory::GuestMemory; -use crate::device::FDT_MAX_SIZE; + use super::kvm::load_boot_source; const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000; @@ -104,12 +105,13 @@ pub fn load_kernel( } pub fn kvm_load_kernel(guest_memory: &Arc, vcpu : &mut CPU, vm_fd: &Arc) -> Riscv64BootLoader{ + let layout = load_boot_source(guest_memory); let cpu_boot_cfg = CPUBootConfig { fdt_addr: layout.dtb_start, kernel_addr: layout.kernel_start, }; - vcpu.set_boot_config(&cpu_boot_cfg); + vcpu.realize(&vm_fd, cpu_boot_cfg); layout diff --git a/src/arch/riscv64/cpu/mod.rs b/src/arch/riscv64/cpu/mod.rs new file mode 100644 index 000000000..f738334a2 --- /dev/null +++ b/src/arch/riscv64/cpu/mod.rs @@ -0,0 +1,326 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::convert::Into; +use std::mem; +use std::sync::{Arc, Mutex}; +use crate::arch::riscv64::device::plic::PlicState; + +use kvm_bindings::{ + user_regs_struct, kvm_riscv_core,kvm_riscv_config,kvm_riscv_timer, + KVM_REG_RISCV, KVM_REG_RISCV_CORE, KVM_REG_SIZE_U64, KVM_REG_RISCV_CONFIG, KVM_REG_RISCV_TIMER, +}; +use kvm_ioctls::{VcpuFd, VmFd}; + +use crate::offset_of; + +// Processor Multiprocessor status. +#[allow(non_upper_case_globals)] +const KVM_MP_STATE_STOPPED: u32 = 5; +// riscv64 cpu core register +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Riscv64CoreRegs{ + PC, + RA, + SP, + GP, + TP, + T0, + T1, + T2, + S0, + S1, + A0, + A1, + A2, + A3, + A4, + A5, + A6, + A7, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + T3, + T4, + T5, + T6, + MODE, +} + +#[allow(clippy::zero_ptr)] +impl Into for Riscv64CoreRegs{ + fn into(self) -> u64 { + let reg_offset = match self { + Riscv64CoreRegs::PC => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, pc) + } + Riscv64CoreRegs::RA => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, ra) + } + Riscv64CoreRegs::SP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, sp) + } + Riscv64CoreRegs::GP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, gp) + } + Riscv64CoreRegs::TP => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, tp) + } + Riscv64CoreRegs::T0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) + } + Riscv64CoreRegs::T1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t1) + } + Riscv64CoreRegs::T2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t2) + } + Riscv64CoreRegs::S0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s0) + } + Riscv64CoreRegs::S1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s1) + } + Riscv64CoreRegs::A0 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a0) + } + Riscv64CoreRegs::A1 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a1) + } + Riscv64CoreRegs::A2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a2) + } + Riscv64CoreRegs::A3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a3) + } + Riscv64CoreRegs::A4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a4) + } + Riscv64CoreRegs::A5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a5) + } + Riscv64CoreRegs::A6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a6) + } + Riscv64CoreRegs::A7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, a7) + } + Riscv64CoreRegs::S2 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s2) + } + Riscv64CoreRegs::S3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s3) + } + Riscv64CoreRegs::S4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s4) + } + Riscv64CoreRegs::S5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s5) + } + Riscv64CoreRegs::S6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s6) + } + Riscv64CoreRegs::S7 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s7) + } + Riscv64CoreRegs::S8 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s8) + } + Riscv64CoreRegs::S9 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s9) + } + Riscv64CoreRegs::S10 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s10) + } + Riscv64CoreRegs::S11 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, s11) + } + Riscv64CoreRegs::T3 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t3) + } + Riscv64CoreRegs::T4 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t4) + } + Riscv64CoreRegs::T5 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t5) + } + Riscv64CoreRegs::T6 => { + offset_of!(kvm_riscv_core, regs, user_regs_struct, t6) + } + Riscv64CoreRegs::MODE => { + offset_of!(kvm_riscv_core, mode) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_CORE) + | (reg_offset / mem::size_of::()) as u64 + } +} + +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Riscv64ConfigRegs{ + ISA, + ZICBOM_BLOCK_SIZE, + MVENDORID, + MARCHID, + MIMPID, + ZICBOZ_BLOCK_SIZE, + SATP_MODE, +} +#[allow(clippy::zero_ptr)] +impl Into for Riscv64ConfigRegs{ + fn into(self) -> u64 { + let reg_offset = match self { + Riscv64ConfigRegs::ISA => { + offset_of!(kvm_riscv_config, isa) + } + Riscv64ConfigRegs::ZICBOM_BLOCK_SIZE => { + offset_of!(kvm_riscv_config, zicbom_block_size) + } + Riscv64ConfigRegs::MVENDORID => { + offset_of!(kvm_riscv_config, mvendorid) + } + Riscv64ConfigRegs::MARCHID => { + offset_of!(kvm_riscv_config, marchid) + } + Riscv64ConfigRegs::MIMPID => { + offset_of!(kvm_riscv_config, mimpid) + } + Riscv64ConfigRegs::ZICBOZ_BLOCK_SIZE => { + offset_of!(kvm_riscv_config, zicboz_block_size) + } + Riscv64ConfigRegs::SATP_MODE => { + offset_of!(kvm_riscv_config, satp_mode) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_CONFIG) + | (reg_offset / mem::size_of::()) as u64 + } +} + + +#[allow(non_camel_case_types)] +#[allow(dead_code)] +pub enum Riscv64Timer{ + FREQUENCY, + TIME, + COMPARE, + STATE, +} +#[allow(clippy::zero_ptr)] +impl Into for Riscv64Timer{ + fn into(self) -> u64 { + let reg_offset = match self { + Riscv64Timer::FREQUENCY => { + offset_of!(kvm_riscv_timer, frequency) + } + Riscv64Timer::TIME => { + offset_of!(kvm_riscv_timer, time) + } + Riscv64Timer::COMPARE => { + offset_of!(kvm_riscv_timer, compare) + } + Riscv64Timer::STATE => { + offset_of!(kvm_riscv_timer, state) + } + }; + KVM_REG_RISCV as u64 + | KVM_REG_SIZE_U64 as u64 + | u64::from(KVM_REG_RISCV_TIMER) + | (reg_offset / mem::size_of::()) as u64 + } +} +/// Riscv64 CPU booting configure information +/// +/// Before jumping into the kernel, primary CPU general-purpose +/// register `a1` need to setting to physical address of device +/// tree blob (dtb) in system RAM, a0 need to setting to hartid. +/// +#[derive(Default, Copy, Clone)] +pub struct Riscv64CPUBootConfig { + pub fdt_addr: u64, + pub kernel_addr: u64, +} + +/// AArch64 CPU architect information +#[derive(Default, Clone)] +pub struct CPUState { + /// The vcpu id, `0` means primary CPU. + pub vcpu_id: u32, + /// The guest physical address of kernel start point. + boot_ip: u64, + /// The guest physical address of device tree blob (dtb). + fdt_addr: u64, + + pub nr_vcpus: u32, + pub isa: u64, + pub frequency: u64, + pub satp_mode: u64, + pub interrupt_controller: Option>> +} + +impl CPUState { + pub fn new(vcpu_id: u32, nr_vcpus: u32) -> Self { + CPUState { + vcpu_id, + boot_ip: 0, + fdt_addr: 0, + nr_vcpus, + isa: 0, + frequency: 0, + satp_mode: 0, + interrupt_controller: None + } + } + + pub fn set_boot_config( + &mut self, + vm_fd: &Arc, + vcpu_fd: Arc, + boot_config: &Riscv64CPUBootConfig, + ) { + self.boot_ip = boot_config.kernel_addr; + self.fdt_addr = boot_config.fdt_addr; + self.isa = vcpu_fd.get_one_reg(Riscv64ConfigRegs::ISA.into()).unwrap(); + self.satp_mode = vcpu_fd.get_one_reg(Riscv64ConfigRegs::SATP_MODE.into()).unwrap(); + self.frequency = vcpu_fd.get_one_reg(Riscv64Timer::FREQUENCY.into()).unwrap(); + } + + pub fn set_plic(&mut self, plic: Arc>) { + self.interrupt_controller = Some(plic); + } + pub fn reset_vcpu(&self, vcpu: Arc) { + let mp_state = vcpu.get_mp_state().unwrap(); + if mp_state.mp_state == KVM_MP_STATE_STOPPED { + return; + } + vcpu.set_one_reg(Riscv64CoreRegs::PC.into(), self.boot_ip) + .expect("Failed to set PC register"); + vcpu.set_one_reg(Riscv64CoreRegs::A0.into(), self.vcpu_id as u64) + .expect("Failed to set hartid to a0"); + vcpu.set_one_reg(Riscv64CoreRegs::A1.into(), self.fdt_addr) + .expect("Failed to set fdt to a1"); + } +} diff --git a/src/device/fdt.rs b/src/arch/riscv64/device/fdt.rs similarity index 84% rename from src/device/fdt.rs rename to src/arch/riscv64/device/fdt.rs index 2d991d566..b05932a2f 100644 --- a/src/device/fdt.rs +++ b/src/arch/riscv64/device/fdt.rs @@ -1,17 +1,17 @@ -use crate::device_tree::*; -use crate::memory::{LayoutEntryType, MEM_LAYOUT}; +use crate::arch::riscv64::device_tree::*; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; use crate::cpu::CPU; use crate::memory::GuestMemory; use super::plic::PlicState; use std::sync::{Mutex, Arc}; -use super::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ}; +use crate::arch::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ}; pub fn generate_fdt( sys_mem: &Arc, initrd_range: (u64, u64), cmdline: &str, plic: Arc>, - cpu: Arc>, + cpu: &CPU, fdt_addr: u64, ) { let mut fdt = vec![0; FDT_MAX_SIZE as usize]; @@ -43,8 +43,8 @@ pub fn generate_fdt( } fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { - let mem_base = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0; - let mem_size = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; + let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let mem_size = MEM_LAYOUT[LayoutEntryType::Mem as usize].1; let node = "/memory"; add_sub_node(fdt, node); set_property_string(fdt, node, "device_type", "memory"); @@ -52,20 +52,20 @@ fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { } -fn generate_cpu_node(fdt: &mut Vec, cpu: Arc>) { +fn generate_cpu_node(fdt: &mut Vec, cpu: &CPU) { let node = "/cpus"; add_sub_node(fdt, node); set_property_u32(fdt, node, "#address-cells", 0x01); set_property_u32(fdt, node, "#size-cells", 0x00); - let state_guard = cpu.lock().unwrap(); - set_property_u32(fdt, node, "timebase-frequency", state_guard.frequency as u32); + let state_guard = cpu; + set_property_u32(fdt, node, "timebase-frequency", state_guard.state.frequency as u32); - for num in 0..state_guard.nr_vcpus { + for num in 0..state_guard.state.nr_vcpus { let node = format!("/cpus/cpu@{:x}", num); add_sub_node(fdt, &node); set_property_string(fdt, &node, "device_type", "cpu"); set_property_string(fdt, &node, "compatible", "riscv"); - let mmu_type = match state_guard.satp_mode { + let mmu_type = match state_guard.state.satp_mode { 10 => "riscv,sv57", 9 => "riscv,sv48", 8 => "riscv,sv39", @@ -76,13 +76,13 @@ fn generate_cpu_node(fdt: &mut Vec, cpu: Arc>) { let mut cpu_isa = String::from("rv64"); for i in 0..valid_isa_order.len() { let index = valid_isa_order.as_bytes()[i] as u32 - 65; - if state_guard.isa & (1 << index) != 0 { + if state_guard.state.isa & (1 << index) != 0 { let char_to_add = ((index as u8) + b'a') as char; cpu_isa.push(char_to_add); } } set_property_string(fdt, &node, "riscv,isa", &cpu_isa); - set_property_u32(fdt, &node, "reg", num as u32); + set_property_u32(fdt, &node, "reg", num); set_property_string(fdt, &node, "status", "okay"); let node = format!("/cpus/cpu@{:x}/interrupt-controller", num); @@ -122,6 +122,8 @@ fn generate_chosen_node(fdt: &mut Vec, cmdline: &str, initrd_addr: u64, init add_sub_node(fdt, node); set_property_string(fdt, node, "bootargs", cmdline); set_property_string(fdt, node, "stdout-path", "serial0"); + set_property_u64(fdt, node, "linux,initrd-start", initrd_addr); + set_property_u64(fdt, node, "linux,initrd-end", initrd_addr + initrd_size); } fn generate_aliases_node(fdt: &mut Vec) { let node = "/aliases"; diff --git a/src/arch/riscv64/device/mod.rs b/src/arch/riscv64/device/mod.rs new file mode 100644 index 000000000..294f23676 --- /dev/null +++ b/src/arch/riscv64/device/mod.rs @@ -0,0 +1,33 @@ +pub mod serial; +pub mod plic; +mod fdt; +use plic::*; +use serial::SerialControl; +use kvm_ioctls::VmFd; + +use crate::{arch::riscv64::boot_loader::Riscv64BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; +use std::sync::{Arc, Mutex}; +pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &mut Vec<&mut CPU>, vm_fd: &Arc, layout : &Riscv64BootLoader) { + + let vcpu_count = vcpus[0].state.nr_vcpus; + let plic = PlicState::new(&vcpus, vcpu_count); + + let serial = Serial::new(&vm_fd, SerialControl::new(plic.clone())); + for i in 0..vcpus.len(){ + vcpus[i].set_serial_dev(serial.clone()); + vcpus[i].state.set_plic(plic.clone()); + } + println!("set plic and serial for vcpu!"); + let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; + let initrd_range = (layout.initrd_start, layout.initrd_size); + let fdt_addr = layout.dtb_start; + fdt::generate_fdt( + guest_memory, + initrd_range, + cmdline, + plic.clone(), + vcpus[0], + fdt_addr, + ); + println!("generate fdt node complete!"); +} diff --git a/src/device/plic.rs b/src/arch/riscv64/device/plic.rs similarity index 97% rename from src/device/plic.rs rename to src/arch/riscv64/device/plic.rs index 21bc732c3..1a82e0d92 100644 --- a/src/device/plic.rs +++ b/src/arch/riscv64/device/plic.rs @@ -1,5 +1,5 @@ -use crate::device_tree::*; -use crate::memory::{LayoutEntryType, MEM_LAYOUT}; +use crate::arch::riscv64::device_tree::*; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; use crate::cpu::CPU; use kvm_ioctls::{VcpuFd, VmFd}; use std::sync::{Arc, Mutex}; @@ -43,12 +43,12 @@ pub struct PlicState { } impl PlicState { - pub fn new(vcpus: &Vec>>, vcpu_nr: u32) -> Arc> { + pub fn new(vcpus: &Vec<&mut CPU>, vcpu_nr: u32) -> Arc> { let mut contexts: Vec = Vec::new(); for i in 0..vcpu_nr * 2 { let context = PlicContext { num: i as u32, - vcpu: vcpus[(i / 2) as usize].lock().unwrap().fd.clone(), + vcpu: vcpus[(i / 2) as usize].fd.clone(), irq_priority_threshold: 0 as u8, irq_enable: [0; (MAX_DEVICES / 32) as usize], irq_pending: [0; (MAX_DEVICES / 32) as usize], @@ -266,6 +266,7 @@ impl PlicState { new_val &= !0x1; } self.contexts[context_idx as usize].irq_enable[irq_word as usize] = new_val; + println!("context[{}] enable[{}] write: {}", context_idx, irq_word, new_val); xor_val = old_val ^ new_val; for i in 0..32 { irq = irq_word * 32 + i; diff --git a/src/arch/riscv64/device/serial.rs b/src/arch/riscv64/device/serial.rs new file mode 100644 index 000000000..01a7be5b7 --- /dev/null +++ b/src/arch/riscv64/device/serial.rs @@ -0,0 +1,28 @@ +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +use super::{PlicState}; +use std::sync::{Arc, Mutex}; +use kvm_ioctls::{VmFd}; +use vmm_sys_util::eventfd::EventFd; +pub const MMIO_SERIAL_IRQ: u32 = 1; +pub const MMIO_SERIAL_ADDR: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].0; +pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; + +pub struct SerialControl { + plic: Arc>, +} + +impl SerialControl { + pub fn new(plic_state: Arc>) -> Self { + Self { + plic: plic_state.clone() + } + } + pub fn interrupt_trigger(&self) { + let mut plic_guard = self.plic.lock().unwrap(); + plic_guard.plic__irq_trig(MMIO_SERIAL_IRQ, true, true); + drop(plic_guard); + } +} +pub fn serial_register_irqfd(vm_fd: &VmFd, evt_fd: &EventFd) { + +} diff --git a/src/device_tree/mod.rs b/src/arch/riscv64/device_tree/mod.rs similarity index 100% rename from src/device_tree/mod.rs rename to src/arch/riscv64/device_tree/mod.rs diff --git a/src/arch/riscv64/kvm/mod.rs b/src/arch/riscv64/kvm/mod.rs new file mode 100644 index 000000000..b481384e3 --- /dev/null +++ b/src/arch/riscv64/kvm/mod.rs @@ -0,0 +1,20 @@ +use std::path::PathBuf; +use std::sync::Arc; +use kvm_ioctls::VmFd; + +use crate::memory::{GuestMemory }; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +use crate::arch::{load_kernel, BootLoader, BootLoaderConfig}; +pub fn load_boot_source(guest_memory: &Arc) -> BootLoader { + let initrd_path = PathBuf::from("/tmp/initramfs.cpio.gz"); + let boot_cfg = BootLoaderConfig { + kernel: PathBuf::from("/tmp/vmlinux.bin"), + initrd: initrd_path, + mem_start: MEM_LAYOUT[LayoutEntryType::Mem as usize].0, + }; + load_kernel(&boot_cfg, &guest_memory) +} + +pub fn arch_init_based_dev(vm_fd: &Arc) { + +} diff --git a/src/arch/riscv64/memory/mod.rs b/src/arch/riscv64/memory/mod.rs new file mode 100644 index 000000000..ec187bc4a --- /dev/null +++ b/src/arch/riscv64/memory/mod.rs @@ -0,0 +1,23 @@ + +/// The type of memory layout entry on aarch64 +#[repr(usize)] +pub enum LayoutEntryType { + IrqChip, + Mmio, + Mem, +} + +/// Layout of riscv64 + +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0x0800_0000, 0x0800_0000), // IRQ_CHIP: 128M + (0x1000_0000, 0x2000_0000), // Mmio: 512M + (0x8000_0000, 0x0800_0000), // Mem: max: (1 << 40)-MemStart, default: 128M +]; + + +pub fn arch_add_ram_ranges(mem_size: u64, ranges: &mut Vec<(u64, u64)>) { + + ranges.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)); + +} diff --git a/src/arch/riscv64/mod.rs b/src/arch/riscv64/mod.rs new file mode 100644 index 000000000..8a5dbcd79 --- /dev/null +++ b/src/arch/riscv64/mod.rs @@ -0,0 +1,6 @@ +pub mod kvm; +pub mod cpu; +pub mod memory; +pub mod device_tree; +pub mod device; +pub mod boot_loader; \ No newline at end of file diff --git a/src/arch/x86_64/boot_loader/bootparam.rs b/src/arch/x86_64/boot_loader/bootparam.rs new file mode 100644 index 000000000..c0f4ea042 --- /dev/null +++ b/src/arch/x86_64/boot_loader/bootparam.rs @@ -0,0 +1,213 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use super::loader::X86BootLoaderConfig; +use crate::helper::byte_code::ByteCode; +use crate::GuestMemory; + +pub const E820_RAM: u32 = 1; +pub const E820_RESERVED: u32 = 2; +pub const BOOT_FLAG: u16 = 0xAA55; +pub const HDRS: u32 = 0x5372_6448; +pub const UNDEFINED_ID: u8 = 0xFF; + +const INITRD_ADDR_MAX: u64 = 0x37ff_ffff; +const CMDLINE_START: u64 = 0x0002_0000; +const REAL_MODE_IVT_BEGIN: u64 = 0x0000_0000; +pub const EBDA_START: u64 = 0x0009_fc00; +const VGA_RAM_BEGIN: u64 = 0x000a_0000; +const MB_BIOS_BEGIN: u64 = 0x000f_0000; +const ZERO_PAGE_START: u64 = 0x0000_7000; + +// Structures below sourced from: +// https://www.kernel.org/doc/html/latest/x86/boot.html +// https://www.kernel.org/doc/html/latest/x86/zero-page.html +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct RealModeKernelHeader { + pub setup_sects: u8, + root_flags: u16, + syssize: u32, + ram_size: u16, + vid_mode: u16, + root_dev: u16, + boot_flag: u16, + jump: u16, + header: u32, + version: u16, + realmode_swtch: u32, + start_sys_seg: u16, + kernel_version: u16, + type_of_loader: u8, + loadflags: u8, + setup_move_size: u16, + code32_start: u32, + ramdisk_image: u32, + ramdisk_size: u32, + bootsect_kludge: u32, + heap_end_ptr: u16, + ext_loader_ver: u8, + ext_loader_type: u8, + cmdline_ptr: u32, + initrd_addr_max: u32, + kernel_alignment: u32, + relocatable_kernel: u8, + min_alignment: u8, + xloadflags: u16, + cmdline_size: u32, + hardware_subarch: u32, + hardware_subarch_data: u64, + payload_offset: u32, + payload_length: u32, + setup_data: u64, + pref_address: u64, + init_size: u32, + handover_offset: u32, + kernel_info_offset: u32, +} + +impl ByteCode for RealModeKernelHeader {} + +impl RealModeKernelHeader { + pub fn new(cmdline_ptr: u32, cmdline_size: u32, ramdisk_image: u32, ramdisk_size: u32) -> Self { + RealModeKernelHeader { + boot_flag: BOOT_FLAG, + header: HDRS, + type_of_loader: UNDEFINED_ID, + cmdline_ptr, + cmdline_size, + ramdisk_image, + ramdisk_size, + ..Default::default() + } + } +} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct E820Entry { + addr: u64, + size: u64, + type_: u32, +} + +#[repr(C, packed)] +#[derive(Copy, Clone)] +pub struct BootParams { + screen_info: [u8; 0x40], + apm_bios_info: [u8; 0x14], + pad1: u32, + tboot_addr: [u8; 0x8], + ist_info: [u8; 0x10], + pad2: [u8; 0x10], + hd0_info: [u8; 0x10], + hd1_info: [u8; 0x10], + sys_desc_table: [u8; 0x10], + olpc_ofw_header: [u8; 0x10], + ext_ramdisk_image: u32, + ext_ramdisk_size: u32, + ext_cmd_line_ptr: u32, + pad3: [u8; 0x74], + edid_info: [u8; 0x80], + efi_info: [u8; 0x20], + alt_mem_k: u32, + scratch: u32, + e820_entries: u8, + eddbuf_entries: u8, + edd_mbr_sig_buf_entries: u8, + kbd_status: u8, + secure_boot: u8, + pad4: u16, + sentinel: u8, + pad5: u8, + kernel_header: RealModeKernelHeader, // offset: 0x1f1 + pad6: [u8; 0x24], + edd_mbr_sig_buffer: [u8; 0x40], + e820_table: [E820Entry; 0x80], + pad8: [u8; 0x30], + eddbuf: [u8; 0x1ec], +} + +impl ByteCode for BootParams {} + +impl Default for BootParams { + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } +} + +impl BootParams { + pub fn new(kernel_header: RealModeKernelHeader) -> Self { + BootParams { + kernel_header, + ..Default::default() + } + } + + pub fn add_e820_entry(&mut self, addr: u64, size: u64, type_: u32) { + self.e820_table[self.e820_entries as usize] = E820Entry { addr, size, type_ }; + self.e820_entries += 1; + } +} + +pub fn setup_boot_params(config: &X86BootLoaderConfig, sys_mem: &GuestMemory) -> (u64, u64) { + let (ramdisk_size, ramdisk_image, initrd_addr) = if config.initrd_size > 0 { + let mut initrd_addr_max = INITRD_ADDR_MAX as u32; + if initrd_addr_max as u64 > sys_mem.memory_end_address() as u64 { + initrd_addr_max = sys_mem.memory_end_address() as u32; + }; + + let img = (initrd_addr_max - config.initrd_size as u32) & !0xfff_u32; + (config.initrd_size as u32, img, img as u64) + } else { + (0_u32, 0_u32, 0_u64) + }; + + let mut boot_params = BootParams::new(RealModeKernelHeader::new( + CMDLINE_START as u32, + config.kernel_cmdline.len() as u32, + ramdisk_image, + ramdisk_size, + )); + + boot_params.add_e820_entry( + REAL_MODE_IVT_BEGIN, + EBDA_START - REAL_MODE_IVT_BEGIN, + E820_RAM, + ); + boot_params.add_e820_entry(EBDA_START, VGA_RAM_BEGIN - EBDA_START, E820_RESERVED); + boot_params.add_e820_entry(MB_BIOS_BEGIN, 0, E820_RESERVED); + + let high_memory_start = super::loader::VMLINUX_RAM_START; + let layout_32bit_gap_end = config.gap_range.0 + config.gap_range.1; + let mem_end = sys_mem.memory_end_address(); + if mem_end < layout_32bit_gap_end { + boot_params.add_e820_entry(high_memory_start, mem_end - high_memory_start, E820_RAM); + } else { + boot_params.add_e820_entry( + high_memory_start, + config.gap_range.0 - high_memory_start, + E820_RAM, + ); + boot_params.add_e820_entry( + layout_32bit_gap_end, + mem_end - layout_32bit_gap_end, + E820_RAM, + ); + } + + sys_mem + .write_object(&boot_params, ZERO_PAGE_START) + .expect("Failed to write bootparam to guest memory."); + + (ZERO_PAGE_START, initrd_addr) +} diff --git a/src/arch/x86_64/boot_loader/gdt.rs b/src/arch/x86_64/boot_loader/gdt.rs new file mode 100644 index 000000000..c57a44413 --- /dev/null +++ b/src/arch/x86_64/boot_loader/gdt.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::{size_of, size_of_val}; + +use kvm_bindings::kvm_segment; + +use crate::GuestMemory; + +const GDT_ENTRY_BOOT_CS: u8 = 2; +const GDT_ENTRY_BOOT_DS: u8 = 3; +const BOOT_GDT_MAX: usize = 4; +const BOOT_GDT_OFFSET: u64 = 0x500; +const BOOT_IDT_OFFSET: u64 = 0x520; + +#[derive(Debug, Default, Copy, Clone)] +pub struct BootGdtSegment { + pub code_segment: kvm_segment, + pub data_segment: kvm_segment, + pub gdt_base: u64, + pub gdt_limit: u16, + pub idt_base: u64, + pub idt_limit: u16, +} + +// /* +// * Constructor for a conventional segment GDT (or LDT) entry. +// * This is a macro so it can be used in initializers. +// */ +// #define GDT_ENTRY(flags, base, limit) \ +// ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ +// (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ +// (((limit) & _AC(0x000f0000,ULL)) << (48-16)) | \ +// (((base) & _AC(0x00ffffff,ULL)) << 16) | \ +// (((limit) & _AC(0x0000ffff,ULL)))) +// +pub struct GdtEntry(pub u64); + +impl GdtEntry { + pub fn new(flags: u64, base: u64, limit: u64) -> Self { + let base = (base & 0xff00_0000) << (56 - 24) | (base & 0x00ff_ffff) << 16; + let limit = (limit & 0x000f_0000) << (48 - 16) | (limit & 0x0000_ffff); + let flags = (flags & 0x0000_f0ff) << 40; + + GdtEntry(base | limit | flags) + } +} + +// Intel SDM 3A 3.4.5, segment descriptor has two +// words(8 byte): +// Word 1st: +// Bits(0 - 15): Segment Limit +// Bits(16 - 31): Base Address 0:15 +// +// Word 2nd: +// Bits(0 - 7): Base Address 23:16 +// Bits(8 - 11): Type, Segment type +// Bits(12): S, Descriptor type +// Bits(13 - 14): DPL, Descriptor privilege level +// Bits(15): P, Segment present +// Bits(16 - 19): Segment Limit +// Bits(20): AVL, Available for use by system software +// Bits(21): L, 64-bit code segment +// Bits(22): D/B, Default Operation Size +// Bits(23): G, Granularity +// Bits(24 - 31): Base Address 24, 31 +impl From for kvm_bindings::kvm_segment { + fn from(item: GdtEntry) -> Self { + let base = (item.0 >> 16 & 0x00ff_ffff) | (item.0 >> (56 - 24) & 0xff00_0000); + let limit = (item.0 >> (48 - 16) & 0x000f_0000) | (item.0 & 0x0000_ffff); + let flags = (item.0 >> 40) & 0x0000_f0ff; + + kvm_bindings::kvm_segment { + base, + limit: limit as u32, + type_: (flags & 0xf) as u8, + present: ((flags >> (15 - 8)) & 0x1) as u8, + dpl: ((flags >> (13 - 8)) & 0x3) as u8, + db: ((flags >> (22 - 8)) & 0x1) as u8, + s: ((flags >> (12 - 8)) & 0x1) as u8, + l: ((flags >> (21 - 8)) & 0x1) as u8, + g: ((flags >> (23 - 8)) & 0x1) as u8, + avl: ((flags >> (20 - 8)) & 0x1) as u8, + ..Default::default() + } + } +} + +impl From for u64 { + fn from(item: GdtEntry) -> Self { + item.0 + } +} + +fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) { + let mut boot_gdt_addr = BOOT_GDT_OFFSET as u64; + for (_, entry) in table.iter().enumerate() { + guest_mem + .write_object(entry, boot_gdt_addr) + .expect("Failed to write gdt table to guest memory."); + boot_gdt_addr += 8 + } +} + +fn write_idt_value(val: u64, guest_mem: &GuestMemory) { + let boot_idt_addr = BOOT_IDT_OFFSET; + guest_mem + .write_object(&val, boot_idt_addr) + .expect("Failed to write idt table to guest memory."); +} + +pub fn setup_gdt(guest_mem: &GuestMemory) -> BootGdtSegment { + let gdt_table: [u64; BOOT_GDT_MAX as usize] = [ + GdtEntry::new(0, 0, 0).into(), // NULL + GdtEntry::new(0, 0, 0).into(), // NULL + GdtEntry::new(0xa09b, 0, 0xfffff).into(), // CODE + GdtEntry::new(0xc093, 0, 0xfffff).into(), // DATA + ]; + + let mut code_seg: kvm_segment = GdtEntry(gdt_table[GDT_ENTRY_BOOT_CS as usize]).into(); + code_seg.selector = GDT_ENTRY_BOOT_CS as u16 * 8; + let mut data_seg: kvm_segment = GdtEntry(gdt_table[GDT_ENTRY_BOOT_DS as usize]).into(); + data_seg.selector = GDT_ENTRY_BOOT_DS as u16 * 8; + + write_gdt_table(&gdt_table[..], guest_mem); + write_idt_value(0, guest_mem); + + BootGdtSegment { + code_segment: code_seg, + data_segment: data_seg, + gdt_base: BOOT_GDT_OFFSET, + gdt_limit: size_of_val(&gdt_table) as u16 - 1, + idt_base: BOOT_IDT_OFFSET, + idt_limit: size_of::() as u16 - 1, + } +} diff --git a/src/arch/x86_64/boot_loader/loader.rs b/src/arch/x86_64/boot_loader/loader.rs new file mode 100644 index 000000000..c4a2027eb --- /dev/null +++ b/src/arch/x86_64/boot_loader/loader.rs @@ -0,0 +1,127 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::path::PathBuf; + +use super::{ + bootparam::{setup_boot_params, EBDA_START}, + gdt::{setup_gdt, BootGdtSegment}, + mptable::setup_isa_mptable, +}; +use crate::GuestMemory; + +const PDE_START: u64 = 0x0000_b000; +const PDPTE_START: u64 = 0x0000_a000; +const PML4_START: u64 = 0x0000_9000; +const CMDLINE_START: u64 = 0x0002_0000; +const VMLINUX_STARTUP: u64 = 0x0100_0000; +const BOOT_LOADER_SP: u64 = 0x0000_8ff0; +pub const VMLINUX_RAM_START: u64 = 0x0010_0000; + +/// Boot loader config used for x86_64. +pub struct X86BootLoaderConfig { + /// Path of the kernel image. + pub kernel: PathBuf, + /// Path of the initrd image. + pub initrd: PathBuf, + /// Initrd image size. + pub initrd_size: u32, + /// Kernel cmdline parameters. + pub kernel_cmdline: String, + /// VM's CPU count. + pub cpu_count: u8, + /// (gap start, gap size) + pub gap_range: (u64, u64), + /// IO APIC base address + pub ioapic_addr: u32, + /// Local APIC base address + pub lapic_addr: u32, +} + +/// The start address for some boot source in guest memory for `x86_64`. +pub struct X86BootLoader { + pub vmlinux_start: u64, + pub kernel_start: u64, + pub kernel_sp: u64, + pub initrd_start: u64, + pub boot_pml4_addr: u64, + pub zero_page_addr: u64, + pub segments: BootGdtSegment, +} + +fn setup_page_table(sys_mem: &GuestMemory) -> std::io::Result { + // Puts PML4 right after zero page but aligned to 4k. + let boot_pml4_addr = PML4_START; + let boot_pdpte_addr = PDPTE_START; + let boot_pde_addr = PDE_START; + + // Entry covering VA [0..512GB) + let pdpte = boot_pdpte_addr | 0x03; + sys_mem + .write_object(&pdpte, boot_pml4_addr) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + + // Entry covering VA [0..1GB) + let pde = boot_pde_addr | 0x03; + sys_mem + .write_object(&pde, boot_pdpte_addr) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + + // 512 2MB entries together covering VA [0..1GB). Note we are assuming + // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do. + for i in 0..512_u64 { + let pde = (i << 21) + 0x83_u64; + sys_mem + .write_object(&pde, boot_pde_addr + i * 8) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + } + + Ok(boot_pml4_addr) +} + +pub fn linux_bootloader(boot_config: &X86BootLoaderConfig, sys_mem: &GuestMemory) -> X86BootLoader { + let (kernel_start, vmlinux_start) = (VMLINUX_STARTUP, VMLINUX_STARTUP); + + let boot_pml4 = setup_page_table(sys_mem).expect("Failed to setup page table"); + setup_isa_mptable( + sys_mem, + EBDA_START, + boot_config.cpu_count, + boot_config.ioapic_addr, + boot_config.lapic_addr, + ) + .expect("Failed to setup isa mptable into guest memory."); + + let (zero_page, initrd_addr) = setup_boot_params(&boot_config, sys_mem); + let gdt_seg = setup_gdt(sys_mem); + + X86BootLoader { + kernel_start, + vmlinux_start, + kernel_sp: BOOT_LOADER_SP, + initrd_start: initrd_addr, + boot_pml4_addr: boot_pml4, + zero_page_addr: zero_page, + segments: gdt_seg, + } +} + +pub fn setup_kernel_cmdline(config: &X86BootLoaderConfig, sys_mem: &GuestMemory) { + let mut cmdline = config.kernel_cmdline.as_bytes(); + sys_mem + .write( + &mut cmdline, + CMDLINE_START, + config.kernel_cmdline.len() as u64, + ) + .expect("Failed to write cmdline to guest memory."); +} diff --git a/src/arch/x86_64/boot_loader/mod.rs b/src/arch/x86_64/boot_loader/mod.rs new file mode 100644 index 000000000..ed4cf36e9 --- /dev/null +++ b/src/arch/x86_64/boot_loader/mod.rs @@ -0,0 +1,77 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod bootparam; +mod gdt; +mod loader; +mod mptable; + +use std::fs::File; +use std::io::{Seek, SeekFrom}; +use std::sync::Arc; + +use kvm_ioctls::VmFd; +pub use loader::{X86BootLoader, X86BootLoaderConfig}; + +use crate::arch::CPUBootConfig; +use crate::cpu::CPU; +use crate::memory::GuestMemory; +use loader::linux_bootloader; + +use super::kvm::load_boot_source; + +fn load_image(image: &mut File, start_addr: u64, sys_mem: &GuestMemory) -> std::io::Result<()> { + let len = image.seek(SeekFrom::End(0))?; + image.seek(SeekFrom::Start(0))?; + + sys_mem + .write(image, start_addr, len) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + + Ok(()) +} + +pub fn load_kernel(config: &X86BootLoaderConfig, sys_mem: &GuestMemory) -> X86BootLoader { + let mut kernel_image = File::open(&config.kernel).expect("Invalid guest kernel path"); + let boot_loader = linux_bootloader(config, sys_mem); + load_image(&mut kernel_image, boot_loader.vmlinux_start, &sys_mem) + .expect("Failed to write guest kernel to guest memory"); + + let mut initrd_image = File::open(&config.initrd).expect("Invalid initrd path"); + load_image(&mut initrd_image, boot_loader.initrd_start, &sys_mem) + .expect("Failed to write initrd to guest memory"); + + loader::setup_kernel_cmdline(&config, sys_mem); + + boot_loader +} + +pub fn kvm_load_kernel(guest_memory: &Arc, vcpu : &mut CPU, vm_fd: &Arc) -> X86BootLoader{ + let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; + + let layout = load_boot_source(guest_memory, cmdline); + let cpu_boot_cfg = CPUBootConfig { + boot_ip: layout.kernel_start, + boot_sp: layout.kernel_sp, + zero_page: layout.zero_page_addr, + code_segment: layout.segments.code_segment, + data_segment: layout.segments.data_segment, + gdt_base: layout.segments.gdt_base, + gdt_size: layout.segments.gdt_limit, + idt_base: layout.segments.idt_base, + idt_size: layout.segments.idt_limit, + pml4_start: layout.boot_pml4_addr, + }; + vcpu.realize(&vm_fd, cpu_boot_cfg); + + layout +} \ No newline at end of file diff --git a/src/arch/x86_64/boot_loader/mptable.rs b/src/arch/x86_64/boot_loader/mptable.rs new file mode 100644 index 000000000..a1f486b5e --- /dev/null +++ b/src/arch/x86_64/boot_loader/mptable.rs @@ -0,0 +1,345 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; + +use crate::helper::byte_code::ByteCode; +use crate::helper::checksum::obj_checksum; +use crate::GuestMemory; + +const SPEC_VERSION: u8 = 4; // version 1.4 +const APIC_VERSION: u8 = 0x14; + +// Variables and Structures below sourced from: +// Intel MultiProcessor Specification 1.4 +const CPU_FLAGS_ENABLE: u8 = 0x1; +const CPU_FLAGS_BSP: u8 = 0x2; +const APIC_FLAGS_ENABLE: u8 = 0x1; + +const BUS_ID: u8 = 0; +const MPTABLE_MAX_CPUS: u32 = 254; // mptable max support 255 cpus, reserve one for ioapic id +const MPTABLE_IOAPIC_NR: u8 = 16; + +pub const INTERRUPT_TYPE_INT: u8 = 0; +pub const INTERRUPT_TYPE_NMI: u8 = 1; +pub const INTERRUPT_TYPE_EXTINT: u8 = 3; +pub const DEST_ALL_LAPIC_MASK: u8 = 0xff; + +macro_rules! write_entry { + ( $d:expr, $t:ty, $m:expr, $o:expr, $s:expr ) => { + let entry = $d; + $m.write_object(&entry, $o).unwrap(); + $o += std::mem::size_of::<$t>() as u64; + $s = $s.wrapping_add(obj_checksum(&entry)); + }; +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FloatingPointer { + signature: [u8; 4], + pointer: u32, + length: u8, + spec: u8, + checksum: u8, + feature1: u8, + feature2: u32, +} + +impl ByteCode for FloatingPointer {} + +impl FloatingPointer { + pub fn new(pointer: u32) -> Self { + let mut fp = FloatingPointer { + signature: [b'_', b'M', b'P', b'_'], + pointer, + length: 1, // spec: 01h + spec: SPEC_VERSION, + checksum: 0, + feature1: 0, + feature2: 0, + }; + + let sum = obj_checksum(&fp); + fp.checksum = (-(sum as i8)) as u8; + + fp + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct ConfigTableHeader { + signature: [u8; 4], + length: u16, + spec: u8, + checksum: u8, + oem_id: [u8; 8], + product_id: [u8; 12], + oem_table_pointer: u32, + oem_table_size: u16, + entry_count: u16, + lapic_addr: u32, + ext_table_length: u16, + ext_table_checksum: u8, + reserved: u8, +} + +impl ByteCode for ConfigTableHeader {} + +impl ConfigTableHeader { + pub fn new(length: u16, sum: u8, lapic_addr: u32) -> Self { + let mut ct = ConfigTableHeader { + signature: [b'P', b'C', b'M', b'P'], + length, + spec: SPEC_VERSION, + checksum: 0, + oem_id: [b'S', b'T', b'R', b'A', b'T', b'O', 0x0, 0x0], + product_id: [ + b'1', b'.', b'0', 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + ], + oem_table_pointer: 0, + oem_table_size: 0, + entry_count: 0, + lapic_addr, + ext_table_length: 0, + ext_table_checksum: 0, + reserved: 0, + }; + + let sum = sum.wrapping_add(obj_checksum(&ct)); + ct.checksum = (-(sum as i8)) as u8; + + ct + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct ProcessEntry { + type_: u8, + lapic_id: u8, + lapic_version: u8, + cpu_flags: u8, + cpu_signature: u32, + feature_flags: u32, + reserved: u32, + reserved1: u32, +} + +impl ByteCode for ProcessEntry {} + +impl ProcessEntry { + pub fn new(lapic_id: u8, enable: bool, bsp: bool) -> Self { + let mut cpu_flags = if enable { CPU_FLAGS_ENABLE } else { 0 }; + if bsp { + cpu_flags |= CPU_FLAGS_BSP; + } + + ProcessEntry { + type_: 0, + lapic_id, + lapic_version: APIC_VERSION, + cpu_flags, + cpu_signature: 0x600, // Intel CPU Family Number: 0x6 + feature_flags: 0x201, // APIC & FPU + reserved: 0, + reserved1: 0, + } + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct BusEntry { + type_: u8, + bus_id: u8, + bus_type: [u8; 6], +} + +impl ByteCode for BusEntry {} + +impl BusEntry { + pub fn new(bus_id: u8) -> Self { + BusEntry { + type_: 1, + bus_id, + bus_type: [b'I', b'S', b'A', 0x0, 0x0, 0x0], + } + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct IOApicEntry { + type_: u8, + ioapic_id: u8, + ioapic_version: u8, + ioapic_flags: u8, + ioapic_addr: u32, +} + +impl ByteCode for IOApicEntry {} + +impl IOApicEntry { + pub fn new(ioapic_id: u8, enable: bool, ioapic_addr: u32) -> Self { + let ioapic_flags = if enable { APIC_FLAGS_ENABLE } else { 0 }; + + IOApicEntry { + type_: 2, + ioapic_id, + ioapic_version: APIC_VERSION, + ioapic_flags, + ioapic_addr, + } + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct IOInterruptEntry { + type_: u8, + interrupt_type: u8, + interrupt_flags: u16, + source_bus_id: u8, + source_bus_irq: u8, + dest_ioapic_id: u8, + dest_ioapic_int: u8, +} + +impl ByteCode for IOInterruptEntry {} + +impl IOInterruptEntry { + pub fn new( + interrupt_type: u8, + source_bus_id: u8, + source_bus_irq: u8, + dest_ioapic_id: u8, + dest_ioapic_int: u8, + ) -> Self { + IOInterruptEntry { + type_: 3, + interrupt_type, + interrupt_flags: 0, // conforms to spec of bus + source_bus_id, + source_bus_irq, + dest_ioapic_id, + dest_ioapic_int, + } + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct LocalInterruptEntry { + type_: u8, + interrupt_type: u8, + interrupt_flags: u16, + source_bus_id: u8, + source_bus_irq: u8, + dest_lapic_id: u8, + dest_lapic_lint: u8, +} + +impl ByteCode for LocalInterruptEntry {} + +impl LocalInterruptEntry { + pub fn new( + interrupt_type: u8, + source_bus_id: u8, + source_bus_irq: u8, + dest_lapic_id: u8, + dest_lapic_lint: u8, + ) -> Self { + LocalInterruptEntry { + type_: 4, + interrupt_type, + interrupt_flags: 0, // conforms to spec of bus + source_bus_id, + source_bus_irq, + dest_lapic_id, + dest_lapic_lint, + } + } +} + +pub fn setup_isa_mptable( + sys_mem: &GuestMemory, + start_addr: u64, + num_cpus: u8, + ioapic_addr: u32, + lapic_addr: u32, +) -> std::io::Result<()> { + if u32::from(num_cpus) > MPTABLE_MAX_CPUS { + panic!("invalid num cpus!"); + } + + let ioapic_id = num_cpus + 1; + let header = start_addr + size_of::() as u64; + sys_mem + .write_object(&FloatingPointer::new(header as u32), start_addr) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + + let mut offset = header + size_of::() as u64; + let mut sum = 0u8; + + for cpu_id in 0..num_cpus { + write_entry!( + ProcessEntry::new(cpu_id as u8, true, cpu_id == 0), + ProcessEntry, + sys_mem, + offset, + sum + ); + } + + write_entry!(BusEntry::new(BUS_ID), BusEntry, sys_mem, offset, sum); + write_entry!( + IOApicEntry::new(ioapic_id, true, ioapic_addr), + IOApicEntry, + sys_mem, + offset, + sum + ); + for i in 0..MPTABLE_IOAPIC_NR { + write_entry!( + IOInterruptEntry::new(INTERRUPT_TYPE_INT, BUS_ID, i, ioapic_id, i), + IOInterruptEntry, + sys_mem, + offset, + sum + ); + } + write_entry!( + LocalInterruptEntry::new(INTERRUPT_TYPE_EXTINT, BUS_ID, 0, ioapic_id, 0), + LocalInterruptEntry, + sys_mem, + offset, + sum + ); + write_entry!( + LocalInterruptEntry::new(INTERRUPT_TYPE_NMI, BUS_ID, 0, DEST_ALL_LAPIC_MASK, 1), + LocalInterruptEntry, + sys_mem, + offset, + sum + ); + + sys_mem + .write_object( + &ConfigTableHeader::new((offset - header) as u16, sum, lapic_addr), + header, + ) + .map_err(|_| std::io::ErrorKind::InvalidData)?; + + Ok(()) +} diff --git a/src/arch/x86_64/cpu/mod.rs b/src/arch/x86_64/cpu/mod.rs new file mode 100644 index 000000000..0b851afd4 --- /dev/null +++ b/src/arch/x86_64/cpu/mod.rs @@ -0,0 +1,355 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use kvm_bindings::{ + kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_regs, kvm_segment, kvm_sregs, Msrs, + KVM_MAX_CPUID_ENTRIES, KVM_MP_STATE_RUNNABLE, KVM_MP_STATE_UNINITIALIZED, +}; +use kvm_ioctls::{Kvm, VcpuFd, VmFd}; + +use crate::arch::x86_64::cpu::host_cpuid; + + + +const ECX_EPB_SHIFT: u32 = 3; +const X86_FEATURE_HYPERVISOR: u32 = 31; +const X86_FEATURE_TSC_DEADLINE_TIMER: u32 = 24; + +const MSR_IA32_MISC_ENABLE: u32 = 0x01a0; +const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x1; + +const MSR_LIST: &[u32] = &[ + 0x0174, // MSR_IA32_SYSENTER_CS + 0x0175, // MSR_IA32_SYSENTER_ESP + 0x0176, // MSR_IA32_SYSENTER_EIP + 0xc000_0081, // MSR_STAR, legacy mode SYSCALL target + 0xc000_0082, // MSR_LSTAR, long mode SYSCALL target + 0xc000_0083, // MSR_CSTAR, compat mode SYSCALL target + 0xc000_0084, // MSR_SYSCALL_MASK, EFLAGS mask for syscall + 0xc000_0102, // MSR_KERNEL_GS_BASE, SwapGS GS shadow + 0x0010, // MSR_IA32_TSC, + 0x01a0, // MSR_IA32_MISC_ENABLE, +]; + +#[derive(Default)] +/// CPU booting configure information +pub struct X86CPUBootConfig { + /// Register %rip value + pub boot_ip: u64, + /// Register %rsp value + pub boot_sp: u64, + /// zero page address, as the second parameter of __startup_64 + /// arch/x86/kernel/head_64.S:86 + pub zero_page: u64, + pub code_segment: kvm_segment, + pub data_segment: kvm_segment, + pub gdt_base: u64, + pub gdt_size: u16, + pub idt_base: u64, + pub idt_size: u16, + pub pml4_start: u64, +} + +#[derive(Copy, Clone)] +pub struct CPUState { + nr_vcpus: u32, + apic_id: u32, + regs: kvm_regs, + sregs: kvm_sregs, + fpu: kvm_fpu, + mp_state: kvm_mp_state, + lapic: kvm_lapic_state, + msr_len: usize, + msr_list: [kvm_msr_entry; 256], +} + +impl CPUState { + /// Allocates a new `CPUX86State`. + /// + /// # Arguments + /// + /// * `vcpu_id` - ID of this `CPU`. + /// * `nr_vcpus` - Number of vcpus. + pub fn new(vcpu_id: u32, nr_vcpus: u32) -> Self { + let mp_state = kvm_mp_state { + mp_state: if vcpu_id == 0 { + KVM_MP_STATE_RUNNABLE + } else { + KVM_MP_STATE_UNINITIALIZED + }, + }; + CPUState { + apic_id: vcpu_id, + nr_vcpus, + regs: kvm_regs::default(), + sregs: kvm_sregs::default(), + fpu: kvm_fpu::default(), + mp_state, + lapic: kvm_lapic_state::default(), + msr_len: 0, + msr_list: [kvm_msr_entry::default(); 256], + } + } + + /// Set register value in `CPUX86State` according to `boot_config`. + /// + /// # Arguments + /// + /// * `vcpu_fd` - Vcpu file descriptor in kvm. + /// * `boot_config` - Boot message from boot_loader. + pub fn set_boot_config( + &mut self, + _vmfd: &std::sync::Arc, + vcpu_fd: &VcpuFd, + boot_config: &X86CPUBootConfig, + ) { + self.setup_lapic(vcpu_fd); + self.setup_regs(&boot_config); + self.setup_sregs(vcpu_fd, &boot_config); + self.setup_fpu(); + self.setup_msrs(); + } + + /// Reset register value in `Kvm` with `CPUX86State`. + /// + /// # Arguments + /// + /// * `vcpu_fd` - Vcpu file descriptor in kvm. + pub fn reset_vcpu(&self, vcpu_fd: &VcpuFd) { + self.setup_cpuid(vcpu_fd); + + vcpu_fd + .set_lapic(&self.lapic) + .expect("Failed to set lapic register"); + vcpu_fd + .set_mp_state(self.mp_state) + .expect("Failed to set mpstate register"); + vcpu_fd + .set_sregs(&self.sregs) + .expect("Failed to set special register register"); + vcpu_fd + .set_regs(&self.regs) + .expect("Failed to set common register register"); + vcpu_fd + .set_fpu(&self.fpu) + .expect("Failed to set fpu register"); + vcpu_fd + .set_msrs(&Msrs::from_entries(&self.msr_list[0..self.msr_len])) + .expect("Failed to set msrs register"); + } + + #[allow(clippy::cast_ptr_alignment)] + fn setup_lapic(&mut self, vcpu_fd: &VcpuFd) { + // Disable nmi and external interrupt before enter protected mode + // See: https://elixir.bootlin.com/linux/v4.19.123/source/arch/x86/include/asm/apicdef.h + const APIC_LVT0: usize = 0x350; + const APIC_LVT1: usize = 0x360; + const APIC_MODE_NMI: u32 = 0x4; + const APIC_MODE_EXTINT: u32 = 0x7; + + self.lapic = vcpu_fd.get_lapic().expect("Failed to get lapic register"); + + // The member regs in struct kvm_lapic_state is a u8 array with 1024 entries, + // so it's saft to cast u8 pointer to u32 at position APIC_LVT0 and APIC_LVT1. + unsafe { + let apic_lvt_lint0 = &mut self.lapic.regs[APIC_LVT0..] as *mut [i8] as *mut u32; + *apic_lvt_lint0 &= !0x700; + *apic_lvt_lint0 |= APIC_MODE_EXTINT << 8; + + let apic_lvt_lint1 = &mut self.lapic.regs[APIC_LVT1..] as *mut [i8] as *mut u32; + *apic_lvt_lint1 &= !0x700; + *apic_lvt_lint1 |= APIC_MODE_NMI << 8; + } + } + + fn setup_regs(&mut self, boot_config: &X86CPUBootConfig) { + self.regs = kvm_regs { + rflags: 0x0002, /* Means processor has been initialized */ + rip: boot_config.boot_ip, + rsp: boot_config.boot_sp, + rbp: boot_config.boot_sp, + rsi: boot_config.zero_page, + ..Default::default() + }; + } + + fn setup_sregs(&mut self, vcpu_fd: &VcpuFd, boot_config: &X86CPUBootConfig) { + self.sregs = vcpu_fd + .get_sregs() + .expect("Failed to get spectial register."); + + // X86_CR0_PE: Protection Enable + // EFER_LME: Long mode enable + // EFER_LMA: Long mode active + // arch/x86/include/uapi/asm/processor-flags.h + const X86_CR0_PE: u64 = 0x1; + const EFER_LME: u64 = 0x100; + const EFER_LMA: u64 = 0x400; + + // X86_CR0_PG: enable Paging + // X86_CR4_PAE: enable physical address extensions + // arch/x86/include/uapi/asm/processor-flags.h + const X86_CR0_PG: u64 = 0x8000_0000; + const X86_CR4_PAE: u64 = 0x20; + + // Init gdt table, gdt table has loaded to Guest Memory Space + self.sregs.cs = boot_config.code_segment; + self.sregs.ds = boot_config.data_segment; + self.sregs.es = boot_config.data_segment; + self.sregs.fs = boot_config.data_segment; + self.sregs.gs = boot_config.data_segment; + self.sregs.ss = boot_config.data_segment; + + // Init gdt table, gdt table has loaded to Guest Memory Space + self.sregs.gdt.base = boot_config.gdt_base; + self.sregs.gdt.limit = boot_config.gdt_size; + + // Init idt table, idt table has loaded to Guest Memory Space + self.sregs.idt.base = boot_config.idt_base; + self.sregs.idt.limit = boot_config.idt_size; + + // Open 64-bit protected mode, include + // Protection enable, Long mode enable, Long mode active + self.sregs.cr0 |= X86_CR0_PE; + self.sregs.efer |= EFER_LME | EFER_LMA; + + // Setup page table + self.sregs.cr3 = boot_config.pml4_start; + self.sregs.cr4 |= X86_CR4_PAE; + self.sregs.cr0 |= X86_CR0_PG; + } + + fn setup_fpu(&mut self) { + // Default value for fxregs_state.mxcsr + // arch/x86/include/asm/fpu/types.h + const MXCSR_DEFAULT: u32 = 0x1f80; + + self.fpu = kvm_fpu { + fcw: 0x37f, + mxcsr: MXCSR_DEFAULT, + ..Default::default() + }; + } + + fn setup_msrs(&mut self) { + // Enable fasting-string operation to improve string + // store operations. + for (index, msr) in MSR_LIST.iter().enumerate() { + let data = match *msr { + MSR_IA32_MISC_ENABLE => MSR_IA32_MISC_ENABLE_FAST_STRING, + _ => 0u64, + }; + + self.msr_list[index] = kvm_msr_entry { + index: *msr, + data, + ..Default::default() + }; + self.msr_len += 1; + } + } + + fn setup_cpuid(&self, vcpu_fd: &VcpuFd) { + let sys_fd = match Kvm::new() { + Ok(fd) => fd, + _ => panic!("setup_cpuid: Open /dev/kvm failed"), + }; + let mut cpuid = sys_fd + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .expect("Failed to get supported cpuid"); + let entries = cpuid.as_mut_slice(); + + for entry in entries.iter_mut() { + match entry.function { + 1 => { + if entry.index == 0 { + entry.ecx |= 1u32 << X86_FEATURE_HYPERVISOR; + entry.ecx |= 1u32 << X86_FEATURE_TSC_DEADLINE_TIMER + } + } + 2 => { + host_cpuid( + 2, + 0, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + } + 4 => { + // cache info: needed for Pentium Pro compatibility + // Passthrough host cache info directly to guest + host_cpuid( + 4, + entry.index, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + entry.eax &= !0xfc00_0000; + if entry.eax & 0x0001_ffff != 0 && self.nr_vcpus > 1 { + entry.eax |= (self.nr_vcpus - 1) << 26; + } + } + 6 => { + entry.ecx &= !(1u32 << ECX_EPB_SHIFT); + } + 10 => { + if entry.eax != 0 { + let version_id = entry.eax & 0xff; + let num_counters = entry.eax & 0xff00; + if version_id != 2 || num_counters == 0 { + entry.eax = 0; + } + } + } + 0xb => { + // Extended Topology Enumeration Leaf + entry.edx = self.apic_id as u32; + entry.ecx = entry.index & 0xff; + match entry.index { + 0 => { + entry.eax = 0u32; + entry.ebx = 1u32; + entry.ecx |= 1u32 << 8; + } + 1 => { + entry.eax = 32u32 - self.nr_vcpus.leading_zeros(); + entry.ebx = self.nr_vcpus; + entry.ecx |= 2u32 << 8; + } + _ => { + entry.ebx = 0xff; + } + } + entry.ebx &= 0xffff; + } + 0x8000_0002..=0x8000_0004 => { + // Passthrough host cpu model name directly to guest + host_cpuid( + entry.function, + entry.index, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + } + _ => (), + } + } + + vcpu_fd.set_cpuid2(&cpuid).expect("Failed to set cpuid2"); + } +} diff --git a/src/arch/x86_64/device/mod.rs b/src/arch/x86_64/device/mod.rs new file mode 100644 index 000000000..931655e41 --- /dev/null +++ b/src/arch/x86_64/device/mod.rs @@ -0,0 +1,15 @@ +pub mod serial; +use std::sync::Arc; + +use kvm_ioctls::VmFd; + +use crate::{arch::x86_64::boot_loader::X86BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; + + +pub fn kvm_setup_fireware(guest_memory: &Arc,vcpus : &Vec>>, vm_fd: &Arc, layout : &X86BootLoader) { + + let serial = Serial::new(&vm_fd); + for vcpu in vcpus.iter() { + vcpu.lock().unwrap().set_serial_dev(serial.clone()); + } +} diff --git a/src/arch/x86_64/device/serial.rs b/src/arch/x86_64/device/serial.rs new file mode 100644 index 000000000..eb0787d28 --- /dev/null +++ b/src/arch/x86_64/device/serial.rs @@ -0,0 +1,3 @@ +pub const MMIO_SERIAL_IRQ: u32 = 4; +pub const MMIO_SERIAL_ADDR: u64 = 0x3f8; +pub const MMIO_SERIAL_ADDR_SIZE: u64 = 8; \ No newline at end of file diff --git a/src/arch/x86_64/helper/cpuid.rs b/src/arch/x86_64/helper/cpuid.rs new file mode 100644 index 000000000..1084b271b --- /dev/null +++ b/src/arch/x86_64/helper/cpuid.rs @@ -0,0 +1,31 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use core::arch::x86_64::__cpuid_count; + +pub fn host_cpuid( + leaf: u32, + subleaf: u32, + eax: *mut u32, + ebx: *mut u32, + ecx: *mut u32, + edx: *mut u32, +) { + unsafe { + let cpuid = __cpuid_count(leaf, subleaf); + + *eax = cpuid.eax; + *ebx = cpuid.ebx; + *ecx = cpuid.ecx; + *edx = cpuid.edx; + } +} diff --git a/src/arch/x86_64/helper/mod.rs b/src/arch/x86_64/helper/mod.rs new file mode 100644 index 000000000..e13a58bf8 --- /dev/null +++ b/src/arch/x86_64/helper/mod.rs @@ -0,0 +1 @@ +pub mod cpuid; \ No newline at end of file diff --git a/src/arch/x86_64/kvm/mod.rs b/src/arch/x86_64/kvm/mod.rs new file mode 100644 index 000000000..6d733647a --- /dev/null +++ b/src/arch/x86_64/kvm/mod.rs @@ -0,0 +1,46 @@ +use std::path::PathBuf; +use std::sync::Arc; +use kvm_ioctls::{Kvm, VmFd}; +use kvm_bindings::{kvm_pit_config, KVM_PIT_SPEAKER_DUMMY}; +use crate::memory::{GuestMemory }; +use crate::arch::{load_kernel, BootLoader, BootLoaderConfig}; +use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +pub fn load_boot_source(guest_memory: &Arc, cmdline: &str) -> BootLoader { + let initrd_path = PathBuf::from("/tmp/initrd.img"); + let initrd_size = match std::fs::metadata("/tmp/initrd.img") { + Ok(meta) => meta.len() as u32, + _ => panic!("initrd file init failed!"), + }; + let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + let boot_cfg = BootLoaderConfig { + kernel: PathBuf::from("/tmp/vmlinux.bin"), + initrd: initrd_path, + initrd_size, + kernel_cmdline: cmdline.to_string(), + cpu_count: 1_u8, + gap_range: (gap_start, gap_end - gap_start), + ioapic_addr: MEM_LAYOUT[LayoutEntryType::IoApic as usize].0 as u32, + lapic_addr: MEM_LAYOUT[LayoutEntryType::LocalApic as usize].0 as u32, + }; + load_kernel(&boot_cfg, &guest_memory) +} + +pub fn arch_init_based_dev(vm_fd: &Arc) { + vm_fd.create_irq_chip().expect("Failed to create irq chip."); + vm_fd + .set_tss_address(0xfffb_d000_usize) + .expect("Failed to set tss address."); + + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + pad: Default::default(), + }; + + vm_fd + .create_pit2(pit_config) + .expect("Failed to create pit2."); +} + + diff --git a/src/arch/x86_64/memory/mod.rs b/src/arch/x86_64/memory/mod.rs new file mode 100644 index 000000000..138068a3d --- /dev/null +++ b/src/arch/x86_64/memory/mod.rs @@ -0,0 +1,30 @@ +/// The type of memory layout entry on x86_64 +#[repr(usize)] +pub enum LayoutEntryType { + MemBelow4g = 0_usize, + Mmio, + IoApic, + LocalApic, + MemAbove4g, +} + +/// Layout of x86_64 +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0, 0xC000_0000), // MemBelow4g + (0xF010_0000, 0x200), // Mmio + (0xFEC0_0000, 0x10_0000), // IoApic + (0xFEE0_0000, 0x10_0000), // LocalApic + (0x1_0000_0000, 0x80_0000_0000), // MemAbove4g +]; + +pub fn arch_add_ram_ranges(mem_size: u64, ranges: &mut Vec<(u64, u64)>) { + + let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + ranges.push((0, std::cmp::min(gap_start, mem_size))); + if mem_size > gap_start { + let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + ranges.push((gap_end, mem_size - gap_start)); + } + +} \ No newline at end of file diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs new file mode 100644 index 000000000..d1018d105 --- /dev/null +++ b/src/arch/x86_64/mod.rs @@ -0,0 +1,6 @@ +pub mod kvm; +pub mod boot_loader; +pub mod cpu; +pub mod device; +pub mod memory; +pub mod helper; \ No newline at end of file diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index 6ce02d247..d501e4924 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -1,336 +1,103 @@ -use kvm_bindings::{ - user_regs_struct, kvm_riscv_core,kvm_riscv_config,kvm_riscv_timer, - KVM_REG_RISCV, KVM_REG_RISCV_CORE, KVM_REG_SIZE_U64, KVM_REG_RISCV_CONFIG, KVM_REG_RISCV_TIMER, - KVM_MP_STATE_STOPPED, KVM_SYSTEM_EVENT_RESET, -}; -use kvm_ioctls::{VmFd, VcpuFd, VcpuExit}; +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use crate::arch::CPUBootConfig; +use std::sync::{Arc, Mutex}; +use std::thread; use std::mem::size_of; use crate::helper::byte_code::ByteCode; -use std::thread; -use std::sync::{Arc, Mutex}; -use crate::device::{PlicState, Serial, judge_serial_addr, judge_plic_addr as judge_interrupt_controller_addr}; -use crate::GuestMemory; - -#[macro_export] -macro_rules! __offset_of { - ($type_name: ty, $field: ident) => {{ - let tmp = std::mem::MaybeUninit::<$type_name>::uninit(); - let outer = tmp.as_ptr(); - let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; - unsafe { - inner.offset_from( outer as *const u8 ) as usize - } - }} -} - -#[macro_export] -macro_rules! offset_of { - ($type_name: ty, $field: ident) => { - $crate::__offset_of!($type_name, $field) - }; - ($type_name: ty, $field: ident, $($sub_type_name: ty, $sub_field: ident), +) => { - $crate::__offset_of!($type_name, $field) + offset_of!($($sub_type_name, $sub_field), +) - }; -} - -pub struct CPUBootConfig { - pub fdt_addr: u64, - pub kernel_addr: u64, -} +use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; +use kvm_bindings::KVM_SYSTEM_EVENT_RESET; -pub enum RISCVCoreRegs { - PC, - RA, - SP, - GP, - TP, - T0, - T1, - T2, - S0, - S1, - A0, - A1, - A2, - A3, - A4, - A5, - A6, - A7, - S2, - S3, - S4, - S5, - S6, - S7, - S8, - S9, - S10, - S11, - T3, - T4, - T5, - T6, - MODE, -} - -impl Into for RISCVCoreRegs { - fn into(self) -> u64 { - let reg_offset = match self { - RISCVCoreRegs::PC => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, pc) - } - RISCVCoreRegs::RA => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, ra) - } - RISCVCoreRegs::SP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, sp) - } - RISCVCoreRegs::GP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, gp) - } - RISCVCoreRegs::TP => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, tp) - } - RISCVCoreRegs::T0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) - } - RISCVCoreRegs::T1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t0) - } - RISCVCoreRegs::T2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t2) - } - RISCVCoreRegs::S0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s0) - } - RISCVCoreRegs::S1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s1) - } - RISCVCoreRegs::A0 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a0) - } - RISCVCoreRegs::A1 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a1) - } - RISCVCoreRegs::A2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a2) - } - RISCVCoreRegs::A3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a3) - } - RISCVCoreRegs::A4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a4) - } - RISCVCoreRegs::A5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a5) - } - RISCVCoreRegs::A6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a6) - } - RISCVCoreRegs::A7 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, a7) - } - RISCVCoreRegs::S2 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s2) - } - RISCVCoreRegs::S3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s3) - } - RISCVCoreRegs::S4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s4) - } - RISCVCoreRegs::S5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s5) - } - RISCVCoreRegs::S6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s6) - } - RISCVCoreRegs::S7 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s7) - } - RISCVCoreRegs::S8 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s8) - } - RISCVCoreRegs::S9 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s9) - } - RISCVCoreRegs::S10 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s10) - } - RISCVCoreRegs::S11 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, s11) - } - RISCVCoreRegs::T3 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t3) - } - RISCVCoreRegs::T4 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t4) - } - RISCVCoreRegs::T5 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t5) - } - RISCVCoreRegs::T6 => { - offset_of!(kvm_riscv_core, regs, user_regs_struct, t6) - } - RISCVCoreRegs::MODE => { - offset_of!(kvm_riscv_core, mode) - } - }; - KVM_REG_RISCV as u64 - | KVM_REG_SIZE_U64 as u64 - | u64::from(KVM_REG_RISCV_CORE) - | (reg_offset / size_of::()) as u64 - } -} -#[allow(non_camel_case_types)] -#[allow(dead_code)] -pub enum Riscv64ConfigRegs{ - ISA, - ZICBOM_BLOCK_SIZE, - MVENDORID, - MARCHID, - MIMPID, - ZICBOZ_BLOCK_SIZE, - SATP_MODE, -} -#[allow(clippy::zero_ptr)] -impl Into for Riscv64ConfigRegs{ - fn into(self) -> u64 { - let reg_offset = match self { - Riscv64ConfigRegs::ISA => { - offset_of!(kvm_riscv_config, isa) - } - Riscv64ConfigRegs::ZICBOM_BLOCK_SIZE => { - offset_of!(kvm_riscv_config, zicbom_block_size) - } - Riscv64ConfigRegs::MVENDORID => { - offset_of!(kvm_riscv_config, mvendorid) - } - Riscv64ConfigRegs::MARCHID => { - offset_of!(kvm_riscv_config, marchid) - } - Riscv64ConfigRegs::MIMPID => { - offset_of!(kvm_riscv_config, mimpid) - } - Riscv64ConfigRegs::ZICBOZ_BLOCK_SIZE => { - offset_of!(kvm_riscv_config, zicboz_block_size) - } - Riscv64ConfigRegs::SATP_MODE => { - offset_of!(kvm_riscv_config, satp_mode) - } - }; - KVM_REG_RISCV as u64 - | KVM_REG_SIZE_U64 as u64 - | u64::from(KVM_REG_RISCV_CONFIG) - | (reg_offset / size_of::()) as u64 - } -} -#[allow(non_camel_case_types)] -#[allow(dead_code)] -pub enum Riscv64Timer{ - FREQUENCY, - TIME, - COMPARE, - STATE, -} -#[allow(clippy::zero_ptr)] -impl Into for Riscv64Timer{ - fn into(self) -> u64 { - let reg_offset = match self { - Riscv64Timer::FREQUENCY => { - offset_of!(kvm_riscv_timer, frequency) - } - Riscv64Timer::TIME => { - offset_of!(kvm_riscv_timer, time) - } - Riscv64Timer::COMPARE => { - offset_of!(kvm_riscv_timer, compare) - } - Riscv64Timer::STATE => { - offset_of!(kvm_riscv_timer, state) - } - }; - KVM_REG_RISCV as u64 - | KVM_REG_SIZE_U64 as u64 - | u64::from(KVM_REG_RISCV_TIMER) - | (reg_offset / size_of::()) as u64 - } -} +use crate::arch::{CPUState, Riscv64CoreRegs, judge_interrupt_controller_addr}; +use crate::device::{judge_serial_addr, Serial}; +use crate::memory::GuestMemory; pub struct CPU { + /// ID of this virtual CPU, `0` means this cpu is primary `CPU`. pub id: u8, + /// The file descriptor of this kvm_based vCPU. pub fd: Arc, - pub nr_vcpus: u8, - pub sys_mem: Arc, - boot_ip: u64, - fdt_addr: u64, - pub isa: u64, - pub frequency: u64, - pub satp_mode: u64, - pub plic: Option>>, - pub serial: Option>> + /// Registers state for kvm_based vCPU. + pub state: CPUState, + /// System memory space. + sys_mem: Arc, + /// Serial device is used for debugging. + serial: Option>>, } impl CPU { - pub fn new(vm_fd: &Arc, sys_mem: Arc,vcpu_id: u8, nr_vcpus: u8) -> Self { - let vcpu_fd = vm_fd.create_vcpu(vcpu_id as u64).expect(&format!("Failed to create Vcpu{}",vcpu_id)); - let vcpu_fd = Arc::new(vcpu_fd); + /// Allocates a new `CPU` for `vm` + /// + /// # Arguments + /// + /// - `vcpu_id` - vcpu_id for `CPU`, started from `0`. + #[allow(unused_variables)] + pub fn new(vm_fd: &Arc, sys_mem: Arc, vcpu_id: u32, nr_vcpus: u32) -> Self { + let vcpu_fd = vm_fd + .create_vcpu(vcpu_id as u8) + .expect("Failed to create vCPU"); + Self { - id: vcpu_id, - fd: vcpu_fd, - nr_vcpus, + id: vcpu_id as u8, + fd: Arc::new(vcpu_fd), sys_mem, - boot_ip: 0, - fdt_addr: 0, - isa: 0, - frequency: 0, - satp_mode: 0, - plic: None, - serial: None + state: CPUState::new(vcpu_id, nr_vcpus), + serial: None, } } - pub fn set_boot_config( - &mut self, - boot_config: &CPUBootConfig - ){ - self.boot_ip = boot_config.kernel_addr; - self.fdt_addr = boot_config.fdt_addr; - self.isa = self.fd.get_one_reg(Riscv64ConfigRegs::ISA.into()).unwrap() as u64; - self.satp_mode = self.fd.get_one_reg(Riscv64ConfigRegs::SATP_MODE.into()).unwrap() as u64; - self.frequency = self.fd.get_one_reg(Riscv64Timer::FREQUENCY.into()).unwrap() as u64; + pub fn set_serial_dev(&mut self, serial: Arc>) { + self.serial = Some(serial); } - pub fn set_register(&self, reg: RISCVCoreRegs, value: u128) { - self.fd.set_one_reg(reg.into(), value); - } - pub fn set_plic(&mut self, plic: Arc>) { - self.plic = Some(plic.clone()); - } - pub fn set_serial(&mut self, serial: Arc>) { - self.serial = Some(serial.clone()); + /// Realize vcpu status. + /// Get register state from kvm. + pub fn realize(&mut self, vm_fd: &Arc, bootconfig: CPUBootConfig) { + self.state.set_boot_config(vm_fd, self.fd.clone(), &bootconfig); } - pub fn get_register(&self, reg: RISCVCoreRegs) -> u64{ - self.fd.get_one_reg(reg.into()).unwrap() as u64 + /// Reset kvm_based vCPU registers state by registers state in `CPU`. + pub fn reset(&self) { + self.state.reset_vcpu(self.fd.clone()); } - pub fn reset_vcpu(&self) { - let mp_state = self.fd.get_mp_state().unwrap(); - if mp_state.mp_state == KVM_MP_STATE_STOPPED { - return; - } - self.fd.set_one_reg(RISCVCoreRegs::PC.into(), self.boot_ip as u128) - .expect("Failed to set PC register"); - self.fd.set_one_reg(RISCVCoreRegs::A0.into(), self.id as u128) - .expect("Failed to set hartid to a0"); - self.fd.set_one_reg(RISCVCoreRegs::A1.into(), self.fdt_addr as u128) - .expect("Failed to set fdt to a1"); + + /// Start run `CPU` in seperate vcpu thread. + /// + /// # Arguments + /// + /// - `arc_cpu`: `CPU` wrapper in `Arc` to send safely during thread. + pub fn start(arc_cpu: Arc) -> thread::JoinHandle<()> { + let cpu_id = arc_cpu.id; + thread::Builder::new() + .name(format!("CPU {}/KVM", cpu_id)) + .spawn(move || { + arc_cpu.reset(); + loop { + if !arc_cpu.kvm_vcpu_exec() { + break; + } + } + }) + .expect(&format!("Failed to create thread for CPU {}/KVM", cpu_id)) } - - pub fn kvm_vcpu_exec(&self) -> bool { + /// Run kvm vcpu emulation. + /// + /// # Return value + /// + /// Whether to continue to emulate or not. + fn kvm_vcpu_exec(&self) -> bool { // println!("current PC: 0x{:x}", self.fd.get_one_reg(Riscv64CoreRegs::PC.into()).unwrap()); match self.fd.run().expect("Unhandled error in vcpu emulation!") { VcpuExit::IoIn(addr, data) => { @@ -357,7 +124,7 @@ impl CPU { if let Some(offset) = judge_serial_addr(addr as u64) { data[0] = self.serial.as_ref().unwrap().lock().unwrap().read(offset); } else if let Some(addr) = judge_interrupt_controller_addr(addr){ - let mut ic_guard = self.plic.as_ref().unwrap().lock().unwrap(); + let mut ic_guard = self.state.interrupt_controller.as_ref().unwrap().lock().unwrap(); let res: u32 = ic_guard.mmio_read(addr); drop(ic_guard); let res_bytes = res.as_bytes(); @@ -386,7 +153,7 @@ impl CPU { } } else if let Some(addr) = judge_interrupt_controller_addr(addr as u64){ let res: & u32 = u32::from_bytes(&data).unwrap(); - let mut ic_guard = self.plic.as_ref().unwrap().lock().unwrap(); + let mut ic_guard = self.state.interrupt_controller.as_ref().unwrap().lock().unwrap(); ic_guard.mmio_write(addr, *res); drop(ic_guard); } else { @@ -416,21 +183,4 @@ impl CPU { true } - - pub fn start(arc_cpu: Arc>) -> std::thread::JoinHandle<()> { - let cpu_id = arc_cpu.lock().unwrap().id; - thread::Builder::new() - .name(format!("CPU {}/KVM", cpu_id)) - .spawn(move || { - arc_cpu.lock().unwrap().reset_vcpu(); - loop { - if !arc_cpu.lock().unwrap().kvm_vcpu_exec(){ - break; - } - } - println!("Vcpu{} exit", cpu_id); - }).expect(&format!("Failed to create thread for CPU {}/KVM", cpu_id)) - } } - - diff --git a/src/device/mod.rs b/src/device/mod.rs index 5babf48d0..a43557a4e 100644 --- a/src/device/mod.rs +++ b/src/device/mod.rs @@ -1,44 +1,44 @@ -pub mod plic; -mod fdt; +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + mod serial; -use kvm_ioctls::VmFd; -use crate::{bootloader::Riscv64BootLoader, cpu::CPU, memory::GuestMemory}; -use std::sync::{Arc, Mutex}; -pub use plic::*; -pub use serial::*; +pub use serial::{ + judge_serial_addr, Serial, +}; -pub const FDT_MAX_SIZE: u32 = 0x1_0000; #[derive(Debug)] pub enum Error { Overflow(usize, usize), IoError(std::io::Error), - + Stop(), } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - Error::Overflow(len, max_len) => write!(f, "The received buffer {} overflow, max_len: {}", len, max_len ), - Error::IoError(ref e) => { write!(f, "IO errors occurs when read/write memory, error is {}", e) } + Error::Overflow(len, max_len) => write!( + f, + "The received buffer {} overflow, max_len: {}", + len, max_len + ), + Error::IoError(ref e) => { + write!(f, "IO errors occurs when read/write memory, error is {}", e) + }, + Error::Stop() => { + write!(f, "thread stop signal received") + } } - - } - -} -pub type Result = std::result::Result; - -pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, vm_fd: Arc, layout : &Riscv64BootLoader) { - let nr_cpu = vcpus[0].lock().unwrap().nr_vcpus as u32; - let plic = PlicState::new(&vcpus, nr_cpu); - let serial = Serial::new(&vm_fd, plic.clone()); - for vcpu in vcpus.iter(){ - vcpu.lock().unwrap().set_serial(serial.clone()); - vcpu.lock().unwrap().set_plic(plic.clone()); } - let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; - let initrd_range = (layout.initrd_start, layout.initrd_size); - let fdt_addr = layout.dtb_start; - fdt::generate_fdt(guest_memory, initrd_range, cmdline, plic.clone(), vcpus[0].clone(), fdt_addr); } +pub type Result = std::result::Result; diff --git a/src/device/serial.rs b/src/device/serial.rs index ba8e4f92d..9a0fdf71e 100644 --- a/src/device/serial.rs +++ b/src/device/serial.rs @@ -19,14 +19,10 @@ use std::thread; use kvm_ioctls::VmFd; use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, terminal::Terminal}; -use super::{Error, Result, PlicState}; -use crate::memory::{MEM_LAYOUT, LayoutEntryType}; +use super::{Error, Result}; +use crate::arch::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ, SerialControl, serial_register_irqfd}; use crate::helper::epoll::{EpollContext, EventNotifier}; -pub const MMIO_SERIAL_IRQ: u32 = 1; -pub const MMIO_SERIAL_ADDR: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].0; -pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; - const UART_IER_RDI: u8 = 0x01; const UART_IER_THRI: u8 = 0x02; @@ -49,6 +45,8 @@ const UART_MSR_DSR: u8 = 0x20; const UART_MSR_DCD: u8 = 0x80; const RECEIVER_BUFF_SIZE: usize = 1024; +const GOT_ESCAPE_CHAR: u8 = 1 << 0; +const STOP_SIGNAL: u8 = 1 << 2; pub fn judge_serial_addr(addr: u64) -> Option { if (MMIO_SERIAL_ADDR..MMIO_SERIAL_ADDR + MMIO_SERIAL_ADDR_SIZE).contains(&addr) { @@ -85,12 +83,14 @@ pub struct Serial { /// Operation methods. output: Box, /// serial interrupt control - serial_ctrl: Arc>, + serial_ctrl: SerialControl, + /// state control + state: u8, } impl Serial { /// Create a new `Serial` instance with default parameters. - pub fn new(vm_fd: &VmFd, serial_ctrl: Arc>) -> Arc> { + pub fn new(vm_fd: &VmFd, serial_ctrl: SerialControl) -> Arc> { std::io::stdin() .lock() .set_raw_mode() @@ -100,6 +100,7 @@ impl Serial { // vm_fd // .register_irqfd(&evt_fd, MMIO_SERIAL_IRQ) // .expect("Failed to register irq fd for serial"); + serial_register_irqfd(&vm_fd, &evt_fd); let serial = Arc::new(Mutex::new(Serial { rbr: VecDeque::new(), ier: 0, @@ -113,15 +114,17 @@ impl Serial { thr_pending: 0, interrupt_evt: evt_fd, output: Box::new(std::io::stdout()), - serial_ctrl: serial_ctrl.clone() + serial_ctrl, + state: 0 })); let serial_clone = serial.clone(); + let serial_clone1 = serial.clone(); let mut epoll = EpollContext::new(); let handler: Box = Box::new(move |event, _| { if event == EventSet::IN && serial_clone.lock().unwrap().stdin_exce().is_err() { println!("Failed to excecute the stdin"); - } + } }); let notifier = EventNotifier::new( @@ -135,6 +138,9 @@ impl Serial { let _ = thread::Builder::new() .name("serial".to_string()) .spawn(move || loop { + if serial_clone1.lock().unwrap().state & STOP_SIGNAL != 0 { + break; + } if !epoll.run() { break; } @@ -159,7 +165,7 @@ impl Serial { self.iir = iir; if iir != UART_IIR_NO_INT { - self.serial_ctrl.lock().unwrap().plic__irq_trig(MMIO_SERIAL_IRQ, true, false);; + self.serial_ctrl.interrupt_trigger(); } Ok(()) @@ -260,7 +266,19 @@ impl Serial { } _ => {} } - + if (self.state & GOT_ESCAPE_CHAR) != 0 { + // self.state &= !GOT_ESCAPE_CHAR; + if ret == 120 { + self.state = STOP_SIGNAL; + println!("set STOP SIGNAL"); + } + if ret == 0x01 { + return ret; + } + } + if ret == 0x01 { + self.state |= GOT_ESCAPE_CHAR; + } ret } diff --git a/src/helper/byte_code.rs b/src/helper/byte_code.rs index 0bc342c7e..9b1dd6f32 100644 --- a/src/helper/byte_code.rs +++ b/src/helper/byte_code.rs @@ -1,35 +1,62 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + use std::mem::size_of; -use std::slice::{ from_raw_parts, from_raw_parts_mut }; +use std::slice::{from_raw_parts, from_raw_parts_mut}; +/// A trait bound defined for types which are safe to convert to a byte slice and +/// to create from a byte slice. pub trait ByteCode: Default + Copy + Send + Sync { + /// Return the contents of an object (impl trait `ByteCode`) as a slice of bytes. + /// the inverse of this function is "from_bytes" fn as_bytes(&self) -> &[u8] { unsafe { from_raw_parts(self as *const Self as *const u8, size_of::()) } } - fn as_mut_bytes(&mut self) ->&mut [u8] { + /// Return the contents of a mutable object (impl trait `ByteCode`) to a mutable slice of bytes. + /// the inverse of this function is "from_bytes_mut" + fn as_mut_bytes(&mut self) -> &mut [u8] { unsafe { from_raw_parts_mut(self as *mut Self as *mut u8, size_of::()) } } + /// Creates an object (impl trait `ByteCode`) from a slice of bytes + /// + /// # Arguments + /// + /// * `data` - the slice of bytes that will be constructed as an object. fn from_bytes(data: &[u8]) -> Option<&Self> { if data.len() != size_of::() { return None; } - let obj_array = unsafe { from_raw_parts::(data.as_ptr() as *const _, data.len()) }; Some(&obj_array[0]) } + /// Creates an mutable object (impl trait `ByteCode`) from a mutable slice of bytes + /// + /// # Arguments + /// + /// * `data` - the slice of bytes that will be constructed as an mutable object. fn from_mut_bytes(data: &mut [u8]) -> Option<&mut Self> { if data.len() != size_of::() { return None; } - - let obj_array = unsafe { from_raw_parts_mut::(data.as_mut_ptr() as *mut _, data.len()) }; + let obj_array = + unsafe { from_raw_parts_mut::(data.as_mut_ptr() as *mut _, data.len()) }; Some(&mut obj_array[0]) } } - +// Integer types of Rust satisfy the requirements of `trait ByteCode` impl ByteCode for usize {} impl ByteCode for u8 {} impl ByteCode for u16 {} @@ -40,4 +67,3 @@ impl ByteCode for i8 {} impl ByteCode for i16 {} impl ByteCode for i32 {} impl ByteCode for i64 {} - diff --git a/src/helper/mod.rs b/src/helper/mod.rs index 22c89b112..005f16659 100644 --- a/src/helper/mod.rs +++ b/src/helper/mod.rs @@ -1,4 +1,38 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + pub mod byte_code; pub mod checksum; pub mod epoll; pub mod num_ops; + +/// Macro: Calculate offset of specified field in a type. +#[macro_export] +macro_rules! __offset_of { + ($type_name:ty, $field:ident) => { + unsafe { &(*(std::ptr::null::<$type_name>())).$field as *const _ as usize } + }; +} + +/// Macro: Calculate offset of a field in a recursive type. +/// +/// # Arguments +/// +/// The Arguments is: a type name and its field name, +/// follows by a series of sub-type's name and its field's name. +#[macro_export] +macro_rules! offset_of { + ($type_name:ty, $field:ident) => { crate::__offset_of!($type_name, $field) }; + ($type_name:ty, $field:ident, $($sub_type_name:ty, $sub_field:ident), +) => { + crate::__offset_of!($type_name, $field) + crate::offset_of!($($sub_type_name, $sub_field), +) + }; +} diff --git a/src/main.rs b/src/main.rs index 889583e2d..540d8ba09 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,18 +1,35 @@ -mod helper; -mod memory; +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + mod cpu; -mod kvm; -mod bootloader; mod device; -mod device_tree; +#[allow(dead_code)] +#[macro_use] +mod helper; +#[allow(dead_code)] +mod memory; +mod arch; -use cpu::CPU; -use bootloader::kvm_load_kernel; -use device::kvm_setup_fireware; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use kvm_ioctls::Kvm; -use memory::GuestMemory; +use crate::arch::CPUBootConfig; +use crate::cpu::CPU; +use crate::device::Serial; +use crate::memory::GuestMemory; +use crate::arch::{kvm_load_kernel,load_boot_source,arch_init_based_dev,kvm_setup_fireware}; + +// Run a simple VM on x86_64 platfrom. +// Reference: https://lwn.net/Articles/658511/. fn main() { let mem_size = 512 * 1024 * 1024; @@ -23,20 +40,25 @@ fn main() { // 2. Initialize Guest Memory. let guest_memory = GuestMemory::new(&vm_fd, mem_size).expect("Failed to init guest memory"); - // 3. Init vCPU. + // 3. Init kvm_based devices. + arch_init_based_dev(&vm_fd); + + // 4. Init vCPU. let vcpu_count = 1_u32; let arc_memory = Arc::new(guest_memory); - let mut vcpu = CPU::new(&vm_fd, arc_memory.clone(), 0, vcpu_count as u8); + let mut vcpu = CPU::new(&vm_fd, arc_memory.clone(), 0, vcpu_count); + + // 5. load boot source and realize vCPU0. + - // 4. load boot source and realize vCPU0. let layout = kvm_load_kernel(&arc_memory,&mut vcpu, &vm_fd); - let vcpus = vec![Arc::new(Mutex::new(vcpu))]; - kvm_setup_fireware(&arc_memory, &vcpus, vm_fd.clone(), &layout); + + let mut vcpus = vec![&mut vcpu]; + kvm_setup_fireware(&arc_memory, &mut vcpus, &vm_fd, &layout); println!("fireware set up !"); // 9. Run vCPU0. - let cpu_task_0 = CPU::start(vcpus[0].clone()); - println!("task created !"); + let cpu_task_0 = CPU::start(Arc::new(vcpu)); println!("Start to run linux kernel!"); cpu_task_0.join().expect("Failed to wait cpu task 0"); } diff --git a/src/memory/guest_memory.rs b/src/memory/guest_memory.rs index 4f923f230..6196f4717 100644 --- a/src/memory/guest_memory.rs +++ b/src/memory/guest_memory.rs @@ -1,33 +1,45 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + use std::sync::Arc; + use kvm_bindings::kvm_userspace_memory_region; use kvm_ioctls::VmFd; use super::host_mmap::HostMemMapping; -use super::{Error, LayoutEntryType, MEM_LAYOUT, Result}; - +use super::{Error, Result}; use crate::helper::byte_code::ByteCode; - +use crate::arch::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; #[derive(Clone)] pub struct GuestMemory { host_mmaps: Vec>, } impl GuestMemory { + /// Construct function. pub fn new(vm_fd: &Arc, mem_size: u64) -> Result { let ranges = Self::arch_ram_ranges(mem_size); let mut host_mmaps = Vec::new(); for (index, range) in ranges.iter().enumerate() { - println!("mmap start 0x{:x}, size 0x{:x}", range.0, range.1); let host_mmap = Arc::new(HostMemMapping::new(range.0, range.1)?); host_mmaps.push(host_mmap.clone()); let kvm_region = kvm_userspace_memory_region { slot: index as u32, - guest_phys_addr: host_mmap.guest_addr(), + guest_phys_addr: host_mmap.guest_address(), memory_size: host_mmap.size(), - userspace_addr: host_mmap.host_addr(), - flags: 0 + userspace_addr: host_mmap.host_address(), + flags: 0, }; unsafe { vm_fd @@ -35,78 +47,133 @@ impl GuestMemory { .map_err(Error::KvmSetMR)?; } } - Ok(GuestMemory{ host_mmaps }) + + Ok(GuestMemory { host_mmaps }) } + /// Calculate the ranges of memory according to architecture. + /// + /// # Arguments + /// + /// * `mem_size` - memory size of VM. + /// + /// # Returns + /// + /// A array of ranges, it's element represents (start_addr, size). + /// On x86_64, there is a gap ranged below 4G, which will be skipped. pub fn arch_ram_ranges(mem_size: u64) -> Vec<(u64, u64)> { + // ranges is the vector of (start_addr, size) let mut ranges = Vec::<(u64, u64)>::new(); - let gap_start = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].0; - let gap_size = MEM_LAYOUT[LayoutEntryType::MemRAM as usize].1; - ranges.push((gap_start, gap_size)); - + + + arch_add_ram_ranges(mem_size, &mut ranges); ranges } - pub fn find_host_mmap(&self, addr: u64, size: u64) -> Result> { + /// Find corresponding host mem mapping according to guest address. + fn find_host_mmap(&self, addr: u64, size: u64) -> Result> { for host_mmap in &self.host_mmaps { - if addr >= host_mmap.guest_addr() - && addr < host_mmap.guest_addr() + host_mmap.size() - { - if addr + size >= host_mmap.guest_addr() + host_mmap.size() { - return Err(Error::Overflow( - addr - host_mmap.guest_addr(), - size, - host_mmap.size() - )) - } - return Ok(host_mmap.clone()); + if addr >= host_mmap.guest_address() + && addr < host_mmap.guest_address() + host_mmap.size() + { + if addr + size > host_mmap.guest_address() + host_mmap.size() { + return Err(Error::Overflow( + addr - host_mmap.guest_address(), + size, + host_mmap.size(), + )); } + return Ok(host_mmap.clone()); + } } - return Err(Error::HostMmapNotFound(addr)); + Err(Error::HostMmapNotFound(addr)) } + /// Read memory segment to `dst`. + /// + /// # Arguments + /// + /// * `dst` - Destination the data would be written to. + /// * `addr` - Start address. + /// * `count` - Size of data. + /// + /// # Errors + /// + /// Return Error if the `addr` is not mapped. pub fn read(&self, dst: &mut dyn std::io::Write, addr: u64, count: u64) -> Result<()> { let host_mmap = self.find_host_mmap(addr, count)?; - let offset = addr - host_mmap.guest_addr(); - let host_addr = host_mmap.host_addr(); + let offset = addr - host_mmap.guest_address(); + let host_addr = host_mmap.host_address(); let slice = unsafe { std::slice::from_raw_parts((host_addr + offset) as *const u8, count as usize) }; dst.write_all(slice).map_err(Error::IoError)?; + Ok(()) } + /// Write data to specified guest address. + /// + /// # Arguments + /// + /// * `src` - Data buffer to write. + /// * `addr` - Start address. + /// * `count` - Size of data. + /// + /// # Errors + /// + /// Return Error if the `addr` is not mapped. pub fn write(&self, src: &mut dyn std::io::Read, addr: u64, count: u64) -> Result<()> { let host_mmap = self.find_host_mmap(addr, count)?; - let offset = addr - host_mmap.guest_addr(); - let host_addr = host_mmap.host_addr(); + let offset = addr - host_mmap.guest_address(); + let host_addr = host_mmap.host_address(); let slice = unsafe { std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, count as usize) }; src.read_exact(slice).map_err(Error::IoError)?; + Ok(()) } + /// Write an object to memory. + /// + /// # Arguments + /// + /// * `data` - The object that will be written to the memory. + /// * `addr` - The start guest address where the object will be written to. + /// + /// # Note + /// To use this method, it is necessary to implement `ByteCode` trait for your object. pub fn write_object(&self, data: &T, addr: u64) -> Result<()> { self.write(&mut data.as_bytes(), addr, std::mem::size_of::() as u64) } + /// Read some data from memory to form an object. + /// + /// # Arguments + /// + /// * `addr` - The start guest address where the data will be read from. + /// + /// # Note + /// To use this method, it is necessary to implement `ByteCode` trait for your object. pub fn read_object(&self, addr: u64) -> Result { let mut obj = T::default(); self.read( &mut obj.as_mut_bytes(), addr, - std::mem::size_of::() as u64 + std::mem::size_of::() as u64, )?; Ok(obj) } + + /// Get guest memory end address. pub fn memory_end_address(&self) -> u64 { let mut end_address = 0; for host_mmap in self.host_mmaps.iter() { - let addr = host_mmap.guest_addr() + host_mmap.size(); + let addr = host_mmap.guest_address() + host_mmap.size(); if addr > end_address { end_address = addr; } diff --git a/src/memory/host_mmap.rs b/src/memory/host_mmap.rs index 7281b5673..b0b3c1b29 100644 --- a/src/memory/host_mmap.rs +++ b/src/memory/host_mmap.rs @@ -1,24 +1,52 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + use super::{Error, Result}; +/// Record information of memory mapping. pub struct HostMemMapping { - // guest physical address of memmapping + /// Address of Host mem mapping. guest_addr: u64, - // memmapping size + /// Size of Host mem mapping. size: u64, - // host virtual address of memmapping + /// Host virtual address of mem mapping. host_addr: u64, } impl HostMemMapping { + /// Construct a new HostMemMapping. + /// + /// # Arguments + /// + /// * `guest_addr` - The start address im memory. + /// * `size` - Size of memory that will be mapped. + /// * `file_back` - Information of file and offset-in-file that backs memory. + /// * `dump_guest_core` - Include guest memory in core file or not. + /// * `is_share` - This mapping is sharable or not. + /// + /// # Errors + /// + /// Return Error if fail to map memory. pub fn new(guest_addr: u64, size: u64) -> Result { + let flags = libc::MAP_ANONYMOUS | libc::MAP_PRIVATE; + let host_addr = unsafe { let hva = libc::mmap( std::ptr::null_mut() as *mut libc::c_void, size as libc::size_t, libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + flags, -1, - 0 + 0, ); if hva == libc::MAP_FAILED { return Err(Error::Mmap(std::io::Error::last_os_error())); @@ -26,7 +54,6 @@ impl HostMemMapping { hva }; - Ok(HostMemMapping { guest_addr, size, @@ -34,25 +61,29 @@ impl HostMemMapping { }) } + /// Get size of mapped memory. pub fn size(&self) -> u64 { self.size } - pub fn guest_addr(&self) -> u64 { + /// Get start address of mapped memory. + pub fn guest_address(&self) -> u64 { self.guest_addr } - pub fn host_addr(&self) -> u64 { + /// Get start `HVA` (host virtual address) of mapped memory. + pub fn host_address(&self) -> u64 { self.host_addr } } impl Drop for HostMemMapping { + /// Release the memory mapping. fn drop(&mut self) { unsafe { libc::munmap( self.host_addr as *mut libc::c_void, - self.size as libc::size_t + self.size() as libc::size_t, ); } } diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 571da36ec..035faafe6 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -1,13 +1,29 @@ -mod host_mmap; +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + mod guest_memory; +mod host_mmap; pub use guest_memory::GuestMemory; #[derive(Debug)] pub enum Error { + // Overflow occurs when read/write memory, (offset, count, size). Overflow(u64, u64, u64), + // Can not find corresponding HostMemMapping when read/write memory. HostMmapNotFound(u64), + // Failed to mmap. Mmap(std::io::Error), + // IO Error. IoError(std::io::Error), KvmSetMR(kvm_ioctls::Error), } @@ -20,38 +36,23 @@ impl std::fmt::Display for Error { "Failed to read/write memory, offset in host_mmap 0x{:x}, count {}, host_mmap size {}", offset, count, size ), - Error::HostMmapNotFound(addr) => write!( - f, - "Failed to find matched HostMemMaping, addr 0x{:x}", - addr - ), + Error::HostMmapNotFound(addr) => write!(f, "Failed to find matched HostMemMapping, addr 0x{:x}", addr), Error::Mmap(ref e) => write!( f, "Failed to mmap, error is {}", e ), - Error::IoError(ref e) => write!( + Error::IoError(ref e)=> write!( f, - "IO Error occurs when read/write memory, error is {}",e + "IO errors occurs when read/write memory, error is {}", + e ), Error::KvmSetMR(ref e) => write!( f, - "Failed to set memory region to KVM, error is {}", e + "Failed to set memory region to KVM, error is {}", + e ), } } } pub type Result = std::result::Result; - -#[repr(usize)] -pub enum LayoutEntryType { - IrqChip = 0_usize, - Mmio, - MemRAM, -} - -pub const MEM_LAYOUT: &[(u64, u64)] = &[ - (0x0800_0000, 0x0800_0000), - (0x1000_0000, 0x2000_0000), - (0x8000_0000, 0xC000_0000) -]; -- Gitee From 480ae7d8790ca9e2d4a03dfd14bab5cd8989f63e Mon Sep 17 00:00:00 2001 From: sts Date: Sun, 15 Sep 2024 04:21:26 +0000 Subject: [PATCH 8/8] =?UTF-8?q?=E5=AE=8C=E6=88=90=E6=9E=B6=E6=9E=84?= =?UTF-8?q?=E9=80=82=E9=85=8D=E7=9B=B8=E5=85=B3=E5=B7=A5=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 20 +++-------- src/arch/aarch64/cpu/mod.rs | 6 ++-- src/arch/aarch64/device/fdt.rs | 6 ++-- src/arch/aarch64/device/gicv3.rs | 4 +-- src/arch/aarch64/device/mod.rs | 25 ++++++++++---- src/arch/aarch64/device/serial.rs | 15 ++++++++- src/arch/mod.rs | 55 ++++++++++++++++--------------- src/arch/riscv64/device/fdt.rs | 2 +- src/arch/riscv64/device/mod.rs | 17 ++++++++-- src/arch/riscv64/device/plic.rs | 1 - src/arch/riscv64/memory/mod.rs | 10 +++--- src/arch/x86_64/cpu/cpuid.rs | 19 +++++++++++ src/arch/x86_64/cpu/mod.rs | 22 ++++++------- src/arch/x86_64/device/mod.rs | 35 +++++++++++++++----- src/arch/x86_64/device/serial.rs | 17 +++++++++- src/arch/x86_64/kvm/mod.rs | 14 ++++---- src/cpu/mod.rs | 50 +++++++++++++--------------- src/device/mod.rs | 6 ++-- src/device/serial.rs | 46 +++++++++----------------- src/main.rs | 2 +- 20 files changed, 215 insertions(+), 157 deletions(-) create mode 100644 src/arch/x86_64/cpu/cpuid.rs diff --git a/Cargo.lock b/Cargo.lock index 4e6bbb5f4..d5d25b7d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,7 @@ dependencies = [ "kvm-bindings", "kvm-ioctls", "libc", - "vmm-sys-util 0.7.0", + "vmm-sys-util", ] [[package]] @@ -20,18 +20,18 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "kvm-bindings" -version = "0.8.2" +version = "0.3.0" dependencies = [ - "vmm-sys-util 0.12.1", + "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.1.0" +version = "0.6.0" dependencies = [ "kvm-bindings", "libc", - "vmm-sys-util 0.12.1", + "vmm-sys-util", ] [[package]] @@ -49,13 +49,3 @@ dependencies = [ "bitflags", "libc", ] - -[[package]] -name = "vmm-sys-util" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" -dependencies = [ - "bitflags", - "libc", -] diff --git a/src/arch/aarch64/cpu/mod.rs b/src/arch/aarch64/cpu/mod.rs index 9c366ed5d..9e5f2d880 100644 --- a/src/arch/aarch64/cpu/mod.rs +++ b/src/arch/aarch64/cpu/mod.rs @@ -173,7 +173,7 @@ impl CPUState { pub fn set_boot_config( &mut self, vm_fd: &Arc, - vcpu_fd: &VcpuFd, + vcpu_fd: Arc, boot_config: &AArch64CPUBootConfig, ) { self.boot_ip = boot_config.kernel_addr; @@ -195,7 +195,7 @@ impl CPUState { vcpu_fd.vcpu_init(&kvi).expect("Failed to init kvm vcpu"); self.get_mpidr(vcpu_fd); } - pub fn get_mpidr(&mut self, vcpu_fd: &VcpuFd) -> u64 { + pub fn get_mpidr(&mut self, vcpu_fd: Arc) -> u64 { if self.mpidr == UNINIT_MPIDR { self.mpidr = match vcpu_fd.get_one_reg(SYS_MPIDR_EL1) { Ok(mpidr) => mpidr as u64, @@ -205,7 +205,7 @@ impl CPUState { self.mpidr } - pub fn reset_vcpu(&self, vcpu: &VcpuFd) { + pub fn reset_vcpu(&self, vcpu: Arc) { // Configure PSTATE(Processor State), mask all interrupts. let data: u64 = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; vcpu.set_one_reg(Arm64CoreRegs::USER_PT_REG_PSTATE.into(), data) diff --git a/src/arch/aarch64/device/fdt.rs b/src/arch/aarch64/device/fdt.rs index d7e399dc6..69afbffcd 100644 --- a/src/arch/aarch64/device/fdt.rs +++ b/src/arch/aarch64/device/fdt.rs @@ -11,7 +11,7 @@ pub fn generate_fdt( gic: &GICv3, initrd_range: (u64, u64), cmdline: &str, - cpu: Arc>, + cpu: &mut CPU, fdt_addr: u64, ) { let mut fdt = vec![0; FDT_MAX_SIZE as usize]; @@ -46,13 +46,13 @@ fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { } -fn generate_cpu_node(fdt: &mut Vec, cpu: Arc>) { +fn generate_cpu_node(fdt: &mut Vec, cpu: &mut CPU) { let node = "/cpus"; add_sub_node(fdt, node); set_property_u32(fdt, node, "#address-cells", 0x02); set_property_u32(fdt, node, "#size-cells", 0x0); - let mpidr = cpu.lock().unwrap().state.get_mpidr(&cpu.fd); + let mpidr = cpu.state.get_mpidr(cpu.fd.clone()); let node = format!("/cpus/cpu@{:x}", mpidr); add_sub_node(fdt, &node); set_property_u32( diff --git a/src/arch/aarch64/device/gicv3.rs b/src/arch/aarch64/device/gicv3.rs index ab66a2c28..d4ca063b4 100644 --- a/src/arch/aarch64/device/gicv3.rs +++ b/src/arch/aarch64/device/gicv3.rs @@ -127,11 +127,11 @@ impl GICv3 { fd: 0, flags: 0, }; - + println!("vm create gicv3 device!"); let gic_fd = vm .create_device(&mut gic_device) .map_err(Error::CreateKvmDevice)?; - + println!("vm create gicv3 coomplete"); Ok(GICv3 { fd: gic_fd, nr_irqs: max_irq, diff --git a/src/arch/aarch64/device/mod.rs b/src/arch/aarch64/device/mod.rs index c5889006f..229208322 100644 --- a/src/arch/aarch64/device/mod.rs +++ b/src/arch/aarch64/device/mod.rs @@ -1,21 +1,22 @@ pub mod gicv3; pub mod serial; mod fdt; +use crate::arch::CPUState; +use crate::device::{Error, Result}; use gicv3::GICv3; use kvm_ioctls::VmFd; use crate::{arch::aarch64::boot_loader::AArch64BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; use std::sync::Arc; -use crate::arch::CPUState; -pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec>>, vm_fd: &Arc, layout : &AArch64BootLoader) { +pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &mut Vec<&mut CPU>, vm_fd: &Arc, layout : &AArch64BootLoader) { - let vcpu_count = vcpus[0].lock().unwrap().state.nr_vcpus; + let vcpu_count = vcpus[0].state.nr_vcpus; let gic = GICv3::new(&vm_fd, vcpu_count as u64, 192).expect("Failed to create GICv3 device"); gic.realize().expect("Failed to realize GICv3 device"); - let serial = Serial::new(&vm_fd); - for vcpu in vcpus.iter() { - vcpu.lock().unwrap().set_serial_dev(serial.clone()); + let serial = Serial::new(&vm_fd, None); + for i in 0..vcpus.len() { + vcpus[i].set_serial_dev(serial.clone()); } let cmdline = "console=ttyS0 panic=1 reboot=k root=/dev/ram rdinit=/bin/sh"; @@ -26,7 +27,17 @@ pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &Vec Option { + None +} + +pub fn write_other_addr(_state: &CPUState, _addr: u64, _data: u32) -> Result<()> { + Err(Error::MMIoError()) +} +pub fn read_other_addr(_state: &CPUState, _addr: u64) -> Result { + Err(Error::MMIoError()) +} diff --git a/src/arch/aarch64/device/serial.rs b/src/arch/aarch64/device/serial.rs index c90c53278..63aa57c31 100644 --- a/src/arch/aarch64/device/serial.rs +++ b/src/arch/aarch64/device/serial.rs @@ -1,4 +1,17 @@ +use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; use crate::arch::{LayoutEntryType, MEM_LAYOUT}; pub const MMIO_SERIAL_IRQ: u32 = 32; pub const MMIO_SERIAL_ADDR: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].0; -pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; \ No newline at end of file +pub const MMIO_SERIAL_ADDR_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; +pub fn serial_register_irqfd(vm_fd: &VmFd, evt_fd: &EventFd) { + vm_fd + .register_irqfd(&evt_fd, MMIO_SERIAL_IRQ) + .expect("Failed to register irq fd for serial"); +} + +pub struct SerialControl {} + +impl SerialControl { + pub fn interrupt_trigger(&self) {} +} diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 9d6015dab..49098529c 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -1,60 +1,61 @@ #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "aarch64")] -pub use aarch64::kvm::*; +pub use aarch64::boot_loader::{ + kvm_load_kernel, load_kernel, AArch64BootLoader as BootLoader, + AArch64BootLoaderConfig as BootLoaderConfig, +}; #[cfg(target_arch = "aarch64")] pub use aarch64::cpu::AArch64CPUBootConfig as CPUBootConfig; #[cfg(target_arch = "aarch64")] pub use aarch64::cpu::CPUState; #[cfg(target_arch = "aarch64")] -pub use aarch64::boot_loader::{ - load_kernel, kvm_load_kernel,AArch64BootLoader as BootLoader, AArch64BootLoaderConfig as BootLoaderConfig, -}; -#[cfg(target_arch = "aarch64")] -pub use aarch64::device::kvm_setup_fireware; +pub use aarch64::device::{judge_other_addr, kvm_setup_fireware, read_other_addr, write_other_addr}; #[cfg(target_arch = "aarch64")] pub use aarch64::device::serial::*; #[cfg(target_arch = "aarch64")] -pub use aarch64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; - +pub use aarch64::kvm::*; +#[cfg(target_arch = "aarch64")] +pub use aarch64::memory::{arch_add_ram_ranges, LayoutEntryType, MEM_LAYOUT}; #[cfg(target_arch = "x86_64")] mod x86_64; #[cfg(target_arch = "x86_64")] -pub use x86_64::kvm::*; -#[cfg(target_arch = "x86_64")] -pub use x86_64::cpu::X86CPUBootConfig as CPUBootConfig; -#[cfg(target_arch = "x86_64")] -pub use x86_64::cpu::CPUState; -#[cfg(target_arch = "x86_64")] pub use x86_64::boot_loader::{ - load_kernel, kvm_load_kernel,X86BootLoader as BootLoader, X86BootLoaderConfig as BootLoaderConfig, + kvm_load_kernel, load_kernel, X86BootLoader as BootLoader, + X86BootLoaderConfig as BootLoaderConfig, }; #[cfg(target_arch = "x86_64")] -pub use x86_64::device::kvm_setup_fireware; +pub use x86_64::cpu::CPUState; +#[cfg(target_arch = "x86_64")] +pub use x86_64::cpu::X86CPUBootConfig as CPUBootConfig; #[cfg(target_arch = "x86_64")] pub use x86_64::device::serial::*; #[cfg(target_arch = "x86_64")] -pub use x86_64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; - +pub use x86_64::device::{judge_other_addr, kvm_setup_fireware, read_other_addr, write_other_addr}; +#[cfg(target_arch = "x86_64")] +pub use x86_64::kvm::*; +#[cfg(target_arch = "x86_64")] +pub use x86_64::memory::{arch_add_ram_ranges, LayoutEntryType, MEM_LAYOUT}; #[cfg(target_arch = "riscv64")] mod riscv64; #[cfg(target_arch = "riscv64")] -pub use riscv64::memory::{LayoutEntryType, MEM_LAYOUT, arch_add_ram_ranges}; -#[cfg(target_arch = "riscv64")] -pub use riscv64::kvm::*; +pub use riscv64::boot_loader::{ + kvm_load_kernel, load_kernel, Riscv64BootLoader as BootLoader, + Riscv64BootLoaderConfig as BootLoaderConfig, +}; #[cfg(target_arch = "riscv64")] pub use riscv64::cpu::Riscv64CPUBootConfig as CPUBootConfig; #[cfg(target_arch = "riscv64")] pub use riscv64::cpu::{CPUState, Riscv64CoreRegs}; #[cfg(target_arch = "riscv64")] -pub use riscv64::boot_loader::{ - load_kernel, kvm_load_kernel,Riscv64BootLoader as BootLoader, Riscv64BootLoaderConfig as BootLoaderConfig, -}; -#[cfg(target_arch = "riscv64")] -pub use riscv64::device::kvm_setup_fireware; +pub use riscv64::device::plic::judge_plic_addr as judge_other_addr; #[cfg(target_arch = "riscv64")] pub use riscv64::device::serial::*; #[cfg(target_arch = "riscv64")] -pub use riscv64::device::plic::{judge_plic_addr as judge_interrupt_controller_addr}; +pub use riscv64::device::{kvm_setup_fireware, read_other_addr, write_other_addr}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::kvm::*; +#[cfg(target_arch = "riscv64")] +pub use riscv64::memory::{arch_add_ram_ranges, LayoutEntryType, MEM_LAYOUT}; diff --git a/src/arch/riscv64/device/fdt.rs b/src/arch/riscv64/device/fdt.rs index b05932a2f..a941d7e19 100644 --- a/src/arch/riscv64/device/fdt.rs +++ b/src/arch/riscv64/device/fdt.rs @@ -44,7 +44,7 @@ pub fn generate_fdt( fn generate_memory_node(fdt: &mut Vec, sys_mem: &Arc) { let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - let mem_size = MEM_LAYOUT[LayoutEntryType::Mem as usize].1; + let mem_size = sys_mem.memory_end_address() - mem_base; let node = "/memory"; add_sub_node(fdt, node); set_property_string(fdt, node, "device_type", "memory"); diff --git a/src/arch/riscv64/device/mod.rs b/src/arch/riscv64/device/mod.rs index 294f23676..c7d2c0f64 100644 --- a/src/arch/riscv64/device/mod.rs +++ b/src/arch/riscv64/device/mod.rs @@ -3,6 +3,8 @@ pub mod plic; mod fdt; use plic::*; use serial::SerialControl; +use crate::arch::CPUState; +use crate::device::Result; use kvm_ioctls::VmFd; use crate::{arch::riscv64::boot_loader::Riscv64BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; @@ -12,7 +14,7 @@ pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &mut Vec<&mut let vcpu_count = vcpus[0].state.nr_vcpus; let plic = PlicState::new(&vcpus, vcpu_count); - let serial = Serial::new(&vm_fd, SerialControl::new(plic.clone())); + let serial = Serial::new(&vm_fd, Some(SerialControl::new(plic.clone()))); for i in 0..vcpus.len(){ vcpus[i].set_serial_dev(serial.clone()); vcpus[i].state.set_plic(plic.clone()); @@ -30,4 +32,15 @@ pub fn kvm_setup_fireware(guest_memory: &Arc, vcpus : &mut Vec<&mut fdt_addr, ); println!("generate fdt node complete!"); -} +} + +pub fn read_other_addr(state: &mut CPUState, addr: u64) -> Result{ + let mut plic_guard = state.interrupt_controller.as_ref().unwrap().lock().unwrap(); + let res = plic_guard.mmio_read(addr); + Ok(res) +} +pub fn write_other_addr(state: &mut CPUState, addr: u64, data: u32) -> Result<()>{ + let mut plic_guard = state.interrupt_controller.as_ref().unwrap().lock().unwrap(); + plic_guard.mmio_write(addr, data); + Ok(()) +} diff --git a/src/arch/riscv64/device/plic.rs b/src/arch/riscv64/device/plic.rs index 1a82e0d92..6117d63d7 100644 --- a/src/arch/riscv64/device/plic.rs +++ b/src/arch/riscv64/device/plic.rs @@ -266,7 +266,6 @@ impl PlicState { new_val &= !0x1; } self.contexts[context_idx as usize].irq_enable[irq_word as usize] = new_val; - println!("context[{}] enable[{}] write: {}", context_idx, irq_word, new_val); xor_val = old_val ^ new_val; for i in 0..32 { irq = irq_word * 32 + i; diff --git a/src/arch/riscv64/memory/mod.rs b/src/arch/riscv64/memory/mod.rs index ec187bc4a..45786c8f3 100644 --- a/src/arch/riscv64/memory/mod.rs +++ b/src/arch/riscv64/memory/mod.rs @@ -2,7 +2,7 @@ /// The type of memory layout entry on aarch64 #[repr(usize)] pub enum LayoutEntryType { - IrqChip, + IrqChip = 0_usize, Mmio, Mem, } @@ -17,7 +17,9 @@ pub const MEM_LAYOUT: &[(u64, u64)] = &[ pub fn arch_add_ram_ranges(mem_size: u64, ranges: &mut Vec<(u64, u64)>) { - - ranges.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)); - + if mem_size < ( 1 << 40 ) - MEM_LAYOUT[LayoutEntryType::Mem as usize].0 { + ranges.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)); + }else { + ranges.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, MEM_LAYOUT[LayoutEntryType::Mem as usize].1)); + } } diff --git a/src/arch/x86_64/cpu/cpuid.rs b/src/arch/x86_64/cpu/cpuid.rs new file mode 100644 index 000000000..f92f2d7d5 --- /dev/null +++ b/src/arch/x86_64/cpu/cpuid.rs @@ -0,0 +1,19 @@ +use core::arch::x86_64::__cpuid_count; + +pub fn host_cpuid( + leaf: u32, + subleaf: u32, + eax: *mut u32, + ebx: *mut u32, + ecx: *mut u32, + edx: *mut u32, +) { + unsafe { + let cpuid = __cpuid_count(leaf, subleaf); + + *eax = cpuid.eax; + *ebx = cpuid.ebx; + *ecx = cpuid.ecx; + *edx = cpuid.edx; + } +} diff --git a/src/arch/x86_64/cpu/mod.rs b/src/arch/x86_64/cpu/mod.rs index 0b851afd4..27a6de148 100644 --- a/src/arch/x86_64/cpu/mod.rs +++ b/src/arch/x86_64/cpu/mod.rs @@ -15,10 +15,10 @@ use kvm_bindings::{ KVM_MAX_CPUID_ENTRIES, KVM_MP_STATE_RUNNABLE, KVM_MP_STATE_UNINITIALIZED, }; use kvm_ioctls::{Kvm, VcpuFd, VmFd}; +use std::sync::Arc; -use crate::arch::x86_64::cpu::host_cpuid; - - +mod cpuid; +use cpuid::host_cpuid; const ECX_EPB_SHIFT: u32 = 3; const X86_FEATURE_HYPERVISOR: u32 = 31; @@ -109,12 +109,12 @@ impl CPUState { pub fn set_boot_config( &mut self, _vmfd: &std::sync::Arc, - vcpu_fd: &VcpuFd, + vcpu_fd: Arc, boot_config: &X86CPUBootConfig, ) { - self.setup_lapic(vcpu_fd); + self.setup_lapic(vcpu_fd.clone()); self.setup_regs(&boot_config); - self.setup_sregs(vcpu_fd, &boot_config); + self.setup_sregs(vcpu_fd.clone(), &boot_config); self.setup_fpu(); self.setup_msrs(); } @@ -124,8 +124,8 @@ impl CPUState { /// # Arguments /// /// * `vcpu_fd` - Vcpu file descriptor in kvm. - pub fn reset_vcpu(&self, vcpu_fd: &VcpuFd) { - self.setup_cpuid(vcpu_fd); + pub fn reset_vcpu(&self, vcpu_fd: Arc) { + self.setup_cpuid(vcpu_fd.clone()); vcpu_fd .set_lapic(&self.lapic) @@ -148,7 +148,7 @@ impl CPUState { } #[allow(clippy::cast_ptr_alignment)] - fn setup_lapic(&mut self, vcpu_fd: &VcpuFd) { + fn setup_lapic(&mut self, vcpu_fd: Arc) { // Disable nmi and external interrupt before enter protected mode // See: https://elixir.bootlin.com/linux/v4.19.123/source/arch/x86/include/asm/apicdef.h const APIC_LVT0: usize = 0x350; @@ -182,7 +182,7 @@ impl CPUState { }; } - fn setup_sregs(&mut self, vcpu_fd: &VcpuFd, boot_config: &X86CPUBootConfig) { + fn setup_sregs(&mut self, vcpu_fd: Arc, boot_config: &X86CPUBootConfig) { self.sregs = vcpu_fd .get_sregs() .expect("Failed to get spectial register."); @@ -258,7 +258,7 @@ impl CPUState { } } - fn setup_cpuid(&self, vcpu_fd: &VcpuFd) { + fn setup_cpuid(&self, vcpu_fd: Arc) { let sys_fd = match Kvm::new() { Ok(fd) => fd, _ => panic!("setup_cpuid: Open /dev/kvm failed"), diff --git a/src/arch/x86_64/device/mod.rs b/src/arch/x86_64/device/mod.rs index 931655e41..c988058ea 100644 --- a/src/arch/x86_64/device/mod.rs +++ b/src/arch/x86_64/device/mod.rs @@ -1,15 +1,32 @@ pub mod serial; -use std::sync::Arc; - +use crate::arch::CPUState; +use crate::device::{Error, Result}; use kvm_ioctls::VmFd; +use std::sync::Arc; -use crate::{arch::x86_64::boot_loader::X86BootLoader, cpu::CPU, device::Serial, memory::GuestMemory}; +use crate::{ + arch::x86_64::boot_loader::X86BootLoader, cpu::CPU, device::Serial, memory::GuestMemory, +}; +pub fn kvm_setup_fireware( + guest_memory: &Arc, + vcpus: &mut Vec<&mut CPU>, + vm_fd: &Arc, + layout: &X86BootLoader, +) { + let serial = Serial::new(&vm_fd, None); + for i in 0..vcpus.len() { + vcpus[i].set_serial_dev(serial.clone()); + } +} -pub fn kvm_setup_fireware(guest_memory: &Arc,vcpus : &Vec>>, vm_fd: &Arc, layout : &X86BootLoader) { +pub fn judge_other_addr(_addr: u64) -> Option { + None +} - let serial = Serial::new(&vm_fd); - for vcpu in vcpus.iter() { - vcpu.lock().unwrap().set_serial_dev(serial.clone()); - } -} +pub fn write_other_addr(_state: &CPUState, _addr: u64, _data: u32) -> Result<()> { + Err(Error::MMIoError()) +} +pub fn read_other_addr(_state: &CPUState, _addr: u64) -> Result { + Err(Error::MMIoError()) +} diff --git a/src/arch/x86_64/device/serial.rs b/src/arch/x86_64/device/serial.rs index eb0787d28..3315ec88f 100644 --- a/src/arch/x86_64/device/serial.rs +++ b/src/arch/x86_64/device/serial.rs @@ -1,3 +1,18 @@ +use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; + pub const MMIO_SERIAL_IRQ: u32 = 4; pub const MMIO_SERIAL_ADDR: u64 = 0x3f8; -pub const MMIO_SERIAL_ADDR_SIZE: u64 = 8; \ No newline at end of file +pub const MMIO_SERIAL_ADDR_SIZE: u64 = 8; + +pub fn serial_register_irqfd(vm_fd: &VmFd, evt_fd: &EventFd) { + vm_fd + .register_irqfd(&evt_fd, MMIO_SERIAL_IRQ) + .expect("Failed to register irq fd for serial"); +} + +pub struct SerialControl {} + +impl SerialControl { + pub fn interrupt_trigger(&self) {} +} diff --git a/src/arch/x86_64/kvm/mod.rs b/src/arch/x86_64/kvm/mod.rs index 6d733647a..d5540d848 100644 --- a/src/arch/x86_64/kvm/mod.rs +++ b/src/arch/x86_64/kvm/mod.rs @@ -1,10 +1,10 @@ -use std::path::PathBuf; -use std::sync::Arc; -use kvm_ioctls::{Kvm, VmFd}; -use kvm_bindings::{kvm_pit_config, KVM_PIT_SPEAKER_DUMMY}; -use crate::memory::{GuestMemory }; use crate::arch::{load_kernel, BootLoader, BootLoaderConfig}; use crate::arch::{LayoutEntryType, MEM_LAYOUT}; +use crate::memory::GuestMemory; +use kvm_bindings::{kvm_pit_config, KVM_PIT_SPEAKER_DUMMY}; +use kvm_ioctls::{Kvm, VmFd}; +use std::path::PathBuf; +use std::sync::Arc; pub fn load_boot_source(guest_memory: &Arc, cmdline: &str) -> BootLoader { let initrd_path = PathBuf::from("/tmp/initrd.img"); let initrd_size = match std::fs::metadata("/tmp/initrd.img") { @@ -37,10 +37,8 @@ pub fn arch_init_based_dev(vm_fd: &Arc) { flags: KVM_PIT_SPEAKER_DUMMY, pad: Default::default(), }; - + vm_fd .create_pit2(pit_config) .expect("Failed to create pit2."); } - - diff --git a/src/cpu/mod.rs b/src/cpu/mod.rs index d501e4924..96f612333 100644 --- a/src/cpu/mod.rs +++ b/src/cpu/mod.rs @@ -11,15 +11,15 @@ // See the Mulan PSL v2 for more details. use crate::arch::CPUBootConfig; +use crate::helper::byte_code::ByteCode; +use std::mem::size_of; use std::sync::{Arc, Mutex}; use std::thread; -use std::mem::size_of; -use crate::helper::byte_code::ByteCode; -use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; use kvm_bindings::KVM_SYSTEM_EVENT_RESET; +use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; -use crate::arch::{CPUState, Riscv64CoreRegs, judge_interrupt_controller_addr}; +use crate::arch::{judge_other_addr, read_other_addr, write_other_addr, CPUState}; use crate::device::{judge_serial_addr, Serial}; use crate::memory::GuestMemory; @@ -64,7 +64,8 @@ impl CPU { /// Realize vcpu status. /// Get register state from kvm. pub fn realize(&mut self, vm_fd: &Arc, bootconfig: CPUBootConfig) { - self.state.set_boot_config(vm_fd, self.fd.clone(), &bootconfig); + self.state + .set_boot_config(vm_fd, self.fd.clone(), &bootconfig); } /// Reset kvm_based vCPU registers state by registers state in `CPU`. @@ -76,15 +77,14 @@ impl CPU { /// /// # Arguments /// - /// - `arc_cpu`: `CPU` wrapper in `Arc` to send safely during thread. - pub fn start(arc_cpu: Arc) -> thread::JoinHandle<()> { - let cpu_id = arc_cpu.id; + pub fn start(mut vcpu: CPU) -> thread::JoinHandle<()> { + let cpu_id = vcpu.id; thread::Builder::new() .name(format!("CPU {}/KVM", cpu_id)) .spawn(move || { - arc_cpu.reset(); + vcpu.reset(); loop { - if !arc_cpu.kvm_vcpu_exec() { + if !vcpu.kvm_vcpu_exec() { break; } } @@ -97,7 +97,7 @@ impl CPU { /// # Return value /// /// Whether to continue to emulate or not. - fn kvm_vcpu_exec(&self) -> bool { + fn kvm_vcpu_exec(&mut self) -> bool { // println!("current PC: 0x{:x}", self.fd.get_one_reg(Riscv64CoreRegs::PC.into()).unwrap()); match self.fd.run().expect("Unhandled error in vcpu emulation!") { VcpuExit::IoIn(addr, data) => { @@ -123,10 +123,8 @@ impl CPU { VcpuExit::MmioRead(addr, mut data) => { if let Some(offset) = judge_serial_addr(addr as u64) { data[0] = self.serial.as_ref().unwrap().lock().unwrap().read(offset); - } else if let Some(addr) = judge_interrupt_controller_addr(addr){ - let mut ic_guard = self.state.interrupt_controller.as_ref().unwrap().lock().unwrap(); - let res: u32 = ic_guard.mmio_read(addr); - drop(ic_guard); + } else if let Some(addr) = judge_other_addr(addr) { + let res = read_other_addr(&mut self.state, addr).unwrap(); let res_bytes = res.as_bytes(); for i in 0..res_bytes.len() { data[i] = res_bytes[i]; @@ -151,11 +149,11 @@ impl CPU { { println!("Failed to write data for serial, offset: {}", offset); } - } else if let Some(addr) = judge_interrupt_controller_addr(addr as u64){ - let res: & u32 = u32::from_bytes(&data).unwrap(); - let mut ic_guard = self.state.interrupt_controller.as_ref().unwrap().lock().unwrap(); - ic_guard.mmio_write(addr, *res); - drop(ic_guard); + } else if let Some(addr) = judge_other_addr(addr as u64) { + let res: &u32 = u32::from_bytes(&data).unwrap(); + if write_other_addr(&mut self.state, addr, *res).is_err() { + println!("Failed to write other mmio, addr: {:#x}", addr); + } } else { let data_len = data.len() as u64; self.sys_mem @@ -163,12 +161,10 @@ impl CPU { .expect("Invalid mmio write."); } } - VcpuExit::SystemEvent(event_type, _ndata) => { - match event_type { - KVM_SYSTEM_EVENT_RESET => {}, - _ => return false - } - } + VcpuExit::SystemEvent(event_type, _ndata) => match event_type { + KVM_SYSTEM_EVENT_RESET => {} + _ => return false, + }, VcpuExit::Hlt => { println!("KVM_EXIT_HLT"); return false; @@ -176,7 +172,7 @@ impl CPU { VcpuExit::Shutdown => { println!("Guest shutdown"); - return false; + return true; } r => panic!("Unexpected exit reason: {:?}", r), } diff --git a/src/device/mod.rs b/src/device/mod.rs index a43557a4e..95bb74d71 100644 --- a/src/device/mod.rs +++ b/src/device/mod.rs @@ -20,7 +20,7 @@ pub use serial::{ pub enum Error { Overflow(usize, usize), IoError(std::io::Error), - Stop(), + MMIoError(), } impl std::fmt::Display for Error { @@ -34,8 +34,8 @@ impl std::fmt::Display for Error { Error::IoError(ref e) => { write!(f, "IO errors occurs when read/write memory, error is {}", e) }, - Error::Stop() => { - write!(f, "thread stop signal received") + Error::MMIoError() => { + write!(f, "MMIo Error occurs when read/write mmio memory") } } } diff --git a/src/device/serial.rs b/src/device/serial.rs index 9a0fdf71e..bec1942ef 100644 --- a/src/device/serial.rs +++ b/src/device/serial.rs @@ -20,10 +20,11 @@ use kvm_ioctls::VmFd; use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, terminal::Terminal}; use super::{Error, Result}; -use crate::arch::{MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ, SerialControl, serial_register_irqfd}; +use crate::arch::{ + serial_register_irqfd, SerialControl, MMIO_SERIAL_ADDR, MMIO_SERIAL_ADDR_SIZE, MMIO_SERIAL_IRQ, +}; use crate::helper::epoll::{EpollContext, EventNotifier}; - const UART_IER_RDI: u8 = 0x01; const UART_IER_THRI: u8 = 0x02; const UART_IIR_NO_INT: u8 = 0x01; @@ -45,8 +46,6 @@ const UART_MSR_DSR: u8 = 0x20; const UART_MSR_DCD: u8 = 0x80; const RECEIVER_BUFF_SIZE: usize = 1024; -const GOT_ESCAPE_CHAR: u8 = 1 << 0; -const STOP_SIGNAL: u8 = 1 << 2; pub fn judge_serial_addr(addr: u64) -> Option { if (MMIO_SERIAL_ADDR..MMIO_SERIAL_ADDR + MMIO_SERIAL_ADDR_SIZE).contains(&addr) { @@ -83,14 +82,14 @@ pub struct Serial { /// Operation methods. output: Box, /// serial interrupt control - serial_ctrl: SerialControl, - /// state control + serial_ctrl: Option, + /// state control state: u8, } impl Serial { /// Create a new `Serial` instance with default parameters. - pub fn new(vm_fd: &VmFd, serial_ctrl: SerialControl) -> Arc> { + pub fn new(vm_fd: &VmFd, serial_ctrl: Option) -> Arc> { std::io::stdin() .lock() .set_raw_mode() @@ -115,7 +114,7 @@ impl Serial { interrupt_evt: evt_fd, output: Box::new(std::io::stdout()), serial_ctrl, - state: 0 + state: 0, })); let serial_clone = serial.clone(); @@ -124,7 +123,7 @@ impl Serial { let handler: Box = Box::new(move |event, _| { if event == EventSet::IN && serial_clone.lock().unwrap().stdin_exce().is_err() { println!("Failed to excecute the stdin"); - } + } }); let notifier = EventNotifier::new( @@ -138,9 +137,6 @@ impl Serial { let _ = thread::Builder::new() .name("serial".to_string()) .spawn(move || loop { - if serial_clone1.lock().unwrap().state & STOP_SIGNAL != 0 { - break; - } if !epoll.run() { break; } @@ -153,7 +149,7 @@ impl Serial { /// this method would be called when the interrupt identification changes. fn update_iir(&mut self) -> Result<()> { let mut iir = UART_IIR_NO_INT; - + // Data Ready Interrupt enable && Data Ready Signal if self.ier & UART_IER_RDI != 0 && self.lsr & UART_LSR_DR != 0 { iir &= !UART_IIR_NO_INT; iir |= UART_IIR_RDI; @@ -165,7 +161,11 @@ impl Serial { self.iir = iir; if iir != UART_IIR_NO_INT { - self.serial_ctrl.interrupt_trigger(); + if self.serial_ctrl.is_some() { + self.serial_ctrl.as_ref().unwrap().interrupt_trigger(); + } else { + self.interrupt_evt.write(1).map_err(Error::IoError)?; + } } Ok(()) @@ -178,6 +178,7 @@ impl Serial { } self.rbr.extend(data); + // Data Ready Signal self.lsr |= UART_LSR_DR; self.update_iir()?; @@ -189,10 +190,6 @@ impl Serial { fn stdin_exce(&mut self) -> Result<()> { let mut out = [0_u8; 64]; if let Ok(count) = std::io::stdin().lock().read_raw(&mut out) { - for i in 0..count { - if out[i] == 0x01 && i + 1 < count && out[i + 1] == b'x'{ - } - } self.receive(&out[..count]) } else { Ok(()) @@ -266,19 +263,6 @@ impl Serial { } _ => {} } - if (self.state & GOT_ESCAPE_CHAR) != 0 { - // self.state &= !GOT_ESCAPE_CHAR; - if ret == 120 { - self.state = STOP_SIGNAL; - println!("set STOP SIGNAL"); - } - if ret == 0x01 { - return ret; - } - } - if ret == 0x01 { - self.state |= GOT_ESCAPE_CHAR; - } ret } diff --git a/src/main.rs b/src/main.rs index 540d8ba09..b7fe454f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,7 +58,7 @@ fn main() { println!("fireware set up !"); // 9. Run vCPU0. - let cpu_task_0 = CPU::start(Arc::new(vcpu)); + let cpu_task_0 = CPU::start(vcpu); println!("Start to run linux kernel!"); cpu_task_0.join().expect("Failed to wait cpu task 0"); } -- Gitee