diff --git a/kata-containers.spec b/kata-containers.spec index 273cc5bab6c98797e29db16b628ca574199a0e81..cf08d9e26c0955034218c8c73e00d83a460c7290 100644 --- a/kata-containers.spec +++ b/kata-containers.spec @@ -2,7 +2,7 @@ %global debug_package %{nil} %define VERSION 3.2.0 -%define RELEASE 1 +%define RELEASE 2 Name: kata-containers Version: %{VERSION} @@ -86,6 +86,7 @@ install -p -m 750 %{_builddir}/kata-containers/src/runtime/kata-runtime %{buildr install -p -m 750 %{_builddir}/kata-containers/src/runtime/kata-monitor %{buildroot}/usr/bin/ install -p -m 750 %{_builddir}/kata-containers/src/runtime/containerd-shim-kata-v2 %{buildroot}/usr/bin/ install -p -m 640 -D %{_builddir}/kata-containers/src/runtime/config/configuration-qemu.toml %{buildroot}/usr/share/defaults/kata-containers/configuration.toml +install -p -m 640 -D %{_builddir}/kata-containers/src/runtime/config/configuration-stratovirt.toml %{buildroot}/usr/share/defaults/kata-containers/configuration-stratovirt.toml install -p -m 640 ./build/kata-containers-initrd.img %{buildroot}/var/lib/kata/ mkdir -p -m 750 %{buildroot}/usr/share/defaults/kata-containers/ strip %{buildroot}/usr/bin/kata* @@ -104,6 +105,12 @@ strip %{buildroot}/usr/bin/containerd-shim-kata-v2 %doc %changelog +* Tue May 2024 wenyuanlau - 3.2.0-2 +- Type:feature +- ID:NA +- SUG:NA +- DESC:add stratovirt microvm support for kata 3.2.0 + * Tue Feb 2024 Vanient - 3.2.0-1 - Type:feature - ID:NA diff --git a/patches/0001-virtcontainers-Add-StratoVirt-as-a-supported-hypervi.patch b/patches/0001-virtcontainers-Add-StratoVirt-as-a-supported-hypervi.patch new file mode 100644 index 0000000000000000000000000000000000000000..6034d8e5ebc67835108564288c8a0689a38d203b --- /dev/null +++ b/patches/0001-virtcontainers-Add-StratoVirt-as-a-supported-hypervi.patch @@ -0,0 +1,1390 @@ +From 926fc8c05d81a91fd4553253fc25d7e57084a872 Mon Sep 17 00:00:00 2001 +From: Liu Wenyuan +Date: Wed, 23 Aug 2023 16:39:48 +0800 +Subject: [PATCH 1/5] virtcontainers: Add StratoVirt as a supported hypervisor + +Initial support of the MicroVM machine type of StratoVirt +hypervisor for the kata go runtime. + +Signed-off-by: Liu Wenyuan +--- + .../cmd/kata-runtime/kata-check_amd64.go | 4 + + src/runtime/virtcontainers/hypervisor.go | 5 + + .../virtcontainers/hypervisor_linux.go | 2 + + src/runtime/virtcontainers/stratovirt.go | 1306 +++++++++++++++++ + 4 files changed, 1317 insertions(+) + create mode 100644 src/runtime/virtcontainers/stratovirt.go + +diff --git a/src/runtime/cmd/kata-runtime/kata-check_amd64.go b/src/runtime/cmd/kata-runtime/kata-check_amd64.go +index fcdb047..c40f5e9 100644 +--- a/src/runtime/cmd/kata-runtime/kata-check_amd64.go ++++ b/src/runtime/cmd/kata-runtime/kata-check_amd64.go +@@ -115,6 +115,8 @@ func setCPUtype(hypervisorType vc.HypervisorType) error { + } + + switch hypervisorType { ++ case vc.StratovirtHypervisor: ++ fallthrough + case vc.FirecrackerHypervisor: + fallthrough + case vc.ClhHypervisor: +@@ -315,6 +317,8 @@ func archHostCanCreateVMContainer(hypervisorType vc.HypervisorType) error { + fallthrough + case vc.ClhHypervisor: + fallthrough ++ case vc.StratovirtHypervisor: ++ fallthrough + case vc.FirecrackerHypervisor: + return kvmIsUsable() + case vc.AcrnHypervisor: +diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go +index 7d0575f..65009fa 100644 +--- a/src/runtime/virtcontainers/hypervisor.go ++++ b/src/runtime/virtcontainers/hypervisor.go +@@ -47,6 +47,9 @@ const ( + // ClhHypervisor is the ICH hypervisor. + ClhHypervisor HypervisorType = "clh" + ++ // StratovirtHypervisor is the StratoVirt hypervisor. ++ StratovirtHypervisor HypervisorType = "stratovirt" ++ + // DragonballHypervisor is the Dragonball hypervisor. + DragonballHypervisor HypervisorType = "dragonball" + +@@ -255,6 +258,8 @@ func (hType *HypervisorType) String() string { + return string(AcrnHypervisor) + case ClhHypervisor: + return string(ClhHypervisor) ++ case StratovirtHypervisor: ++ return string(StratovirtHypervisor) + case MockHypervisor: + return string(MockHypervisor) + default: +diff --git a/src/runtime/virtcontainers/hypervisor_linux.go b/src/runtime/virtcontainers/hypervisor_linux.go +index f419e09..ed73d97 100644 +--- a/src/runtime/virtcontainers/hypervisor_linux.go ++++ b/src/runtime/virtcontainers/hypervisor_linux.go +@@ -36,6 +36,8 @@ func NewHypervisor(hType HypervisorType) (Hypervisor, error) { + return &Acrn{}, nil + case ClhHypervisor: + return &cloudHypervisor{}, nil ++ case StratovirtHypervisor: ++ return &stratovirt{}, nil + case DragonballHypervisor: + return &mockHypervisor{}, nil + case MockHypervisor: +diff --git a/src/runtime/virtcontainers/stratovirt.go b/src/runtime/virtcontainers/stratovirt.go +new file mode 100644 +index 0000000..eac975d +--- /dev/null ++++ b/src/runtime/virtcontainers/stratovirt.go +@@ -0,0 +1,1306 @@ ++//go:build linux ++ ++// Copyright (c) 2023 Huawei Technologies Co.,Ltd. ++// ++// SPDX-License-Identifier: Apache-2.0 ++// ++ ++package virtcontainers ++ ++import ( ++ "bufio" ++ "context" ++ "fmt" ++ "io" ++ "os" ++ "os/exec" ++ "path/filepath" ++ "regexp" ++ "strconv" ++ "strings" ++ "sync/atomic" ++ "syscall" ++ "time" ++ ++ "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" ++ govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" ++ hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" ++ "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" ++ "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" ++ "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ++ "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" ++ ++ "github.com/pkg/errors" ++ "github.com/sirupsen/logrus" ++) ++ ++// stratovirtTracingTags defines tags for the trace span ++var stratovirtTracingTags = map[string]string{ ++ "source": "runtime", ++ "package": "virtcontainers", ++ "subsystem": "hypervisor", ++ "type": "stratovirt", ++} ++ ++// Constants and type definitions related to StratoVirt hypervisor ++const ( ++ stratovirtStopSandboxTimeoutSecs = 15 ++ defaultStratoVirt = "/usr/bin/stratovirt" ++ defaultStratoVirtMachineType = "microvm" ++ apiSocket = "qmp.socket" ++ debugSocket = "console.socket" ++ virtiofsSocket = "virtiofs_kata.socket" ++ nydusdSock = "nydusd_kata.socket" ++ maxMmioBlkCount = 4 ++ machineTypeMicrovm = "microvm" ++ mmioBus VirtioDriver = "mmio" ++) ++ ++var defaultKernelParames = []Param{ ++ {"reboot", "k"}, ++ {"panic", "1"}, ++ {"net.ifnames", "0"}, ++ {"ramdom.trust_cpu", "on"}, ++} ++ ++var defaultMicroVMParames = []Param{ ++ {"pci", "off"}, ++ {"iommu", "off"}, ++ {"acpi", "off"}, ++} ++ ++var ( ++ blkDriver = map[VirtioDriver]string{ ++ mmioBus: "virtio-blk-device", ++ } ++ netDriver = map[VirtioDriver]string{ ++ mmioBus: "virtio-net-device", ++ } ++ virtiofsDriver = map[VirtioDriver]string{ ++ mmioBus: "vhost-user-fs-device", ++ } ++ vsockDriver = map[VirtioDriver]string{ ++ mmioBus: "vhost-vsock-device", ++ } ++ rngDriver = map[VirtioDriver]string{ ++ mmioBus: "virtio-rng-device", ++ } ++ consoleDriver = map[VirtioDriver]string{ ++ mmioBus: "virtio-serial-device", ++ } ++) ++ ++// VirtioDev is the StratoVirt device interface. ++type VirtioDev interface { ++ getParams(config *StratovirtConfig) []string ++} ++ ++type VirtioDriver string ++ ++type blkDevice struct { ++ id string ++ filePath string ++ driver VirtioDriver ++ deviceID string ++} ++ ++func (b blkDevice) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var driveParams []Param ++ var devParams []Param ++ ++ driver := blkDriver[b.driver] ++ driveParams = append(driveParams, Param{"id", b.id}) ++ driveParams = append(driveParams, Param{"file", b.filePath}) ++ driveParams = append(driveParams, Param{"readonly", "on"}) ++ driveParams = append(driveParams, Param{"direct", "off"}) ++ ++ devParams = append(devParams, Param{"drive", b.id}) ++ devParams = append(devParams, Param{"id", b.deviceID}) ++ ++ params = append(params, "-drive", strings.Join(SerializeParams(driveParams, "="), ",")) ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) ++ return params ++} ++ ++type netDevice struct { ++ devType string ++ id string ++ ifname string ++ driver VirtioDriver ++ netdev string ++ deviceID string ++ FDs []*os.File ++ mac string ++} ++ ++func (n netDevice) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var netdevParams []Param ++ var devParams []Param ++ ++ driver := netDriver[n.driver] ++ netdevParams = append(netdevParams, Param{"id", n.id}) ++ if len(n.FDs) > 0 { ++ var fdParams []string ++ ++ FDs := config.appendFDs(n.FDs) ++ for _, fd := range FDs { ++ fdParams = append(fdParams, fmt.Sprintf("%d", fd)) ++ } ++ netdevParams = append(netdevParams, Param{"fds", strings.Join(fdParams, ":")}) ++ } else if n.ifname != "" { ++ netdevParams = append(netdevParams, Param{"ifname", n.ifname}) ++ } ++ ++ devParams = append(devParams, Param{"netdev", n.id}) ++ devParams = append(devParams, Param{"id", n.deviceID}) ++ if n.mac != "" { ++ devParams = append(devParams, Param{"mac", n.mac}) ++ } ++ ++ params = append(params, "-netdev", fmt.Sprintf("%s,%s", n.devType, strings.Join(SerializeParams(netdevParams, "="), ","))) ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) ++ return params ++} ++ ++type virtioFs struct { ++ driver VirtioDriver ++ backend string ++ charID string ++ charDev string ++ tag string ++ deviceID string ++} ++ ++func (v virtioFs) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var charParams []Param ++ var fsParams []Param ++ ++ driver := virtiofsDriver[v.driver] ++ charParams = append(charParams, Param{"id", v.charID}) ++ charParams = append(charParams, Param{"path", config.fsSockPath}) ++ ++ fsParams = append(fsParams, Param{"chardev", v.charDev}) ++ fsParams = append(fsParams, Param{"tag", v.tag}) ++ fsParams = append(fsParams, Param{"id", v.deviceID}) ++ ++ params = append(params, "-chardev", fmt.Sprintf("%s,%s,server,nowait", v.backend, strings.Join(SerializeParams(charParams, "="), ","))) ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(fsParams, "="), ","))) ++ return params ++} ++ ++type vhostVsock struct { ++ driver VirtioDriver ++ id string ++ guestID string ++ VHostFD *os.File ++} ++ ++func (v vhostVsock) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var devParams []Param ++ ++ driver := vsockDriver[v.driver] ++ devParams = append(devParams, Param{"id", v.id}) ++ devParams = append(devParams, Param{"guest-cid", v.guestID}) ++ ++ if v.VHostFD != nil { ++ FDs := config.appendFDs([]*os.File{v.VHostFD}) ++ devParams = append(devParams, Param{"vhostfd", fmt.Sprintf("%d", FDs[0])}) ++ } ++ ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) ++ return params ++} ++ ++type rngDevice struct { ++ id string ++ fileName string ++ driver VirtioDriver ++ deviceID string ++ rng string ++} ++ ++func (r rngDevice) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var objParams []Param ++ var devParams []Param ++ ++ driver := rngDriver[r.driver] ++ objParams = append(objParams, Param{"id", r.id}) ++ objParams = append(objParams, Param{"filename", r.fileName}) ++ ++ devParams = append(devParams, Param{"rng", r.rng}) ++ devParams = append(devParams, Param{"id", r.deviceID}) ++ ++ params = append(params, "-object", fmt.Sprintf("rng-random,%s", strings.Join(SerializeParams(objParams, "="), ","))) ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) ++ return params ++} ++ ++type consoleDevice struct { ++ driver VirtioDriver ++ id string ++ backend string ++ charID string ++ devType string ++ charDev string ++ deviceID string ++} ++ ++func (c consoleDevice) getParams(config *StratovirtConfig) []string { ++ var params []string ++ var devParams []Param ++ var charParams []Param ++ var conParams []Param ++ ++ driver := consoleDriver[c.driver] ++ if c.id != "" { ++ devParams = append(devParams, Param{"id", c.id}) ++ } ++ ++ conParams = append(conParams, Param{"chardev", c.charDev}) ++ conParams = append(conParams, Param{"id", c.deviceID}) ++ params = append(params, "-device", fmt.Sprintf("%s,%s", driver, strings.Join(SerializeParams(devParams, "="), ","))) ++ ++ charParams = append(charParams, Param{"id", c.charID}) ++ charParams = append(charParams, Param{"path", config.consolePath}) ++ params = append(params, "-chardev", fmt.Sprintf("%s,%s,server,nowait", c.backend, strings.Join(SerializeParams(charParams, "="), ","))) ++ params = append(params, "-device", fmt.Sprintf("%s,%s,nr=0", c.devType, strings.Join(SerializeParams(conParams, "="), ","))) ++ return params ++} ++ ++// StratovirtConfig keeps the custom settings and parameters to start virtual machine. ++type StratovirtConfig struct { ++ name string ++ uuid string ++ machineType string ++ vmPath string ++ smp uint32 ++ memory uint64 ++ kernelPath string ++ kernelAdditionalParams string ++ rootfsPath string ++ initrdPath string ++ devices []VirtioDev ++ qmpSocketPath govmmQemu.QMPSocket ++ consolePath string ++ fsSockPath string ++ fds []*os.File ++} ++ ++func (config *StratovirtConfig) appendFDs(fds []*os.File) []int { ++ var fdInts []int ++ ++ oldLen := len(config.fds) ++ ++ config.fds = append(config.fds, fds...) ++ ++ // The magic 3 offset comes from https://golang.org/src/os/exec/exec.go: ++ // ExtraFiles specifies additional open files to be inherited by the ++ // new process. It does not include standard input, standard output, or ++ // standard error. If non-nil, entry i becomes file descriptor 3+i. ++ // This means that arbitrary file descriptors fd0, fd1... fdN passed in ++ // the array will be presented to the guest as consecutive descriptors ++ // 3, 4... N+3. The golang library internally relies on dup2() to do ++ // the renumbering. ++ for i := range fds { ++ fdInts = append(fdInts, oldLen+3+i) ++ } ++ ++ return fdInts ++} ++ ++// State keeps StratoVirt device and pids state. ++type State struct { ++ mmioBlkSlots [maxMmioBlkCount]bool ++ pid int ++ virtiofsPid int ++} ++ ++type stratovirt struct { ++ id string ++ path string ++ ctx context.Context ++ fds []*os.File ++ config HypervisorConfig ++ qmpMonitorCh qmpChannel ++ svConfig StratovirtConfig ++ state State ++ stopped atomic.Bool ++ virtiofsDaemon VirtiofsDaemon ++} ++ ++func (s *stratovirt) getKernelParams(machineType string, initrdPath string) (string, error) { ++ var kernelParams []Param ++ ++ if initrdPath == "" { ++ params, err := GetKernelRootParams(s.config.RootfsType, true, false) ++ if err != nil { ++ return "", err ++ } ++ kernelParams = params ++ } ++ ++ // Take the default parameters. ++ kernelParams = append(kernelParams, defaultKernelParames...) ++ if machineType == "microvm" { ++ kernelParams = append(kernelParams, defaultMicroVMParames...) ++ } ++ ++ if s.config.Debug { ++ kernelParams = append(kernelParams, []Param{ ++ {"debug", ""}, ++ {"console", "hvc0"}, ++ }...) ++ } else { ++ kernelParams = append(kernelParams, []Param{ ++ {"quiet", ""}, ++ {"8250.nr_uarts", "0"}, ++ {"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)}, ++ }...) ++ } ++ ++ kernelParams = append(s.config.KernelParams, kernelParams...) ++ strParams := SerializeParams(kernelParams, "=") ++ ++ return strings.Join(strParams, " "), nil ++} ++ ++func (s *stratovirt) createQMPSocket(vmPath string) govmmQemu.QMPSocket { ++ socketPath := filepath.Join(vmPath, apiSocket) ++ ++ s.qmpMonitorCh = qmpChannel{ ++ ctx: s.ctx, ++ path: socketPath, ++ } ++ ++ return govmmQemu.QMPSocket{ ++ Type: "unix", ++ Name: s.qmpMonitorCh.path, ++ Server: true, ++ NoWait: true, ++ } ++} ++ ++// Logger returns a logrus logger appropriate for logging StratoVirt messages ++func (s *stratovirt) Logger() *logrus.Entry { ++ return virtLog.WithField("subsystem", "stratovirt") ++} ++ ++func (s *stratovirt) consoleSocketPath(id string) (string, error) { ++ return utils.BuildSocketPath(s.config.VMStorePath, id, debugSocket) ++} ++ ++func (s *stratovirt) virtiofsSocketPath(id string) (string, error) { ++ return utils.BuildSocketPath(s.config.VMStorePath, id, virtiofsSocket) ++} ++ ++func (s *stratovirt) nydusdSocketPath(id string) (string, error) { ++ return utils.BuildSocketPath(s.config.VMStorePath, id, nydusdSock) ++} ++ ++func (s *stratovirt) qmpSetup() error { ++ s.qmpMonitorCh.Lock() ++ defer s.qmpMonitorCh.Unlock() ++ ++ if s.qmpMonitorCh.qmp != nil { ++ return nil ++ } ++ ++ events := make(chan govmmQemu.QMPEvent) ++ go s.loopQMPEvent(events) ++ ++ cfg := govmmQemu.QMPConfig{ ++ Logger: newQMPLogger(), ++ EventCh: events, ++ } ++ ++ // Auto-closed by QMPStart(). ++ disconnectCh := make(chan struct{}) ++ ++ qmp, _, err := govmmQemu.QMPStart(s.qmpMonitorCh.ctx, s.qmpMonitorCh.path, cfg, disconnectCh) ++ if err != nil { ++ s.Logger().WithError(err).Error("Failed to connect to StratoVirt instance") ++ return err ++ } ++ ++ err = qmp.ExecuteQMPCapabilities(s.qmpMonitorCh.ctx) ++ if err != nil { ++ qmp.Shutdown() ++ s.Logger().WithError(err).Error(qmpCapErrMsg) ++ return err ++ } ++ s.qmpMonitorCh.qmp = qmp ++ s.qmpMonitorCh.disconn = disconnectCh ++ ++ return nil ++} ++ ++func (s *stratovirt) loopQMPEvent(event chan govmmQemu.QMPEvent) { ++ for e := range event { ++ s.Logger().WithField("event", e).Debug("got QMP event") ++ } ++ s.Logger().Infof("QMP event channel closed") ++} ++ ++func (s *stratovirt) qmpShutdown() { ++ s.qmpMonitorCh.Lock() ++ defer s.qmpMonitorCh.Unlock() ++ ++ if s.qmpMonitorCh.qmp != nil { ++ s.qmpMonitorCh.qmp.Shutdown() ++ // wait on disconnected channel to be sure that the qmp ++ // been closed cleanly. ++ <-s.qmpMonitorCh.disconn ++ s.qmpMonitorCh.qmp = nil ++ s.qmpMonitorCh.disconn = nil ++ } ++} ++ ++func (s *stratovirt) createDevices() []VirtioDev { ++ var devices []VirtioDev ++ ctx := s.ctx ++ ++ // Set random device. ++ devices = s.appendRng(ctx, devices) ++ ++ // Set serial console device for Debug. ++ if s.config.Debug { ++ devices = s.appendConsole(ctx, devices) ++ } ++ ++ if s.svConfig.initrdPath == "" { ++ devices = s.appendBlock(ctx, devices) ++ if s.svConfig.machineType == machineTypeMicrovm { ++ s.state.mmioBlkSlots[0] = true ++ } ++ } ++ ++ return devices ++} ++ ++func (s *stratovirt) appendBlock(ctx context.Context, devices []VirtioDev) []VirtioDev { ++ devices = append(devices, blkDevice{ ++ id: "rootfs", ++ filePath: s.svConfig.rootfsPath, ++ deviceID: "virtio-blk0", ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) appendRng(ctx context.Context, devices []VirtioDev) []VirtioDev { ++ devices = append(devices, rngDevice{ ++ id: "objrng0", ++ fileName: s.config.EntropySource, ++ rng: "objrng0", ++ deviceID: "virtio-rng0", ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) appendConsole(ctx context.Context, devices []VirtioDev) []VirtioDev { ++ devices = append(devices, consoleDevice{ ++ id: "virtio-serial0", ++ backend: "socket", ++ charID: "charconsole0", ++ devType: "virtconsole", ++ charDev: "charconsole0", ++ deviceID: "virtio-console0", ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) appendVhostVsock(ctx context.Context, devices []VirtioDev, vsock types.VSock) []VirtioDev { ++ devices = append(devices, vhostVsock{ ++ id: "vsock-id", ++ guestID: fmt.Sprintf("%d", vsock.ContextID), ++ VHostFD: vsock.VhostFd, ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) appendNetwork(ctx context.Context, devices []VirtioDev, endpoint Endpoint) []VirtioDev { ++ name := endpoint.Name() ++ ++ devices = append(devices, netDevice{ ++ devType: "tap", ++ id: name, ++ ifname: endpoint.NetworkPair().TapInterface.TAPIface.Name, ++ netdev: name, ++ deviceID: name, ++ FDs: endpoint.NetworkPair().TapInterface.VMFds, ++ mac: endpoint.HardwareAddr(), ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) appendVirtioFs(ctx context.Context, devices []VirtioDev, volume types.Volume) []VirtioDev { ++ if s.config.SharedFS != config.VirtioFS && s.config.SharedFS != config.VirtioFSNydus { ++ return devices ++ } ++ name := "virtio_fs" ++ ++ devices = append(devices, virtioFs{ ++ backend: "socket", ++ // Virtio-fs must be bound to unique charDev, it uses the same name. ++ charID: name, ++ charDev: name, ++ tag: volume.MountTag, ++ deviceID: "virtio-fs0", ++ driver: mmioBus, ++ }) ++ ++ return devices ++} ++ ++func (s *stratovirt) setVMConfig(id string, hypervisorConfig *HypervisorConfig) error { ++ span, _ := katatrace.Trace(s.ctx, s.Logger(), "setStratoVirtUp", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ if err := validateHypervisorConfig(hypervisorConfig); err != nil { ++ return err ++ } ++ ++ s.id = id ++ if err := s.setConfig(hypervisorConfig); err != nil { ++ return err ++ } ++ ++ machineType := strings.ToLower(s.config.HypervisorMachineType) ++ if machineType == "" { ++ machineType = defaultStratoVirtMachineType ++ } ++ ++ initrdPath, err := s.config.InitrdAssetPath() ++ if err != nil { ++ return err ++ } ++ ++ imagePath, err := s.config.ImageAssetPath() ++ if err != nil { ++ return err ++ } ++ ++ kernelPath, err := s.config.KernelAssetPath() ++ if err != nil { ++ return err ++ } ++ ++ kernelParams, err := s.getKernelParams(machineType, initrdPath) ++ if err != nil { ++ return err ++ } ++ ++ vmPath := filepath.Join(s.config.VMStorePath, s.id) ++ qmpSocket := s.createQMPSocket(vmPath) ++ ++ s.svConfig = StratovirtConfig{ ++ name: fmt.Sprintf("sandbox-%s", id), ++ uuid: uuid.Generate().String(), ++ machineType: machineType, ++ vmPath: vmPath, ++ smp: s.config.NumVCPUs, ++ memory: uint64(s.config.MemorySize), ++ kernelPath: kernelPath, ++ kernelAdditionalParams: kernelParams, ++ rootfsPath: imagePath, ++ initrdPath: initrdPath, ++ qmpSocketPath: qmpSocket, ++ consolePath: filepath.Join(vmPath, debugSocket), ++ fsSockPath: filepath.Join(vmPath, virtiofsSocket), ++ } ++ ++ s.svConfig.devices = s.createDevices() ++ ++ return nil ++} ++ ++func (s *stratovirt) setupVirtiofsDaemon(ctx context.Context) (err error) { ++ if s.config.SharedFS == config.NoSharedFS { ++ return nil ++ } ++ ++ if s.virtiofsDaemon == nil { ++ return errors.New("No stratovirt virtiofsDaemon configuration") ++ } ++ ++ s.Logger().Info("Starting virtiofsDaemon") ++ ++ pid, err := s.virtiofsDaemon.Start(ctx, func() { ++ s.StopVM(ctx, false) ++ }) ++ if err != nil { ++ return err ++ } ++ s.state.virtiofsPid = pid ++ ++ return nil ++} ++ ++func (s *stratovirt) stopVirtiofsDaemon(ctx context.Context) (err error) { ++ if s.state.virtiofsPid == 0 { ++ s.Logger().Warn("The virtiofsd had stopped") ++ return nil ++ } ++ ++ err = s.virtiofsDaemon.Stop(ctx) ++ if err != nil { ++ return err ++ } ++ ++ s.state.virtiofsPid = 0 ++ ++ return nil ++} ++ ++// Get StratoVirt binary path. ++func (s *stratovirt) binPath() (string, error) { ++ path, err := s.config.HypervisorAssetPath() ++ if err != nil { ++ return "", err ++ } ++ ++ if path == "" { ++ path = defaultStratoVirt ++ } ++ ++ if _, err = os.Stat(path); os.IsNotExist(err) { ++ return "", fmt.Errorf("StratoVirt path (%s) does not exist", path) ++ } ++ return path, nil ++} ++ ++// Log StratoVirt errors and ensure the StratoVirt process is reaped after ++// termination ++func (s *stratovirt) logAndWait(stratovirtCmd *exec.Cmd, reader io.ReadCloser) { ++ s.state.pid = stratovirtCmd.Process.Pid ++ s.Logger().Infof("Start logging StratoVirt (Pid=%d)", s.state.pid) ++ scanner := bufio.NewScanner(reader) ++ infoRE := regexp.MustCompile("([^:]):INFO: ") ++ warnRE := regexp.MustCompile("([^:]):WARN: ") ++ for scanner.Scan() { ++ text := scanner.Text() ++ if infoRE.MatchString(text) { ++ text = infoRE.ReplaceAllString(text, "$1") ++ s.Logger().WithField("StratoVirt Pid", s.state.pid).Info(text) ++ } else if warnRE.MatchString(text) { ++ text = infoRE.ReplaceAllString(text, "$1") ++ s.Logger().WithField("StratoVirt Pid", s.state.pid).Warn(text) ++ } else { ++ s.Logger().WithField("StratoVirt Pid", s.state.pid).Error(text) ++ } ++ } ++ s.Logger().Infof("Stop logging StratoVirt (Pid=%d)", s.state.pid) ++ stratovirtCmd.Wait() ++} ++ ++// waitVM will wait for the Sandbox's VM to be up and running. ++func (s *stratovirt) waitVM(ctx context.Context, timeout int) error { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "waitVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ if timeout < 0 { ++ return fmt.Errorf("Invalid timeout %ds", timeout) ++ } ++ ++ cfg := govmmQemu.QMPConfig{Logger: newQMPLogger()} ++ ++ var qmp *govmmQemu.QMP ++ var disconnectCh chan struct{} ++ var ver *govmmQemu.QMPVersion ++ var err error ++ ++ // clear andy possible old state before trying to connect again. ++ s.qmpShutdown() ++ timeStart := time.Now() ++ for { ++ disconnectCh = make(chan struct{}) ++ qmp, ver, err = govmmQemu.QMPStart(s.qmpMonitorCh.ctx, s.qmpMonitorCh.path, cfg, disconnectCh) ++ if err == nil { ++ break ++ } ++ ++ if int(time.Since(timeStart).Seconds()) > timeout { ++ return fmt.Errorf("Failed to connect StratoVirt instance (timeout %ds): %v", timeout, err) ++ } ++ ++ time.Sleep(time.Duration(50) * time.Millisecond) ++ } ++ s.qmpMonitorCh.qmp = qmp ++ s.qmpMonitorCh.disconn = disconnectCh ++ defer s.qmpShutdown() ++ ++ s.Logger().WithFields(logrus.Fields{ ++ "qmp-major-version": ver.Major, ++ "qmp-minor-version": ver.Minor, ++ "qmp-micro-version": ver.Micro, ++ "qmp-Capabilities": strings.Join(ver.Capabilities, ","), ++ }).Infof("QMP details") ++ ++ if err = s.qmpMonitorCh.qmp.ExecuteQMPCapabilities(s.qmpMonitorCh.ctx); err != nil { ++ s.Logger().WithError(err).Error(qmpCapErrMsg) ++ return err ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) createParams(params *[]string) { ++ *params = append(*params, "-name", s.svConfig.name) ++ *params = append(*params, "-uuid", s.svConfig.uuid) ++ *params = append(*params, "-smp", strconv.Itoa(int(s.svConfig.smp))) ++ *params = append(*params, "-m", strconv.Itoa(int(s.svConfig.memory))) ++ *params = append(*params, "-kernel", s.svConfig.kernelPath) ++ *params = append(*params, "-append", s.svConfig.kernelAdditionalParams) ++ *params = append(*params, "-qmp", fmt.Sprintf("%s:%s,server,nowait", s.svConfig.qmpSocketPath.Type, s.svConfig.qmpSocketPath.Name)) ++ *params = append(*params, "-D") ++ *params = append(*params, "-disable-seccomp") ++ ++ if s.config.SharedFS == config.VirtioFS || s.config.SharedFS == config.VirtioFSNydus { ++ *params = append(*params, "-machine", fmt.Sprintf("type=%s,dump-guest-core=off,mem-share=on", s.svConfig.machineType)) ++ } else { ++ *params = append(*params, "-machine", fmt.Sprintf("type=%s,dump-guest-core=off", s.svConfig.machineType)) ++ } ++ ++ if s.svConfig.initrdPath != "" { ++ *params = append(*params, "-initrd", s.svConfig.initrdPath) ++ } ++ ++ for _, d := range s.svConfig.devices { ++ *params = append(*params, d.getParams(&s.svConfig)...) ++ } ++} ++ ++// cleanupVM will remove generated files and directories related with VM. ++func (s *stratovirt) cleanupVM(force bool) error { ++ link, err := filepath.EvalSymlinks(s.svConfig.vmPath) ++ if err != nil { ++ s.Logger().WithError(err).Warn("Failed to get evaluation of any symbolic links.") ++ } ++ ++ s.Logger().WithFields(logrus.Fields{ ++ "link": link, ++ "dir": s.svConfig.vmPath, ++ }).Infof("cleanup vm path") ++ ++ if err := os.RemoveAll(s.svConfig.vmPath); err != nil { ++ if !force { ++ return err ++ } ++ s.Logger().WithError(err).Warnf("Failed to clean up vm dir %s", s.svConfig.vmPath) ++ } ++ ++ if link != s.svConfig.vmPath && link != "" { ++ if errRemove := os.RemoveAll(link); errRemove != nil { ++ if !force { ++ return err ++ } ++ s.Logger().WithError(errRemove).WithField("link", link).Warnf("Failed to remove vm path link %s", link) ++ } ++ } ++ ++ if s.config.VMid != "" { ++ dir := filepath.Join(s.config.VMStorePath, s.config.VMid) ++ if err := os.RemoveAll(dir); err != nil { ++ if !force { ++ return err ++ } ++ s.Logger().WithError(err).WithField("path", dir).Warn("failed to remove vm path") ++ } ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) setupMmioSlot(Name string, isPut bool) (int, error) { ++ Name = filepath.Base(strings.ToLower(Name)) ++ ++ if strings.HasPrefix(Name, "vd") { ++ charStr := strings.TrimPrefix(Name, "vd") ++ if charStr == Name { ++ return 0, fmt.Errorf("Could not parse idx from Name %q", Name) ++ } ++ ++ char := []rune(charStr) ++ idx := int(char[0] - 'a') ++ ++ if !isPut && s.state.mmioBlkSlots[idx] { ++ return 0, fmt.Errorf("failed to setup mmio slot, slot is being used %q", charStr) ++ } ++ s.state.mmioBlkSlots[idx] = !isPut ++ ++ return idx, nil ++ } ++ ++ return 0, fmt.Errorf("failed to setup mmio slot, Name is invalid %q", Name) ++} ++ ++func (s *stratovirt) getDevSlot(Name string) (int, error) { ++ slot, err := s.setupMmioSlot(Name, false) ++ if err != nil { ++ return 0, err ++ } ++ ++ return slot, nil ++} ++ ++func (s *stratovirt) delDevSlot(Name string) error { ++ if _, err := s.setupMmioSlot(Name, true); err != nil { ++ return err ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) hotplugBlk(ctx context.Context, drive *config.BlockDrive, op Operation) error { ++ err := s.qmpSetup() ++ if err != nil { ++ return err ++ } ++ ++ driver := "virtio-blk-mmio" ++ ++ defer func() { ++ if err != nil { ++ s.qmpMonitorCh.qmp.ExecuteBlockdevDel(s.qmpMonitorCh.ctx, drive.ID) ++ if errDel := s.delDevSlot(drive.VirtPath); errDel != nil { ++ s.Logger().WithError(errDel).Warn("Failed to delete device slot.") ++ } ++ } ++ }() ++ ++ switch op { ++ case AddDevice: ++ sblkDevice := govmmQemu.BlockDevice{ ++ ID: drive.ID, ++ File: drive.File, ++ ReadOnly: drive.ReadOnly, ++ AIO: govmmQemu.BlockDeviceAIO("native"), ++ } ++ if err := s.qmpMonitorCh.qmp.ExecuteBlockdevAdd(s.qmpMonitorCh.ctx, &sblkDevice); err != nil { ++ return err ++ } ++ ++ slot, err := s.getDevSlot(drive.VirtPath) ++ if err != nil { ++ return err ++ } ++ ++ devAddr := fmt.Sprintf("%d", slot) ++ if err := s.qmpMonitorCh.qmp.ExecutePCIDeviceAdd(s.qmpMonitorCh.ctx, drive.ID, drive.ID, driver, devAddr, "", "", 0, false, false); err != nil { ++ return err ++ } ++ case RemoveDevice: ++ if errDel := s.delDevSlot(drive.VirtPath); errDel != nil { ++ s.Logger().WithError(errDel).Warn("Failed to delete device slot.") ++ } ++ if err := s.qmpMonitorCh.qmp.ExecuteDeviceDel(s.qmpMonitorCh.ctx, drive.ID); err != nil { ++ return err ++ } ++ ++ default: ++ return fmt.Errorf("operation is not supported %d", op) ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { ++ virtiofsdSocketPath, err := s.virtiofsSocketPath(s.id) ++ if err != nil { ++ return nil, err ++ } ++ ++ if s.config.SharedFS == config.VirtioFSNydus { ++ apiSockPath, err := s.nydusdSocketPath(s.id) ++ if err != nil { ++ return nil, err ++ } ++ nd := &nydusd{ ++ path: s.config.VirtioFSDaemon, ++ sockPath: virtiofsdSocketPath, ++ apiSockPath: apiSockPath, ++ sourcePath: sharedPath, ++ debug: s.config.Debug, ++ extraArgs: s.config.VirtioFSExtraArgs, ++ startFn: startInShimNS, ++ } ++ nd.setupShareDirFn = nd.setupPassthroughFS ++ return nd, nil ++ } ++ ++ // default use virtiofsd ++ return &virtiofsd{ ++ path: s.config.VirtioFSDaemon, ++ sourcePath: sharedPath, ++ socketPath: virtiofsdSocketPath, ++ extraArgs: s.config.VirtioFSExtraArgs, ++ cache: s.config.VirtioFSCache, ++ }, nil ++} ++ ++func (s *stratovirt) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "CreateVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ s.ctx = ctx ++ err := s.setVMConfig(id, hypervisorConfig) ++ if err != nil { ++ return err ++ } ++ ++ if s.path, err = s.binPath(); err != nil { ++ return err ++ } ++ ++ s.virtiofsDaemon, err = s.createVirtiofsDaemon(hypervisorConfig.SharedPath) ++ if err != nil { ++ return err ++ } ++ ++ return nil ++} ++ ++func launchStratovirt(ctx context.Context, s *stratovirt) (*exec.Cmd, io.ReadCloser, error) { ++ var params []string ++ s.createParams(¶ms) ++ ++ cmd := exec.CommandContext(ctx, s.path, params...) ++ ++ if len(s.fds) > 0 { ++ s.Logger().Infof("Adding extra file %v", s.fds) ++ cmd.ExtraFiles = s.fds ++ } ++ ++ if s.config.Debug { ++ cmd.Env = []string{"STRATOVIRT_LOG_LEVEL=info"} ++ } ++ ++ reader, err := cmd.StdoutPipe() ++ if err != nil { ++ s.Logger().Error("Unable to connect stdout to a pipe") ++ return nil, nil, err ++ } ++ s.Logger().Infof("launching %s with: %v", s.path, params) ++ ++ if err := cmd.Start(); err != nil { ++ s.Logger().Error("Error starting hypervisor, please check the params") ++ return nil, nil, err ++ } ++ ++ return cmd, reader, nil ++} ++ ++func (s *stratovirt) StartVM(ctx context.Context, timeout int) error { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "StartVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ err := utils.MkdirAllWithInheritedOwner(s.svConfig.vmPath, DirMode) ++ if err != nil { ++ return err ++ } ++ ++ defer func() { ++ if err != nil { ++ if s.state.virtiofsPid != 0 { ++ syscall.Kill(s.state.virtiofsPid, syscall.SIGILL) ++ } ++ } ++ for _, fd := range s.fds { ++ if err := fd.Close(); err != nil { ++ s.Logger().WithError(err).Error("After launching StratoVirt") ++ } ++ } ++ s.fds = []*os.File{} ++ }() ++ ++ if err = s.setupVirtiofsDaemon(ctx); err != nil { ++ return err ++ } ++ defer func() { ++ if err != nil { ++ if shutdownErr := s.stopVirtiofsDaemon(ctx); shutdownErr != nil { ++ s.Logger().WithError(shutdownErr).Warn("Error shutting down the VirtiofsDaemon") ++ } ++ } ++ }() ++ ++ stratovirtCmd, reader, err := launchStratovirt(ctx, s) ++ if err != nil { ++ s.Logger().WithError(err).Error("failed to launch StratoVirt") ++ return fmt.Errorf("failed to launch StratoVirt: %s", err) ++ } ++ ++ go s.logAndWait(stratovirtCmd, reader) ++ ++ if err = s.waitVM(s.ctx, timeout); err != nil { ++ return err ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) StopVM(ctx context.Context, waitOnly bool) (err error) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "StopVM", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ s.Logger().Info("Stopping Sandbox") ++ if s.stopped.Load() { ++ s.Logger().Info("Already stopped") ++ return nil ++ } ++ ++ defer func() { ++ s.cleanupVM(true) ++ if err == nil { ++ s.stopped.Store(true) ++ } ++ }() ++ ++ if err := s.qmpSetup(); err != nil { ++ return err ++ } ++ ++ pids := s.GetPids() ++ if len(pids) == 0 { ++ return errors.New("cannot determine StratoVirt PID") ++ } ++ pid := pids[0] ++ ++ if waitOnly { ++ err := utils.WaitLocalProcess(pid, stratovirtStopSandboxTimeoutSecs, syscall.Signal(0), s.Logger()) ++ if err != nil { ++ return err ++ } ++ } else { ++ err = syscall.Kill(pid, syscall.SIGKILL) ++ if err != nil { ++ s.Logger().WithError(err).Error("Failed to send SIGKILL to stratovirt") ++ return err ++ } ++ } ++ ++ if s.config.SharedFS == config.VirtioFS || s.config.SharedFS == config.VirtioFSNydus { ++ if err := s.stopVirtiofsDaemon(ctx); err != nil { ++ return err ++ } ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) PauseVM(ctx context.Context) error { ++ return nil ++} ++ ++func (s *stratovirt) SaveVM() error { ++ return nil ++} ++ ++func (s *stratovirt) ResumeVM(ctx context.Context) error { ++ return nil ++} ++ ++func (s *stratovirt) AddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) error { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "AddDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ switch v := devInfo.(type) { ++ case types.Socket: ++ s.svConfig.devices = s.appendConsole(ctx, s.svConfig.devices) ++ case types.VSock: ++ s.fds = append(s.fds, v.VhostFd) ++ s.svConfig.devices = s.appendVhostVsock(ctx, s.svConfig.devices, v) ++ case Endpoint: ++ s.fds = append(s.fds, v.NetworkPair().TapInterface.VMFds...) ++ s.svConfig.devices = s.appendNetwork(ctx, s.svConfig.devices, v) ++ case config.BlockDrive: ++ s.svConfig.devices = s.appendBlock(ctx, s.svConfig.devices) ++ case types.Volume: ++ s.svConfig.devices = s.appendVirtioFs(ctx, s.svConfig.devices, v) ++ default: ++ s.Logger().WithField("dev-type", v).Warn("Could not append device: unsupported device type") ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ switch devType { ++ case BlockDev: ++ return nil, s.hotplugBlk(ctx, devInfo.(*config.BlockDrive), AddDevice) ++ default: ++ return nil, fmt.Errorf("Hotplug add device: unsupported device type '%v'", devType) ++ } ++} ++ ++func (s *stratovirt) HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "HotplugRemoveDevice", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ switch devType { ++ case BlockDev: ++ return nil, s.hotplugBlk(ctx, devInfo.(*config.BlockDrive), RemoveDevice) ++ default: ++ return nil, fmt.Errorf("Hotplug remove device: unsupported device type '%v'", devType) ++ } ++} ++ ++func (s *stratovirt) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { ++ return 0, MemoryDevice{}, nil ++} ++ ++func (s *stratovirt) ResizeVCPUs(ctx context.Context, reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { ++ return 0, 0, nil ++} ++ ++func (s *stratovirt) GetVMConsole(ctx context.Context, id string) (string, string, error) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "GetVMConsole", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ consoleURL, err := s.consoleSocketPath(s.id) ++ if err != nil { ++ return consoleProtoUnix, "", err ++ } ++ ++ return consoleProtoUnix, consoleURL, nil ++} ++ ++func (s *stratovirt) Disconnect(ctx context.Context) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "Disconnect", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ s.qmpShutdown() ++} ++ ++func (s *stratovirt) Capabilities(ctx context.Context) types.Capabilities { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "Capabilities", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ var caps types.Capabilities ++ caps.SetBlockDeviceHotplugSupport() ++ if s.config.SharedFS != config.NoSharedFS { ++ caps.SetFsSharingSupport() ++ } ++ ++ return caps ++} ++ ++func (s *stratovirt) HypervisorConfig() HypervisorConfig { ++ return s.config ++} ++ ++func (s *stratovirt) GetTotalMemoryMB(ctx context.Context) uint32 { ++ return s.config.MemorySize ++} ++ ++func (s *stratovirt) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "GetThreadIDs", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ tid := VcpuThreadIDs{} ++ if err := s.qmpSetup(); err != nil { ++ return tid, err ++ } ++ ++ cpuInfos, err := s.qmpMonitorCh.qmp.ExecQueryCpus(s.qmpMonitorCh.ctx) ++ if err != nil { ++ s.Logger().WithError(err).Error("failed to query cpu infos") ++ return tid, err ++ } ++ ++ tid.vcpus = make(map[int]int, len(cpuInfos)) ++ for _, i := range cpuInfos { ++ if i.ThreadID > 0 { ++ tid.vcpus[i.CPU] = i.ThreadID ++ } ++ } ++ return tid, nil ++} ++ ++func (s *stratovirt) Cleanup(ctx context.Context) error { ++ span, _ := katatrace.Trace(ctx, s.Logger(), "Cleanup", stratovirtTracingTags, map[string]string{"sandbox_id": s.id}) ++ defer span.End() ++ ++ for _, fd := range s.fds { ++ if err := fd.Close(); err != nil { ++ s.Logger().WithError(err).Warn("failed closing fd") ++ } ++ } ++ s.fds = []*os.File{} ++ ++ return nil ++} ++ ++func (s *stratovirt) setConfig(config *HypervisorConfig) error { ++ s.config = *config ++ ++ return nil ++} ++ ++func (s *stratovirt) GetPids() []int { ++ var pids []int ++ pids = append(pids, s.state.pid) ++ ++ return pids ++} ++ ++func (s *stratovirt) GetVirtioFsPid() *int { ++ return &s.state.virtiofsPid ++} ++ ++func (s *stratovirt) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { ++ return errors.New("StratoVirt is not supported by VM cache") ++} ++ ++func (s *stratovirt) toGrpc(ctx context.Context) ([]byte, error) { ++ return nil, errors.New("StratoVirt is not supported by VM cache") ++} ++ ++func (s *stratovirt) Check() error { ++ if s.stopped.Load() { ++ return fmt.Errorf("StratoVirt is not running") ++ } ++ ++ if err := s.qmpSetup(); err != nil { ++ return err ++ } ++ ++ return nil ++} ++ ++func (s *stratovirt) Save() (hs hv.HypervisorState) { ++ pids := s.GetPids() ++ hs.Pid = pids[0] ++ hs.VirtiofsDaemonPid = s.state.virtiofsPid ++ hs.Type = string(StratovirtHypervisor) ++ return ++} ++ ++func (s *stratovirt) Load(hs hv.HypervisorState) { ++ s.state.pid = hs.Pid ++ s.state.virtiofsPid = hs.VirtiofsDaemonPid ++} ++ ++func (s *stratovirt) GenerateSocket(id string) (interface{}, error) { ++ return generateVMSocket(id, s.config.VMStorePath) ++} ++ ++func (s *stratovirt) IsRateLimiterBuiltin() bool { ++ return false ++} +-- +2.34.1 + diff --git a/patches/0002-build-Makefile-for-StratoVirt-hypervisor.patch b/patches/0002-build-Makefile-for-StratoVirt-hypervisor.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ca0207e8e7f97a99e85c5b14df15060760cf146 --- /dev/null +++ b/patches/0002-build-Makefile-for-StratoVirt-hypervisor.patch @@ -0,0 +1,201 @@ +From c2cab7406e53db2985bec48af7f8665799f1ead6 Mon Sep 17 00:00:00 2001 +From: Liu Wenyuan +Date: Wed, 23 Aug 2023 17:27:01 +0800 +Subject: [PATCH 2/5] build: Makefile for StratoVirt hypervisor + +Add support for building StratoVirt hypervisor, including x86_64 and +arm64. + +Signed-off-by: Liu Wenyuan +--- + src/runtime/Makefile | 51 ++++++++++++++++++++++++++++++- + src/runtime/arch/amd64-options.mk | 3 ++ + src/runtime/arch/arm64-options.mk | 3 ++ + 3 files changed, 56 insertions(+), 1 deletion(-) + +diff --git a/src/runtime/Makefile b/src/runtime/Makefile +index 33fa8f2..0e6e366 100644 +--- a/src/runtime/Makefile ++++ b/src/runtime/Makefile +@@ -79,6 +79,7 @@ QEMUBINDIR := $(PREFIXDEPS)/bin + CLHBINDIR := $(PREFIXDEPS)/bin + FCBINDIR := $(PREFIXDEPS)/bin + ACRNBINDIR := $(PREFIXDEPS)/bin ++STRATOVIRTBINDIR := $(PREFIXDEPS)/bin + SYSCONFDIR := /etc + LOCALSTATEDIR := /var + +@@ -102,6 +103,7 @@ GENERATED_VARS = \ + CONFIG_QEMU_SNP_IN \ + CONFIG_CLH_IN \ + CONFIG_FC_IN \ ++ CONFIG_STRATOVIRT_IN \ + $(USER_VARS) + SCRIPTS += $(COLLECT_SCRIPT) + SCRIPTS_DIR := $(BINDIR) +@@ -144,12 +146,13 @@ HYPERVISOR_ACRN = acrn + HYPERVISOR_FC = firecracker + HYPERVISOR_QEMU = qemu + HYPERVISOR_CLH = cloud-hypervisor ++HYPERVISOR_STRATOVIRT = stratovirt + + # Determines which hypervisor is specified in $(CONFIG_FILE). + DEFAULT_HYPERVISOR ?= $(HYPERVISOR_QEMU) + + # List of hypervisors this build system can generate configuration for. +-HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) ++HYPERVISORS := $(HYPERVISOR_ACRN) $(HYPERVISOR_FC) $(HYPERVISOR_QEMU) $(HYPERVISOR_CLH) $(HYPERVISOR_STRATOVIRT) + + QEMUPATH := $(QEMUBINDIR)/$(QEMUCMD) + QEMUVALIDHYPERVISORPATHS := [\"$(QEMUPATH)\"] +@@ -175,6 +178,9 @@ ACRNVALIDHYPERVISORPATHS := [\"$(ACRNPATH)\"] + ACRNCTLPATH := $(ACRNBINDIR)/$(ACRNCTLCMD) + ACRNVALIDCTLPATHS := [\"$(ACRNCTLPATH)\"] + ++STRATOVIRTPATH = $(STRATOVIRTBINDIR)/$(STRATOVIRTCMD) ++STRATOVIRTVALIDHYPERVISORPATHS := [\"$(STRATOVIRTPATH)\"] ++ + # Default number of vCPUs + DEFVCPUS := 1 + # Default maximum number of vCPUs +@@ -217,6 +223,7 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] + DEFDISABLEBLOCK := false + DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs + DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs ++DEFSHAREDFS_STRATOVIRT_VIRTIOFS := virtio-fs + DEFSHAREDFS_QEMU_TDX_VIRTIOFS := virtio-9p + DEFSHAREDFS_QEMU_SEV_VIRTIOFS := virtio-9p + DEFSHAREDFS_QEMU_SNP_VIRTIOFS := virtio-9p +@@ -379,6 +386,36 @@ ifneq (,$(CLHCMD)) + KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH) + endif + ++ifneq (,$(STRATOVIRTCMD)) ++ KNOWN_HYPERVISORS += $(HYPERVISOR_STRATOVIRT) ++ ++ CONFIG_FILE_STRATOVIRT = configuration-stratovirt.toml ++ CONFIG_STRATOVIRT = config/$(CONFIG_FILE_STRATOVIRT) ++ CONFIG_STRATOVIRT_IN = $(CONFIG_STRATOVIRT).in ++ ++ CONFIG_PATH_STRATOVIRT = $(abspath $(CONFDIR)/$(CONFIG_FILE_STRATOVIRT)) ++ CONFIG_PATHS += $(CONFIG_PATH_STRATOVIRT) ++ ++ SYSCONFIG_STRATOVIRT = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_STRATOVIRT)) ++ SYSCONFIG_PATHS += $(SYSCONFIG_STRATOVIRT) ++ ++ CONFIGS += $(CONFIG_STRATOVIRT) ++ ++ # stratovirt-specific options (all should be suffixed by "_STRATOVIRT") ++ DEFMACHINETYPE_STRATOVIRT := microvm ++ DEFBLOCKSTORAGEDRIVER_STRATOVIRT := virtio-mmio ++ DEFNETWORKMODEL_STRATOVIRT := tcfilter ++ DEFSTATICRESOURCEMGMT_STRATOVIRT = true ++ifeq ($(ARCH),amd64) ++ KERNELTYPE_STRATOVIRT = compressed ++endif ++ifeq ($(ARCH),arm64) ++ KERNELTYPE_STRATOVIRT = uncompressed ++endif ++ KERNEL_NAME_STRATOVIRT = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_STRATOVIRT)) ++ KERNELPATH_STRATOVIRT = $(KERNELDIR)/$(KERNEL_NAME_STRATOVIRT) ++endif ++ + ifneq (,$(FCCMD)) + KNOWN_HYPERVISORS += $(HYPERVISOR_FC) + +@@ -477,6 +514,7 @@ USER_VARS += BINDIR + USER_VARS += CONFIG_ACRN_IN + USER_VARS += CONFIG_CLH_IN + USER_VARS += CONFIG_FC_IN ++USER_VARS += CONFIG_STRATOVIRT_IN + USER_VARS += CONFIG_PATH + USER_VARS += CONFIG_QEMU_IN + USER_VARS += DESTDIR +@@ -495,6 +533,8 @@ USER_VARS += FCPATH + USER_VARS += FCVALIDHYPERVISORPATHS + USER_VARS += FCJAILERPATH + USER_VARS += FCVALIDJAILERPATHS ++USER_VARS += STRATOVIRTPATH ++USER_VARS += STRATOVIRTVALIDHYPERVISORPATHS + USER_VARS += SYSCONFIG + USER_VARS += IMAGENAME + USER_VARS += IMAGEPATH +@@ -516,6 +556,7 @@ USER_VARS += KERNELTDXPATH + USER_VARS += KERNELSNPPATH + USER_VARS += KERNELPATH_CLH + USER_VARS += KERNELPATH_FC ++USER_VARS += KERNELPATH_STRATOVIRT + USER_VARS += KERNELVIRTIOFSPATH + USER_VARS += FIRMWAREPATH + USER_VARS += FIRMWARESEVPATH +@@ -527,6 +568,7 @@ USER_VARS += MACHINEACCELERATORS + USER_VARS += CPUFEATURES + USER_VARS += TDXCPUFEATURES + USER_VARS += DEFMACHINETYPE_CLH ++USER_VARS += DEFMACHINETYPE_STRATOVIRT + USER_VARS += KERNELPARAMS + USER_VARS += KERNELTDXPARAMS + USER_VARS += LIBEXECDIR +@@ -568,6 +610,7 @@ USER_VARS += DEFNETWORKMODEL_ACRN + USER_VARS += DEFNETWORKMODEL_CLH + USER_VARS += DEFNETWORKMODEL_FC + USER_VARS += DEFNETWORKMODEL_QEMU ++USER_VARS += DEFNETWORKMODEL_STRATOVIRT + USER_VARS += DEFDISABLEGUESTEMPTYDIR + USER_VARS += DEFDISABLEGUESTSECCOMP + USER_VARS += DEFDISABLESELINUX +@@ -578,9 +621,11 @@ USER_VARS += DEFDISABLEBLOCK + USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN + USER_VARS += DEFBLOCKSTORAGEDRIVER_FC + USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU ++USER_VARS += DEFBLOCKSTORAGEDRIVER_STRATOVIRT + USER_VARS += DEFBLOCKDEVICEAIO_QEMU + USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS + USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS ++USER_VARS += DEFSHAREDFS_STRATOVIRT_VIRTIOFS + USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS + USER_VARS += DEFSHAREDFS_QEMU_SEV_VIRTIOFS + USER_VARS += DEFSHAREDFS_QEMU_SNP_VIRTIOFS +@@ -605,6 +650,7 @@ USER_VARS += DEFSANDBOXCGROUPONLY + USER_VARS += DEFSTATICRESOURCEMGMT + USER_VARS += DEFSTATICRESOURCEMGMT_CLH + USER_VARS += DEFSTATICRESOURCEMGMT_FC ++USER_VARS += DEFSTATICRESOURCEMGMT_STRATOVIRT + USER_VARS += DEFSTATICRESOURCEMGMT_TEE + USER_VARS += DEFBINDMOUNTS + USER_VARS += DEFSERVICEOFFLOAD +@@ -936,6 +982,9 @@ ifneq (,$(findstring $(HYPERVISOR_FC),$(KNOWN_HYPERVISORS))) + endif + ifneq (,$(findstring $(HYPERVISOR_ACRN),$(KNOWN_HYPERVISORS))) + @printf "\t$(HYPERVISOR_ACRN) hypervisor path (ACRNPATH) : %s\n" $(abspath $(ACRNPATH)) ++endif ++ifneq (,$(findstring $(HYPERVISOR_STRATOVIRT),$(KNOWN_HYPERVISORS))) ++ @printf "\t$(HYPERVISOR_STRATOVIRT) hypervisor path (STRATOVIRTPATH) : %s\n" $(abspath $(STRATOVIRTPATH)) + endif + @printf "\tassets path (PKGDATADIR) : %s\n" $(abspath $(PKGDATADIR)) + @printf "\tshim path (PKGLIBEXECDIR) : %s\n" $(abspath $(PKGLIBEXECDIR)) +diff --git a/src/runtime/arch/amd64-options.mk b/src/runtime/arch/amd64-options.mk +index e606815..940e87a 100644 +--- a/src/runtime/arch/amd64-options.mk ++++ b/src/runtime/arch/amd64-options.mk +@@ -28,3 +28,6 @@ ACRNCTLCMD := acrnctl + CLHCMD := cloud-hypervisor + + DEFSTATICRESOURCEMGMT_CLH := false ++ ++# stratovirt binary name ++STRATOVIRTCMD := stratovirt +diff --git a/src/runtime/arch/arm64-options.mk b/src/runtime/arch/arm64-options.mk +index 7f74ae3..895c93f 100644 +--- a/src/runtime/arch/arm64-options.mk ++++ b/src/runtime/arch/arm64-options.mk +@@ -21,3 +21,6 @@ FCJAILERCMD := jailer + CLHCMD := cloud-hypervisor + + DEFSTATICRESOURCEMGMT_CLH := true ++ ++# stratovirt binary name ++STRATOVIRTCMD := stratovirt +-- +2.34.1 + diff --git a/patches/0003-configuration-add-configuration-for-StratoVirt-hyper.patch b/patches/0003-configuration-add-configuration-for-StratoVirt-hyper.patch new file mode 100644 index 0000000000000000000000000000000000000000..d9d062a4a1eba481a1287fbad3b1bcdffe9c1970 --- /dev/null +++ b/patches/0003-configuration-add-configuration-for-StratoVirt-hyper.patch @@ -0,0 +1,548 @@ +From 2d0431ff3051b9267919a543906c8fd5547facf9 Mon Sep 17 00:00:00 2001 +From: Liu Wenyuan +Date: Wed, 23 Aug 2023 17:49:56 +0800 +Subject: [PATCH 3/5] configuration: add configuration for StratoVirt + hypervisor. + +Add configuration-stratovirt.toml.in to generate the StratoVirt configuration, +and parser to deliver config to StratoVirt. + +Signed-off-by: Liu Wenyuan +--- + .../config/configuration-stratovirt.toml.in | 394 ++++++++++++++++++ + src/runtime/pkg/katautils/config.go | 104 +++++ + 2 files changed, 498 insertions(+) + create mode 100644 src/runtime/config/configuration-stratovirt.toml.in + +diff --git a/src/runtime/config/configuration-stratovirt.toml.in b/src/runtime/config/configuration-stratovirt.toml.in +new file mode 100644 +index 0000000..c98cf6b +--- /dev/null ++++ b/src/runtime/config/configuration-stratovirt.toml.in +@@ -0,0 +1,394 @@ ++# Copyright (c) 2023 Huawei Technologies Co.,Ltd. ++# ++# SPDX-License-Identifier: Apache-2.0 ++# ++ ++# XXX: WARNING: this file is auto-generated. ++# XXX: ++# XXX: Source file: "@CONFIG_STRATOVIRT_IN@" ++# XXX: Project: ++# XXX: Name: @PROJECT_NAME@ ++# XXX: Type: @PROJECT_TYPE@ ++ ++[hypervisor.stratovirt] ++path = "@STRATOVIRTPATH@" ++kernel = "@KERNELPATH_STRATOVIRT@" ++#image = "@IMAGEPATH@" ++initrd = "@INITRDPATH@" ++machine_type = "@DEFMACHINETYPE_STRATOVIRT@" ++ ++# rootfs filesystem type: ++# - ext4 (default) ++# - xfs ++# - erofs ++rootfs_type = @DEFROOTFSTYPE@ ++ ++# List of valid annotation names for the hypervisor ++# Each member of the list is a regular expression, which is the base name ++# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" ++enable_annotations = @DEFENABLEANNOTATIONS@ ++ ++# List of valid annotations values for the hypervisor ++# Each member of the list is a path pattern as described by glob(3). ++# The default if not set is empty (all annotations rejected.) ++# Your distribution recommends: @STRATOVIRTVALIDHYPERVISORPATHS@ ++valid_hypervisor_paths = @STRATOVIRTVALIDHYPERVISORPATHS@ ++ ++# Optional space-separated list of options to pass to the guest kernel. ++# For example, use `kernel_params = "vsyscall=emulate"` if you are having ++# trouble running pre-2.15 glibc. ++# ++# WARNING: - any parameter specified here will take priority over the default ++# parameter value of the same name used to start the virtual machine. ++# Do not set values here unless you understand the impact of doing so as you ++# may stop the virtual machine from booting. ++# To see the list of default parameters, enable hypervisor debug, create a ++# container and look for 'default-kernel-parameters' log entries. ++kernel_params = "@KERNELPARAMS@" ++ ++# Default number of vCPUs per SB/VM: ++# unspecified or 0 --> will be set to @DEFVCPUS@ ++# < 0 --> will be set to the actual number of physical cores ++# > 0 <= number of physical cores --> will be set to the specified number ++# > number of physical cores --> will be set to the actual number of physical cores ++default_vcpus = 1 ++ ++# Default maximum number of vCPUs per SB/VM: ++# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number ++# of vCPUs supported by KVM if that number is exceeded ++# > 0 <= number of physical cores --> will be set to the specified number ++# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number ++# of vCPUs supported by KVM if that number is exceeded ++# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when ++# the actual number of physical cores is greater than it. ++# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU ++# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs ++# can be added to a SB/VM, but the memory footprint will be big. Another example, with ++# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of ++# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, ++# unless you know what are you doing. ++# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8. ++default_maxvcpus = @DEFMAXVCPUS@ ++ ++# Bridges can be used to hot plug devices. ++# Limitations: ++# * Currently only pci bridges are supported ++# * Until 30 devices per bridge can be hot plugged. ++# * Until 5 PCI bridges can be cold plugged per VM. ++# This limitation could be a bug in the kernel ++# Default number of bridges per SB/VM: ++# unspecified or 0 --> will be set to @DEFBRIDGES@ ++# > 1 <= 5 --> will be set to the specified number ++# > 5 --> will be set to 5 ++default_bridges = @DEFBRIDGES@ ++ ++# Default memory size in MiB for SB/VM. ++# If unspecified then it will be set @DEFMEMSZ@ MiB. ++default_memory = @DEFMEMSZ@ ++# ++# Default memory slots per SB/VM. ++# If unspecified then it will be set @DEFMEMSLOTS@. ++# This is will determine the times that memory will be hotadded to sandbox/VM. ++#memory_slots = @DEFMEMSLOTS@ ++ ++# Default maximum memory in MiB per SB / VM ++# unspecified or == 0 --> will be set to the actual amount of physical RAM ++# > 0 <= amount of physical RAM --> will be set to the specified number ++# > amount of physical RAM --> will be set to the actual amount of physical RAM ++default_maxmemory = @DEFMAXMEMSZ@ ++ ++# The size in MiB will be plused to max memory of hypervisor. ++# It is the memory address space for the NVDIMM devie. ++# If set block storage driver (block_device_driver) to "nvdimm", ++# should set memory_offset to the size of block device. ++# Default 0 ++#memory_offset = 0 ++ ++# Disable block device from being used for a container's rootfs. ++# In case of a storage driver like devicemapper where a container's ++# root file system is backed by a block device, the block device is passed ++# directly to the hypervisor for performance reasons. ++# This flag prevents the block device from being passed to the hypervisor, ++# virtio-fs is used instead to pass the rootfs. ++disable_block_device_use = @DEFDISABLEBLOCK@ ++ ++# Shared file system type: ++# - virtio-fs (default) ++# - virtio-fs-nydus ++# - none ++shared_fs = "@DEFSHAREDFS_STRATOVIRT_VIRTIOFS@" ++ ++# Path to vhost-user-fs daemon. ++virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@" ++ ++# List of valid annotations values for the virtiofs daemon ++# The default if not set is empty (all annotations rejected.) ++valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@ ++ ++# Default size of DAX cache in MiB ++virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@ ++ ++# Extra args for virtiofsd daemon ++# ++# Format example: ++# ["--arg1=xxx", "--arg2=yyy"] ++# Examples: ++# Set virtiofsd log level to debug : ["--log-level=debug"] ++# ++# see `virtiofsd -h` for possible options. ++virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@ ++ ++# Cache mode: ++# ++# - never ++# Metadata, data, and pathname lookup are not cached in guest. They are ++# always fetched from host and any changes are immediately pushed to host. ++# ++# - auto ++# Metadata and pathname lookup cache expires after a configured amount of ++# time (default is 1 second). Data is cached while the file is open (close ++# to open consistency). ++# ++# - always ++# Metadata, data, and pathname lookup are cached in guest and never expire. ++virtio_fs_cache = "@DEFVIRTIOFSCACHE@" ++ ++# Block storage driver to be used for the hypervisor in case the container ++# rootfs is backed by a block device. This is virtio-scsi, virtio-blk ++# or nvdimm. ++block_device_driver = "@DEFBLOCKSTORAGEDRIVER_STRATOVIRT@" ++ ++# Specifies cache-related options will be set to block devices or not. ++# Default false ++#block_device_cache_set = true ++ ++# Specifies cache-related options for block devices. ++# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled. ++# Default false ++#block_device_cache_direct = true ++ ++# Specifies cache-related options for block devices. ++# Denotes whether flush requests for the device are ignored. ++# Default false ++#block_device_cache_noflush = true ++ ++# Enable huge pages for VM RAM, default false ++# Enabling this will result in the VM memory ++# being allocated using huge pages. ++# This is useful when you want to use vhost-user network ++# stacks within the container. This will automatically ++# result in memory pre allocation ++#enable_hugepages = true ++ ++# Enable vIOMMU, default false ++# Enabling this will result in the VM having a vIOMMU device ++# This will also add the following options to the kernel's ++# command line: intel_iommu=on,iommu=pt ++#enable_iommu = true ++ ++# This option changes the default hypervisor and kernel parameters ++# to enable debug output where available. ++# ++# Default false ++#enable_debug = true ++ ++# Disable the customizations done in the runtime when it detects ++# that it is running on top a VMM. This will result in the runtime ++# behaving as it would when running on bare metal. ++# ++#disable_nesting_checks = true ++ ++# ++# Default entropy source. ++# The path to a host source of entropy (including a real hardware RNG) ++# /dev/urandom and /dev/random are two main options. ++# Be aware that /dev/random is a blocking source of entropy. If the host ++# runs out of entropy, the VMs boot time will increase leading to get startup ++# timeouts. ++# The source of entropy /dev/urandom is non-blocking and provides a ++# generally acceptable source of entropy. It should work well for pretty much ++# all practical purposes. ++entropy_source = "@DEFENTROPYSOURCE@" ++ ++# Path to OCI hook binaries in the *guest rootfs*. ++# This does not affect host-side hooks which must instead be added to ++# the OCI spec passed to the runtime. ++# ++# You can create a rootfs with hooks by customizing the osbuilder scripts: ++# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder ++# ++# Hooks must be stored in a subdirectory of guest_hook_path according to their ++# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}". ++# The agent will scan these directories for executable files and add them, in ++# lexicographical order, to the lifecycle of the guest container. ++# Hooks are executed in the runtime namespace of the guest. See the official documentation: ++# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks ++# Warnings will be logged if any error is encountered while scanning for hooks, ++# but it will not abort container execution. ++#guest_hook_path = "/usr/share/oci/hooks" ++ ++# disable applying SELinux on the VMM process (default false) ++disable_selinux = @DEFDISABLESELINUX@ ++ ++# disable applying SELinux on the container process ++# If set to false, the type `container_t` is applied to the container process by default. ++# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built ++# with `SELINUX=yes`. ++# (default: true) ++disable_guest_selinux = @DEFDISABLEGUESTSELINUX@ ++ ++[factory] ++# VM templating support. Once enabled, new VMs are created from template ++# using vm cloning. They will share the same initial kernel, initramfs and ++# agent memory by mapping it readonly. It helps speeding up new container ++# creation and saves a lot of memory if there are many kata containers running ++# on the same host. ++# ++# When disabled, new VMs are created from scratch. ++# ++# Note: Requires "initrd=" to be set ("image=" is not supported). ++# ++# Default false ++#enable_template = true ++ ++[agent.@PROJECT_TYPE@] ++# If enabled, make the agent display debug-level messages. ++# (default: disabled) ++#enable_debug = true ++ ++# Enable agent tracing. ++# ++# If enabled, the agent will generate OpenTelemetry trace spans. ++# ++# Notes: ++# ++# - If the runtime also has tracing enabled, the agent spans will be ++# associated with the appropriate runtime parent span. ++# - If enabled, the runtime will wait for the container to shutdown, ++# increasing the container shutdown time slightly. ++# ++# (default: disabled) ++#enable_tracing = true ++ ++# Comma separated list of kernel modules and their parameters. ++# These modules will be loaded in the guest kernel using modprobe(8). ++# The following example can be used to load two kernel modules with parameters ++# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"] ++# The first word is considered as the module name and the rest as its parameters. ++# Container will not be started when: ++# * A kernel module is specified and the modprobe command is not installed in the guest ++# or it fails loading the module. ++# * The module is not available in the guest or it doesn't met the guest kernel ++# requirements, like architecture and version. ++# ++kernel_modules = [] ++ ++# Enable debug console. ++ ++# If enabled, user can connect guest OS running inside hypervisor ++# through "kata-runtime exec " command ++ ++#debug_console_enabled = true ++ ++# Agent connection dialing timeout value in seconds ++# (default: 45) ++dial_timeout = 45 ++ ++[runtime] ++# If enabled, the runtime will log additional debug messages to the ++# system log ++# (default: disabled) ++#enable_debug = true ++# ++# Internetworking model ++# Determines how the VM should be connected to the ++# the container network interface ++# Options: ++# ++# - macvtap ++# Used when the Container network interface can be bridged using ++# macvtap. ++# ++# - none ++# Used when customize network. Only creates a tap device. No veth pair. ++# ++# - tcfilter ++# Uses tc filter rules to redirect traffic from the network interface ++# provided by plugin to a tap interface connected to the VM. ++# ++internetworking_model = "@DEFNETWORKMODEL_STRATOVIRT@" ++ ++# disable guest seccomp ++# Determines whether container seccomp profiles are passed to the virtual ++# machine and applied by the kata agent. If set to true, seccomp is not applied ++# within the guest ++# (default: true) ++disable_guest_seccomp = @DEFDISABLEGUESTSECCOMP@ ++ ++# vCPUs pinning settings ++# if enabled, each vCPU thread will be scheduled to a fixed CPU ++# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet) ++#enable_vcpus_pinning = false ++ ++# Apply a custom SELinux security policy to the container process inside the VM. ++# This is used when you want to apply a type other than the default `container_t`, ++# so general users should not uncomment and apply it. ++# (format: "user:role:type") ++# Note: You cannot specify MCS policy with the label because the sensitivity levels and ++# categories are determined automatically by high-level container runtimes such as containerd. ++#guest_selinux_label = "@DEFGUESTSELINUXLABEL@" ++ ++# If enabled, the runtime will create opentracing.io traces and spans. ++# (See https://www.jaegertracing.io/docs/getting-started). ++# (default: disabled) ++#enable_tracing = true ++ ++# Set the full url to the Jaeger HTTP Thrift collector. ++# The default if not set will be "http://localhost:14268/api/traces" ++#jaeger_endpoint = "" ++ ++# Sets the username to be used if basic auth is required for Jaeger. ++#jaeger_user = "" ++ ++# Sets the password to be used if basic auth is required for Jaeger. ++#jaeger_password = "" ++ ++# If enabled, the runtime will not create a network namespace for shim and hypervisor processes. ++# This option may have some potential impacts to your host. It should only be used when you know what you're doing. ++# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only ++# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge ++# (like OVS) directly. ++# (default: false) ++#disable_new_netns = true ++ ++# if enabled, the runtime will add all the kata processes inside one dedicated cgroup. ++# The container cgroups in the host are not created, just one single cgroup per sandbox. ++# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox. ++# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation. ++# The sandbox cgroup is constrained if there is no container type annotation. ++# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType ++sandbox_cgroup_only = @DEFSANDBOXCGROUPONLY@ ++ ++# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In ++# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful ++# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug. ++# Compatibility for determining appropriate sandbox (VM) size: ++# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O ++# does not yet support sandbox sizing annotations. ++# - When running single containers using a tool like ctr, container sizing information will be available. ++static_sandbox_resource_mgmt = @DEFSTATICRESOURCEMGMT_STRATOVIRT@ ++ ++# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will ++# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. ++disable_guest_empty_dir = @DEFDISABLEGUESTEMPTYDIR@ ++ ++# Enabled experimental feature list, format: ["a", "b"]. ++# Experimental features are features not stable enough for production, ++# they may break compatibility, and are prepared for a big version bump. ++# Supported experimental features: ++# (default: []) ++experimental = @DEFAULTEXPFEATURES@ ++ ++# If enabled, user can run pprof tools with shim v2 process through kata-monitor. ++# (default: false) ++#enable_pprof = true +diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go +index feeef68..45bbfea 100644 +--- a/src/runtime/pkg/katautils/config.go ++++ b/src/runtime/pkg/katautils/config.go +@@ -52,6 +52,7 @@ const ( + qemuHypervisorTableType = "qemu" + acrnHypervisorTableType = "acrn" + dragonballHypervisorTableType = "dragonball" ++ stratovirtHypervisorTableType = "stratovirt" + + // the maximum amount of PCI bridges that can be cold plugged in a VM + maxPCIBridges uint32 = 5 +@@ -1142,6 +1143,106 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { + }, nil + } + ++func newStratovirtHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { ++ hypervisor, err := h.path() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ kernel, err := h.kernel() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ initrd, err := h.initrd() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ image, err := h.image() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ if image != "" && initrd != "" { ++ return vc.HypervisorConfig{}, ++ errors.New("having both an image and an initrd defined in the configuration file is not supported") ++ } ++ ++ if image == "" && initrd == "" { ++ return vc.HypervisorConfig{}, ++ errors.New("image or initrd must be defined in the configuration file") ++ } ++ ++ rootfsType, err := h.rootfsType() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ kernelParams := h.kernelParams() ++ machineType := h.machineType() ++ ++ blockDriver, err := h.blockDeviceDriver() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ if vSock, err := utils.SupportsVsocks(); !vSock { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ sharedFS, err := h.sharedFS() ++ if err != nil { ++ return vc.HypervisorConfig{}, err ++ } ++ ++ if sharedFS != config.VirtioFS && sharedFS != config.VirtioFSNydus && sharedFS != config.NoSharedFS { ++ return vc.HypervisorConfig{}, ++ fmt.Errorf("Stratovirt Hypervisor does not support %s shared filesystem option", sharedFS) ++ } ++ ++ if (sharedFS == config.VirtioFS || sharedFS == config.VirtioFSNydus) && h.VirtioFSDaemon == "" { ++ return vc.HypervisorConfig{}, ++ fmt.Errorf("cannot enable %s without daemon path in configuration file", sharedFS) ++ } ++ ++ return vc.HypervisorConfig{ ++ HypervisorPath: hypervisor, ++ HypervisorPathList: h.HypervisorPathList, ++ KernelPath: kernel, ++ InitrdPath: initrd, ++ ImagePath: image, ++ RootfsType: rootfsType, ++ KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), ++ HypervisorMachineType: machineType, ++ NumVCPUs: h.defaultVCPUs(), ++ DefaultMaxVCPUs: h.defaultMaxVCPUs(), ++ MemorySize: h.defaultMemSz(), ++ MemSlots: h.defaultMemSlots(), ++ MemOffset: h.defaultMemOffset(), ++ DefaultMaxMemorySize: h.defaultMaxMemSz(), ++ EntropySource: h.GetEntropySource(), ++ DefaultBridges: h.defaultBridges(), ++ DisableBlockDeviceUse: h.DisableBlockDeviceUse, ++ SharedFS: sharedFS, ++ VirtioFSDaemon: h.VirtioFSDaemon, ++ VirtioFSDaemonList: h.VirtioFSDaemonList, ++ VirtioFSCacheSize: h.VirtioFSCacheSize, ++ VirtioFSCache: h.defaultVirtioFSCache(), ++ VirtioFSExtraArgs: h.VirtioFSExtraArgs, ++ HugePages: h.HugePages, ++ Debug: h.Debug, ++ DisableNestingChecks: h.DisableNestingChecks, ++ BlockDeviceDriver: blockDriver, ++ DisableVhostNet: true, ++ GuestHookPath: h.guestHookPath(), ++ EnableAnnotations: h.EnableAnnotations, ++ DisableSeccomp: h.DisableSeccomp, ++ DisableSeLinux: h.DisableSeLinux, ++ DisableGuestSeLinux: h.DisableGuestSeLinux, ++ }, nil ++} ++ + func newFactoryConfig(f factory) (oci.FactoryConfig, error) { + if f.TemplatePath == "" { + f.TemplatePath = defaultTemplatePath +@@ -1178,6 +1279,9 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi + case dragonballHypervisorTableType: + config.HypervisorType = vc.DragonballHypervisor + hConfig, err = newDragonballHypervisorConfig(hypervisor) ++ case stratovirtHypervisorTableType: ++ config.HypervisorType = vc.StratovirtHypervisor ++ hConfig, err = newStratovirtHypervisorConfig(hypervisor) + default: + err = fmt.Errorf("%s: %+q", errInvalidHypervisorPrefix, k) + } +-- +2.34.1 + diff --git a/patches/0004-kata-deploy-Add-StratoVirt-support-to-deploy-process.patch b/patches/0004-kata-deploy-Add-StratoVirt-support-to-deploy-process.patch new file mode 100644 index 0000000000000000000000000000000000000000..4113359c3a3152ddaa0908b87bb308ad40279d72 --- /dev/null +++ b/patches/0004-kata-deploy-Add-StratoVirt-support-to-deploy-process.patch @@ -0,0 +1,246 @@ +From a89fca251514dee26b4e6e077f4a0484a2d45a5f Mon Sep 17 00:00:00 2001 +From: Liu Wenyuan +Date: Fri, 1 Sep 2023 20:44:30 +0800 +Subject: [PATCH 4/5] kata-deploy: Add StratoVirt support to deploy process + +Allow kata-deploy process to pull StratoVirt from release binaries, and +add them as a part of kata release. + +Signed-off-by: Liu Wenyuan +--- + .../kata-cleanup/base/kata-cleanup.yaml | 2 +- + .../kata-deploy/base/kata-deploy.yaml | 2 +- + .../kata-deploy/local-build/Makefile | 4 ++ + .../local-build/kata-deploy-binaries.sh | 28 ++++++++++++++ + .../runtimeclasses/kata-runtimeClasses.yaml | 13 +++++++ + .../runtimeclasses/kata-stratovirt.yaml | 13 +++++++ + .../stratovirt/build-static-stratovirt.sh | 37 +++++++++++++++++++ + versions.yaml | 5 +++ + 8 files changed, 102 insertions(+), 2 deletions(-) + create mode 100644 tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml + create mode 100755 tools/packaging/static-build/stratovirt/build-static-stratovirt.sh + +diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +index df1a2ff..4814b93 100644 +--- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml ++++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +@@ -30,7 +30,7 @@ spec: + - name: DEBUG + value: "false" + - name: SHIMS +- value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" ++ value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu stratovirt" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES +diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +index 383eec9..52e5987 100644 +--- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml ++++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +@@ -32,7 +32,7 @@ spec: + - name: DEBUG + value: "false" + - name: SHIMS +- value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" ++ value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx stratovirt" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES +diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile +index d9e28a5..ef63f2f 100644 +--- a/tools/packaging/kata-deploy/local-build/Makefile ++++ b/tools/packaging/kata-deploy/local-build/Makefile +@@ -35,6 +35,7 @@ all: serial-targets \ + qemu-snp-experimental-tarball \ + qemu-tarball \ + qemu-tdx-experimental-tarball \ ++ stratovirt-tarball \ + shim-v2-tarball \ + tdvf-tarball \ + virtiofsd-tarball +@@ -112,6 +113,9 @@ qemu-tarball: + qemu-tdx-experimental-tarball: + ${MAKE} $@-build + ++stratovirt-tarball: ++ ${MAKE} $@-build ++ + rootfs-image-tarball: + ${MAKE} $@-build + +diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +index bb68d88..70e07b4 100755 +--- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh ++++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +@@ -30,6 +30,7 @@ readonly kernel_builder="${static_build_dir}/kernel/build.sh" + readonly ovmf_builder="${static_build_dir}/ovmf/build.sh" + readonly qemu_builder="${static_build_dir}/qemu/build-static-qemu.sh" + readonly qemu_experimental_builder="${static_build_dir}/qemu/build-static-qemu-experimental.sh" ++readonly stratovirt_builder="${static_build_dir}/stratovirt/build-static-stratovirt.sh" + readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" + readonly virtiofsd_builder="${static_build_dir}/virtiofsd/build.sh" + readonly nydus_builder="${static_build_dir}/nydus/build.sh" +@@ -103,6 +104,7 @@ options: + qemu + qemu-snp-experimental + qemu-tdx-experimental ++ stratovirt + rootfs-image + rootfs-image-tdx + rootfs-initrd +@@ -511,6 +513,28 @@ install_clh_glibc() { + install_clh_helper "gnu" "${features}" "-glibc" + } + ++# Install static stratovirt asset ++install_stratovirt() { ++ local stratovirt_version=$(get_from_kata_deps "assets.hypervisor.stratovirt.version") ++ ++ latest_artefact="${stratovirt_version}" ++ latest_builder_image="" ++ ++ install_cached_tarball_component \ ++ "stratovirt" \ ++ "${latest_artefact}" \ ++ "${latest_builder_image}" \ ++ "${final_tarball_name}" \ ++ "${final_tarball_path}" \ ++ && return 0 ++ ++ info "build static stratovirt" ++ "${stratovirt_builder}" ++ info "Install static stratovirt" ++ mkdir -p "${destdir}/opt/kata/bin/" ++ sudo install -D --owner root --group root --mode 0744 static-stratovirt/stratovirt "${destdir}/opt/kata/bin/stratovirt" ++} ++ + # Install static virtiofsd asset + install_virtiofsd() { + latest_artefact="$(get_from_kata_deps "externals.virtiofsd.version")-$(get_from_kata_deps "externals.virtiofsd.toolchain")" +@@ -734,6 +758,7 @@ handle_build() { + install_qemu + install_qemu_snp_experimental + install_qemu_tdx_experimental ++ install_stratovirt + install_runk + install_shimv2 + install_tdvf +@@ -781,6 +806,8 @@ handle_build() { + + qemu-tdx-experimental) install_qemu_tdx_experimental ;; + ++ stratovirt) install_stratovirt ;; ++ + rootfs-image) install_image ;; + + rootfs-image-tdx) install_image_tdx ;; +@@ -860,6 +887,7 @@ main() { + log-parser-rs + nydus + qemu ++ stratovirt + rootfs-image + rootfs-initrd + rootfs-initrd-mariner +diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml +index e67cafe..ae3c8c8 100644 +--- a/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml ++++ b/tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml +@@ -102,3 +102,16 @@ overhead: + scheduling: + nodeSelector: + katacontainers.io/kata-runtime: "true" ++--- ++kind: RuntimeClass ++apiVersion: node.k8s.io/v1 ++metadata: ++ name: kata-stratovirt ++handler: kata-stratovirt ++overhead: ++ podFixed: ++ memory: "130Mi" ++ cpu: "250m" ++scheduling: ++ nodeSelector: ++ katacontainers.io/kata-runtime: "true" +diff --git a/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml b/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml +new file mode 100644 +index 0000000..ea27046 +--- /dev/null ++++ b/tools/packaging/kata-deploy/runtimeclasses/kata-stratovirt.yaml +@@ -0,0 +1,13 @@ ++--- ++kind: RuntimeClass ++apiVersion: node.k8s.io/v1 ++metadata: ++ name: kata-stratovirt ++handler: kata-stratovirt ++overhead: ++ podFixed: ++ memory: "130Mi" ++ cpu: "250m" ++scheduling: ++ nodeSelector: ++ katacontainers.io/kata-runtime: "true" +diff --git a/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh b/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh +new file mode 100755 +index 0000000..2fe28aa +--- /dev/null ++++ b/tools/packaging/static-build/stratovirt/build-static-stratovirt.sh +@@ -0,0 +1,37 @@ ++#!/usr/bin/env bash ++# ++# Copyright (c) 2023 Huawei Technologies Co.,Ltd. ++# ++# SPDX-License-Identifier: Apache-2.0 ++ ++set -o errexit ++set -o nounset ++set -o pipefail ++ ++ARCH=$(uname -m) ++ ++# Currently, StratoVirt only support x86_64 and aarch64. ++[ "${ARCH}" != "x86_64" ] && [ "${ARCH}" != "aarch64" ] && exit ++ ++script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ++source "${script_dir}/../../scripts/lib.sh" ++ ++info "Get stratovirt information from runtime versions.yaml" ++stratovirt_url="${stratovirt_url:-}" ++[ -n "$stratovirt_url" ] || stratovirt_url=$(get_from_kata_deps "assets.hypervisor.stratovirt.url") ++[ -n "$stratovirt_url" ] || die "failed to get stratovirt url" ++ ++stratovirt_version="${stratovirt_version:-}" ++[ -n "$stratovirt_version" ] || stratovirt_version=$(get_from_kata_deps "assets.hypervisor.stratovirt.version") ++[ -n "$stratovirt_version" ] || die "failed to get stratovirt version" ++ ++pull_stratovirt_released_binary() { ++ file_name="stratovirt-static-${stratovirt_version##*v}-${ARCH}" ++ download_url="${stratovirt_url}/releases/download/${stratovirt_version}/${file_name}.tar.gz" ++ ++ curl -L ${download_url} -o ${file_name}.tar.gz ++ mkdir -p static-stratovirt ++ tar zxvf ${file_name}.tar.gz -C static-stratovirt ++} ++ ++pull_stratovirt_released_binary +diff --git a/versions.yaml b/versions.yaml +index 57e3c94..3b5f37a 100644 +--- a/versions.yaml ++++ b/versions.yaml +@@ -115,6 +115,11 @@ assets: + url: "https://github.com/AMDESE/qemu" + tag: "3b6a2b6b7466f6dea53243900b7516c3f29027b7" + ++ stratovirt: ++ description: "StratoVirt is an lightweight opensource VMM" ++ url: "https://github.com/openeuler-mirror/stratovirt" ++ version: "v2.3.0" ++ + image: + description: | + Root filesystem disk image used to boot the guest virtual +-- +2.34.1 + diff --git a/patches/0005-mount-Reduce-the-mount-points-with-namespace-isolati.patch b/patches/0005-mount-Reduce-the-mount-points-with-namespace-isolati.patch new file mode 100644 index 0000000000000000000000000000000000000000..00f32c1e6ada6c1172a4bb110d3aec32de61f261 --- /dev/null +++ b/patches/0005-mount-Reduce-the-mount-points-with-namespace-isolati.patch @@ -0,0 +1,60 @@ +From 0fb8c1bd205397ee2efd070dc525a17e9a787857 Mon Sep 17 00:00:00 2001 +From: Zhigang Wang +Date: Mon, 1 Jan 2024 21:51:25 +0800 +Subject: [PATCH 5/5] mount: Reduce the mount points with namespace isolation + +This patch can reduce load on systemd process, and +increase the k8s deployment density when using go runtime. + +Signed-off-by: Zhigang Wang +Signed-off-by: Liu Wenyuan +--- + src/runtime/pkg/containerd-shim-v2/service.go | 25 +++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go +index 26d4c21..7b06429 100644 +--- a/src/runtime/pkg/containerd-shim-v2/service.go ++++ b/src/runtime/pkg/containerd-shim-v2/service.go +@@ -191,6 +191,27 @@ func newCommand(ctx context.Context, id, containerdBinary, containerdAddress str + return cmd, nil + } + ++func setupMntNs() error { ++ err := unix.Unshare(unix.CLONE_NEWNS) ++ if err != nil { ++ return err ++ } ++ ++ err = unix.Mount("", "/", "", unix.MS_REC|unix.MS_SLAVE, "") ++ if err != nil { ++ err = fmt.Errorf("failed to mount with slave: %v", err) ++ return err ++ } ++ ++ err = unix.Mount("", "/", "", unix.MS_REC|unix.MS_SHARED, "") ++ if err != nil { ++ err = fmt.Errorf("failed to mount with shared: %v", err) ++ return err ++ } ++ ++ return nil ++} ++ + // StartShim is a binary call that starts a kata shimv2 service which will + // implement the ShimV2 APIs such as create/start/update etc containers. + func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ string, retErr error) { +@@ -255,6 +276,10 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin + } + } + ++ if err := setupMntNs(); err != nil { ++ return "", err ++ } ++ + if err := cmd.Start(); err != nil { + return "", err + } +-- +2.34.1 + diff --git a/series.conf b/series.conf index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1cf015cdf10b7365005ed1b9c44fe0119b848553 100644 --- a/series.conf +++ b/series.conf @@ -0,0 +1,5 @@ +0001-virtcontainers-Add-StratoVirt-as-a-supported-hypervi.patch +0002-build-Makefile-for-StratoVirt-hypervisor.patch +0003-configuration-add-configuration-for-StratoVirt-hyper.patch +0004-kata-deploy-Add-StratoVirt-support-to-deploy-process.patch +0005-mount-Reduce-the-mount-points-with-namespace-isolati.patch