1 Star 1 Fork 0

孙磊 / runtime

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
hypervisor.go 21.09 KB
一键复制 编辑 原始数据 按行查看 历史
Jia He 提交于 2020-04-21 16:27 . qemu: add cpu_features option
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799
// Copyright (c) 2016 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"bufio"
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/kata-containers/runtime/virtcontainers/persist"
persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
"github.com/kata-containers/runtime/virtcontainers/types"
"github.com/kata-containers/runtime/virtcontainers/utils"
)
// HypervisorType describes an hypervisor type.
type HypervisorType string
type operation int
const (
addDevice operation = iota
removeDevice
)
const (
// FirecrackerHypervisor is the FC hypervisor.
FirecrackerHypervisor HypervisorType = "firecracker"
// QemuHypervisor is the QEMU hypervisor.
QemuHypervisor HypervisorType = "qemu"
// AcrnHypervisor is the ACRN hypervisor.
AcrnHypervisor HypervisorType = "acrn"
// ClhHypervisor is the ICH hypervisor.
ClhHypervisor HypervisorType = "clh"
// MockHypervisor is a mock hypervisor for testing purposes
MockHypervisor HypervisorType = "mock"
)
const (
procMemInfo = "/proc/meminfo"
procCPUInfo = "/proc/cpuinfo"
)
const (
defaultVCPUs = 1
// 2 GiB
defaultMemSzMiB = 2048
defaultBridges = 1
defaultBlockDriver = config.VirtioSCSI
defaultSocketName = "kata.sock"
defaultSocketDeviceID = "channel0"
defaultSocketChannelName = "agent.channel.0"
defaultSocketID = "charch0"
// port numbers below 1024 are called privileged ports. Only a process with
// CAP_NET_BIND_SERVICE capability may bind to these port numbers.
vSockPort = 1024
// Port where the agent will send the logs. Logs are sent through the vsock in cases
// where the hypervisor has no console.sock, i.e firecracker
vSockLogsPort = 1025
// MinHypervisorMemory is the minimum memory required for a VM.
MinHypervisorMemory = 256
)
// In some architectures the maximum number of vCPUs depends on the number of physical cores.
var defaultMaxQemuVCPUs = MaxQemuVCPUs()
// agnostic list of kernel root parameters for NVDIMM
var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
{"root", "/dev/pmem0p1"},
{"rootflags", "dax,data=ordered,errors=remount-ro ro"},
{"rootfstype", "ext4"},
}
// agnostic list of kernel root parameters for NVDIMM
var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
{"root", "/dev/pmem0p1"},
{"rootflags", "data=ordered,errors=remount-ro ro"},
{"rootfstype", "ext4"},
}
// agnostic list of kernel root parameters for virtio-blk
var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
{"root", "/dev/vda1"},
{"rootflags", "data=ordered,errors=remount-ro ro"},
{"rootfstype", "ext4"},
}
// deviceType describes a virtualized device type.
type deviceType int
const (
// ImgDev is the image device type.
imgDev deviceType = iota
// FsDev is the filesystem device type.
fsDev
// NetDev is the network device type.
netDev
// BlockDev is the block device type.
blockDev
// SerialPortDev is the serial port device type.
serialPortDev
// vSockPCIDev is the vhost vsock PCI device type.
vSockPCIDev
// VFIODevice is VFIO device type
vfioDev
// vhostuserDev is a Vhost-user device type
vhostuserDev
// CPUDevice is CPU device type
cpuDev
// memoryDevice is memory device type
memoryDev
// hybridVirtioVsockDev is a hybrid virtio-vsock device supported
// only on certain hypervisors, like firecracker.
hybridVirtioVsockDev
)
type memoryDevice struct {
slot int
sizeMB int
addr uint64
probe bool
}
// Set sets an hypervisor type based on the input string.
func (hType *HypervisorType) Set(value string) error {
switch value {
case "qemu":
*hType = QemuHypervisor
return nil
case "firecracker":
*hType = FirecrackerHypervisor
return nil
case "acrn":
*hType = AcrnHypervisor
return nil
case "clh":
*hType = ClhHypervisor
return nil
case "mock":
*hType = MockHypervisor
return nil
default:
return fmt.Errorf("Unknown hypervisor type %s", value)
}
}
// String converts an hypervisor type to a string.
func (hType *HypervisorType) String() string {
switch *hType {
case QemuHypervisor:
return string(QemuHypervisor)
case FirecrackerHypervisor:
return string(FirecrackerHypervisor)
case AcrnHypervisor:
return string(AcrnHypervisor)
case ClhHypervisor:
return string(ClhHypervisor)
case MockHypervisor:
return string(MockHypervisor)
default:
return ""
}
}
// newHypervisor returns an hypervisor from and hypervisor type.
func newHypervisor(hType HypervisorType) (hypervisor, error) {
store, err := persist.GetDriver()
if err != nil {
return nil, err
}
switch hType {
case QemuHypervisor:
return &qemu{
store: store,
}, nil
case FirecrackerHypervisor:
return &firecracker{}, nil
case AcrnHypervisor:
return &Acrn{
store: store,
}, nil
case ClhHypervisor:
return &cloudHypervisor{
store: store,
}, nil
case MockHypervisor:
return &mockHypervisor{}, nil
default:
return nil, fmt.Errorf("Unknown hypervisor type %s", hType)
}
}
// Param is a key/value representation for hypervisor and kernel parameters.
type Param struct {
Key string
Value string
}
// HypervisorConfig is the hypervisor configuration.
type HypervisorConfig struct {
// NumVCPUs specifies default number of vCPUs for the VM.
NumVCPUs uint32
//DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM.
DefaultMaxVCPUs uint32
// DefaultMem specifies default memory size in MiB for the VM.
MemorySize uint32
// DefaultBridges specifies default number of bridges for the VM.
// Bridges can be used to hot plug devices
DefaultBridges uint32
// Msize9p is used as the msize for 9p shares
Msize9p uint32
// MemSlots specifies default memory slots the VM.
MemSlots uint32
// MemOffset specifies memory space for nvdimm device
MemOffset uint32
// VirtioFSCacheSize is the DAX cache size in MiB
VirtioFSCacheSize uint32
// KernelParams are additional guest kernel parameters.
KernelParams []Param
// HypervisorParams are additional hypervisor parameters.
HypervisorParams []Param
// KernelPath is the guest kernel host path.
KernelPath string
// ImagePath is the guest image host path.
ImagePath string
// InitrdPath is the guest initrd image host path.
// ImagePath and InitrdPath cannot be set at the same time.
InitrdPath string
// FirmwarePath is the bios host path
FirmwarePath string
// MachineAccelerators are machine specific accelerators
MachineAccelerators string
// CPUFeatures are cpu specific features
CPUFeatures string
// HypervisorPath is the hypervisor executable host path.
HypervisorPath string
// HypervisorCtlPath is the hypervisor ctl executable host path.
HypervisorCtlPath string
// JailerPath is the jailer executable host path.
JailerPath string
// BlockDeviceDriver specifies the driver to be used for block device
// either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver
BlockDeviceDriver string
// HypervisorMachineType specifies the type of machine being
// emulated.
HypervisorMachineType string
// MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or
// BootFromTemplate is true.
MemoryPath string
// DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or
// BootFromTemplate is true.
DevicesStatePath string
// EntropySource is the path to a host source of
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
EntropySource string
// Shared file system type:
// - virtio-9p (default)
// - virtio-fs
SharedFS string
// VirtioFSDaemon is the virtio-fs vhost-user daemon path
VirtioFSDaemon string
// VirtioFSCache cache mode for fs version cache or "none"
VirtioFSCache string
// VirtioFSExtraArgs passes options to virtiofsd daemon
VirtioFSExtraArgs []string
// File based memory backend root directory
FileBackedMemRootDir string
// customAssets is a map of assets.
// Each value in that map takes precedence over the configured assets.
// For example, if there is a value for the "kernel" key in this map,
// it will be used for the sandbox's kernel path instead of KernelPath.
customAssets map[types.AssetType]*types.Asset
// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
BlockDeviceCacheSet bool
// BlockDeviceCacheDirect specifies cache-related options for block devices.
// Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
BlockDeviceCacheDirect bool
// BlockDeviceCacheNoflush specifies cache-related options for block devices.
// Denotes whether flush requests for the device are ignored.
BlockDeviceCacheNoflush bool
// DisableBlockDeviceUse disallows a block device from being used.
DisableBlockDeviceUse bool
// EnableIOThreads enables IO to be processed in a separate thread.
// Supported currently for virtio-scsi driver.
EnableIOThreads bool
// Debug changes the default hypervisor and kernel parameters to
// enable debug output where available.
Debug bool
// MemPrealloc specifies if the memory should be pre-allocated
MemPrealloc bool
// HugePages specifies if the memory should be pre-allocated from huge pages
HugePages bool
// VirtioMem is used to enable/disable virtio-mem
VirtioMem bool
// Realtime Used to enable/disable realtime
Realtime bool
// Mlock is used to control memory locking when Realtime is enabled
// Realtime=true and Mlock=false, allows for swapping out of VM memory
// enabling higher density
Mlock bool
// DisableNestingChecks is used to override customizations performed
// when running on top of another VMM.
DisableNestingChecks bool
// UseVSock use a vsock for agent communication
UseVSock bool
// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
DisableImageNvdimm bool
// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
// root bus instead of a bridge.
HotplugVFIOOnRootBus bool
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
// The PCIe Root Port device is used to hot-plug the PCIe device
PCIeRootPort uint32
// BootToBeTemplate used to indicate if the VM is created to be a template VM
BootToBeTemplate bool
// BootFromTemplate used to indicate if the VM should be created from a template VM
BootFromTemplate bool
// DisableVhostNet is used to indicate if host supports vhost_net
DisableVhostNet bool
// EnableVhostUserStore is used to indicate if host supports vhost-user-blk/scsi
EnableVhostUserStore bool
// VhostUserStorePath is the directory path where vhost-user devices
// related folders, sockets and device nodes should be.
VhostUserStorePath string
// GuestHookPath is the path within the VM that will be used for 'drop-in' hooks
GuestHookPath string
// VMid is the id of the VM that create the hypervisor if the VM is created by the factory.
// VMid is "" if the hypervisor is not created by the factory.
VMid string
// SELinux label for the VM
SELinuxProcessLabel string
}
// vcpu mapping from vcpu number to thread number
type vcpuThreadIDs struct {
vcpus map[int]int
}
func (conf *HypervisorConfig) checkTemplateConfig() error {
if conf.BootToBeTemplate && conf.BootFromTemplate {
return fmt.Errorf("Cannot set both 'to be' and 'from' vm tempate")
}
if conf.BootToBeTemplate || conf.BootFromTemplate {
if conf.MemoryPath == "" {
return fmt.Errorf("Missing MemoryPath for vm template")
}
if conf.BootFromTemplate && conf.DevicesStatePath == "" {
return fmt.Errorf("Missing DevicesStatePath to load from vm template")
}
}
return nil
}
func (conf *HypervisorConfig) valid() error {
if conf.KernelPath == "" {
return fmt.Errorf("Missing kernel path")
}
if conf.ImagePath == "" && conf.InitrdPath == "" {
return fmt.Errorf("Missing image and initrd path")
}
if err := conf.checkTemplateConfig(); err != nil {
return err
}
if conf.NumVCPUs == 0 {
conf.NumVCPUs = defaultVCPUs
}
if conf.MemorySize == 0 {
conf.MemorySize = defaultMemSzMiB
}
if conf.DefaultBridges == 0 {
conf.DefaultBridges = defaultBridges
}
if conf.BlockDeviceDriver == "" {
conf.BlockDeviceDriver = defaultBlockDriver
}
if conf.DefaultMaxVCPUs == 0 {
conf.DefaultMaxVCPUs = defaultMaxQemuVCPUs
}
if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS {
conf.Msize9p = defaultMsize9p
}
return nil
}
// AddKernelParam allows the addition of new kernel parameters to an existing
// hypervisor configuration.
func (conf *HypervisorConfig) AddKernelParam(p Param) error {
if p.Key == "" {
return fmt.Errorf("Empty kernel parameter")
}
conf.KernelParams = append(conf.KernelParams, p)
return nil
}
func (conf *HypervisorConfig) addCustomAsset(a *types.Asset) error {
if a == nil || a.Path() == "" {
// We did not get a custom asset, we will use the default one.
return nil
}
if !a.Valid() {
return fmt.Errorf("Invalid %s at %s", a.Type(), a.Path())
}
virtLog.Debugf("Using custom %v asset %s", a.Type(), a.Path())
if conf.customAssets == nil {
conf.customAssets = make(map[types.AssetType]*types.Asset)
}
conf.customAssets[a.Type()] = a
return nil
}
func (conf *HypervisorConfig) assetPath(t types.AssetType) (string, error) {
// Custom assets take precedence over the configured ones
a, ok := conf.customAssets[t]
if ok {
return a.Path(), nil
}
// We could not find a custom asset for the given type, let's
// fall back to the configured ones.
switch t {
case types.KernelAsset:
return conf.KernelPath, nil
case types.ImageAsset:
return conf.ImagePath, nil
case types.InitrdAsset:
return conf.InitrdPath, nil
case types.HypervisorAsset:
return conf.HypervisorPath, nil
case types.HypervisorCtlAsset:
return conf.HypervisorCtlPath, nil
case types.JailerAsset:
return conf.JailerPath, nil
case types.FirmwareAsset:
return conf.FirmwarePath, nil
default:
return "", fmt.Errorf("Unknown asset type %v", t)
}
}
func (conf *HypervisorConfig) isCustomAsset(t types.AssetType) bool {
_, ok := conf.customAssets[t]
return ok
}
// KernelAssetPath returns the guest kernel path
func (conf *HypervisorConfig) KernelAssetPath() (string, error) {
return conf.assetPath(types.KernelAsset)
}
// CustomKernelAsset returns true if the kernel asset is a custom one, false otherwise.
func (conf *HypervisorConfig) CustomKernelAsset() bool {
return conf.isCustomAsset(types.KernelAsset)
}
// ImageAssetPath returns the guest image path
func (conf *HypervisorConfig) ImageAssetPath() (string, error) {
return conf.assetPath(types.ImageAsset)
}
// CustomImageAsset returns true if the image asset is a custom one, false otherwise.
func (conf *HypervisorConfig) CustomImageAsset() bool {
return conf.isCustomAsset(types.ImageAsset)
}
// InitrdAssetPath returns the guest initrd path
func (conf *HypervisorConfig) InitrdAssetPath() (string, error) {
return conf.assetPath(types.InitrdAsset)
}
// CustomInitrdAsset returns true if the initrd asset is a custom one, false otherwise.
func (conf *HypervisorConfig) CustomInitrdAsset() bool {
return conf.isCustomAsset(types.InitrdAsset)
}
// HypervisorAssetPath returns the VM hypervisor path
func (conf *HypervisorConfig) HypervisorAssetPath() (string, error) {
return conf.assetPath(types.HypervisorAsset)
}
// HypervisorCtlAssetPath returns the VM hypervisor ctl path
func (conf *HypervisorConfig) HypervisorCtlAssetPath() (string, error) {
return conf.assetPath(types.HypervisorCtlAsset)
}
// JailerAssetPath returns the VM Jailer path
func (conf *HypervisorConfig) JailerAssetPath() (string, error) {
return conf.assetPath(types.JailerAsset)
}
// CustomHypervisorAsset returns true if the hypervisor asset is a custom one, false otherwise.
func (conf *HypervisorConfig) CustomHypervisorAsset() bool {
return conf.isCustomAsset(types.HypervisorAsset)
}
// FirmwareAssetPath returns the guest firmware path
func (conf *HypervisorConfig) FirmwareAssetPath() (string, error) {
return conf.assetPath(types.FirmwareAsset)
}
// CustomFirmwareAsset returns true if the firmware asset is a custom one, false otherwise.
func (conf *HypervisorConfig) CustomFirmwareAsset() bool {
return conf.isCustomAsset(types.FirmwareAsset)
}
func appendParam(params []Param, parameter string, value string) []Param {
return append(params, Param{parameter, value})
}
// SerializeParams converts []Param to []string
func SerializeParams(params []Param, delim string) []string {
var parameters []string
for _, p := range params {
if p.Key == "" && p.Value == "" {
continue
} else if p.Key == "" {
parameters = append(parameters, fmt.Sprint(p.Value))
} else if p.Value == "" {
parameters = append(parameters, fmt.Sprint(p.Key))
} else if delim == "" {
parameters = append(parameters, fmt.Sprint(p.Key))
parameters = append(parameters, fmt.Sprint(p.Value))
} else {
parameters = append(parameters, fmt.Sprintf("%s%s%s", p.Key, delim, p.Value))
}
}
return parameters
}
// DeserializeParams converts []string to []Param
func DeserializeParams(parameters []string) []Param {
var params []Param
for _, param := range parameters {
if param == "" {
continue
}
p := strings.SplitN(param, "=", 2)
if len(p) == 2 {
params = append(params, Param{Key: p[0], Value: p[1]})
} else {
params = append(params, Param{Key: p[0], Value: ""})
}
}
return params
}
func getHostMemorySizeKb(memInfoPath string) (uint64, error) {
f, err := os.Open(memInfoPath)
if err != nil {
return 0, err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
// Expected format: ["MemTotal:", "1234", "kB"]
parts := strings.Fields(scanner.Text())
// Sanity checks: Skip malformed entries.
if len(parts) < 3 || parts[0] != "MemTotal:" || parts[2] != "kB" {
continue
}
sizeKb, err := strconv.ParseUint(parts[1], 0, 64)
if err != nil {
continue
}
return sizeKb, nil
}
// Handle errors that may have occurred during the reading of the file.
if err := scanner.Err(); err != nil {
return 0, err
}
return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath)
}
// RunningOnVMM checks if the system is running inside a VM.
func RunningOnVMM(cpuInfoPath string) (bool, error) {
if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" {
virtLog.Info("Unable to know if the system is running inside a VM")
return false, nil
}
flagsField := "flags"
f, err := os.Open(cpuInfoPath)
if err != nil {
return false, err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
// Expected format: ["flags", ":", ...] or ["flags:", ...]
fields := strings.Fields(scanner.Text())
if len(fields) < 2 {
continue
}
if !strings.HasPrefix(fields[0], flagsField) {
continue
}
for _, field := range fields[1:] {
if field == "hypervisor" {
return true, nil
}
}
// As long as we have been able to analyze the fields from
// "flags", there is no reason to check what comes next from
// /proc/cpuinfo, because we already know we are not running
// on a VMM.
return false, nil
}
if err := scanner.Err(); err != nil {
return false, err
}
return false, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
}
func getHypervisorPid(h hypervisor) int {
pids := h.getPids()
if len(pids) == 0 {
return 0
}
return pids[0]
}
func generateVMSocket(id string, useVsock bool, vmStogarePath string) (interface{}, error) {
if useVsock {
vhostFd, contextID, err := utils.FindContextID()
if err != nil {
return nil, err
}
return types.VSock{
VhostFd: vhostFd,
ContextID: contextID,
Port: uint32(vSockPort),
}, nil
}
path, err := utils.BuildSocketPath(filepath.Join(vmStogarePath, id), defaultSocketName)
if err != nil {
return nil, err
}
return types.Socket{
DeviceID: defaultSocketDeviceID,
ID: defaultSocketID,
HostPath: path,
Name: defaultSocketChannelName,
}, nil
}
// hypervisor is the virtcontainers hypervisor interface.
// The default hypervisor implementation is Qemu.
type hypervisor interface {
createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error
startSandbox(timeout int) error
stopSandbox() error
pauseSandbox() error
saveSandbox() error
resumeSandbox() error
addDevice(devInfo interface{}, devType deviceType) error
hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error)
hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error)
resizeMemory(memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error)
resizeVCPUs(vcpus uint32) (uint32, uint32, error)
getSandboxConsole(sandboxID string) (string, error)
disconnect()
capabilities() types.Capabilities
hypervisorConfig() HypervisorConfig
getThreadIDs() (vcpuThreadIDs, error)
cleanup() error
// getPids returns a slice of hypervisor related process ids.
// The hypervisor pid must be put at index 0.
getPids() []int
fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error
toGrpc() ([]byte, error)
check() error
save() persistapi.HypervisorState
load(persistapi.HypervisorState)
// generate the socket to communicate the host and guest
generateSocket(id string, useVsock bool) (interface{}, error)
}
1
https://gitee.com/leisunstar/runtime.git
git@gitee.com:leisunstar/runtime.git
leisunstar
runtime
runtime
5cef3e7b53f9

搜索帮助