diff --git a/.cargo/config b/.cargo/config index b66bb9cbd9147017bdb6547ae1c4cf21acd38c6c..31ec4746704a0c9065893b1df5c1b29ec5c8d385 100644 --- a/.cargo/config +++ b/.cargo/config @@ -12,8 +12,12 @@ [build] -[target.'cfg(any(target_arch="aarch64"))'] +[target.'cfg(target_arch = "aarch64", not(target_env = "ohos"))'] rustflags = [ "-C", "link-arg=-lgcc", ] +[target.'cfg(not(any(target_env = "musl", target_env = "ohos")))'] +rustflags = [ + "-C", "link-arg=-lpixman-1", +] diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af70023ff67f3394a9e827e5824bc5583460f3e9 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,136 @@ +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +# +# StratoVirt is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan +# PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +name: Build and release static stratovirt +on: + release: + types: [published] + +jobs: + build-stratovirt-x86_64: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install musl-gcc + run: sudo apt install -y musl-tools + + - name: Install rust toolchain (x86_64-unknown-linux-musl) + uses: actions-rs/toolchain@v1 + with: + toolchain: "1.64.0" + target: x86_64-unknown-linux-musl + + - name: Static build (x86_64) + uses: actions-rs/cargo@v1 + with: + toolchain: "1.64.0" + command: build + args: --bin stratovirt --release --target=x86_64-unknown-linux-musl + + - name: Build archive + shell: bash + run: | + mkdir archive + cd archive + cp "../target/x86_64-unknown-linux-musl/release/stratovirt" ./ + tar -czf "stratovirt-static-x86_64.tar.gz" stratovirt + + - name: Upload archive + uses: actions/upload-artifact@v3 + with: + name: stratovirt-static-x86_64.tar.gz + path: archive/stratovirt-static-x86_64.tar.gz + + build-stratovirt-aarch64: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install musl-gcc + run: sudo apt install -y musl-tools + + - name: Install rust toolchain (aarch64-unknown-linux-musl) + uses: actions-rs/toolchain@v1 + with: + toolchain: "1.64.0" + target: aarch64-unknown-linux-musl + override: true + + - name: Static build (aarch64) + uses: actions-rs/cargo@v1 + with: + toolchain: "1.64.0" + command: build + use-cross: true + args: --bin stratovirt --release --target=aarch64-unknown-linux-musl + + - name: Build archive + shell: bash + run: | + mkdir archive + cd archive + cp "../target/aarch64-unknown-linux-musl/release/stratovirt" ./ + tar -czf "stratovirt-static-aarch64.tar.gz" stratovirt + + - name: Upload archive + uses: actions/upload-artifact@v3 + with: + name: stratovirt-static-aarch64.tar.gz + path: archive/stratovirt-static-aarch64.tar.gz + + release-stratovirt: + name: release-stratovirt + needs: [build-stratovirt-x86_64, build-stratovirt-aarch64] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Download artifact x86_64 + uses: actions/download-artifact@v3 + with: + name: stratovirt-static-x86_64.tar.gz + path: ./ + + - name: Download artifact aarch64 + uses: actions/download-artifact@v3 + with: + name: stratovirt-static-aarch64.tar.gz + path: ./ + + - name: Split tag name + env: + TAG: ${{ github.ref }} + id: split + run: echo "::set-output name=fragment::${TAG##*v}" + + - name: Upload release asset x86_64 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./stratovirt-static-x86_64.tar.gz + asset_name: stratovirt-static-${{ steps.split.outputs.fragment }}-x86_64.tar.gz + asset_content_type: application/x-tgz + + - name: Upload release asset aarch64 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./stratovirt-static-aarch64.tar.gz + asset_name: stratovirt-static-${{ steps.split.outputs.fragment }}-aarch64.tar.gz + asset_content_type: application/x-tgz diff --git a/.gitignore b/.gitignore index eb5a316cbd195d26e3f768c7dd8e1b47299e17f8..68d13feb1e91cc25019f3bc3b5b4b575745b053d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,9 @@ +# Don't add Cargo.lock file to gitignore. +# Ref: https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html + target +**/*.rs.bk + +# Ignore configuration directory generated by IDEA. +.idea +.vscode diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000000000000000000000000000000000000..5d983aa2dff15f3ad9512de29a04a91b186f9e4d --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,10 @@ +use_field_init_shorthand = true +edition = "2021" + +# These are unstable configurations of rustfmt, which can only be checked with +# `cargo +nightly fmt --all`, currently will not be integrated into CI. + +#wrap_comments = true +#comment_width = 100 +#normalize_comments = true +#format_code_in_doc_comments = true diff --git a/Cargo.lock b/Cargo.lock index 215f1d557c9ce09aeeb43ec81d0427f9d109d2d3..36c8f841879cdb9a30fb1911130c24957782e5ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,128 +1,254 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] -name = "StratoVirt" -version = "2.1.0" +name = "acpi" +version = "2.4.0" dependencies = [ - "error-chain", - "hypervisor", - "kvm-ioctls", - "libc", + "address_space", + "anyhow", "log", - "machine", "machine_manager", - "migration", + "thiserror", "util", - "vfio", - "virtio", - "vmm-sys-util", ] [[package]] -name = "acpi" -version = "2.1.0" +name = "address_space" +version = "2.4.0" dependencies = [ - "address_space", - "byteorder", - "error-chain", + "anyhow", + "arc-swap", + "libc", "log", "machine_manager", + "migration", + "migration_derive", + "nix 0.26.2", + "once_cell", + "thiserror", + "trace", "util", + "vmm-sys-util", ] [[package]] -name = "addr2line" -version = "0.15.1" +name = "aho-corasick" +version = "0.7.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03345e98af8f3d786b6d9f656ccfa6ac316d954e92bc4841f0bba20789d5fb5a" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" dependencies = [ - "gimli", + "memchr", ] [[package]] -name = "address_space" -version = "2.1.0" +name = "alsa" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8512c9117059663fb5606788fbca3619e2a91dac0e3fe516242eab1fa6be5e44" dependencies = [ - "arc-swap", - "error-chain", - "hypervisor", - "kvm-bindings", - "kvm-ioctls", + "alsa-sys", + "bitflags 1.3.2", "libc", - "log", - "machine_manager", - "migration", - "migration_derive", - "serial_test", - "util", - "vmm-sys-util", + "nix 0.24.3", +] + +[[package]] +name = "alsa-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527" +dependencies = [ + "libc", + "pkg-config", ] [[package]] -name = "adler" -version = "1.0.2" +name = "anyhow" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" [[package]] name = "arc-swap" -version = "0.4.8" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dabe5a181f83789739c194cbe5a897dde195078fac08568d09221fd6137a7ba8" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] -name = "autocfg" -version = "1.0.1" +name = "atk" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "6ba16453d10c712284061a05f6510f75abeb92b56ba88dfeb48c74775020cc22" +dependencies = [ + "atk-sys", + "bitflags 1.3.2", + "glib", + "libc", +] [[package]] -name = "backtrace" -version = "0.3.59" +name = "atk-sys" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744" +checksum = "e3bf0a7ca572fbd5762fd8f8cd65a581e06767bc1234913fe1f43e370cff6e90" dependencies = [ - "addr2line", - "cc", - "cfg-if", + "glib-sys", + "gobject-sys", "libc", - "miniz_oxide", - "object", - "rustc-demangle", + "system-deps", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" + +[[package]] +name = "bindgen" +version = "0.65.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.18", + "which", ] [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bitintr" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "7ba5a5c4df8ac8673f22698f443ef1ce3853d7f22d5a15ebf66b9a7553b173dd" + +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + +[[package]] +name = "block_backend" +version = "2.4.0" +dependencies = [ + "anyhow", + "byteorder", + "libc", + "log", + "machine_manager", + "once_cell", + "thiserror", + "trace", + "util", + "vmm-sys-util", +] [[package]] name = "boot_loader" -version = "2.1.0" +version = "2.4.0" dependencies = [ "address_space", + "anyhow", "devices", - "error-chain", "kvm-bindings", - "kvm-ioctls", - "libc", "log", + "thiserror", "util", - "vmm-sys-util", ] +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + [[package]] name = "byteorder" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "cairo-rs" +version = "0.17.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3603c4028a5e368d09b51c8b624b9a46edcd7c3778284077a6125af73c9f0a" +dependencies = [ + "bitflags 1.3.2", + "cairo-sys-rs", + "glib", + "libc", + "once_cell", + "thiserror", +] + +[[package]] +name = "cairo-sys-rs" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55382a01d30e5e53f185eee269124f5e21ab526595b872751278dfbb463594e" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + [[package]] name = "cc" -version = "1.0.67" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-expr" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" +checksum = "e70d3ad08698a0568b0562f22710fe6bfc1f4a61a367c77d0398c562eadd453a" +dependencies = [ + "smallvec", + "target-lexicon", +] [[package]] name = "cfg-if" @@ -130,492 +256,1649 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chardev_backend" +version = "2.4.0" +dependencies = [ + "anyhow", + "libc", + "log", + "machine_manager", + "nix 0.26.2", + "util", + "vmm-sys-util", +] + +[[package]] +name = "clang-sys" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +dependencies = [ + "bitflags 1.3.2", + "clap_derive", + "clap_lex", + "once_cell", +] + +[[package]] +name = "clap_derive" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "clap_lex" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "033f6b7a4acb1f358c742aaca805c939ee73b4c6209ae4318ec7aca81c42e646" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "code_generator" +version = "2.4.0" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "serde", + "syn 2.0.18", + "toml", +] + +[[package]] +name = "const_format" +version = "0.2.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c990efc7a285731f9a4378d81aff2f0e85a2c8781a05ef0f8baa8dac54d0ff48" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e026b6ce194a874cb9cf32cd5772d1ef9767cc8fcb5765948d74f37a9d8b2bf6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cpu" -version = "2.1.0" +version = "2.4.0" dependencies = [ - "error-chain", - "hypervisor", + "anyhow", "kvm-bindings", - "kvm-ioctls", "libc", "log", "machine_manager", "migration", "migration_derive", - "serial_test", + "nix 0.26.2", + "thiserror", + "trace", "util", "vmm-sys-util", ] [[package]] name = "devices" -version = "2.1.0" +version = "2.4.0" dependencies = [ "acpi", "address_space", + "alsa", + "anyhow", + "block_backend", "byteorder", - "error-chain", - "hypervisor", - "kvm-bindings", - "kvm-ioctls", + "chardev_backend", + "clap", + "cpu", + "drm-fourcc", + "hisysevent", "libc", + "libpulse-binding", + "libpulse-simple-binding", + "libusb1-sys", "log", "machine_manager", "migration", "migration_derive", + "once_cell", + "rand", + "rusb", "serde", - "serial_test", - "sysbus", + "serde_json", + "strum", + "strum_macros", + "thiserror", + "trace", + "ui", "util", + "v4l2-sys-mit", "vmm-sys-util", ] [[package]] -name = "error-chain" -version = "0.12.4" +name = "drm-fourcc" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +checksum = "0aafbcdb8afc29c1a7ee5fbe53b5d62f4565b35a042a662ca9fecd0b54dae6f4" + +[[package]] +name = "duct" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc6a0a59ed0888e0041cf708e66357b7ae1a82f1c67247e1f93b5e0818f7d8d" dependencies = [ - "backtrace", - "version_check", + "libc", + "once_cell", + "os_pipe", + "shared_child", ] [[package]] -name = "gimli" -version = "0.24.0" +name = "either" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] -name = "heck" -version = "0.3.3" +name = "field-offset" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +checksum = "a3cf3a800ff6e860c863ca6d4b16fd999db8b752819c1606884047b73e468535" dependencies = [ - "unicode-segmentation", + "memoffset 0.8.0", + "rustc_version", ] [[package]] -name = "hypervisor" -version = "2.1.0" +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ - "arc-swap", - "error-chain", - "kvm-bindings", - "kvm-ioctls", - "log", - "migration", - "migration_derive", + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gdk" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1df5ea52cccd7e3a0897338b5564968274b52f5fd12601e0afa44f454c74d3" +dependencies = [ + "bitflags 1.3.2", + "cairo-rs", + "gdk-pixbuf", + "gdk-sys", + "gio", + "glib", + "libc", + "pango", +] + +[[package]] +name = "gdk-pixbuf" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b023fbe0c6b407bd3d9805d107d9800da3829dc5a676653210f1d5f16d7f59bf" +dependencies = [ + "bitflags 1.3.2", + "gdk-pixbuf-sys", + "gio", + "glib", + "libc", "once_cell", - "util", - "vmm-sys-util", ] [[package]] -name = "instant" -version = "0.1.9" +name = "gdk-pixbuf-sys" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +checksum = "7b41bd2b44ed49d99277d3925652a163038bd5ed943ec9809338ffb2f4391e3b" dependencies = [ - "cfg-if", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "system-deps", +] + +[[package]] +name = "gdk-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2152de9d38bc67a17b3fe49dc0823af5bf874df59ea088c5f28f31cf103de703" +dependencies = [ + "cairo-sys-rs", + "gdk-pixbuf-sys", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "pango-sys", + "pkg-config", + "system-deps", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gettext-rs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e49ea8a8fad198aaa1f9655a2524b64b70eb06b2f3ff37da407566c93054f364" +dependencies = [ + "gettext-sys", + "locale_config", +] + +[[package]] +name = "gettext-sys" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c63ce2e00f56a206778276704bbe38564c8695249fdc8f354b4ef71c57c3839d" +dependencies = [ + "cc", + "temp-dir", +] + +[[package]] +name = "gio" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d14522e56c6bcb6f7a3aebc25cbcfb06776af4c0c25232b601b4383252d7cb92" +dependencies = [ + "bitflags 1.3.2", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys", + "glib", + "libc", + "once_cell", + "pin-project-lite", + "smallvec", + "thiserror", +] + +[[package]] +name = "gio-sys" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b1d43b0d7968b48455244ecafe41192871257f5740aa6b095eb19db78e362a5" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", + "winapi", +] + +[[package]] +name = "glib" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7f1de7cbde31ea4f0a919453a2dcece5d54d5b70e08f8ad254dc4840f5f09b6" +dependencies = [ + "bitflags 1.3.2", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys", + "glib-macros", + "glib-sys", + "gobject-sys", + "libc", + "memchr", + "once_cell", + "smallvec", + "thiserror", +] + +[[package]] +name = "glib-macros" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7206c5c03851ef126ea1444990e81fdd6765fb799d5bc694e4897ca01bb97f" +dependencies = [ + "anyhow", + "heck", + "proc-macro-crate", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "glib-sys" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f00ad0a1bf548e61adfff15d83430941d9e1bb620e334f779edd1c745680a5" +dependencies = [ + "libc", + "system-deps", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "gobject-sys" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e75b0000a64632b2d8ca3cf856af9308e3a970844f6e9659bd197f026793d0" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + +[[package]] +name = "gtk" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c4222ab92b08d4d0bab90ddb6185b4e575ceeea8b8cdf00b938d7b6661d966" +dependencies = [ + "atk", + "bitflags 1.3.2", + "cairo-rs", + "field-offset", + "futures-channel", + "gdk", + "gdk-pixbuf", + "gio", + "glib", + "gtk-sys", + "gtk3-macros", + "libc", + "once_cell", + "pango", + "pkg-config", +] + +[[package]] +name = "gtk-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d8eb6a4b93e5a7e6980f7348d08c1cd93d31fae07cf97f20678c5ec41de3d7e" +dependencies = [ + "atk-sys", + "cairo-sys-rs", + "gdk-pixbuf-sys", + "gdk-sys", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "pango-sys", + "system-deps", +] + +[[package]] +name = "gtk3-macros" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3efb84d682c9a39c10bd9f24f5a4b9c15cc8c7edc45c19cb2ca2c4fc38b2d95e" +dependencies = [ + "anyhow", + "proc-macro-crate", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hisysevent" +version = "2.4.0" +dependencies = [ + "anyhow", + "code_generator", + "lazy_static", + "libloading", + "log", +] + +[[package]] +name = "hypervisor" +version = "2.4.0" +dependencies = [ + "address_space", + "anyhow", + "cpu", + "devices", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "machine_manager", + "migration", + "migration_derive", + "thiserror", + "trace", + "util", + "vmm-sys-util", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "io-uring" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b7b36074613a723279637061b40db993208908a94f10ccb14436ce735bc0f57" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kvm-bindings" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "081fbd8164229a990fbf24a1f35d287740db110c2b5d42addf460165f1b0e032" +dependencies = [ + "vmm-sys-util", +] + +[[package]] +name = "kvm-ioctls" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9002dff009755414f22b962ec6ae6980b07d6d8b06e5297b1062019d72bd6a8c" +dependencies = [ + "bitflags 2.5.0", + "kvm-bindings", + "libc", + "vmm-sys-util", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libpulse-binding" +version = "2.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1745b20bfc194ac12ef828f144f0ec2d4a7fe993281fa3567a0bd4969aee6890" +dependencies = [ + "bitflags 1.3.2", + "libc", + "libpulse-sys", + "num-derive", + "num-traits", + "winapi", +] + +[[package]] +name = "libpulse-simple-binding" +version = "2.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ced94199e6e44133431374e4043f34e1f0697ebfb7b7d6c244a65bfaedf0e31" +dependencies = [ + "libpulse-binding", + "libpulse-simple-sys", + "libpulse-sys", +] + +[[package]] +name = "libpulse-simple-sys" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84e423d9c619c908ce9b4916080e65ab586ca55b8c4939379f15e6e72fb43842" +dependencies = [ + "libpulse-sys", + "pkg-config", +] + +[[package]] +name = "libpulse-sys" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2191e6880818d1df4cf72eac8e91dce7a5a52ba0da4b2a5cdafabc22b937eadb" +dependencies = [ + "libc", + "num-derive", + "num-traits", + "pkg-config", + "winapi", +] + +[[package]] +name = "libusb1-sys" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f6bace2f39082e9787c851afce469e7b2fe0f1cc64bbc68ca96653b63d8f17" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "locale_config" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d2c35b16f4483f6c26f0e4e9550717a2f6575bcd6f12a53ff0c490a94a6934" +dependencies = [ + "lazy_static", + "objc", + "objc-foundation", + "regex", + "winapi", +] + +[[package]] +name = "log" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" + +[[package]] +name = "machine" +version = "2.4.0" +dependencies = [ + "acpi", + "address_space", + "anyhow", + "block_backend", + "boot_loader", + "clap", + "cpu", + "devices", + "hypervisor", + "libc", + "log", + "machine_manager", + "migration", + "migration_derive", + "serde_json", + "thiserror", + "trace", + "ui", + "util", + "vfio", + "virtio", + "vmm-sys-util", +] + +[[package]] +name = "machine_manager" +version = "2.4.0" +dependencies = [ + "anyhow", + "clap", + "hex", + "libc", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", + "trace", + "util", + "vmm-sys-util", +] + +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + +[[package]] +name = "migration" +version = "2.4.0" +dependencies = [ + "anyhow", + "kvm-ioctls", + "log", + "machine_manager", + "migration_derive", + "once_cell", + "serde", + "serde_json", + "thiserror", + "util", +] + +[[package]] +name = "migration_derive" +version = "2.4.0" +dependencies = [ + "migration", + "proc-macro2", + "quote", + "syn 2.0.18", + "util", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "mod_test" +version = "2.4.0" +dependencies = [ + "acpi", + "anyhow", + "byteorder", + "devices", + "hex", + "libc", + "machine", + "machine_manager", + "rand", + "serde", + "serde_json", + "util", + "virtio", + "vmm-sys-util", +] + +[[package]] +name = "nix" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "objc-foundation" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1add1b659e36c9607c7aab864a76c7a4c2760cd0cd2e120f3fb8b952c7e22bf9" +dependencies = [ + "block", + "objc", + "objc_id", +] + +[[package]] +name = "objc_id" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c92d4ddb4bd7b50d730c215ff871754d0da6b2178849f8a2a2ab69712d0c073b" +dependencies = [ + "objc", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "os_pipe" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb233f06c2307e1f5ce2ecad9f8121cffbbee2c95428f44ea85222e460d0d213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "ozone" +version = "2.4.0" +dependencies = [ + "anyhow", + "libc", + "nix 0.26.2", + "thiserror", + "util", +] + +[[package]] +name = "pango" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c280b82a881e4208afb3359a8e7fde27a1b272280981f1f34610bed5770d37" +dependencies = [ + "bitflags 1.3.2", + "gio", + "glib", + "libc", + "once_cell", + "pango-sys", +] + +[[package]] +name = "pango-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4293d0f0b5525eb5c24734d30b0ed02cd02aa734f216883f376b54de49625de8" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1" +dependencies = [ + "proc-macro2", + "syn 2.0.18", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + +[[package]] +name = "rusb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44a8c36914f9b1a3be712c1dfa48c9b397131f9a75707e570a391735f785c5d1" +dependencies = [ + "libc", + "libusb1-sys", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustls" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79ea77c539259495ce8ca47f53e66ae0330a8819f67e23ac96ca02f50e7b7d36" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +dependencies = [ + "base64", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513722fd73ad80a71f72b61009ea1b584bcfa1483ca93949c8f290298837fa59" +dependencies = [ + "ring", + "untrusted", ] [[package]] -name = "itoa" -version = "0.4.7" +name = "rustversion" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] -name = "kvm-bindings" -version = "0.3.1" +name = "ryu" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e9742810f4cb95388955853c032cd50415d18834357f67b7b299fc28217cd3d" -dependencies = [ - "vmm-sys-util", -] +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] -name = "kvm-ioctls" -version = "0.6.1" +name = "sasl2-sys" +version = "0.1.20+2.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b8002ac5e13e1f61da5b8c440fe4de5bae029b4538cd899ef2dd93e1245bf71" +checksum = "9e645bd98535fc8fd251c43ba7c7c1f9be1e0369c99b6a5ea719052a773e655c" dependencies = [ - "kvm-bindings", + "cc", + "duct", "libc", - "vmm-sys-util", + "pkg-config", ] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "sct" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] [[package]] -name = "libc" -version = "0.2.94" +name = "semver" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] -name = "lock_api" -version = "0.4.2" +name = "serde" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" dependencies = [ - "scopeguard", + "serde_derive", ] [[package]] -name = "log" -version = "0.4.14" +name = "serde_derive" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" dependencies = [ - "cfg-if", + "proc-macro2", + "quote", + "syn 2.0.18", ] [[package]] -name = "machine" -version = "2.1.0" +name = "serde_json" +version = "1.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ - "acpi", - "address_space", - "boot_loader", - "cpu", - "devices", - "error-chain", - "hypervisor", - "kvm-bindings", - "kvm-ioctls", - "libc", - "log", - "machine_manager", - "migration", - "pci", + "itoa", + "ryu", "serde", - "serde_json", - "sysbus", - "util", - "vfio", - "vfio-bindings", - "virtio", - "vmm-sys-util", ] [[package]] -name = "machine_manager" -version = "2.1.0" +name = "serde_spanned" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93107647184f6027e3b7dcb2e11034cf95ffa1e3a682c67951963ac69c1c007d" dependencies = [ - "error-chain", - "libc", - "log", - "once_cell", "serde", - "serde_json", - "strum", - "strum_macros", - "util", - "vmm-sys-util", ] [[package]] -name = "migration" -version = "2.1.0" +name = "shared_child" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6be9f7d5565b1483af3e72975e2dee33879b3b86bd48c0929fccf6585d79e65a" dependencies = [ - "error-chain", - "kvm-ioctls", - "log", - "migration_derive", - "once_cell", - "serde", - "serde_json", - "util", + "libc", + "winapi", ] [[package]] -name = "migration_derive" -version = "2.1.0" -dependencies = [ - "migration", - "proc-macro2", - "quote", - "syn", - "util", -] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] -name = "miniz_oxide" -version = "0.4.4" +name = "slab" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" dependencies = [ - "adler", "autocfg", ] [[package]] -name = "object" -version = "0.24.0" +name = "smallvec" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] -name = "once_cell" -version = "1.9.0" +name = "spin" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] -name = "ozone" -version = "2.1.0" +name = "sscanf" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c713ebd15ce561dd4a13ed62bc2a0368e16806fc30dcaf66ecf1256b2a3fdde6" dependencies = [ - "error-chain", - "libc", - "util", + "const_format", + "lazy_static", + "regex", + "sscanf_macro", ] [[package]] -name = "parking_lot" -version = "0.11.1" +name = "sscanf_macro" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +checksum = "84955aa74a157e5834d58a07be11af7f0ab923f0194a0bb2ea6b3db8b5d1611d" dependencies = [ - "instant", - "lock_api", - "parking_lot_core", + "convert_case", + "proc-macro2", + "quote", + "regex-syntax", + "strsim", + "syn 2.0.18", + "unicode-width", ] [[package]] -name = "parking_lot_core" -version = "0.8.3" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stratovirt" +version = "2.4.0" dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", + "anyhow", + "hisysevent", + "log", + "machine", + "machine_manager", + "thiserror", + "trace", + "util", ] [[package]] -name = "pci" -version = "2.1.0" +name = "stratovirt-img" +version = "2.4.0" dependencies = [ - "acpi", - "address_space", - "byteorder", - "error-chain", - "hypervisor", - "kvm-bindings", - "kvm-ioctls", + "anyhow", + "block_backend", "libc", "log", "machine_manager", - "migration", - "migration_derive", - "once_cell", - "sysbus", "util", - "vmm-sys-util", ] [[package]] -name = "proc-macro2" -version = "1.0.26" +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum_macros" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "unicode-xid", + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", ] [[package]] -name = "quote" -version = "1.0.9" +name = "syn" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "redox_syscall" -version = "0.2.5" +name = "syn" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ - "bitflags", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "rustc-demangle" -version = "0.1.19" +name = "system-deps" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "410f7acf3cb3a44527c5d9546bad4bf4e6c460915d5f9f2fc524498bfe8f70ce" +checksum = "e5fa6fb9ee296c0dc2df41a656ca7948546d061958115ddb0bcaae43ad0d17d2" +dependencies = [ + "cfg-expr", + "heck", + "pkg-config", + "toml", + "version-compare", +] [[package]] -name = "ryu" -version = "1.0.5" +name = "target-lexicon" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] -name = "scopeguard" -version = "1.1.0" +name = "temp-dir" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab" [[package]] -name = "serde" -version = "1.0.125" +name = "thiserror" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ - "serde_derive", + "thiserror-impl", ] [[package]] -name = "serde_derive" -version = "1.0.125" +name = "thiserror-impl" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.18", ] [[package]] -name = "serde_json" -version = "1.0.64" +name = "toml" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec" dependencies = [ - "itoa", - "ryu", "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", ] [[package]] -name = "serial_test" -version = "0.5.1" +name = "toml_datetime" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f" dependencies = [ - "lazy_static", - "parking_lot", - "serial_test_derive", + "serde", ] [[package]] -name = "serial_test_derive" -version = "0.5.1" +name = "toml_edit" +version = "0.19.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" dependencies = [ - "proc-macro2", - "quote", - "syn", + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", ] [[package]] -name = "smallvec" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" - -[[package]] -name = "strum" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c" - -[[package]] -name = "strum_macros" -version = "0.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149" +name = "trace" +version = "2.4.0" dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", + "anyhow", + "lazy_static", + "libloading", + "log", + "regex", + "trace_generator", + "vmm-sys-util", ] [[package]] -name = "syn" -version = "1.0.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82" +name = "trace_generator" +version = "2.4.0" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "serde", + "syn 2.0.18", + "toml", ] [[package]] -name = "sysbus" -version = "2.1.0" +name = "ui" +version = "2.4.0" dependencies = [ - "acpi", "address_space", - "error-chain", - "hypervisor", - "kvm-ioctls", + "anyhow", + "bitintr", + "cairo-rs", + "gettext-rs", + "gtk", + "libc", + "log", + "machine_manager", + "once_cell", + "rustls", + "rustls-pemfile", + "sasl2-sys", + "serde_json", + "sscanf", + "thiserror", + "trace", + "util", "vmm-sys-util", ] +[[package]] +name = "unicode-ident" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" + [[package]] name = "unicode-segmentation" -version = "1.8.0" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode-width" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "unicode-xid" -version = "0.2.2" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "untrusted" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "util" -version = "2.1.0" +version = "2.4.0" dependencies = [ + "anyhow", "arc-swap", "byteorder", - "error-chain", + "io-uring", "kvm-bindings", - "kvm-ioctls", "libc", + "libloading", "log", + "nix 0.26.2", "once_cell", + "serde", + "thiserror", + "trace", + "v4l2-sys-mit", "vmm-sys-util", ] +[[package]] +name = "v4l2-sys-mit" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6779878362b9bacadc7893eac76abe69612e8837ef746573c4a5239daf11990b" +dependencies = [ + "bindgen", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version-compare" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29" + [[package]] name = "version_check" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vfio" -version = "2.1.0" +version = "2.4.0" dependencies = [ "address_space", + "anyhow", "byteorder", - "error-chain", + "clap", + "devices", "hypervisor", "kvm-bindings", "kvm-ioctls", "libc", "log", + "machine_manager", "once_cell", - "pci", + "thiserror", "util", "vfio-bindings", "vmm-sys-util", @@ -623,41 +1906,125 @@ dependencies = [ [[package]] name = "vfio-bindings" -version = "0.2.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a21f546f2bda37f5a8cfb138c87f95b8e34d2d78d6a7a92ba3785f4e08604a7" +checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce" [[package]] name = "virtio" -version = "2.1.0" +version = "2.4.0" dependencies = [ "acpi", "address_space", + "anyhow", + "block_backend", "byteorder", + "chardev_backend", + "clap", "devices", - "error-chain", - "hypervisor", - "kvm-ioctls", "libc", "log", "machine_manager", "migration", "migration_derive", - "pci", + "once_cell", "serde_json", - "sysbus", + "thiserror", + "trace", + "ui", "util", "vmm-sys-util", ] [[package]] name = "vmm-sys-util" -version = "0.7.0" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "which" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cdd1d72e262bbfb014de65ada24c1ac50e10a2e3b1e8ec052df188c2ee5dfa" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" dependencies = [ - "bitflags", + "either", "libc", + "once_cell", ] [[package]] @@ -681,3 +2048,12 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "winnow" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 46371cbff2ee0443c975121940dfbf65c9ea98f7..4c78415f7a98ca013c4d9aa334841e3bb31e8225 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,55 +1,57 @@ [package] -name = "StratoVirt" -version = "2.1.0" +name = "stratovirt" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" description = "a lightweight hypervisor with low memory overhead and fast booting speed" license = "Mulan PSL v2" [dependencies] -error-chain = "0.12.4" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = "0.4.8" -vmm-sys-util = ">=0.7.0" -hypervisor = { path = "hypervisor" } +thiserror = "1.0" +anyhow = "1.0" +log = "0.4" machine = { path = "machine" } machine_manager = { path = "machine_manager" } -migration = { path = "migration" } util = { path = "util" } -virtio = { path = "virtio" } -vfio = { path = "vfio" } +trace = { path = "trace" } +hisysevent = { path = "hisysevent" } [workspace] members = [ - "address_space", - "boot_loader", - "cpu", - "devices", - "hypervisor", - "machine", - "machine_manager", - "migration", - "migration_derive", - "pci", - "sysbus", - "util", - "acpi", - "virtio", "ozone", - "vfio", + "image", + "tests/mod_test", ] -[[bin]] -name = "stratovirt" -path = "src/main.rs" - -[[bin]] -name = "ozone" -path = "ozone/src/main.rs" - [features] default = [] +boot_time = ["machine/boot_time"] +scream_alsa = ["machine/scream_alsa"] +scream_pulseaudio = ["machine/scream_pulseaudio"] +scream_ohaudio = ["machine/scream_ohaudio"] +pvpanic = ["machine/pvpanic"] +demo_device = ["machine/demo_device"] +usb_host = ["machine/usb_host"] +usb_camera_v4l2 = ["machine/usb_camera_v4l2"] +usb_camera_oh = ["machine/usb_camera_oh"] +gtk = ["machine/gtk"] +vnc = ["machine/vnc"] +vnc_auth = ["machine/vnc_auth"] +ohui_srv = ["machine/ohui_srv"] +ramfb = ["machine/ramfb"] +virtio_gpu = ["machine/virtio_gpu"] +trace_to_logger = ["trace/trace_to_logger", "machine/trace_to_logger"] +trace_to_ftrace = ["trace/trace_to_ftrace", "machine/trace_to_ftrace"] +trace_to_hitrace = ["trace/trace_to_hitrace", "machine/trace_to_hitrace"] +hisysevent = ["hisysevent/hisysevent"] +vfio = ["machine/vfio_device"] +usb_uas = ["machine/usb_uas"] +virtio_rng = ["machine/virtio_rng"] +virtio_scsi = ["machine/virtio_scsi"] +vhost_vsock = ["machine/vhost_vsock"] +vhostuser_block = ["machine/vhostuser_block"] +vhostuser_net = ["machine/vhostuser_net"] +vhost_net = ["machine/vhost_net"] [package.metadata.rpm.cargo] buildflags = ["--release"] @@ -63,3 +65,4 @@ panic = "abort" [profile.release] panic = "abort" lto = true +debug = true diff --git a/Makefile b/Makefile index c6016784cc951139a60a6c84bddb8dd9f1dd0d37..e39a3acbe8a86d9d8b51e021534b977b6ad43341 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ .PHONY: build -build: - cargo build +build: yum-deps + cargo build --workspace --bins --release + +.PHONY: dbg-build +dbg-build: yum-deps + cargo build --workspace --bins .PHONY: install install: @@ -9,3 +13,14 @@ install: .PHONY: clean clean: cargo clean + +yum-deps: + @yum install pixman-devel + @yum install libcap-ng-devel + @yum install cyrus-sasl-devel + @yum install pulseaudio + @yum install clang + @yum install gtk3-devel + @yum install libusbx + @yum install alsa-lib-devel + @yum install make diff --git a/README.ch.md b/README.ch.md index 93e5013c16f49458ca4e4719e5a0fe6782699bf1..68f14aa99f7646d6cd7c4bc208bcbd9caf0a998e 100644 --- a/README.ch.md +++ b/README.ch.md @@ -1,7 +1,8 @@ # StratoVirt: -StratoVirt是计算产业中面向云数据中心的企业级虚拟化平台,实现了一套架构统一支持虚拟机、容器、Serverless三种场景。StratoVirt在轻量低噪、软硬协同、Rust语言级安全等方面具备关键技术竞争优势。 +StratoVirt是计算产业中面向云数据中心的企业级虚拟化平台,实现了一套架构统一支持虚拟机、容器、Serverless三种场景。 +StratoVirt在轻量低噪、软硬协同、Rust语言级安全等方面具备关键技术竞争优势。 -StratoVirt预留了接口和设计来支持更多特性,未来甚至向标准虚拟化演进。 +StratoVirt预留了接口和设计来支持更多特性,同时支持标准虚拟化和轻量级虚拟化,也预留了对新型异构设备扩展支持能力。 ## 如何开始 @@ -15,7 +16,7 @@ https://www.rust-lang.org/tools/install ```sh $ git clone https://gitee.com/openeuler/stratovirt.git $ cd stratovirt -$ cargo build --release +$ make build ``` 可以在`target/release/stratovirt`路径下找到生成的二进制文件 @@ -26,7 +27,7 @@ $ cargo build --release 可以通过以下链接获取我们准备好的linux内核镜像和rootfs镜像: -https://repo.openeuler.org/openEuler-21.03/stratovirt_img/ +https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/ 启动标准机型的虚拟机需要指定遵循UEFI的edk2固件文件。 @@ -56,7 +57,7 @@ $ ./target/release/stratovirt \ -serial stdio ``` -关于制作rootfs镜像、编译内核镜像以及编译StratoVirt的详细指导,请参考[StratoVirt Quickstart](./docs/quickstart.md)。 +关于制作rootfs镜像、编译内核镜像以及编译StratoVirt的详细指导,请参考[StratoVirt Quickstart](./docs/quickstart.ch.md)。 StratoVirt所支持更多特性,详细指导请参考[Configuration Guidebook](docs/config_guidebook.md)。 diff --git a/README.md b/README.md index 9778b6196568adb02ed9cb41535d20b9fac4d734..ab26df53f1ed7f930fb887d2dd33a675e1876834 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,9 @@ three scenarios: virtual machines, containers, and serverless computing. StratoVirt has competitive advantages in light weight and low noise, software and hardware coordination, and Rust language-level security. -StratoVirt reserves interface and design for importing more features, even -evaluates to standard virtualization. +StratoVirt reserves interfaces and design to support more features, now can support +standard and lightweight virtualization together, +as well as the ability to extend support for new heterogeneous devices. ## How to start @@ -24,7 +25,7 @@ To build StratoVirt, clone the project and build it first: ```sh $ git clone https://gitee.com/openeuler/stratovirt.git $ cd stratovirt -$ cargo build --release +$ make build ``` Now you can find StratoVirt binary in `target/release/stratovirt`. @@ -35,7 +36,7 @@ To run StratoVirt quickly, requires You can get kernel and rootfs image from the following link: -https://repo.openeuler.org/openEuler-21.03/stratovirt_img/ +https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/ For standard VM, firmware file of EDK2 which follows UEFI is required. diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 0000000000000000000000000000000000000000..9bbf0e694252c678de7726faf1fc0a25392c923f --- /dev/null +++ b/_typos.toml @@ -0,0 +1,34 @@ +[files] +extend-exclude = ["docs/kernel_config/*", "_typos.toml"] + +[default.extend-identifiers] +APIC_MODE_EXTINT = "APIC_MODE_EXTINT" +baInterfaceNr = "baInterfaceNr" +BARs = "BARs" +DESCRIPTION_HEADERs = "DESCRIPTION_HEADERs" +E6GgSyMd0oQtUGFyNf5pRHlYqlx3s7PMPVUtRJP0bBnNd5eDwWAotInu33h6UI0zfKgckAxeVdEROKAExx5xWK = "E6GgSyMd0oQtUGFyNf5pRHlYqlx3s7PMPVUtRJP0bBnNd5eDwWAotInu33h6UI0zfKgckAxeVdEROKAExx5xWK" +fldXlNNdCeqMvoIfEFogBxlL = "fldXlNNdCeqMvoIfEFogBxlL" +INTERRUPT_TYPE_EXTINT = "INTERRUPT_TYPE_EXTINT" +ist_info = "ist_info" +KVM_CPUID_FLAG_SIGNIFCANT_INDEX = "KVM_CPUID_FLAG_SIGNIFCANT_INDEX" +MODE_PAGE_ALLS = "MODE_PAGE_ALLS" +n_subtiles = "n_subtiles" +O_WRONLY = "O_WRONLY" +RTC_MIS = "RTC_MIS" +SECCOMP_FILETER_FLAG_TSYNC = "SECCOMP_FILETER_FLAG_TSYNC" +test_ths = "test_ths" +UART_LSR_THRE = "UART_LSR_THRE" +closID = "closID" +CLOS = "CLOS" + +[default.extend-words] +ba = "ba" +deactive = "deactive" +Deactive = "Deactive" +fpr = "fpr" +fullfill = "fullfill" +hda = "hda" +inout = "inout" +IST = "IST" +NCE = "NCE" +parm = "parm" diff --git a/acpi/Cargo.toml b/acpi/Cargo.toml index d7586ff37f4c897a74c42e9002f14bf21d8e8c82..8c778ef92e3cb201de435346556e9c7969ce6a49 100644 --- a/acpi/Cargo.toml +++ b/acpi/Cargo.toml @@ -1,16 +1,15 @@ [package] name = "acpi" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "acpi module" [dependencies] -error-chain = "0.12.4" -log = "0.4.8" -byteorder = "1.3.4" - +log = "0.4" +thiserror = "1.0" +anyhow = "1.0" address_space = { path = "../address_space" } util = {path = "../util"} machine_manager = {path = "../machine_manager"} diff --git a/acpi/src/acpi_device.rs b/acpi/src/acpi_device.rs index 3c68a6ac01e00a61154b29b98ebaedca736f0220..d315fc40bd5a383b4270fa51e2ec3f1795cb6c6c 100644 --- a/acpi/src/acpi_device.rs +++ b/acpi/src/acpi_device.rs @@ -12,13 +12,17 @@ use std::time::Instant; +use log::error; + use address_space::GuestAddress; -use byteorder::{ByteOrder, LittleEndian}; +use util::{ + num_ops::{read_data_u16, write_data_u16}, + time::NANOSECONDS_PER_SECOND, +}; // Frequency of PM Timer in HZ. const PM_TIMER_FREQUENCY: u128 = 3_579_545; -const NANOSECONDS_PER_SECOND: u128 = 1_000_000_000; -pub const ACPI_BITMASK_SLEEP_ENABLE: u16 = 0x2000; +const ACPI_BITMASK_SLEEP_ENABLE: u16 = 0x2000; /// ACPI Power Management Timer #[allow(clippy::upper_case_acronyms)] @@ -48,7 +52,7 @@ impl AcpiPMTimer { } let now = Instant::now(); let time_nanos = now.duration_since(self.start).as_nanos(); - let counter: u128 = (time_nanos * PM_TIMER_FREQUENCY) / NANOSECONDS_PER_SECOND; + let counter: u128 = (time_nanos * PM_TIMER_FREQUENCY) / u128::from(NANOSECONDS_PER_SECOND); data.copy_from_slice(&((counter & 0xFFFF_FFFF) as u32).to_le_bytes()); true @@ -73,64 +77,29 @@ impl AcpiPmEvent { pub fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { match offset { - 0 => match data.len() { - 1 => data[0] = self.status as u8, - 2 => LittleEndian::write_u16(data, self.status), - n => { - error!( - "Invalid data length {} for reading PM status register, offset is {}", - n, offset - ); - return false; - } - }, - 2 => match data.len() { - 1 => data[0] = self.enable as u8, - 2 => LittleEndian::write_u16(data, self.enable), - n => { - error!( - "Invalid data length {} for reading PM enable register, offset is {}", - n, offset - ); - return false; - } - }, + 0 => write_data_u16(data, self.status), + 2 => write_data_u16(data, self.enable), _ => { error!("Invalid offset"); - return false; + false } } - true } pub fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { match offset { 0 => { - let value: u16 = match data.len() { - 1 => data[0] as u16, - 2 => LittleEndian::read_u16(data), - n => { - error!( - "Invalid data length {} for writing PM status register, offset is {}", - n, offset - ); - return false; - } - }; + let mut value = 0; + if !read_data_u16(data, &mut value) { + return false; + } self.status &= !value; } 2 => { - let value: u16 = match data.len() { - 1 => data[0] as u16, - 2 => LittleEndian::read_u16(data), - n => { - error!( - "Invalid data length {} for writing PM enable register, offset is {}", - n, offset - ); - return false; - } - }; + let mut value = 0; + if !read_data_u16(data, &mut value) { + return false; + } self.enable = value; } _ => { @@ -153,27 +122,15 @@ impl AcpiPmCtrl { } pub fn read(&mut self, data: &mut [u8], _base: GuestAddress, _offset: u64) -> bool { - match data.len() { - 1 => data[0] = self.control as u8, - 2 => LittleEndian::write_u16(data, self.control), - n => { - error!("Invalid data length {} for reading PM control register", n); - return false; - } - } - true + write_data_u16(data, self.control) } // Return true when guest want poweroff. pub fn write(&mut self, data: &[u8], _base: GuestAddress, _offset: u64) -> bool { - let value: u16 = match data.len() { - 1 => data[0] as u16, - 2 => LittleEndian::read_u16(data), - n => { - error!("Invalid data length {} for writing PM control register", n); - return false; - } - }; + let mut value = 0; + if !read_data_u16(data, &mut value) { + return false; + } self.control = value & !ACPI_BITMASK_SLEEP_ENABLE; value & ACPI_BITMASK_SLEEP_ENABLE != 0 } diff --git a/acpi/src/acpi_table.rs b/acpi/src/acpi_table.rs index f7de8e30289f7b3e382430ffba1bf6121b1156aa..1577fd5b550a190e964072d8f33d407d67718881 100644 --- a/acpi/src/acpi_table.rs +++ b/acpi/src/acpi_table.rs @@ -10,12 +10,31 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use util::byte_code::ByteCode; - use super::aml_compiler::AmlBuilder; +use util::byte_code::ByteCode; /// Offset of checksum field in ACPI table. pub const TABLE_CHECKSUM_OFFSET: u32 = 9; +pub const INTERRUPT_PPIS_COUNT: u32 = 16; +pub const INTERRUPT_SGIS_COUNT: u32 = 16; +/// GTDT irq number for timer. +pub const ACPI_GTDT_ARCH_TIMER_VIRT_IRQ: u32 = 11; +pub const ACPI_GTDT_ARCH_TIMER_S_EL1_IRQ: u32 = 13; +pub const ACPI_GTDT_ARCH_TIMER_NS_EL1_IRQ: u32 = 14; +pub const ACPI_GTDT_ARCH_TIMER_NS_EL2_IRQ: u32 = 10; +pub const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0; +pub const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 4; +/// IORT node types, reference: ARM Document number: ARM DEN 0049B, October 2015. +pub const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00; +pub const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02; +/// Root Complex Node in IORT +pub const ROOT_COMPLEX_ENTRY_SIZE: u16 = 36; +pub const ID_MAPPING_ENTRY_SIZE: u16 = 20; +/// Interrupt controller structure types for MADT. +pub const ACPI_MADT_GENERIC_CPU_INTERFACE: u8 = 11; +pub const ACPI_MADT_GENERIC_DISTRIBUTOR: u8 = 12; +pub const ACPI_MADT_GENERIC_REDISTRIBUTOR: u8 = 14; +pub const ACPI_MADT_GENERIC_TRANSLATOR: u8 = 15; #[repr(C, packed)] #[derive(Default, Copy, Clone)] @@ -52,23 +71,23 @@ impl AmlBuilder for AcpiGenericAddress { #[derive(Default, Copy, Clone)] pub struct AcpiTableHeader { /// Signature of this table. - pub signature: [u8; 4], + signature: [u8; 4], /// The total length of this table, including this header. - pub length: u32, + length: u32, /// The revision of this table. - pub revision: u8, + revision: u8, /// The checksum of this table, including this header. - pub checksum: u8, + checksum: u8, /// OEM ID. - pub oem_id: [u8; 6], + oem_id: [u8; 6], /// OEM table ID. - pub oem_table_id: [u8; 8], + oem_table_id: [u8; 8], /// OEM revision of this table. - pub oem_revision: u32, + oem_revision: u32, /// Vendor ID for the ASL Compiler, default zero. - pub asl_compiler_id: [u8; 4], + asl_compiler_id: [u8; 4], /// Revision number of the ASL Compiler, default zero. - pub asl_compiler_revision: u32, + asl_compiler_revision: u32, } impl ByteCode for AcpiTableHeader {} @@ -140,7 +159,7 @@ impl AcpiTable { /// `new_value` - The new value that will be set in the field. pub fn set_field(&mut self, byte_index: usize, new_value: T) { let value_len = std::mem::size_of::(); - if byte_index >= self.entries.len() || byte_index + value_len >= self.entries.len() { + if byte_index >= self.entries.len() || byte_index + value_len > self.entries.len() { panic!("Set field in table failed: overflow occurs."); } self.entries[byte_index..(byte_index + value_len)].copy_from_slice(new_value.as_bytes()); @@ -161,6 +180,145 @@ impl AmlBuilder for AcpiTable { } } +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +pub struct ProcessorHierarchyNode { + r#type: u8, + length: u8, + reserved: u16, + flags: u32, + parent: u32, + acpi_processor_id: u32, + num_private_resources: u32, +} + +impl ByteCode for ProcessorHierarchyNode {} + +impl AmlBuilder for ProcessorHierarchyNode { + fn aml_bytes(&self) -> Vec { + self.as_bytes().to_vec() + } +} + +impl ProcessorHierarchyNode { + pub fn new( + flags: u32, + parent: u32, + acpi_processor_id: u32, + num_private_resources: u32, + ) -> Self { + Self { + r#type: 0, + length: 20 + num_private_resources as u8 * 4, + reserved: 0, + flags, + parent, + acpi_processor_id, + num_private_resources, + } + } +} + +pub fn processor_append_priv_res(pptt: &mut AcpiTable, priv_resources: Vec) { + let start = pptt.table_len(); + pptt.set_table_len(start + priv_resources.len() * 4); + for (i, priv_res) in priv_resources.iter().enumerate() { + pptt.set_field(start + i * 4, *priv_res); + } +} + +/// The Type of the hardcoded cache info +pub enum CacheType { + L1D, + L1I, + L2, + L3, +} + +struct CacheNode { + size: u32, + sets: u32, + associativity: u8, + attributes: u8, + line_size: u16, +} + +const CACHE_NODES: [CacheNode; CacheType::L3 as usize + 1] = [ + // L1 data cache + CacheNode { + size: 65536, + sets: 256, + associativity: 4, + attributes: 2, + line_size: 64, + }, + // L1 instruction cache + CacheNode { + size: 65536, + sets: 256, + associativity: 4, + attributes: 4, + line_size: 64, + }, + // L2 unified cache + CacheNode { + size: 524288, + sets: 1024, + associativity: 8, + attributes: 10, + line_size: 64, + }, + // L3 unified cache + CacheNode { + size: 33554432, + sets: 2048, + associativity: 15, + attributes: 10, + line_size: 128, + }, +]; + +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +pub struct CacheHierarchyNode { + r#type: u8, + length: u8, + reserved: u16, + flags: u32, + next_level: u32, + size: u32, + number_sets: u32, + associativity: u8, + attributes: u8, + line_size: u16, +} + +impl ByteCode for CacheHierarchyNode {} + +impl AmlBuilder for CacheHierarchyNode { + fn aml_bytes(&self) -> Vec { + self.as_bytes().to_vec() + } +} + +impl CacheHierarchyNode { + pub fn new(next_level: u32, cache_type: CacheType) -> Self { + let cache_node = &CACHE_NODES[cache_type as usize]; + Self { + r#type: 1, + length: 24, + reserved: 0, + flags: 127, + next_level, + size: cache_node.size, + number_sets: cache_node.sets, + associativity: cache_node.associativity, + attributes: cache_node.attributes, + line_size: cache_node.line_size, + } + } +} + /// ACPI RSDP structure. #[repr(C, packed)] #[derive(Default, Copy, Clone)] @@ -209,6 +367,94 @@ impl AmlBuilder for AcpiRsdp { } } +/// ACPI SRAT processor affinity structure. +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +pub struct AcpiSratProcessorAffinity { + /// Type ID. + pub type_id: u8, + /// The length of this structure. + pub length: u8, + /// Bit `\[`7:0`\]` of the proximity domain to which the processor belongs. + pub proximity_lo: u8, + /// The processor local APIC ID. + pub local_apic_id: u8, + /// The processor affinity flags. + pub flags: u32, + /// The processor local SAPIC EID. + pub local_sapic_eid: u8, + /// Bit `\[`31:8`\]` of the proximity domain to which the processor belongs. + pub proximity_hi: [u8; 3], + /// The clock domain to which the processor belongs. + pub clock_domain: u32, +} + +impl ByteCode for AcpiSratProcessorAffinity {} + +impl AmlBuilder for AcpiSratProcessorAffinity { + fn aml_bytes(&self) -> Vec { + Vec::from(self.as_bytes()) + } +} + +/// ACPI SRAT GICC affinity structure. +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +pub struct AcpiSratGiccAffinity { + /// Type ID. + pub type_id: u8, + /// The length of this structure. + pub length: u8, + /// Represents the proximity domain to which the "range of memory" belongs. + pub proximity_domain: u32, + /// The ACPI processor UID of the associated GICC + pub process_uid: u32, + /// The GICC affinity flags. + pub flags: u32, + /// The clock domain to which the processor belongs. + pub clock_domain: u32, +} + +impl ByteCode for AcpiSratGiccAffinity {} + +impl AmlBuilder for AcpiSratGiccAffinity { + fn aml_bytes(&self) -> Vec { + Vec::from(self.as_bytes()) + } +} + +/// ACPI SRAT memory affinity structure. +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +pub struct AcpiSratMemoryAffinity { + /// Type ID. + pub type_id: u8, + /// The length of this structure. + pub length: u8, + /// Represents the proximity domain to which the "range of memory" belongs. + pub proximity_domain: u32, + /// Reserved field. + pub reserved1: u16, + /// The base address of the memory range. + pub base_addr: u64, + /// The length of the memory range. + pub range_length: u64, + /// Reserved field. + pub reserved2: u32, + /// Indicates whether memory is enabled and can be hot plugged. + pub flags: u32, + /// Reserved field. + pub reserved3: u64, +} + +impl ByteCode for AcpiSratMemoryAffinity {} + +impl AmlBuilder for AcpiSratMemoryAffinity { + fn aml_bytes(&self) -> Vec { + Vec::from(self.as_bytes()) + } +} + /// This module describes ACPI MADT's sub-tables on x86_64 platform. #[cfg(target_arch = "x86_64")] pub mod madt_subtable { diff --git a/acpi/src/aml_compiler.rs b/acpi/src/aml_compiler.rs index 3fa4fa00fcdf68ce57f625c4d35a9746a6b17869..f97216ae2eae04e4963fd37d419ae0650fd2ea52 100644 --- a/acpi/src/aml_compiler.rs +++ b/acpi/src/aml_compiler.rs @@ -262,8 +262,8 @@ impl AmlToUuid { } // Char located at 8, 13, 18, 23 should be `-` - let indexs = &[8, 13, 18, 23]; - for i in indexs { + let indexes = &[8, 13, 18, 23]; + for i in indexes { if uuid.chars().nth(*i).unwrap() != '-' { return false; } @@ -377,14 +377,12 @@ impl AmlBuilder for AmlBuffer { /// Package contains an array of other objects. pub struct AmlPackage { - elem_count: u8, buf: Vec, } impl AmlPackage { pub fn new(elem_count: u8) -> AmlPackage { AmlPackage { - elem_count, buf: vec![elem_count], } } @@ -407,14 +405,12 @@ impl AmlScopeBuilder for AmlPackage { /// Variable-sized Package. pub struct AmlVarPackage { - elem_count: u8, buf: Vec, } impl AmlVarPackage { pub fn new(elem_count: u8) -> AmlVarPackage { AmlVarPackage { - elem_count, buf: vec![elem_count], } } @@ -533,14 +529,6 @@ pub enum AmlFieldUpdateRule { /// Field represents several bits in Operation Field. pub struct AmlField { - /// The name of corresponding OperationRegion. - name: String, - /// The access type of this Field. - access_type: AmlFieldAccessType, - /// Global lock is to be used or not when accessing this field. - lock_rule: AmlFieldLockRule, - /// Unmodified bits of a field are treated as Ones/Zeros/Preserve. - update_rule: AmlFieldUpdateRule, /// Field Unit list. buf: Vec, } @@ -557,13 +545,7 @@ impl AmlField { bytes.extend(build_name_string(name)); bytes.push(flag); - AmlField { - name: name.to_string(), - access_type: acc_ty, - lock_rule: lock_r, - update_rule: update_r, - buf: bytes, - } + AmlField { buf: bytes } } } @@ -613,8 +595,6 @@ impl AmlBuilder for AmlFieldUnit { /// Open a named Scope, can refer any scope within the namespace. pub struct AmlScope { - /// The name of scope. - name: String, /// Contains objects created inside the scope, which are encodes to bytes. buf: Vec, } @@ -622,7 +602,6 @@ pub struct AmlScope { impl AmlScope { pub fn new(name: &str) -> AmlScope { AmlScope { - name: name.to_string(), buf: build_name_string(name), } } @@ -650,14 +629,12 @@ impl AmlScopeBuilder for AmlScope { /// Device object that represents a processor, a device, etc. pub struct AmlDevice { - name: String, buf: Vec, } impl AmlDevice { pub fn new(name: &str) -> AmlDevice { AmlDevice { - name: name.to_string(), buf: build_name_string(name), } } @@ -680,12 +657,6 @@ impl AmlScopeBuilder for AmlDevice { /// Method definition. pub struct AmlMethod { - /// The name of this method. - name: String, - /// Count of Arguments. default value is zero. - args_count: u8, - /// Whether this method is Serialized or not. - serialized: bool, /// The body of this method, which has been converted to byte stream. buf: Vec, } @@ -709,12 +680,7 @@ impl AmlMethod { let mut bytes = build_name_string(name); bytes.push(flag); - AmlMethod { - name: name.to_string(), - args_count, - serialized, - buf: bytes, - } + AmlMethod { buf: bytes } } } @@ -1173,8 +1139,8 @@ impl AmlBuilder for AmlMutex { pub struct AmlAcquire { /// The mutex object is converted to byte stream. mutex: Vec, - /// If the mutex is owned by others, current thread suspends and waits for `timeout` **milliseconds** - /// `timeout` being set as 0xFFFF indicates that there is no timeout and + /// If the mutex is owned by others, current thread suspends and waits for `timeout` + /// **milliseconds**, `timeout` being set as 0xFFFF indicates that there is no timeout and /// the acquire mutex operation will keeping waiting. time_out: u16, } @@ -1204,7 +1170,7 @@ pub struct AmlRelease { } impl AmlRelease { - fn new(mtx: T) -> AmlRelease { + pub fn new(mtx: T) -> AmlRelease { AmlRelease { mutex: mtx.aml_bytes(), } @@ -1219,49 +1185,6 @@ impl AmlBuilder for AmlRelease { } } -/// Create arbitrary-length field of Buffer. -pub struct AmlCreateField { - /// The name of this field. - name: String, - /// The source Buffer, which has been converted to bytes. - src: Vec, - /// the start index in the Buffer, which has been converted to bytes. - /// `bit_index` has to be an Integer. - bit_index: Vec, - /// the length of this bit range, which has been converted to bytes. - /// `bit_count` has to be an Integer and must not be zero. - /// Note that the bit range (bit_index, bit_index + bit_count) must not exceed the bound of Buffer. - bit_count: Vec, -} - -impl AmlCreateField { - pub fn new( - src: S, - bit_index: T, - bit_count: C, - name: &str, - ) -> AmlCreateField { - AmlCreateField { - name: name.to_string(), - src: src.aml_bytes(), - bit_index: bit_index.aml_bytes(), - bit_count: bit_count.aml_bytes(), - } - } -} - -impl AmlBuilder for AmlCreateField { - fn aml_bytes(&self) -> Vec { - let mut bytes = vec![0x5B, 0x13]; - bytes.extend(self.src.clone()); - bytes.extend(self.bit_index.clone()); - bytes.extend(self.bit_count.clone()); - bytes.extend(build_name_string(self.name.as_ref())); - - bytes - } -} - /// Macro helps to define CreateWordField/CreateDWordField/CreateQWordField. macro_rules! create_word_field_define { ($name: ident, $op: expr) => { @@ -1693,7 +1616,7 @@ impl AmlIrqNoFlags { impl AmlBuilder for AmlIrqNoFlags { fn aml_bytes(&self) -> Vec { - let irq_mask = 1 << (self.irq as u16); + let irq_mask = 1 << u16::from(self.irq); vec![0x22, (irq_mask & 0xFF) as u8, (irq_mask >> 8) as u8] } } @@ -1896,7 +1819,7 @@ mod test { let elem4 = AmlFieldUnit::new(Some("FLD3"), 4); let elem5 = AmlFieldUnit::new(Some("FLD4"), 12); - for e in vec![elem1, elem2, elem3, elem4, elem5] { + for e in [elem1, elem2, elem3, elem4, elem5] { field.append_child(e); } @@ -2068,7 +1991,7 @@ mod test { if_scope1.append_child(AmlReturn::new()); method1.append_child(if_scope1); - let store1 = AmlStore::new(AmlArg(0), AmlLocal(0).clone()); + let store1 = AmlStore::new(AmlArg(0), AmlLocal(0)); method1.append_child(store1); let mut while_scope = AmlWhile::new(AmlLLess::new(AmlLocal(0), AmlArg(1))); @@ -2108,7 +2031,7 @@ mod test { method2.append_child(store2); let mut pkg1 = AmlPackage::new(3); - vec![0x01, 0x03F8, 0x03FF].iter().for_each(|&x| { + [0x01, 0x03F8, 0x03FF].iter().for_each(|&x| { pkg1.append_child(AmlInteger(x as u64)); }); let named_pkg1 = AmlNameDecl::new("PKG1", pkg1); @@ -2266,12 +2189,10 @@ mod test { let size = AmlCreateDWordField::new(AmlArg(0), AmlInteger(4), "SIZE"); let minv = AmlCreateWordField::new(AmlArg(0), AmlInteger(8), "MINV"); let maxv = AmlCreateQWordField::new(AmlArg(0), AmlInteger(10), "MAXV"); - let temp = AmlCreateField::new(AmlArg(0), AmlInteger(64), AmlInteger(8), "TEMP"); method.append_child(revs); method.append_child(size); method.append_child(minv); method.append_child(maxv); - method.append_child(temp); let store = AmlOr::new( AmlName("MINV".to_string()), @@ -2289,12 +2210,11 @@ mod test { method.append_child(AmlReturn::with_value(AmlLocal(0))); let method_bytes = vec![ - 0x14, 0x4A, 0x04, 0x4D, 0x54, 0x44, 0x31, 0x01, 0x8A, 0x68, 0x00, 0x52, 0x45, 0x56, - 0x53, 0x8A, 0x68, 0x0A, 0x04, 0x53, 0x49, 0x5A, 0x45, 0x8B, 0x68, 0x0A, 0x08, 0x4D, - 0x49, 0x4E, 0x56, 0x8F, 0x68, 0x0A, 0x0A, 0x4D, 0x41, 0x58, 0x56, 0x5B, 0x13, 0x68, - 0x0A, 0x40, 0x0A, 0x08, 0x54, 0x45, 0x4D, 0x50, 0x7D, 0x4D, 0x49, 0x4E, 0x56, 0x4D, - 0x41, 0x58, 0x56, 0x54, 0x45, 0x4D, 0x50, 0x73, 0x52, 0x45, 0x56, 0x53, 0x53, 0x49, - 0x5A, 0x45, 0x60, 0xA4, 0x60, + 0x14, 0x3E, 0x4D, 0x54, 0x44, 0x31, 0x01, 0x8A, 0x68, 0x00, 0x52, 0x45, 0x56, 0x53, + 0x8A, 0x68, 0x0A, 0x04, 0x53, 0x49, 0x5A, 0x45, 0x8B, 0x68, 0x0A, 0x08, 0x4D, 0x49, + 0x4E, 0x56, 0x8F, 0x68, 0x0A, 0x0A, 0x4D, 0x41, 0x58, 0x56, 0x7D, 0x4D, 0x49, 0x4E, + 0x56, 0x4D, 0x41, 0x58, 0x56, 0x54, 0x45, 0x4D, 0x50, 0x73, 0x52, 0x45, 0x56, 0x53, + 0x53, 0x49, 0x5A, 0x45, 0x60, 0xA4, 0x60, ]; assert_eq!(method.aml_bytes(), method_bytes); } diff --git a/acpi/src/error.rs b/acpi/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..116234d6d6606308c05b62f6f11aa156cf46976a --- /dev/null +++ b/acpi/src/error.rs @@ -0,0 +1,27 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum AcpiError { + #[error("Failed to add AllocateEntry in TableLoader, file_blob {0} already exists.")] + FileEntryExist(String), + #[error("Failed to find matched file_blob in TableLoader, file name: {0}.")] + NoMatchedFile(String), + #[error("Invalid alignment {0}. Alignment is in bytes, and must be a power of 2.")] + Alignment(u32), + #[error("Address overflows, offset {0}, size {1}, max size {2}.")] + AddrOverflow(u32, u32, usize), + #[error("Failed to add pointer command: pointer length {0}, which is expected to be 1/2/4/8.")] + AddPointerLength(u8), +} diff --git a/acpi/src/lib.rs b/acpi/src/lib.rs index b691ed9fdc041681abff76c03938e391eaa62f38..d5a81153d390491e198fe5936561b5468ca59e0d 100644 --- a/acpi/src/lib.rs +++ b/acpi/src/lib.rs @@ -10,23 +10,18 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; - -mod acpi_device; -#[allow(dead_code)] pub mod acpi_table; -#[allow(dead_code)] pub(crate) mod aml_compiler; -#[allow(dead_code)] +pub mod error; + +mod acpi_device; mod table_loader; pub use acpi_device::{AcpiPMTimer, AcpiPmCtrl, AcpiPmEvent}; pub use acpi_table::madt_subtable::*; pub use acpi_table::*; pub use aml_compiler::*; +pub use error::AcpiError; pub use table_loader::TableLoader; // The name of corresponding file-entry in FwCfg device that represents acpi table data. @@ -35,25 +30,3 @@ pub const ACPI_TABLE_FILE: &str = "etc/acpi/tables"; pub const ACPI_TABLE_LOADER_FILE: &str = "etc/table-loader"; // The name of corresponding file-entry in FwCfg device that represents acpi rsdp struct. pub const ACPI_RSDP_FILE: &str = "etc/acpi/rsdp"; - -pub mod errors { - error_chain! { - errors { - FileEntryExist(name: String) { - display("Failed to add AllocateEntry in TableLoader, file_blob {} already exists.", name) - } - NoMatchedFile(name: String) { - display("Failed to find matched file_blob in TableLoader, file name: {}.", name) - } - Alignment(align: u32) { - display("Invalid alignment {}. Alignment is in bytes, and must be a power of 2.", align) - } - AddrOverflow(offset: u32, size: u32, blob_size: usize) { - display("Address overflows, offset {}, size {}, max size {}.", offset, size, blob_size) - } - AddPointerLength(size: u8) { - display("Failed to add pointer command: pointer length {}, which is expected to be 1/2/4/8.", size) - } - } - } -} diff --git a/acpi/src/table_loader.rs b/acpi/src/table_loader.rs index 6bb2bba5537757dc9ef2b4c1dd55de0b4ed27c21..e968e4d07a865a192171b4d5d641264f5d01d270 100644 --- a/acpi/src/table_loader.rs +++ b/acpi/src/table_loader.rs @@ -12,10 +12,11 @@ use std::sync::{Arc, Mutex}; -use util::byte_code::ByteCode; +use anyhow::{anyhow, bail, Context, Result}; -use crate::errors::{ErrorKind, Result, ResultExt}; +use crate::AcpiError; use crate::AmlBuilder; +use util::byte_code::ByteCode; const TABLE_LOADER_FILE_NAME_SZ: usize = 56; const TABLE_LOADER_ENTRY_SZ: usize = 124; @@ -77,10 +78,10 @@ impl ByteCode for EntryContent {} /// Stores the command and corresponding content of entry. /// - For `AllocateEntry`, Guest will alloc guest memory resource. -/// - For `AddPointerEntry`, Guest will update pointer at specified offset of dst file -/// by adding base address of source file. -/// - For `AddPointerEntry`, Guest will calculate u8-type checksum of a range in file -/// and store it at specified offset of the same file. +/// - For `AddPointerEntry`, Guest will update pointer at specified offset of dst file by adding +/// base address of source file. +/// - For `AddPointerEntry`, Guest will calculate u8-type checksum of a range in file and store it +/// at specified offset of the same file. #[derive(Copy, Clone, Default)] struct TableLoaderEntry { /// The Type of command. @@ -208,12 +209,9 @@ impl TableLoader { /// /// * `file_name` - The name of file to find. fn find_matched_file(&self, file_name: &str) -> Option<&TableLoaderFileEntry> { - for file_entry in &self.files { - if file_entry.file_name == file_name { - return Some(file_entry); - } - } - None + self.files + .iter() + .find(|&file_entry| file_entry.file_name == file_name) } /// Add loader entry of type `Allocate`. @@ -223,8 +221,8 @@ impl TableLoader { /// * `file` - File name. /// * `file_blob` - File blob data. /// * `align` - Required alignment of this blob. - /// * `is_fseg` - Represents range where Guest will allocate for this entry. - /// If true, Guest will allocate in FSEG zone. + /// * `is_fseg` - Represents range where Guest will allocate for this entry. If true, Guest will + /// allocate in FSEG zone. pub fn add_alloc_entry( &mut self, file: &str, @@ -234,10 +232,10 @@ impl TableLoader { ) -> Result<()> { let file = file.to_string(); if align & (align - 1) != 0 { - return Err(ErrorKind::Alignment(align).into()); + return Err(anyhow!(AcpiError::Alignment(align))); } if self.find_matched_file(&file).is_some() { - return Err(ErrorKind::FileEntryExist(file).into()); + return Err(anyhow!(AcpiError::FileEntryExist(file))); } self.files.push(TableLoaderFileEntry { @@ -270,15 +268,23 @@ impl TableLoader { let file = file.to_string(); let file_entry = self .find_matched_file(&file) - .chain_err(|| ErrorKind::NoMatchedFile(file.clone()))?; + .with_context(|| AcpiError::NoMatchedFile(file.clone()))?; let file_entry_len = file_entry.file_blob.lock().unwrap().len(); if cksum_offset as usize + 1 > file_entry_len { - return Err(ErrorKind::AddrOverflow(cksum_offset, 1, file_entry_len).into()); + return Err(anyhow!(AcpiError::AddrOverflow( + cksum_offset, + 1, + file_entry_len + ))); } if start as usize >= file_entry_len || (start + length) as usize > file_entry_len { - return Err(ErrorKind::AddrOverflow(start, length, file_entry_len).into()); + return Err(anyhow!(AcpiError::AddrOverflow( + start, + length, + file_entry_len + ))); } if cksum_offset < start { bail!("The offset of checksum should larger offset of start of range in file blob"); @@ -321,28 +327,36 @@ impl TableLoader { let src_file = src_file.to_string(); let dst_file_entry = self .find_matched_file(&dst_file) - .chain_err(|| ErrorKind::NoMatchedFile(dst_file.clone()))?; + .with_context(|| AcpiError::NoMatchedFile(dst_file.clone()))?; let src_file_entry = self .find_matched_file(&src_file) - .chain_err(|| ErrorKind::NoMatchedFile(src_file.clone()))?; + .with_context(|| AcpiError::NoMatchedFile(src_file.clone()))?; let dst_file_len = dst_file_entry.file_blob.lock().unwrap().len(); let src_file_len = src_file_entry.file_blob.lock().unwrap().len(); if src_offset as usize >= src_file_len || (src_offset + u32::from(size)) as usize > src_file_len { - return Err(ErrorKind::AddrOverflow(src_offset, u32::from(size), src_file_len).into()); + return Err(anyhow!(AcpiError::AddrOverflow( + src_offset, + u32::from(size), + src_file_len + ))); } if offset as usize >= dst_file_len || (offset + u32::from(size)) as usize > dst_file_len { - return Err(ErrorKind::AddrOverflow(offset, u32::from(size), dst_file_len).into()); + return Err(anyhow!(AcpiError::AddrOverflow( + offset, + u32::from(size), + dst_file_len + ))); } if size != 1 && size != 2 && size != 4 && size != 8 { - return Err(ErrorKind::AddPointerLength(size).into()); + return Err(anyhow!(AcpiError::AddPointerLength(size))); } dst_file_entry.file_blob.lock().unwrap() [offset as usize..(offset as usize + size as usize)] - .copy_from_slice(&(src_offset as u64).as_bytes()[0..size as usize]); + .copy_from_slice(&u64::from(src_offset).as_bytes()[0..size as usize]); self.cmds.push(TableLoaderEntry::new_add_pointer_entry( dst_file, src_file, offset, size, @@ -367,20 +381,16 @@ mod test { .is_ok()); let file_bytes = file_name.as_bytes(); + // SATETY: The "alloc" field of union consists of u8 members, so the access is safe. + let alloc = unsafe { &table_loader.cmds.first().unwrap().entry.alloc }; assert_eq!( - unsafe { - table_loader.cmds.get(0).unwrap().entry.alloc.file[0..file_bytes.len()].to_vec() - }, + alloc.file[0..file_bytes.len()].to_vec(), file_bytes.to_vec() ); - assert_eq!( - unsafe { table_loader.cmds.get(0).unwrap().entry.alloc.align }, - 4_u32 - ); - assert_eq!( - unsafe { table_loader.cmds.get(0).unwrap().entry.alloc.zone }, - 0x1 - ); + // Copy to local var to avoid unaligned_references error. + let align = alloc.align; + assert_eq!(align, 4_u32); + assert_eq!(alloc.zone, 0x1); assert!(table_loader .add_alloc_entry("etc/table-loader", file_blob, 4_u32, false) @@ -440,7 +450,7 @@ mod test { .add_cksum_entry(&file, 0_u32, 0_u32, file_len + 1) .is_err()); assert!(table_loader - .add_cksum_entry(&file, (file_len - 1) as u32, 80, 20) + .add_cksum_entry(&file, file_len - 1, 80, 20) .is_ok()); assert!(table_loader .add_cksum_entry(&file, file_len - 1, 0, 50) diff --git a/address_space/Cargo.toml b/address_space/Cargo.toml index e1f4e0befc1b88926f49983e06f2393e8e684c16..c77820d3f8cd796b2ff0b588a86321e571dddd8c 100644 --- a/address_space/Cargo.toml +++ b/address_space/Cargo.toml @@ -1,24 +1,22 @@ [package] name = "address_space" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "provide memory management for VM" [dependencies] -error-chain = "0.12.4" -libc = ">=0.2.71" -log = "0.4.8" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -vmm-sys-util = ">=0.7.0" -arc-swap = "0.4.8" -hypervisor = { path = "../hypervisor" } +libc = "0.2" +log = "0.4" +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"] } +vmm-sys-util = "0.12.1" +arc-swap = "1.6.0" +thiserror = "1.0" +anyhow = "1.0" +once_cell = "1.18.0" machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } - -[dev-dependencies] -serial_test = "0.5.1" +trace = { path = "../trace" } diff --git a/address_space/src/address.rs b/address_space/src/address.rs index e7760c3e15b57fb0ad90991d4b715292107e94cb..5d1d02b670711b2effedf950002a328f1d7d1a97 100644 --- a/address_space/src/address.rs +++ b/address_space/src/address.rs @@ -15,6 +15,15 @@ use std::ops::{BitAnd, BitOr}; use util::num_ops::{round_down, round_up}; +#[derive(PartialEq, Eq)] +pub enum AddressAttr { + Ram, + MMIO, + RamDevice, + RomDevice, + RomDeviceForce, +} + /// Represent the address in given address space. #[derive(Copy, Clone, Default, Debug, Eq, PartialEq, Ord, PartialOrd)] pub struct GuestAddress(pub u64); @@ -166,17 +175,21 @@ impl AddressRange { /// /// * `other` - Other AddressRange. pub fn find_intersection(&self, other: AddressRange) -> Option { - let end = self.base.checked_add(self.size)?; - let other_end = other.base.checked_add(other.size)?; + let begin = u128::from(self.base.raw_value()); + let end = u128::from(self.size) + begin; + let other_begin = u128::from(other.base.raw_value()); + let other_end = u128::from(other.size) + other_begin; - if end <= other.base || other_end <= self.base { + if end <= other_begin || other_end <= begin { return None; } - let start = std::cmp::max(self.base, other.base); + // SAFETY: The range of a region will not exceed 64 bits. + let size_inter = (std::cmp::min(end, other_end) - u128::from(start.0)) as u64; + Some(AddressRange { base: start, - size: std::cmp::min(end, other_end).offset_from(start), + size: size_inter, }) } diff --git a/address_space/src/address_space.rs b/address_space/src/address_space.rs index 81774da55ee1d77199c5e14cdff8da3707c2374b..95cad2ac921a774de08029dd1ff1fae781a30a38 100644 --- a/address_space/src/address_space.rs +++ b/address_space/src/address_space.rs @@ -10,21 +10,25 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::io::Write; +use std::fmt; +use std::fmt::Debug; use std::sync::{Arc, Mutex}; +use anyhow::{anyhow, bail, Context, Result}; use arc_swap::ArcSwap; -use util::byte_code::ByteCode; +use log::error; +use once_cell::sync::OnceCell; -use crate::errors::{ErrorKind, Result, ResultExt}; use crate::{ - AddressRange, FlatRange, GuestAddress, Listener, ListenerReqType, Region, RegionIoEventFd, - RegionType, + AddressAttr, AddressRange, AddressSpaceError, FlatRange, GuestAddress, Listener, + ListenerReqType, Region, RegionIoEventFd, RegionType, }; +use util::aio::Iovec; +use util::byte_code::ByteCode; -/// Contain an array of `FlatRange`. -#[derive(Default, Clone)] -pub(crate) struct FlatView(pub Vec); +/// Contains an array of `FlatRange`. +#[derive(Default, Clone, Debug)] +pub(crate) struct FlatView(pub(crate) Vec); impl FlatView { fn find_flatrange(&self, addr: GuestAddress) -> Option<&FlatRange> { @@ -34,6 +38,124 @@ impl FlatView { _ => None, } } + + fn read( + &self, + dst: &mut dyn std::io::Write, + addr: GuestAddress, + count: u64, + attr: AddressAttr, + ) -> Result<()> { + let mut len = count; + let mut l = count; + let mut start = addr; + let region_type = match attr { + AddressAttr::Ram => RegionType::Ram, + AddressAttr::MMIO => RegionType::IO, + AddressAttr::RamDevice => RegionType::RamDevice, + AddressAttr::RomDevice => RegionType::RomDevice, + AddressAttr::RomDeviceForce => RegionType::RomDevice, + }; + + loop { + if let Some(fr) = self.find_flatrange(start) { + let fr_offset = start.offset_from(fr.addr_range.base); + let region_offset = fr.offset_in_region + fr_offset; + let region_base = fr.addr_range.base.unchecked_sub(fr.offset_in_region); + let fr_remain = fr.addr_range.size - fr_offset; + + if !util::test_helper::is_test_enabled() && fr.owner.region_type() != region_type { + // Read op RomDevice in I/O access mode as MMIO + if region_type == RegionType::IO + && fr.owner.region_type() == RegionType::RomDevice + { + if fr.owner.get_rom_device_romd().unwrap() { + bail!("mismatch region type") + } + } else { + bail!("mismatch region type") + } + } + + if fr.owner.region_type() == RegionType::Ram + || fr.owner.region_type() == RegionType::RamDevice + { + l = std::cmp::min(l, fr_remain); + } + fr.owner.read(dst, region_base, region_offset, l)?; + } else { + return Err(anyhow!(AddressSpaceError::RegionNotFound( + start.raw_value() + ))); + } + + len -= l; + if len == 0 { + return Ok(()); + } + start = start.unchecked_add(l); + l = len; + } + } + + fn write( + &self, + src: &mut dyn std::io::Read, + addr: GuestAddress, + count: u64, + attr: AddressAttr, + ) -> Result<()> { + let mut l = count; + let mut len = count; + let mut start = addr; + + let region_type = match attr { + AddressAttr::Ram => RegionType::Ram, + AddressAttr::MMIO => RegionType::IO, + AddressAttr::RamDevice => RegionType::RamDevice, + AddressAttr::RomDeviceForce => RegionType::RomDevice, + _ => { + bail!("Error write attr") + } + }; + loop { + if let Some(fr) = self.find_flatrange(start) { + let fr_offset = start.offset_from(fr.addr_range.base); + let region_offset = fr.offset_in_region + fr_offset; + let region_base = fr.addr_range.base.unchecked_sub(fr.offset_in_region); + let fr_remain = fr.addr_range.size - fr_offset; + + // Read/Write ops to RomDevice is MMIO. + if !util::test_helper::is_test_enabled() + && fr.owner.region_type() != region_type + && !(region_type == RegionType::IO + && fr.owner.region_type() == RegionType::RomDevice) + { + bail!("mismatch region type") + } + + if fr.owner.region_type() == RegionType::Ram + || fr.owner.region_type() == RegionType::RamDevice + { + l = std::cmp::min(l, fr_remain); + } + fr.owner.write(src, region_base, region_offset, l)?; + } else { + return Err(anyhow!(AddressSpaceError::RegionNotFound( + start.raw_value() + ))); + } + + len -= l; + if len == 0 { + break; + } + start = start.unchecked_add(l); + l = len; + } + + Ok(()) + } } #[derive(Clone, Copy)] @@ -44,31 +166,36 @@ pub struct RegionCache { pub end: u64, } -impl Default for RegionCache { - fn default() -> Self { - RegionCache { - reg_type: RegionType::Ram, - host_base: 0, - start: 0, - end: 0, - } - } -} - type ListenerObj = Arc>; /// Address Space of memory. #[derive(Clone)] pub struct AddressSpace { + /// the name of AddressSpace. + name: String, /// Root Region of this AddressSpace. root: Region, - /// Flat_view is the output of rendering all regions in parent address-space, - /// every time the topology changed (add/delete region), flat_view would be updated. - flat_view: ArcSwap, + /// `flat_view` is the output of rendering all regions in parent `address-space`, + /// every time the topology changed (add/delete region), `flat_view` would be updated. + flat_view: Arc>, /// The triggered call-backs when flat_view changed. listeners: Arc>>, /// The current layout of ioeventfds, which is compared with new ones in topology-update stage. ioeventfds: Arc>>, + /// The backend memory region tree, used for migrate. + machine_ram: Option>, + /// Whether the hypervisor enables the ioeventfd. + hyp_ioevtfd_enabled: OnceCell, +} + +impl fmt::Debug for AddressSpace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("AddressSpace") + .field("root", &self.root) + .field("flat_view", &self.flat_view) + .field("ioeventfds", &self.ioeventfds) + .finish() + } } impl AddressSpace { @@ -77,29 +204,65 @@ impl AddressSpace { /// # Arguments /// /// * `root` - Root region of address space. - pub fn new(root: Region) -> Result> { + /// * `name` - the name of AddressSpace. + pub fn new( + root: Region, + name: &str, + machine_ram: Option>, + ) -> Result> { let space = Arc::new(AddressSpace { + name: String::from(name), root: root.clone(), - flat_view: ArcSwap::new(Arc::new(FlatView::default())), + flat_view: Arc::new(ArcSwap::new(Arc::new(FlatView::default()))), listeners: Arc::new(Mutex::new(Vec::new())), ioeventfds: Arc::new(Mutex::new(Vec::new())), + machine_ram, + hyp_ioevtfd_enabled: OnceCell::new(), }); root.set_belonged_address_space(&space); if !space.root.subregions().is_empty() { space .update_topology() - .chain_err(|| "Failed to update topology for address_space")?; + .with_context(|| "Failed to update topology for address_space")?; } Ok(space) } + pub fn get_machine_ram(&self) -> Option<&Arc> { + if let Some(region) = &self.machine_ram { + return Some(region); + } + None + } + /// Get the reference of root region of AddressSpace. pub fn root(&self) -> &Region { &self.root } + pub fn memspace_show(&self) { + let view = self.flat_view.load(); + + println!("----- address-space flat: {} -----", self.name); + for fr in view.0.iter() { + println!( + " 0x{:X} - 0x{:X}, (pri {}, {:?}) Region {} @ offset 0x{:X}", + fr.addr_range.base.raw_value(), + fr.addr_range.base.raw_value() + fr.addr_range.size, + fr.owner.priority(), + fr.owner.region_type(), + fr.owner.name, + fr.offset_in_region + ); + } + + println!("------ regions show: {} --------------", self.root().name); + self.root().mtree(0_u32); + println!("--------------------------------------"); + } + /// Register the listener to the `AddressSpace`. /// /// # Arguments @@ -116,7 +279,7 @@ impl AddressSpace { } locked_listener.enable(); - let mut idx = 0; + let mut idx = 0_usize; let mut mls = self.listeners.lock().unwrap(); for ml in mls.iter() { if ml.lock().unwrap().priority() >= locked_listener.priority() { @@ -222,7 +385,7 @@ impl AddressSpace { } old_idx += 1; continue; - } else if old_r.addr_range == new_r.addr_range && old_r == new_r { + } else if old_r == new_r { old_idx += 1; new_idx += 1; continue; @@ -230,8 +393,8 @@ impl AddressSpace { } else { if !is_add { self.call_listeners(Some(old_r), None, ListenerReqType::DeleteRegion) - .chain_err(|| { - ErrorKind::UpdateTopology( + .with_context(|| { + AddressSpaceError::UpdateTopology( old_r.addr_range.base.raw_value(), old_r.addr_range.size, old_r.owner.region_type(), @@ -246,8 +409,8 @@ impl AddressSpace { // current old_range is None, or current new_range is before old_range if is_add && new_range.is_some() { self.call_listeners(new_range, None, ListenerReqType::AddRegion) - .chain_err(|| { - ErrorKind::UpdateTopology( + .with_context(|| { + AddressSpaceError::UpdateTopology( new_range.unwrap().addr_range.base.raw_value(), new_range.unwrap().addr_range.size, new_range.unwrap().owner.region_type(), @@ -267,37 +430,41 @@ impl AddressSpace { /// * `new_evtfds` - New `RegionIoEventFd` array. fn update_ioeventfds_pass(&self, new_evtfds: &[RegionIoEventFd]) -> Result<()> { let old_evtfds = self.ioeventfds.lock().unwrap(); - let mut old_idx = 0; - let mut new_idx = 0; + let mut old_idx = 0_usize; + let mut new_idx = 0_usize; while old_idx < old_evtfds.len() || new_idx < new_evtfds.len() { let old_fd = old_evtfds.get(old_idx); let new_fd = new_evtfds.get(new_idx); - if old_fd.is_some() && (new_fd.is_none() || old_fd.unwrap().before(new_fd.unwrap())) { + + if old_fd == new_fd { + old_idx += 1; + new_idx += 1; + continue; + } + // Delete old_fd, but do not delete it if it's after new_fd, as it may match later. + if old_fd.is_some() && (new_fd.is_none() || !old_fd.unwrap().after(new_fd.unwrap())) { self.call_listeners(None, old_fd, ListenerReqType::DeleteIoeventfd) - .chain_err(|| { - ErrorKind::UpdateTopology( + .with_context(|| { + AddressSpaceError::UpdateTopology( old_fd.unwrap().addr_range.base.raw_value(), old_fd.unwrap().addr_range.size, RegionType::IO, ) })?; old_idx += 1; - } else if new_fd.is_some() - && (old_fd.is_none() || new_fd.unwrap().before(old_fd.unwrap())) - { + } + // Add new_fd, but do not add it if it's after old_fd, as it may match later. + if new_fd.is_some() && (old_fd.is_none() || !new_fd.unwrap().after(old_fd.unwrap())) { self.call_listeners(None, new_fd, ListenerReqType::AddIoeventfd) - .chain_err(|| { - ErrorKind::UpdateTopology( + .with_context(|| { + AddressSpaceError::UpdateTopology( new_fd.unwrap().addr_range.base.raw_value(), new_fd.unwrap().addr_range.size, RegionType::IO, ) })?; new_idx += 1; - } else { - old_idx += 1; - new_idx += 1; } } @@ -313,7 +480,7 @@ impl AddressSpace { for fr in self.flat_view.load().0.iter() { let region_base = fr.addr_range.base.unchecked_sub(fr.offset_in_region).0; for evtfd in fr.owner.ioeventfds().iter() { - let mut evtfd_clone = evtfd.try_clone()?; + let mut evtfd_clone = evtfd.clone(); evtfd_clone.addr_range.base = evtfd_clone.addr_range.base.unchecked_add(region_base); if fr @@ -327,28 +494,128 @@ impl AddressSpace { } self.update_ioeventfds_pass(&ioeventfds) - .chain_err(|| "Failed to update ioeventfds")?; + .with_context(|| "Failed to update ioeventfds")?; *self.ioeventfds.lock().unwrap() = ioeventfds; Ok(()) } - /// Return the host address according to the given `GuestAddress`. + /// Return the host address according to the given `GuestAddress`. It is dangerous to + /// read and write directly to hva. We strongly recommend that you use the read and + /// write interface provided by AddressSpace unless you know exactly what you need and + /// are sure it is safe. /// /// # Arguments /// /// * `addr` - Guest address. - pub fn get_host_address(&self, addr: GuestAddress) -> Option { + /// + /// # Safety + /// + /// Using this function, the caller needs to make it clear that hva is always in the ram + /// range of the virtual machine. And if you want to operate [hva,hva+size], the range + /// from hva to hva+size needs to be in the ram range. + pub unsafe fn get_host_address(&self, addr: GuestAddress, attr: AddressAttr) -> Option { let view = self.flat_view.load(); - view.find_flatrange(addr).and_then(|range| { let offset = addr.offset_from(range.addr_range.base); range .owner - .get_host_address() + .get_host_address(attr) .map(|host| host + range.offset_in_region + offset) }) } + /// Return the available size and hva to the given `GuestAddress` from flat_view. + /// + /// # Arguments + /// + /// * `addr` - Guest address. + /// Return Error if the `addr` is not mapped. + /// or return the HVA address and available mem length + pub fn addr_cache_init(&self, addr: GuestAddress, attr: AddressAttr) -> Option<(u64, u64)> { + let view = self.flat_view.load(); + + if let Some(flat_range) = view.find_flatrange(addr) { + let fr_offset = addr.offset_from(flat_range.addr_range.base); + let region_offset = flat_range.offset_in_region + fr_offset; + + let region_remain = flat_range.owner.size() - region_offset; + let fr_remain = flat_range.addr_range.size - fr_offset; + + // SAFETY: addr and size is in ram region. + return unsafe { + flat_range.owner.get_host_address(attr).map(|host| { + ( + host + region_offset, + std::cmp::min(fr_remain, region_remain), + ) + }) + }; + } + + None + } + + /// Convert GPA buffer iovec to HVA buffer iovec. + /// + /// # Arguments + /// + /// * `addr` - Guest address. + /// * `count` - Memory needed length + pub fn get_address_map( + &self, + cache: &Option, + addr: GuestAddress, + count: u64, + res: &mut Vec, + ) -> Result<()> { + let mut len = count; + let mut start = addr; + + loop { + let io_vec = self + .get_host_address_from_cache(start, cache) + .map(|(hva, fr_len)| Iovec { + iov_base: hva, + iov_len: std::cmp::min(len, fr_len), + }) + .with_context(|| format!("Map iov base {:x?}, iov len {:?} failed", addr, count))?; + start = start.unchecked_add(io_vec.iov_len); + len -= io_vec.iov_len; + res.push(io_vec); + + if len == 0 { + break; + } + } + + Ok(()) + } + + /// Return the host address according to the given `GuestAddress` from cache. + /// + /// # Arguments + /// + /// * `addr` - Guest address. + /// * `cache` - The related region cache. + pub fn get_host_address_from_cache( + &self, + addr: GuestAddress, + cache: &Option, + ) -> Option<(u64, u64)> { + if cache.is_none() { + return self.addr_cache_init(addr, AddressAttr::Ram); + } + let region_cache = cache.unwrap(); + if addr.0 >= region_cache.start && addr.0 < region_cache.end { + Some(( + region_cache.host_base + addr.0 - region_cache.start, + region_cache.end - addr.0, + )) + } else { + self.addr_cache_init(addr, AddressAttr::Ram) + } + } + /// Check if the GuestAddress is in one of Ram region. /// /// # Arguments @@ -363,13 +630,14 @@ impl AddressSpace { }) } - pub fn get_region_cache(&self, addr: GuestAddress) -> Option { + pub fn get_region_cache(&self, addr: GuestAddress, attr: AddressAttr) -> Option { let view = &self.flat_view.load(); if let Some(range) = view.find_flatrange(addr) { let reg_type = range.owner.region_type(); let start = range.addr_range.base.0; let end = range.addr_range.end_addr().0; - let host_base = self.get_host_address(GuestAddress(start)).unwrap_or(0); + // SAFETY: the size is in region range, and the type will be checked in get_host_address. + let host_base = unsafe { self.get_host_address(GuestAddress(start), attr) }?; let cache = RegionCache { reg_type, host_base, @@ -381,7 +649,7 @@ impl AddressSpace { None } - /// Return the end address of memory according to all Ram regions in AddressSpace. + /// Return the end address of memory according to all Ram regions in AddressSpace. pub fn memory_end_address(&self) -> GuestAddress { self.flat_view .load() @@ -403,26 +671,18 @@ impl AddressSpace { /// # Errors /// /// Return Error if the `addr` is not mapped. - pub fn read(&self, dst: &mut dyn std::io::Write, addr: GuestAddress, count: u64) -> Result<()> { - let view = &self.flat_view.load(); + pub fn read( + &self, + dst: &mut dyn std::io::Write, + addr: GuestAddress, + count: u64, + attr: AddressAttr, + ) -> Result<()> { + trace::address_space_read(&addr, count); + let view = self.flat_view.load(); - let (fr, offset) = view - .find_flatrange(addr) - .map(|fr| (fr, addr.offset_from(fr.addr_range.base))) - .chain_err(|| ErrorKind::RegionNotFound(addr.raw_value()))?; - - let region_base = fr.addr_range.base.unchecked_sub(fr.offset_in_region); - let offset_in_region = fr.offset_in_region + offset; - fr.owner - .read(dst, region_base, offset_in_region, count) - .chain_err(|| { - format!( - "Failed to read region, region base 0x{:X}, offset in region 0x{:X}, size 0x{:X}", - region_base.raw_value(), - offset_in_region, - count - ) - }) + view.read(dst, addr, count, attr)?; + Ok(()) } /// Write data to specified guest address. @@ -436,24 +696,55 @@ impl AddressSpace { /// # Errors /// /// Return Error if the `addr` is not mapped. - pub fn write(&self, src: &mut dyn std::io::Read, addr: GuestAddress, count: u64) -> Result<()> { + pub fn write( + &self, + src: &mut dyn std::io::Read, + addr: GuestAddress, + count: u64, + attr: AddressAttr, + ) -> Result<()> { + trace::address_space_write(&addr, count); let view = self.flat_view.load(); - let (fr, offset) = view - .find_flatrange(addr) - .map(|fr| (fr, addr.offset_from(fr.addr_range.base))) - .chain_err(|| ErrorKind::RegionNotFound(addr.raw_value()))?; - - let region_base = fr.addr_range.base.unchecked_sub(fr.offset_in_region); - let offset_in_region = fr.offset_in_region + offset; - fr.owner - .write(src, region_base, offset_in_region, count) - .chain_err(|| - format!( - "Failed to write region, region base 0x{:X}, offset in region 0x{:X}, size 0x{:X}", - region_base.raw_value(), - offset_in_region, - count - )) + + let mut buf = Vec::new(); + src.read_to_end(&mut buf).unwrap(); + + if !*self.hyp_ioevtfd_enabled.get_or_init(|| false) { + let ioeventfds = self.ioeventfds.lock().unwrap(); + for evtfd in ioeventfds.as_slice() { + if evtfd.addr_range.base != addr { + continue; + } + if count == evtfd.addr_range.size || evtfd.addr_range.size == 0 { + if !evtfd.data_match { + if let Err(e) = evtfd.fd.write(1) { + error!("Failed to write ioeventfd {:?}: {}", evtfd, e); + } + return Ok(()); + } + + let mut buf_temp = buf.clone(); + + if buf_temp.len() <= 8 { + buf_temp.resize(8, 0); + let data = u64::from_bytes(buf_temp.as_slice()).unwrap(); + if *data == evtfd.data { + if let Err(e) = evtfd.fd.write(1) { + error!("Failed to write ioeventfd {:?}: {}", evtfd, e); + } + return Ok(()); + } else { + continue; + } + } + view.write(&mut buf_temp.as_slice(), addr, count, attr)?; + return Ok(()); + } + } + } + + view.write(&mut buf.as_slice(), addr, count, attr)?; + Ok(()) } /// Write an object to memory. @@ -465,26 +756,19 @@ impl AddressSpace { /// /// # Note /// To use this method, it is necessary to implement `ByteCode` trait for your object. - pub fn write_object(&self, data: &T, addr: GuestAddress) -> Result<()> { - self.write(&mut data.as_bytes(), addr, std::mem::size_of::() as u64) - .chain_err(|| "Failed to write object") - } - - /// Write an object to memory via host address. - /// - /// # Arguments - /// - /// * `data` - The object that will be written to the memory. - /// * `host_addr` - The start host address where the object will be written to. - /// - /// # Note - /// To use this method, it is necessary to implement `ByteCode` trait for your object. - pub fn write_object_direct(&self, data: &T, host_addr: u64) -> Result<()> { - let mut dst = unsafe { - std::slice::from_raw_parts_mut(host_addr as *mut u8, std::mem::size_of::() as usize) - }; - dst.write_all(data.as_bytes()) - .chain_err(|| "Failed to write object via host address") + pub fn write_object( + &self, + data: &T, + addr: GuestAddress, + attr: AddressAttr, + ) -> Result<()> { + self.write( + &mut data.as_bytes(), + addr, + std::mem::size_of::() as u64, + attr, + ) + .with_context(|| "Failed to write object") } /// Read some data from memory to form an object. @@ -495,57 +779,45 @@ impl AddressSpace { /// /// # Note /// To use this method, it is necessary to implement `ByteCode` trait for your object. - pub fn read_object(&self, addr: GuestAddress) -> Result { + pub fn read_object(&self, addr: GuestAddress, attr: AddressAttr) -> Result { let mut obj = T::default(); self.read( &mut obj.as_mut_bytes(), addr, std::mem::size_of::() as u64, + attr, ) - .chain_err(|| "Failed to read object")?; - Ok(obj) - } - - /// Read some data from memory to form an object via host address. - /// - /// # Arguments - /// - /// * `hoat_addr` - The start host address where the data will be read from. - /// - /// # Note - /// To use this method, it is necessary to implement `ByteCode` trait for your object. - pub fn read_object_direct(&self, host_addr: u64) -> Result { - let mut obj = T::default(); - let mut dst = obj.as_mut_bytes(); - let src = unsafe { - std::slice::from_raw_parts_mut(host_addr as *mut u8, std::mem::size_of::() as usize) - }; - dst.write_all(src) - .chain_err(|| "Failed to read object via host address")?; - + .with_context(|| "Failed to read object")?; Ok(obj) } /// Update the topology of memory. pub fn update_topology(&self) -> Result<()> { + trace::trace_scope_start!(address_update_topology); let old_fv = self.flat_view.load(); let addr_range = AddressRange::new(GuestAddress(0), self.root.size()); let new_fv = self .root .generate_flatview(GuestAddress(0), addr_range) - .chain_err(|| "Failed to generate new topology")?; + .with_context(|| "Failed to generate new topology")?; self.update_topology_pass(&old_fv, &new_fv, false) - .chain_err(|| "Failed to update topology (first pass)")?; + .with_context(|| "Failed to update topology (first pass)")?; self.update_topology_pass(&old_fv, &new_fv, true) - .chain_err(|| "Failed to update topology (second pass)")?; + .with_context(|| "Failed to update topology (second pass)")?; self.flat_view.store(Arc::new(new_fv)); self.update_ioeventfds() - .chain_err(|| "Failed to generate and update ioeventfds")?; + .with_context(|| "Failed to generate and update ioeventfds")?; Ok(()) } + + pub fn set_ioevtfd_enabled(&self, ioevtfd_enabled: bool) { + self.hyp_ioevtfd_enabled + .set(ioevtfd_enabled) + .unwrap_or_else(|_| error!("Failed to set hyp_ioevtfd_enabled")); + } } #[cfg(test)] @@ -553,11 +825,12 @@ mod test { use vmm_sys_util::eventfd::EventFd; use super::*; - use crate::{HostMemMapping, RegionOps}; + use crate::{AddressAttr, HostMemMapping, RegionOps}; #[derive(Default, Clone)] struct TestListener { reqs: Arc>>, + enabled: bool, } impl Listener for TestListener { @@ -565,6 +838,18 @@ mod test { 2 } + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + fn handle_request( &self, range: Option<&FlatRange>, @@ -593,43 +878,103 @@ mod test { #[test] fn test_listeners() { // define an array of listeners in order to check the priority order - struct ListenerPrior0; + #[derive(Default)] + struct ListenerPrior0 { + enabled: bool, + } impl Listener for ListenerPrior0 { fn priority(&self) -> i32 { 0 } + + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + } + #[derive(Default)] + struct ListenerPrior3 { + enabled: bool, } - struct ListenerPrior3; impl Listener for ListenerPrior3 { fn priority(&self) -> i32 { 3 } + + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + } + #[derive(Default)] + struct ListenerPrior4 { + enabled: bool, } - struct ListenerPrior4; impl Listener for ListenerPrior4 { fn priority(&self) -> i32 { 4 } + + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + } + #[derive(Default)] + struct ListenerNeg { + enabled: bool, } - struct ListenerNeg; impl Listener for ListenerNeg { fn priority(&self) -> i32 { -1 } + + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } } - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root).unwrap(); - let listener1 = Arc::new(Mutex::new(ListenerPrior0)); - let listener2 = Arc::new(Mutex::new(ListenerPrior0)); - let listener3 = Arc::new(Mutex::new(ListenerPrior3)); - let listener4 = Arc::new(Mutex::new(ListenerPrior4)); - let listener5 = Arc::new(Mutex::new(ListenerNeg)); + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root, "space", None).unwrap(); + let listener1 = Arc::new(Mutex::new(ListenerPrior0::default())); + let listener2 = Arc::new(Mutex::new(ListenerPrior0::default())); + let listener3 = Arc::new(Mutex::new(ListenerPrior3::default())); + let listener4 = Arc::new(Mutex::new(ListenerPrior4::default())); + let listener5 = Arc::new(Mutex::new(ListenerNeg::default())); space.register_listener(listener1).unwrap(); - space.register_listener(listener3).unwrap(); + space.register_listener(listener3.clone()).unwrap(); space.register_listener(listener5).unwrap(); space.register_listener(listener2).unwrap(); - space.register_listener(listener4).unwrap(); + space.register_listener(listener4.clone()).unwrap(); let mut pre_prior = std::i32::MIN; for listener in space.listeners.lock().unwrap().iter() { @@ -637,12 +982,56 @@ mod test { assert!(pre_prior <= curr); pre_prior = curr; } + + space.unregister_listener(listener4).unwrap(); + assert_eq!(space.listeners.lock().unwrap().len(), 4); + space.unregister_listener(listener3).unwrap(); + // It only contains listener1, listener5, listener2. + assert_eq!(space.listeners.lock().unwrap().len(), 3); + } + + #[test] + fn test_unregister_listener() { + #[derive(Default)] + struct ListenerPrior0 { + enabled: bool, + } + impl Listener for ListenerPrior0 { + fn priority(&self) -> i32 { + 0 + } + + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + } + + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root, "space", None).unwrap(); + let listener1 = Arc::new(Mutex::new(ListenerPrior0::default())); + let listener2 = Arc::new(Mutex::new(ListenerPrior0::default())); + space.register_listener(listener1).unwrap(); + space.register_listener(listener2.clone()).unwrap(); + + space.unregister_listener(listener2).unwrap(); + assert_eq!(space.listeners.lock().unwrap().len(), 1); + for listener in space.listeners.lock().unwrap().iter() { + assert!(listener.lock().unwrap().enabled()); + } } #[test] fn test_update_topology() { - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let listener = Arc::new(Mutex::new(TestListener::default())); space.register_listener(listener.clone()).unwrap(); @@ -659,8 +1048,8 @@ mod test { // // the flat_view is as follows, region-b is container which will not appear in the flat-view // [CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC] - let region_b = Region::init_container_region(4000); - let region_c = Region::init_io_region(6000, default_ops.clone()); + let region_b = Region::init_container_region(4000, "region_b"); + let region_c = Region::init_io_region(6000, default_ops.clone(), "region_c"); region_b.set_priority(2); region_c.set_priority(1); root.add_subregion(region_b.clone(), 2000).unwrap(); @@ -675,7 +1064,7 @@ mod test { .reqs .lock() .unwrap() - .get(0) + .first() .unwrap() .1, AddressRange::new(region_c.offset(), region_c.size()) @@ -690,7 +1079,7 @@ mod test { // D: [DDDDDD] // the flat_view is as follows, // [CCCCCCCCCCCC][DDDDDD][CCCCCCCCCCCCCCCCCCC] - let region_d = Region::init_io_region(1000, default_ops); + let region_d = Region::init_io_region(1000, default_ops, "region_d"); region_b.add_subregion(region_d.clone(), 0).unwrap(); let locked_listener = listener.lock().unwrap(); @@ -698,7 +1087,7 @@ mod test { assert_eq!(locked_listener.reqs.lock().unwrap().len(), 4); // delete flat-range 0~6000 first, belonging to region_c assert_eq!( - locked_listener.reqs.lock().unwrap().get(0).unwrap().1, + locked_listener.reqs.lock().unwrap().first().unwrap().1, AddressRange::new(region_c.offset(), region_c.size()) ); // add range 0~2000, belonging to region_c @@ -722,7 +1111,7 @@ mod test { #[test] fn test_update_ioeventfd() { let ioeventfds = vec![RegionIoEventFd { - fd: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + fd: Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()), addr_range: AddressRange::from((0, std::mem::size_of::() as u64)), data_match: true, data: 64_u64, @@ -739,15 +1128,15 @@ mod test { // c: [CCCCCCCCCCCCC] // the flat_view is as follows, // [BBBBBBBBBBBBB][CCCCC] - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(8000, "region"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let listener = Arc::new(Mutex::new(TestListener::default())); space.register_listener(listener.clone()).unwrap(); - let region_b = Region::init_io_region(2000, default_ops.clone()); + let region_b = Region::init_io_region(2000, default_ops.clone(), "region_b"); region_b.set_priority(1); region_b.set_ioeventfds(&ioeventfds); - let region_c = Region::init_io_region(2000, default_ops); + let region_c = Region::init_io_region(2000, default_ops, "region_c"); region_c.set_ioeventfds(&ioeventfds); root.add_subregion(region_c, 2000).unwrap(); @@ -784,7 +1173,7 @@ mod test { #[test] fn test_subregion_ioeventfd() { let ioeventfds = vec![RegionIoEventFd { - fd: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + fd: Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()), addr_range: AddressRange::from((0, 4)), data_match: true, data: 0_64, @@ -801,13 +1190,13 @@ mod test { // c: [CCCCCC] // the flat_view is as follows, // [CCCCCC] - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let listener = Arc::new(Mutex::new(TestListener::default())); space.register_listener(listener.clone()).unwrap(); - let region_b = Region::init_container_region(5000); - let region_c = Region::init_io_region(1000, default_ops); + let region_b = Region::init_container_region(5000, "root"); + let region_c = Region::init_io_region(1000, default_ops, "region_c"); region_c.set_ioeventfds(&ioeventfds); region_b.add_subregion(region_c, 1000).unwrap(); @@ -823,8 +1212,8 @@ mod test { #[test] fn test_get_ram_info() { - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let default_ops = RegionOps { read: Arc::new(|_: &mut [u8], _: GuestAddress, _: u64| -> bool { true }), write: Arc::new(|_: &[u8], _: GuestAddress, _: u64| -> bool { true }), @@ -836,8 +1225,8 @@ mod test { let ram2 = Arc::new( HostMemMapping::new(GuestAddress(2000), None, 1000, None, false, false, false).unwrap(), ); - let region_a = Region::init_ram_region(ram1.clone()); - let region_b = Region::init_ram_region(ram2.clone()); + let region_a = Region::init_ram_region(ram1.clone(), "region_a"); + let region_b = Region::init_ram_region(ram2.clone(), "region_b"); root.add_subregion(region_a, ram1.start_address().raw_value()) .unwrap(); root.add_subregion(region_b, ram2.start_address().raw_value()) @@ -848,16 +1237,16 @@ mod test { ram2.start_address().unchecked_add(ram2.size()) ); assert!(space.address_in_memory(GuestAddress(0), 0)); - assert_eq!(space.address_in_memory(GuestAddress(1000), 0), false); - assert_eq!(space.address_in_memory(GuestAddress(1500), 0), false); + assert!(!space.address_in_memory(GuestAddress(1000), 0)); + assert!(!space.address_in_memory(GuestAddress(1500), 0)); assert!(space.address_in_memory(GuestAddress(2900), 0)); assert_eq!( - space.get_host_address(GuestAddress(500)), + unsafe { space.get_host_address(GuestAddress(500), AddressAttr::Ram) }, Some(ram1.host_address() + 500) ); assert_eq!( - space.get_host_address(GuestAddress(2500)), + unsafe { space.get_host_address(GuestAddress(2500), AddressAttr::Ram) }, Some(ram2.host_address() + 500) ); @@ -869,7 +1258,7 @@ mod test { // c: [CCCCCCCCC] // the flat_view is as follows, // [AAAAAA][CCCCCCCCC][BB] - let region_c = Region::init_io_region(1500, default_ops); + let region_c = Region::init_io_region(1500, default_ops, "region_c"); region_c.set_priority(1); root.add_subregion(region_c, 1000).unwrap(); @@ -878,37 +1267,47 @@ mod test { ram2.start_address().unchecked_add(ram2.size()) ); assert!(space.address_in_memory(GuestAddress(0), 0)); - assert_eq!(space.address_in_memory(GuestAddress(1000), 0), false); - assert_eq!(space.address_in_memory(GuestAddress(1500), 0), false); - assert_eq!(space.address_in_memory(GuestAddress(2400), 0), false); + assert!(!space.address_in_memory(GuestAddress(1000), 0)); + assert!(!space.address_in_memory(GuestAddress(1500), 0)); + assert!(!space.address_in_memory(GuestAddress(2400), 0)); assert!(space.address_in_memory(GuestAddress(2900), 0)); assert_eq!( - space.get_host_address(GuestAddress(500)), + unsafe { space.get_host_address(GuestAddress(500), AddressAttr::Ram) }, Some(ram1.host_address() + 500) ); - assert!(space.get_host_address(GuestAddress(2400)).is_none()); + assert!(unsafe { + space + .get_host_address(GuestAddress(2400), AddressAttr::Ram) + .is_none() + }); assert_eq!( - space.get_host_address(GuestAddress(2500)), + unsafe { space.get_host_address(GuestAddress(2500), AddressAttr::Ram) }, Some(ram2.host_address() + 500) ); } #[test] fn test_write_and_read_object() { - let root = Region::init_container_region(8000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(8000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let ram1 = Arc::new( HostMemMapping::new(GuestAddress(0), None, 1000, None, false, false, false).unwrap(), ); - let region_a = Region::init_ram_region(ram1.clone()); + let region_a = Region::init_ram_region(ram1.clone(), "region_a"); root.add_subregion(region_a, ram1.start_address().raw_value()) .unwrap(); let data: u64 = 10000; - assert!(space.write_object(&data, GuestAddress(992)).is_ok()); - let data1: u64 = space.read_object(GuestAddress(992)).unwrap(); + assert!(space + .write_object(&data, GuestAddress(992), AddressAttr::Ram) + .is_ok()); + let data1: u64 = space + .read_object(GuestAddress(992), AddressAttr::Ram) + .unwrap(); assert_eq!(data1, 10000); - assert!(space.write_object(&data, GuestAddress(993)).is_err()); + assert!(space + .write_object(&data, GuestAddress(993), AddressAttr::Ram) + .is_err()); } } diff --git a/address_space/src/error.rs b/address_space/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..98e4682c2bbcfa779390bc9822c4dbb13f2cd8a0 --- /dev/null +++ b/address_space/src/error.rs @@ -0,0 +1,47 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum AddressSpaceError { + #[error("Util")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Failed to call listener, request type is {0:#?}")] + ListenerRequest(crate::listener::ListenerReqType), + #[error("Failed to update topology, base 0x{0:X}, size 0x{1:X}, region type is {2:#?}")] + UpdateTopology(u64, u64, crate::RegionType), + #[error("Failed to clone EventFd")] + IoEventFd, + #[error("Failed to align-up address, addr 0x{0:X}, align 0x{1:X}")] + AddrAlignUp(u64, u64), + #[error("Failed to find matched region, addr 0x{0:X}")] + RegionNotFound(u64), + #[error("Address overflows, addr is 0x{0:X}")] + Overflow(u64), + #[error("Failed to mmap")] + Mmap, + #[error("Failed to access IO-type region, region base 0x{0:X}, offset 0x{1:X}, size 0x{2:X}")] + IoAccess(u64, u64, u64), + #[error("Wrong region type, {0:#?}")] + RegionType(crate::RegionType), + #[error("Invalid offset: offset 0x{0:X}, data length 0x{1:X}, region size 0x{2:X}")] + InvalidOffset(u64, u64, u64), +} diff --git a/address_space/src/host_mmap.rs b/address_space/src/host_mmap.rs index 1a6b0a83959992bd7b39e660b312d317fe5640b3..d31d4fd34f4a37b940e0d53bc07a7b398938998d 100644 --- a/address_space/src/host_mmap.rs +++ b/address_space/src/host_mmap.rs @@ -11,21 +11,30 @@ // See the Mulan PSL v2 for more details. use std::cmp::min; -use std::fs::File; -use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::ffi::CString; +use std::fs::{remove_file, File}; +use std::os::unix::io::FromRawFd; use std::sync::Arc; use std::thread; -use machine_manager::config::MachineMemConfig; +use anyhow::{bail, Context, Result}; +use log::{error, info}; +use nix::sys::memfd::{memfd_create, MemFdCreateFlag}; +use nix::sys::statfs::fstatfs; +use nix::unistd::{mkstemp, sysconf, unlink, SysconfVar}; -use crate::errors::{Result, ResultExt}; -use crate::{AddressRange, GuestAddress}; -use util::unix::{do_mmap, host_page_size}; +use crate::{AddressRange, GuestAddress, Region}; +use machine_manager::config::{HostMemPolicy, MachineMemConfig, MemZoneConfig}; +use util::unix::{do_mmap, host_page_size, mbind}; -const MAX_PREALLOC_THREAD: u8 = 16; +const MAX_PREALLOC_THREAD: i64 = 16; +/// Verify existing pages in the mapping. +const MPOL_MF_STRICT: u32 = 1; +/// Move pages owned by this process to conform to mapping. +const MPOL_MF_MOVE: u32 = 2; /// FileBackend represents backend-file of `HostMemMapping`. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct FileBackend { /// File we used to map memory. pub file: Arc, @@ -35,15 +44,21 @@ pub struct FileBackend { pub page_size: u64, } +fn file_unlink(file_path: &str) { + if let Err(e) = remove_file(file_path) { + error!("Failed to unlink file \"{}\", error: {:?}", file_path, e); + } +} + impl FileBackend { /// Construct a new FileBackend with an opened file. /// /// # Arguments /// /// * `fd` - Opened backend file. - pub fn new_common(fd: File) -> Self { + pub fn new_common(fd: Arc) -> Self { Self { - file: Arc::new(fd), + file: fd, offset: 0, page_size: 0, } @@ -65,73 +80,68 @@ impl FileBackend { /// * fail to set file length. pub fn new_mem(file_path: &str, file_len: u64) -> Result { let path = std::path::Path::new(&file_path); + let mut need_unlink = false; let file = if path.is_dir() { + // The last six characters of template file must be "XXXXXX" for `mkstemp` + // function to create unique temporary file. let fs_path = format!("{}{}", file_path, "/stratovirt_backmem_XXXXXX"); - let fs_cstr = std::ffi::CString::new(fs_path.clone()).unwrap().into_raw(); - let raw_fd = unsafe { libc::mkstemp(fs_cstr) }; - if raw_fd < 0 { - return Err(std::io::Error::last_os_error()) - .chain_err(|| format!("Failed to create file in directory: {} ", file_path)); - } + let (raw_fd, fs_tmp_path) = match mkstemp(fs_path.as_str()) { + Ok((fd, p)) => (fd, p), + Err(_) => { + return Err(std::io::Error::last_os_error()).with_context(|| { + format!("Failed to create file in directory: {} ", file_path) + }); + } + }; - if unsafe { libc::unlink(fs_cstr) } != 0 { + if unlink(fs_tmp_path.as_path()).is_err() { error!( - "Failed to unlink file \"{}\", error: {}", - fs_path, + "Failed to unlink file \"{:?}\", error: {:?}", + fs_tmp_path.as_path(), std::io::Error::last_os_error() - ); + ) } + + // SAFETY: only one FileBackend instance has the ownership of the file descriptor unsafe { File::from_raw_fd(raw_fd) } } else { - let existed = !path.exists(); + need_unlink = !path.exists(); // Open the file, if not exist, create it. - let file_ret = std::fs::OpenOptions::new() + std::fs::OpenOptions::new() .read(true) .write(true) .create(true) .open(path) - .chain_err(|| format!("Failed to open file: {}", file_path))?; - - if existed - && unsafe { libc::unlink(std::ffi::CString::new(file_path).unwrap().into_raw()) } - != 0 - { - error!( - "Failed to unlink file \"{}\", error: {}", - file_path, - std::io::Error::last_os_error() - ); - } - - file_ret + .with_context(|| format!("Failed to open file: {}", file_path))? }; - // Safe because struct `statfs` only contains plain-data-type field, - // and set to all-zero will not cause any undefined behavior. - let mut fstat: libc::statfs = unsafe { std::mem::zeroed() }; - unsafe { libc::fstatfs(file.as_raw_fd(), &mut fstat) }; + let fstat = fstatfs(&file).with_context(|| "Failed to fstatfs file")?; info!( "Using memory backing file, the page size is {}", - fstat.f_bsize + fstat.optimal_transfer_size() ); let old_file_len = file.metadata().unwrap().len(); if old_file_len == 0 { - file.set_len(file_len) - .chain_err(|| format!("Failed to set length of file: {}", file_path))?; + if file.set_len(file_len).is_err() { + if need_unlink { + file_unlink(file_path); + } + bail!("Failed to set length of file: {}", file_path); + } } else if old_file_len < file_len { bail!( - "Backing file {} does not has sufficient resource for allocating RAM (size is 0x{:X})", - file_path, - file_len - ); + "Backing file {} does not has sufficient resource for allocating RAM (size is 0x{:X})", + file_path, + file_len + ); } Ok(FileBackend { file: Arc::new(file), offset: 0_u64, - page_size: fstat.f_bsize as u64, + page_size: fstat.optimal_transfer_size() as _, }) } } @@ -142,8 +152,24 @@ impl FileBackend { /// /// * `nr_vcpus` - Number of vcpus. fn max_nr_threads(nr_vcpus: u8) -> u8 { - let nr_host_cpu = unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as u8 }; - min(min(nr_host_cpu, MAX_PREALLOC_THREAD), nr_vcpus) + let conf = sysconf(SysconfVar::_NPROCESSORS_ONLN); + + // If fails to call `sysconf` function, just use a single thread to touch pages. + if conf.is_err() || conf.unwrap().is_none() { + log::warn!("Failed to get sysconf of _NPROCESSORS_ONLN"); + return 1; + } + let nr_host_cpu = conf.unwrap().unwrap(); + if nr_host_cpu <= 0 { + log::warn!( + "The sysconf of _NPROCESSORS_ONLN: {} is ignored", + nr_host_cpu + ); + return 1; + } + + // MAX_PREALLOC_THREAD's value(16) is less than 255. + min(min(nr_host_cpu, MAX_PREALLOC_THREAD) as u8, nr_vcpus) } /// Touch pages to pre-alloc memory for VM. @@ -156,12 +182,12 @@ fn max_nr_threads(nr_vcpus: u8) -> u8 { fn touch_pages(start: u64, page_size: u64, nr_pages: u64) { let mut addr = start; for _i in 0..nr_pages { - // Safe, because the data read from raw pointer is written to the same address. + // SAFETY: The data read from raw pointer is written to the same address. unsafe { let read_addr = addr as *mut u8; let data: u8 = *read_addr; - // This function is used to prevent complier optimization. - // If `*read = data` is used, the complier optimizes it as no-op, + // This function is used to prevent compiler optimization. + // If `*read = data` is used, the compiler optimizes it as no-op, // which means that the pages will not be touched. std::ptr::write_volatile(read_addr, data); } @@ -173,15 +199,16 @@ fn touch_pages(start: u64, page_size: u64, nr_pages: u64) { /// /// # Arguments /// -/// * `host_addr` - The start host address of memory of the virtual machine. +/// * `host_addr` - The start host address to pre allocate. /// * `size` - Size of memory. /// * `nr_vcpus` - Number of vcpus. fn mem_prealloc(host_addr: u64, size: u64, nr_vcpus: u8) { + trace::trace_scope_start!(pre_alloc, args = (size)); let page_size = host_page_size(); let threads = max_nr_threads(nr_vcpus); let nr_pages = (size + page_size - 1) / page_size; - let pages_per_thread = nr_pages / (threads as u64); - let left = nr_pages % (threads as u64); + let pages_per_thread = nr_pages / u64::from(threads); + let left = nr_pages % u64::from(threads); let mut addr = host_addr; let mut threads_join = Vec::new(); for i in 0..threads { @@ -204,39 +231,34 @@ fn mem_prealloc(host_addr: u64, size: u64, nr_vcpus: u8) { } } -/// Create HostMemMappings according to address ranges. +/// If the memory is not configured numa, use this /// /// # Arguments /// -/// * `ranges` - The guest address range that will be mapped. -/// * `mem_config` - Machine memory config. -pub fn create_host_mmaps( - ranges: &[(u64, u64)], - mem_config: &MachineMemConfig, - nr_vcpus: u8, -) -> Result>> { +/// * `mem_config` - The config of default memory. +/// * `thread_num` - The num of mem preallocv threads, typically the number of vCPUs. +pub fn create_default_mem(mem_config: &MachineMemConfig, thread_num: u8) -> Result { let mut f_back: Option = None; if let Some(path) = &mem_config.mem_path { - let file_len = ranges.iter().fold(0, |acc, x| acc + x.1); f_back = Some( - FileBackend::new_mem(path, file_len) - .chain_err(|| "Failed to create file that backs memory")?, + FileBackend::new_mem(path, mem_config.mem_size) + .with_context(|| "Failed to create file that backs memory")?, ); } else if mem_config.mem_share { - let file_len = ranges.iter().fold(0, |acc, x| acc + x.1); - let anon_mem_name = String::from("stratovirt_anon_mem"); - - let anon_fd = - unsafe { libc::syscall(libc::SYS_memfd_create, anon_mem_name.as_ptr(), 0) } as RawFd; + let anon_fd = memfd_create( + &CString::new("stratovirt_anon_mem")?, + MemFdCreateFlag::empty(), + )?; if anon_fd < 0 { - return Err(std::io::Error::last_os_error()).chain_err(|| "Failed to create memfd"); + return Err(std::io::Error::last_os_error()).with_context(|| "Failed to create memfd"); } + // SAFETY: The parameters is constant. let anon_file = unsafe { File::from_raw_fd(anon_fd) }; anon_file - .set_len(file_len) - .chain_err(|| "Failed to set the length of anonymous file that backs memory")?; + .set_len(mem_config.mem_size) + .with_context(|| "Failed to set the length of anonymous file that backs memory")?; f_back = Some(FileBackend { file: Arc::new(anon_file), @@ -244,41 +266,127 @@ pub fn create_host_mmaps( page_size: host_page_size(), }); } - - let backend = (&f_back).as_ref(); - let mut host_addr = do_mmap( - &backend.map(|fb| fb.file.as_ref()), + let block = Arc::new(HostMemMapping::new( + GuestAddress(0), + None, mem_config.mem_size, - backend.map_or(0, |fb| fb.offset), - false, - mem_config.mem_share, + f_back, mem_config.dump_guest_core, - )?; + mem_config.mem_share, + false, + )?); + if mem_config.mem_prealloc { - mem_prealloc(host_addr, mem_config.mem_size, nr_vcpus); + mem_prealloc(block.host_address(), mem_config.mem_size, thread_num); } - let mut mappings = Vec::new(); - for range in ranges.iter() { - mappings.push(Arc::new(HostMemMapping::new( - GuestAddress(range.0), - Some(host_addr), - range.1, - f_back.clone(), - mem_config.dump_guest_core, - mem_config.mem_share, - false, - )?)); - host_addr += range.1; + let region = Region::init_ram_region(block, "DefaultRam"); + + Ok(region) +} + +/// If the memory is configured numa, use this +/// +/// # Arguments +/// +/// * `mem_config` - The config of default memory. +/// * `thread_num` - The num of mem preallocv threads, typically the number of vCPUs. +pub fn create_backend_mem(mem_config: &MemZoneConfig, thread_num: u8) -> Result { + let mut f_back: Option = None; - if let Some(mut fb) = f_back.as_mut() { - fb.offset += range.1 + if mem_config.memfd() { + let anon_fd = memfd_create( + &CString::new("stratovirt_anon_mem")?, + MemFdCreateFlag::empty(), + )?; + if anon_fd < 0 { + return Err(std::io::Error::last_os_error()).with_context(|| "Failed to create memfd"); } + + // SAFETY: The parameters is constant. + let anon_file = unsafe { File::from_raw_fd(anon_fd) }; + anon_file + .set_len(mem_config.size) + .with_context(|| "Failed to set the length of anonymous file that backs memory")?; + + f_back = Some(FileBackend { + file: Arc::new(anon_file), + offset: 0, + page_size: host_page_size(), + }); + } else if let Some(path) = &mem_config.mem_path { + f_back = Some( + FileBackend::new_mem(path, mem_config.size) + .with_context(|| "Failed to create file that backs memory")?, + ); + } + let block = Arc::new(HostMemMapping::new( + GuestAddress(0), + None, + mem_config.size, + f_back, + mem_config.dump_guest_core, + mem_config.share, + false, + )?); + if mem_config.prealloc { + mem_prealloc(block.host_address(), mem_config.size, thread_num); + } + set_host_memory_policy(&block, mem_config)?; + + let region = Region::init_ram_region(block, mem_config.id.as_str()); + Ok(region) +} + +/// Set host memory backend numa policy. +/// +/// # Arguments +/// +/// * `mem_mappings` - The host virtual address of mapped memory information. +/// * `zone` - Memory zone config info. +fn set_host_memory_policy(mem_mappings: &Arc, zone: &MemZoneConfig) -> Result<()> { + if zone.host_numa_nodes.is_none() { + return Ok(()); + } + let host_addr_start = mem_mappings.host_address(); + let nodes = zone.host_numa_nodes.as_ref().unwrap(); + let mut max_node = nodes[nodes.len() - 1] as usize; + + let mut nmask: Vec = Vec::new(); + // Upper limit of max_node is MAX_NODES. + nmask.resize(max_node / 64 + 1, 0); + for node in nodes.iter() { + nmask[(*node / 64) as usize] |= 1_u64 << (*node % 64); + } + // We need to pass node_id + 1 as mbind() max_node argument. + // It is kind of linux bug or feature which will cut off the last node. + max_node += 1; + + let policy = HostMemPolicy::from(zone.policy.clone()); + if policy == HostMemPolicy::Default { + max_node = 0; + nmask = vec![0_u64; max_node]; + } + + // SAFETY: + // 1. addr is managed by memory mapping, it can be guaranteed legal. + // 2. node_mask was created in this function. + // 3. Upper limit of max_node is MAX_NODES. + unsafe { + mbind( + host_addr_start, + zone.size, + policy as u32, + nmask, + max_node as u64, + MPOL_MF_STRICT | MPOL_MF_MOVE, + )?; } - Ok(mappings) + Ok(()) } /// Record information of memory mapping. +#[derive(Debug)] pub struct HostMemMapping { /// Record the range of one memory segment. address_range: AddressRange, @@ -286,12 +394,15 @@ pub struct HostMemMapping { host_addr: *mut u8, /// Represents file and offset-in-file that backs this mapping. file_back: Option, + /// share mem flag + is_share: bool, } -// Send and Sync is not auto-implemented for raw pointer type -// implementing them is safe because field of HostMemMapping won't change once initialized, -// only access(r/w) is permitted +// SAFETY: Send and Sync is not auto-implemented for raw pointer type, +// implementing them is safe because field of HostMemMapping won't change +// once initialized, only access(r/w) is permitted unsafe impl Send for HostMemMapping {} +// SAFETY: Same reason as above. unsafe impl Sync for HostMemMapping {} impl HostMemMapping { @@ -318,7 +429,7 @@ impl HostMemMapping { let host_addr = if let Some(addr) = host_addr { addr } else { - let fb = (&file_back).as_ref(); + let fb = file_back.as_ref(); do_mmap( &fb.map(|f| f.file.as_ref()), size, @@ -336,6 +447,7 @@ impl HostMemMapping { }, host_addr: host_addr as *mut u8, file_back, + is_share, }) } @@ -360,11 +472,16 @@ impl HostMemMapping { pub fn file_backend(&self) -> Option { self.file_back.clone() } + + pub fn mem_shared(&self) -> bool { + self.is_share + } } impl Drop for HostMemMapping { /// Release the memory mapping. fn drop(&mut self) { + // SAFETY: self.host_addr and self.size has already been verified during initialization. unsafe { libc::munmap( self.host_addr as *mut libc::c_void, @@ -376,10 +493,12 @@ impl Drop for HostMemMapping { #[cfg(test)] mod test { - use super::*; use std::io::{Read, Seek, SeekFrom, Write}; + use vmm_sys_util::tempfile::TempFile; + use super::*; + fn identify(ram: HostMemMapping, st: u64, end: u64) { assert_eq!(ram.start_address(), GuestAddress(st)); assert_eq!(ram.size(), end - st); @@ -450,6 +569,7 @@ mod test { f_back.as_ref().unwrap().file.metadata().unwrap().len(), 100u64 ); + std::fs::remove_file(file_path).unwrap(); } #[test] @@ -476,43 +596,29 @@ mod test { } #[test] - fn test_create_host_mmaps() { - let addr_ranges = [(0x0, 0x10_0000), (0x100000, 0x10_0000)]; - let mem_path = std::env::current_dir() - .unwrap() - .as_path() - .to_str() - .unwrap() - .to_string(); - let mem_config = MachineMemConfig { - mem_size: 0x20_0000, - mem_path: Some(mem_path), - dump_guest_core: false, - mem_share: false, - mem_prealloc: false, - }; - - let host_mmaps = create_host_mmaps(&addr_ranges, &mem_config, 1).unwrap(); - assert_eq!(host_mmaps.len(), 2); - - // check the start address and size of HostMemMapping - for (index, mmap) in host_mmaps.iter().enumerate() { - assert_eq!(mmap.start_address().raw_value(), addr_ranges[index].0); - assert_eq!(mmap.size(), addr_ranges[index].1); - assert!(mmap.file_backend().is_some()); - } - - // check the file backends' total size, should equal to mem_size in config. - let total_file_size = host_mmaps[0] - .file_backend() - .unwrap() - .file - .metadata() - .unwrap() - .len(); - let total_mem_size = addr_ranges.iter().fold(0_u64, |acc, x| acc + x.1); - let total_mmaps_size = host_mmaps.iter().fold(0_u64, |acc, x| acc + x.size()); - assert_eq!(total_mem_size, total_file_size); - assert_eq!(total_mem_size, total_mmaps_size); + fn test_memory_prealloc() { + // Mmap and prealloc with anonymous memory. + let host_addr = do_mmap(&None, 0x20_0000, 0, false, false, false).unwrap(); + // Check the thread number equals to minimum value. + assert_eq!(max_nr_threads(1), 1); + // The max threads limit is 16, or the number of host CPUs, it will never be 20. + assert_ne!(max_nr_threads(20), 20); + mem_prealloc(host_addr, 0x20_0000, 20); + + // Mmap and prealloc with file backend. + let file_path = String::from("back_mem_test"); + let file_size = 0x10_0000; + let f_back = FileBackend::new_mem(&file_path, file_size).unwrap(); + let host_addr = do_mmap( + &Some(f_back.file.as_ref()), + 0x10_0000, + f_back.offset, + false, + true, + false, + ) + .unwrap(); + mem_prealloc(host_addr, 0x10_0000, 2); + std::fs::remove_file(file_path).unwrap(); } } diff --git a/address_space/src/lib.rs b/address_space/src/lib.rs index 5b337e2158fe50673a4e70a46af69feb928cf6b0..ae8760a4ce2ae9957a5d9edc4124df283b8bd026 100644 --- a/address_space/src/lib.rs +++ b/address_space/src/lib.rs @@ -18,7 +18,9 @@ //! ```rust //! use std::sync::{Arc, Mutex}; //! extern crate address_space; -//! use address_space::{AddressSpace, Region, GuestAddress, HostMemMapping, RegionOps, FileBackend}; +//! use address_space::{ +//! AddressAttr, AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region, RegionOps, +//! }; //! //! struct DummyDevice; //! impl DummyDevice { @@ -34,19 +36,18 @@ //! //! fn main() { //! // 1. create address_space -//! let space = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); +//! let space = AddressSpace::new( +//! Region::init_container_region(u64::max_value(), "space"), +//! "space", +//! None, +//! ) +//! .unwrap(); //! //! // 2. create an Ram-type Region, and set it's priority -//! let mem_mapping = Arc::new(HostMemMapping::new( -//! GuestAddress(0), -//! None, -//! 0x1000, -//! None, -//! false, -//! false, -//! false, -//! ).unwrap()); -//! let ram_region = Region::init_ram_region(mem_mapping.clone()); +//! let mem_mapping = Arc::new( +//! HostMemMapping::new(GuestAddress(0), None, 0x1000, None, false, false, false).unwrap(), +//! ); +//! let ram_region = Region::init_ram_region(mem_mapping.clone(), "ram"); //! ram_region.set_priority(10); //! //! // 3. create a IO-type Region @@ -66,23 +67,20 @@ //! write: Arc::new(write_ops), //! }; //! -//! let io_region = Region::init_io_region(0x1000, dev_ops); +//! let io_region = Region::init_io_region(0x1000, dev_ops, "io_region"); //! //! // 4. add sub_region to address_space's root region -//! space.root().add_subregion(ram_region, mem_mapping.start_address().raw_value()); +//! space +//! .root() +//! .add_subregion(ram_region, mem_mapping.start_address().raw_value()); //! space.root().add_subregion(io_region, 0x2000); //! //! // 5. access address_space -//! space.write_object(&0x11u64, GuestAddress(0)); +//! space.write_object(&0x11u64, GuestAddress(0), AddressAttr::Ram); //! } //! ``` -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate migration_derive; +pub mod error; mod address; mod address_space; @@ -92,85 +90,35 @@ mod region; mod state; pub use crate::address_space::{AddressSpace, RegionCache}; -pub use address::{AddressRange, GuestAddress}; -pub use host_mmap::{create_host_mmaps, FileBackend, HostMemMapping}; -#[cfg(target_arch = "x86_64")] -pub use listener::KvmIoListener; -pub use listener::KvmMemoryListener; -pub use listener::{Listener, ListenerReqType}; +pub use address::{AddressAttr, AddressRange, GuestAddress}; +pub use error::AddressSpaceError; +pub use host_mmap::{create_backend_mem, create_default_mem, FileBackend, HostMemMapping}; +pub use listener::{Listener, ListenerReqType, MemSlot}; pub use region::{FlatRange, Region, RegionIoEventFd, RegionType}; -pub mod errors { - error_chain! { - links { - Util(util::errors::Error, util::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - } - errors { - ListenerRequest(req_type: crate::listener::ListenerReqType) { - display("Failed to call listener, request type is {:#?}", req_type) - } - UpdateTopology(base: u64, size: u64, reg_ty: crate::RegionType) { - display("Failed to update topology, base 0x{:X}, size 0x{:X}, region type is {:#?}", base, size, reg_ty) - } - IoEventFd { - display("Failed to clone EventFd") - } - AddrAlignUp(addr: u64, align: u64) { - display("Failed to align-up address, overflows: addr 0x{:X}, align 0x{:X}", addr, align) - } - RegionNotFound(addr: u64) { - display("Failed to find matched region, addr 0x{:X}", addr) - } - Overflow(addr: u64) { - display("Address overflows, addr is 0x{:X}", addr) - } - Mmap { - display("Failed to mmap") - } - IoAccess(base: u64, offset: u64, count: u64) { - display("Failed to access IO-type region, region base 0x{:X}, offset 0x{:X}, size 0x{:X}", base, offset, count) - } - RegionType(t: crate::RegionType) { - display("Wrong region type, {:#?}", t) - } - NoAvailKvmSlot(cnt: usize) { - display("No available kvm_mem_slot, total count is {}", cnt) - } - NoMatchedKvmSlot(addr: u64, sz: u64) { - display("Failed to find matched kvm_mem_slot, addr 0x{:X}, size 0x{:X}", addr, sz) - } - KvmSlotOverlap(add: (u64, u64), exist: (u64, u64)) { - display("Added KVM mem range (0x{:X}, 0x{:X}) overlaps with exist one (0x{:X}, 0x{:X})", add.0, add.1, exist.0, exist.1) - } - InvalidOffset(offset: u64, count: u64, region_size: u64) { - display("Invalid offset: offset 0x{:X}, data length 0x{:X}, region size 0x{:X}", offset, count, region_size) - } - } - } -} +/// Read data from Region to argument `data`, +/// return `true` if read successfully, or return `false`. +/// +/// # Arguments +/// +/// * `data` - A u8-type array. +/// * `base` - Base address. +/// * `offset` - Offset from base address. +type ReadFn = std::sync::Arc bool + Send + Sync>; + +/// Write `data` to memory, +/// return `true` if write successfully, or return `false`. +/// +/// # Arguments +/// +/// * `data` - A u8-type array. +/// * `base` - Base address. +/// * `offset` - Offset from base address. +type WriteFn = std::sync::Arc bool + Send + Sync>; /// Provide Some operations of `Region`, mainly used by Vm's devices. #[derive(Clone)] pub struct RegionOps { - /// Read data from Region to argument `data`, - /// return `true` if read successfully, or return `false`. - /// - /// # Arguments - /// - /// * `data` - A u8-type array. - /// * `base` - Base address. - /// * `offset` - Offset from base address. - pub read: std::sync::Arc bool + Send + Sync>, - /// Write `data` to memory, - /// return `true` if write successfully, or return `false`. - /// - /// # Arguments - /// - /// * `data` - A u8-type array. - /// * `base` - Base address. - /// * `offset` - Offset from base address. - pub write: std::sync::Arc bool + Send + Sync>, + pub read: ReadFn, + pub write: WriteFn, } diff --git a/address_space/src/listener.rs b/address_space/src/listener.rs index aa07b215a5f46a293d80adb6313a3efb35726c34..586192ceb2e1b5d7a909bebae497d488db9334c5 100644 --- a/address_space/src/listener.rs +++ b/address_space/src/listener.rs @@ -10,19 +10,9 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; +use anyhow::Result; -use error_chain::ChainedError; -use hypervisor::kvm::KVM_FDS; -use kvm_bindings::kvm_userspace_memory_region; -use kvm_ioctls::{IoEventAddress, NoDatamatch}; - -use crate::errors::{ErrorKind, Result, ResultExt}; -use crate::{AddressRange, FlatRange, RegionIoEventFd, RegionType}; -use util::{num_ops::round_down, unix::host_page_size}; - -const MEM_READ_ONLY: u32 = 1 << 1; +use crate::{FlatRange, RegionIoEventFd}; /// Request type of listener. #[derive(Debug, Copy, Clone)] @@ -42,15 +32,13 @@ pub trait Listener: Send + Sync { fn priority(&self) -> i32; /// Is this listener enabled to call. - fn enabled(&self) -> bool { - true - } + fn enabled(&self) -> bool; /// Enable listener for address space. - fn enable(&mut self) {} + fn enable(&mut self); /// Disable listener for address space. - fn disable(&mut self) {} + fn disable(&mut self); /// Function that handle request according to request-type. /// @@ -70,764 +58,15 @@ pub trait Listener: Send + Sync { } /// Records information that manage the slot resource and current usage. -#[allow(dead_code)] #[derive(Default, Copy, Clone)] -struct MemSlot { +pub struct MemSlot { /// Index of a memory slot. - index: u32, + pub index: u32, /// Guest address. - guest_addr: u64, + pub guest_addr: u64, /// Size of memory. /// size = 0 represents no-region use this slot. - size: u64, + pub size: u64, /// Host address. - host_addr: u64, - /// Flag. - flag: u32, -} - -/// Kvm memory listener. -#[derive(Clone)] -pub struct KvmMemoryListener { - /// Id of AddressSpace. - as_id: Arc, - /// Record all MemSlots. - slots: Arc>>, -} - -impl KvmMemoryListener { - /// Create a new KvmMemoryListener for a VM. - /// - /// # Arguments - /// - /// * `nr_slots` - Number of slots. - pub fn new(nr_slots: u32) -> KvmMemoryListener { - KvmMemoryListener { - as_id: Arc::new(AtomicU32::new(0)), - slots: Arc::new(Mutex::new(vec![MemSlot::default(); nr_slots as usize])), - } - } - - /// Find a free slot and fills it with given arguments. - /// - /// # Arguments - /// - /// * `guest_addr` - Guest address. - /// * `size` - Size of slot. - /// * `host_addr` - Host address. - /// - /// # Errors - /// - /// Return Error if - /// * No available Kvm slot. - /// * Given memory slot overlap with existed one. - fn get_free_slot(&self, guest_addr: u64, size: u64, host_addr: u64) -> Result { - let mut slots = self.slots.lock().unwrap(); - - // check if the given address range overlaps with exist ones - let range = AddressRange::from((guest_addr, size)); - slots.iter().try_for_each::<_, Result<()>>(|s| { - if AddressRange::from((s.guest_addr, s.size)) - .find_intersection(range) - .is_some() - { - return Err( - ErrorKind::KvmSlotOverlap((guest_addr, size), (s.guest_addr, s.size)).into(), - ); - } - Ok(()) - })?; - - for (index, slot) in slots.iter_mut().enumerate() { - if slot.size == 0 { - slot.index = index as u32; - slot.guest_addr = guest_addr; - slot.size = size; - slot.host_addr = host_addr; - return Ok(slot.index); - } - } - - Err(ErrorKind::NoAvailKvmSlot(slots.len()).into()) - } - - /// Delete a slot after finding it according to the given arguments. - /// Return the deleted one if succeed. - /// - /// # Arguments - /// - /// * `addr` - Guest address of slot. - /// * `size` - Size of slots. - /// - /// # Errors - /// - /// Return Error if no Kem slot matched. - fn delete_slot(&self, addr: u64, size: u64) -> Result { - let mut slots = self.slots.lock().unwrap(); - for slot in slots.iter_mut() { - if slot.guest_addr == addr && slot.size == size { - // set slot size to zero, so it can be reused later - slot.size = 0; - return Ok(*slot); - } - } - Err(ErrorKind::NoMatchedKvmSlot(addr, size).into()) - } - - /// Align a piece of memory segment according to `alignment`, - /// return AddressRange after aligned. - /// - /// # Arguments - /// - /// * `range` - One piece of memory segment. - /// * `alignment` - Alignment base. - /// - /// # Errors - /// - /// Return Error if Memslot size is zero after aligned. - fn align_mem_slot(range: AddressRange, alignment: u64) -> Result { - let aligned_addr = range - .base - .align_up(alignment) - .chain_err(|| ErrorKind::AddrAlignUp(range.base.raw_value(), alignment))?; - - let aligned_size = range - .size - .checked_sub(aligned_addr.offset_from(range.base)) - .and_then(|sz| round_down(sz, alignment)) - .filter(|&sz| sz > 0_u64) - .chain_err(|| - format!("Mem slot size is zero after aligned, addr 0x{:X}, size 0x{:X}, alignment 0x{:X}", - range.base.raw_value(), range.size, alignment) - )?; - - Ok(AddressRange::new(aligned_addr, aligned_size)) - } - - /// Callback function for adding Region, which only care about Ram-type Region yet. - /// - /// # Arguments - /// - /// * `flat_range` - Corresponding FlatRange of new-added region. - /// - /// # Errors - /// - /// Return Error if fail to delete kvm_mem_slot. - fn add_region(&self, flat_range: &FlatRange) -> Result<()> { - if flat_range.owner.region_type() == RegionType::RomDevice - && !flat_range.owner.get_rom_device_romd().unwrap() - { - if let Err(ref e) = self.delete_region(flat_range) { - warn!( - "Rom-device Region changes to IO mode, Failed to delete region: {}", - e.display_chain() - ); - } - return Ok(()); - } - - if flat_range.owner.region_type() != RegionType::Ram - && flat_range.owner.region_type() != RegionType::RomDevice - && flat_range.owner.region_type() != RegionType::RamDevice - { - return Ok(()); - } - - let (aligned_addr, aligned_size) = - Self::align_mem_slot(flat_range.addr_range, host_page_size()) - .map(|r| (r.base, r.size)) - .chain_err(|| "Failed to align mem slot")?; - let align_adjust = aligned_addr.raw_value() - flat_range.addr_range.base.raw_value(); - - // `unwrap()` won't fail because Ram-type Region definitely has hva - let aligned_hva = flat_range.owner.get_host_address().unwrap() - + flat_range.offset_in_region - + align_adjust; - - let slot_idx = self - .get_free_slot(aligned_addr.raw_value(), aligned_size, aligned_hva) - .chain_err(|| "Failed to get available KVM mem slot")?; - - let mut flags = 0_u32; - if flat_range.owner.get_rom_device_romd().unwrap_or(false) { - flags |= MEM_READ_ONLY; - } - let kvm_region = kvm_userspace_memory_region { - slot: slot_idx | (self.as_id.load(Ordering::SeqCst) << 16), - guest_phys_addr: aligned_addr.raw_value(), - memory_size: aligned_size, - userspace_addr: aligned_hva, - flags, - }; - unsafe { - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .set_user_memory_region(kvm_region) - .or_else(|e| { - self.delete_slot(aligned_addr.raw_value(), aligned_size) - .chain_err(|| "Failed to delete Kvm mem slot")?; - Err(e).chain_err(|| { - format!( - "KVM register memory region failed: addr 0x{:X}, size 0x{:X}", - aligned_addr.raw_value(), - aligned_size - ) - }) - })?; - } - Ok(()) - } - - /// Callback function for deleting Region, which only care about Ram-type Region yet. - /// - /// # Arguments - /// - /// * `flat_range` - Corresponding FlatRange of new-deleted region. - fn delete_region(&self, flat_range: &FlatRange) -> Result<()> { - if flat_range.owner.region_type() != RegionType::Ram - && flat_range.owner.region_type() != RegionType::RomDevice - && flat_range.owner.region_type() != RegionType::RamDevice - { - return Ok(()); - } - - let (aligned_addr, aligned_size) = - Self::align_mem_slot(flat_range.addr_range, host_page_size()) - .map(|r| (r.base, r.size)) - .chain_err(|| "Failed to align mem slot")?; - - let mem_slot = match self.delete_slot(aligned_addr.raw_value(), aligned_size) { - Ok(m) => m, - Err(_) => { - debug!("no match mem slot registered to KVM, just return"); - return Ok(()); - } - }; - - let kvm_region = kvm_userspace_memory_region { - slot: mem_slot.index | (self.as_id.load(Ordering::SeqCst) << 16), - guest_phys_addr: mem_slot.guest_addr, - memory_size: 0_u64, - userspace_addr: mem_slot.host_addr, - flags: 0, - }; - unsafe { - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .set_user_memory_region(kvm_region) - .chain_err(|| { - format!( - "KVM unregister memory region failed: addr 0x{:X}", - aligned_addr.raw_value(), - ) - })?; - } - - Ok(()) - } - - /// Register a IoEvent to `/dev/kvm`. - /// - /// # Arguments - /// - /// * `ioevtfd` - IoEvent would be added. - /// - /// # Errors - /// - /// Return Error if the length of ioeventfd data is unexpected or syscall failed. - fn add_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let io_addr = IoEventAddress::Mmio(ioevtfd.addr_range.base.raw_value()); - let ioctl_ret = if ioevtfd.data_match { - let length = ioevtfd.addr_range.size; - match length { - 2 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), - 4 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), - 8 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u64), - _ => bail!("Unexpected ioeventfd data length {}", length), - } - } else { - vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) - }; - - ioctl_ret.chain_err(|| { - format!( - "KVM register ioeventfd failed, mmio addr 0x{:X}, size 0x{:X}, data_match {}", - ioevtfd.addr_range.base.raw_value(), - ioevtfd.addr_range.size, - if ioevtfd.data_match { - ioevtfd.data - } else { - u64::MAX - } - ) - })?; - - Ok(()) - } - - /// Deletes `ioevtfd` from `/dev/kvm` - /// - /// # Arguments - /// - /// * `ioevtfd` - IoEvent would be deleted. - fn delete_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let io_addr = IoEventAddress::Mmio(ioevtfd.addr_range.base.raw_value()); - let ioctl_ret = if ioevtfd.data_match { - let length = ioevtfd.addr_range.size; - match length { - 2 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), - 4 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), - 8 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u64), - _ => bail!("Unexpected ioeventfd data length {}", length), - } - } else { - vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) - }; - - ioctl_ret.chain_err(|| { - format!( - "KVM unregister ioeventfd failed: mmio addr 0x{:X}, size 0x{:X}, data_match {}", - ioevtfd.addr_range.base.raw_value(), - ioevtfd.addr_range.size, - if ioevtfd.data_match { - ioevtfd.data - } else { - u64::MAX - } - ) - })?; - - Ok(()) - } -} - -impl Listener for KvmMemoryListener { - /// Get default priority. - fn priority(&self) -> i32 { - 10_i32 - } - - /// Deal with the request. - /// - /// # Arguments - /// - /// * `flat_range` - FlatRange would be used to find the region. - /// * `evtfd` - IoEvent of Region. - /// * `req_type` - Request type. - /// - /// # Errors - /// - /// Returns Error if - /// * Both `flat_range` and `evtfd' are not provided. - fn handle_request( - &self, - flat_range: Option<&FlatRange>, - evtfd: Option<&RegionIoEventFd>, - req_type: ListenerReqType, - ) -> Result<()> { - let req_ret = match req_type { - ListenerReqType::AddRegion => { - self.add_region(flat_range.chain_err(|| "No FlatRange for AddRegion request")?) - } - ListenerReqType::DeleteRegion => self - .delete_region(flat_range.chain_err(|| "No FlatRange for DeleteRegion request")?), - ListenerReqType::AddIoeventfd => { - self.add_ioeventfd(evtfd.chain_err(|| "No IoEventFd for AddIoeventfd request")?) - } - ListenerReqType::DeleteIoeventfd => self - .delete_ioeventfd(evtfd.chain_err(|| "No IoEventFd for DeleteIoeventfd request")?), - }; - - req_ret.chain_err(|| ErrorKind::ListenerRequest(req_type)) - } -} - -#[cfg(target_arch = "x86_64")] -pub struct KvmIoListener; - -#[cfg(target_arch = "x86_64")] -impl Default for KvmIoListener { - fn default() -> Self { - Self - } -} - -#[cfg(target_arch = "x86_64")] -impl KvmIoListener { - /// Register a IoEvent to `/dev/kvm`. - /// - /// # Arguments - /// - /// * `ioevtfd` - IoEvent of Region. - /// - /// # Errors - /// - /// Return Error if the length of ioeventfd data is unexpected or syscall failed. - fn add_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let io_addr = IoEventAddress::Pio(ioevtfd.addr_range.base.raw_value()); - let ioctl_ret = if ioevtfd.data_match { - let length = ioevtfd.addr_range.size; - match length { - 2 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), - 4 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), - 8 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u64), - _ => bail!("unexpected ioeventfd data length {}", length), - } - } else { - vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) - }; - - ioctl_ret.chain_err(|| { - format!( - "KVM register ioeventfd failed: io addr 0x{:X}, size 0x{:X}, data_match {}", - ioevtfd.addr_range.base.raw_value(), - ioevtfd.addr_range.size, - if ioevtfd.data_match { - ioevtfd.data - } else { - u64::MAX - } - ) - })?; - - Ok(()) - } - - /// Delete an IoEvent from `/dev/kvm`. - /// - /// # Arguments - /// - /// * `ioevtfd` - IoEvent of Region. - fn delete_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let io_addr = IoEventAddress::Pio(ioevtfd.addr_range.base.raw_value()); - let ioctl_ret = if ioevtfd.data_match { - let length = ioevtfd.addr_range.size; - match length { - 2 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), - 4 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), - 8 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u64), - _ => bail!("Unexpected ioeventfd data length {}", length), - } - } else { - vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) - }; - - ioctl_ret.chain_err(|| { - format!( - "KVM unregister ioeventfd failed: io addr 0x{:X}, size 0x{:X}, data_match {}", - ioevtfd.addr_range.base.raw_value(), - ioevtfd.addr_range.size, - if ioevtfd.data_match { - ioevtfd.data - } else { - u64::MAX - } - ) - })?; - - Ok(()) - } -} - -/// Kvm io listener. -#[cfg(target_arch = "x86_64")] -impl Listener for KvmIoListener { - /// Get the default priority. - fn priority(&self) -> i32 { - 10_i32 - } - - /// Deal with the request. - /// - /// # Arguments - /// - /// * `_range` - Corresponding FlatRange of new-added/deleted region. - /// * `evtfd` - IoEvent of Region. - /// * `req_type` - Request type. - fn handle_request( - &self, - _range: Option<&FlatRange>, - evtfd: Option<&RegionIoEventFd>, - req_type: ListenerReqType, - ) -> Result<()> { - let handle_ret = match req_type { - ListenerReqType::AddIoeventfd => { - self.add_ioeventfd(evtfd.chain_err(|| "No IoEventFd for AddIoeventfd request")?) - } - ListenerReqType::DeleteIoeventfd => self - .delete_ioeventfd(evtfd.chain_err(|| "No IoEventFd for DeleteIoeventfd request")?), - _ => return Ok(()), - }; - - handle_ret.chain_err(|| ErrorKind::ListenerRequest(req_type)) - } -} - -#[cfg(test)] -mod test { - use hypervisor::kvm::KVMFds; - use libc::EFD_NONBLOCK; - use serial_test::serial; - use vmm_sys_util::eventfd::EventFd; - - use super::*; - use crate::{GuestAddress, HostMemMapping, Region, RegionIoEventFd}; - - fn generate_region_ioeventfd>(addr: u64, datamatch: T) -> RegionIoEventFd { - let data = datamatch.into(); - RegionIoEventFd { - fd: EventFd::new(EFD_NONBLOCK).unwrap(), - addr_range: AddressRange::from((addr, std::mem::size_of::() as u64)), - data_match: data != 0, - data, - } - } - - fn create_ram_range(addr: u64, size: u64, offset_in_region: u64) -> FlatRange { - let mem_mapping = Arc::new( - HostMemMapping::new(GuestAddress(addr), None, size, None, false, false, false).unwrap(), - ); - FlatRange { - addr_range: AddressRange::new( - mem_mapping.start_address().unchecked_add(offset_in_region), - mem_mapping.size() - offset_in_region, - ), - owner: Region::init_ram_region(mem_mapping.clone()), - offset_in_region, - rom_dev_romd: None, - } - } - - #[test] - #[serial] - fn test_alloc_slot() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let kml = KvmMemoryListener::new(4); - let host_addr = 0u64; - - assert_eq!(kml.get_free_slot(0, 100, host_addr).unwrap(), 0); - assert_eq!(kml.get_free_slot(200, 100, host_addr).unwrap(), 1); - assert_eq!(kml.get_free_slot(300, 100, host_addr).unwrap(), 2); - assert_eq!(kml.get_free_slot(500, 100, host_addr).unwrap(), 3); - assert!(kml.get_free_slot(200, 100, host_addr).is_err()); - // no available KVM mem slot - assert!(kml.get_free_slot(600, 100, host_addr).is_err()); - - kml.delete_slot(200, 100).unwrap(); - assert!(kml.delete_slot(150, 100).is_err()); - assert!(kml.delete_slot(700, 100).is_err()); - assert_eq!(kml.get_free_slot(200, 100, host_addr).unwrap(), 1); - } - - #[test] - #[serial] - fn test_add_del_ram_region() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let kml = KvmMemoryListener::new(34); - let ram_size = host_page_size(); - let ram_fr1 = create_ram_range(0, ram_size, 0); - - kml.handle_request(Some(&ram_fr1), None, ListenerReqType::AddRegion) - .unwrap(); - // flat-range already added, adding again should make an error - assert!(kml - .handle_request(Some(&ram_fr1), None, ListenerReqType::AddRegion) - .is_err()); - assert!(kml - .handle_request(Some(&ram_fr1), None, ListenerReqType::DeleteRegion) - .is_ok()); - // flat-range already deleted, deleting again should make an error - assert!(kml - .handle_request(Some(&ram_fr1), None, ListenerReqType::DeleteRegion) - .is_ok()); - } - - #[test] - #[serial] - fn test_add_region_align() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let kml = KvmMemoryListener::new(34); - // flat-range not aligned - let page_size = host_page_size(); - let ram_fr2 = create_ram_range(page_size, 2 * page_size, 1000); - assert!(kml - .handle_request(Some(&ram_fr2), None, ListenerReqType::AddRegion) - .is_ok()); - - // flat-range size is zero after aligned, this step should make an error - let ram_fr3 = create_ram_range(page_size, page_size, 1000); - assert!(kml - .handle_request(Some(&ram_fr3), None, ListenerReqType::AddRegion) - .is_err()); - } - - #[test] - #[serial] - fn test_add_del_ioeventfd() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let kml = KvmMemoryListener::new(34); - let evtfd = generate_region_ioeventfd(4, NoDatamatch); - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_ok()); - // The evtfd already added, adding again should make an error - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_err()); - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_ok()); - // The evtfd already deleted, deleting again should cause an error - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_err()); - - // Register an ioeventfd with data-match - let evtfd = generate_region_ioeventfd(64, 4_u64); - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_ok()); - - // Register an ioeventfd which has same address with previously registered ones will cause an error - let same_addred_evtfd = generate_region_ioeventfd(64, 4_u64); - assert!(kml - .handle_request( - None, - Some(&same_addred_evtfd), - ListenerReqType::AddIoeventfd - ) - .is_err()); - - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_ok()); - } - - #[test] - #[serial] - fn test_ioeventfd_with_data_match() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let kml = KvmMemoryListener::new(34); - let evtfd_addr = 0x1000_u64; - let mut evtfd = generate_region_ioeventfd(evtfd_addr, 64_u32); - evtfd.addr_range.size = 3_u64; - // Matched data's length must be 2, 4 or 8. - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_err()); - - let evtfd = generate_region_ioeventfd(evtfd_addr, 64_u32); - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_ok()); - - // Delete ioeventfd with wrong address will cause an error. - let mut evtfd_to_del = evtfd.try_clone().unwrap(); - evtfd_to_del.addr_range.base.0 = evtfd_to_del.addr_range.base.0 - 2; - assert!(kml - .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) - .is_err()); - - // Delete ioeventfd with inconsistent data-match will cause error. - let mut evtfd_to_del = evtfd.try_clone().unwrap(); - evtfd_to_del.data_match = false; - assert!(kml - .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) - .is_err()); - - // Delete ioeventfd with inconsistent matched data will cause an error. - let mut evtfd_to_del = evtfd.try_clone().unwrap(); - evtfd_to_del.data = 128_u64; - assert!(kml - .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) - .is_err()); - - // Delete it successfully. - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_ok()); - - // Delete a not-exist ioeventfd will cause an error. - assert!(kml - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_err()); - } - - #[test] - #[serial] - #[cfg(target_arch = "x86_64")] - fn test_kvm_io_listener() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let iol = KvmIoListener::default(); - let evtfd = generate_region_ioeventfd(4, NoDatamatch); - assert!(iol - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_ok()); - // evtfd already added, adding again should make an error - assert!(iol - .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) - .is_err()); - assert!(iol - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_ok()); - // evtfd already deleted, deleting again should make an error - assert!(iol - .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) - .is_err()); - - // Matched data's length must be 2, 4 or 8. - let mut evtfd_match = generate_region_ioeventfd(4, 64_u32); - evtfd_match.addr_range.size = 3; - assert!(iol - .handle_request(None, Some(&evtfd_match), ListenerReqType::AddIoeventfd) - .is_err()); - evtfd_match.addr_range.size = 4; - assert!(iol - .handle_request(None, Some(&evtfd_match), ListenerReqType::AddIoeventfd) - .is_ok()); - } + pub host_addr: u64, } diff --git a/address_space/src/region.rs b/address_space/src/region.rs index 7c9e8da91f92a23509c151b160ed999c472547fc..48c70302a7454bd0a0971b850ee4c061cb0ea65d 100644 --- a/address_space/src/region.rs +++ b/address_space/src/region.rs @@ -10,12 +10,21 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::fmt; +use std::fmt::Debug; +use std::os::unix::io::AsRawFd; use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU64, Ordering}; use std::sync::{Arc, Mutex, RwLock, Weak}; +use anyhow::{anyhow, bail, Context, Result}; +use log::{debug, warn}; + use crate::address_space::FlatView; -use crate::errors::{ErrorKind, Result, ResultExt}; -use crate::{AddressRange, AddressSpace, FileBackend, GuestAddress, HostMemMapping, RegionOps}; +use crate::{ + AddressAttr, AddressRange, AddressSpace, AddressSpaceError, FileBackend, GuestAddress, + HostMemMapping, RegionOps, +}; +use migration::{migration::Migratable, MigrationManager}; /// Types of Region. #[allow(clippy::upper_case_acronyms)] @@ -31,11 +40,15 @@ pub enum RegionType { RomDevice, /// RamDevice type. RamDevice, + /// Alias type + Alias, } -/// Represents a memory region, used by mem-mapped IO or Ram. +/// Represents a memory region, used by mem-mapped IO, Ram or Rom. #[derive(Clone)] pub struct Region { + /// The name of Region + pub name: String, /// Type of Region, won't be changed once initialized. region_type: RegionType, /// The priority of Region, only valid in parent Container-type Region. @@ -44,7 +57,8 @@ pub struct Region { size: Arc, /// Offset in parent Container-type region. It won't be changed once initialized. offset: Arc>, - /// If not Ram-type Region, `mem_mapping` is None. It won't be changed once initialized. + /// If not Ram, RomDevice, RamDevice Region type, `mem_mapping` is None. It won't be changed + /// once initialized. mem_mapping: Option>, /// `ops` provides read/write function. ops: Option, @@ -52,18 +66,43 @@ pub struct Region { io_evtfds: Arc>>, /// Weak pointer pointing to the father address-spaces. space: Arc>>, - /// Sub-regions array, keep sorted + /// Sub-regions array, keep sorted. subregions: Arc>>, - /// This field is useful for RomDevice-type Region. If true, in read-only mode, otherwise in IO mode. + /// This field is useful for RomDevice-type Region. If true, in read-only mode, otherwise in IO + /// mode. rom_dev_romd: Arc, + /// Max access size supported by the device. + max_access_size: Option, + /// Point to entity memory region + alias: Option>, + /// Offset in parent Alias-type region. + alias_offset: u64, +} + +impl fmt::Debug for Region { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Region") + .field("region_type", &self.region_type) + .field("priority", &self.priority) + .field("size", &self.size) + .field("offset", &self.offset) + .field("mem_mapping", &self.mem_mapping) + .field("io_evtfds", &self.io_evtfds) + .field("space", &self.space) + .field("subregions", &self.subregions) + .field("rom_dev_romd", &self.rom_dev_romd) + .field("max_access_size", &self.max_access_size) + .finish() + } } /// Used to trigger events. /// If `data_match` is enabled, the `EventFd` is triggered iff `data` is written /// to the specified address. +#[derive(Clone)] pub struct RegionIoEventFd { /// EventFd to be triggered when guest writes to the address. - pub fd: vmm_sys_util::eventfd::EventFd, + pub fd: Arc, /// Addr_range contains two params as follows: /// base: in addr_range is the address of EventFd. /// size: can be 2, 4, 8 bytes. @@ -74,43 +113,38 @@ pub struct RegionIoEventFd { pub data: u64, } +impl fmt::Debug for RegionIoEventFd { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RegionIoEventFd") + .field("addr_range", &self.addr_range) + .field("data_match", &self.data_match) + .field("data", &self.data) + .finish() + } +} + +impl PartialEq for RegionIoEventFd { + fn eq(&self, other: &Self) -> bool { + self.addr_range == other.addr_range + && self.data_match == other.data_match + && self.data == other.data + && self.fd.as_raw_fd() == other.fd.as_raw_fd() + } +} + impl RegionIoEventFd { - /// Calculate if this `RegionIoEventFd` is located before the given one. + /// Calculate if this `RegionIoEventFd` is located after the given one. /// /// # Arguments /// /// * `other` - Other `RegionIoEventFd`. - pub(crate) fn before(&self, other: &RegionIoEventFd) -> bool { - if self.addr_range.base != other.addr_range.base { - return self.addr_range.base < other.addr_range.base; - } - if self.addr_range.size != other.addr_range.size { - return self.addr_range.size < other.addr_range.size; - } - if self.data_match != other.data_match { - return self.data_match && (!other.data_match); - } - if self.data != other.data { - return self.data < other.data; - } - false - } - - /// Return the cloned IoEvent, - /// return error if failed to clone EventFd. - pub(crate) fn try_clone(&self) -> Result { - let fd = self.fd.try_clone().or(Err(ErrorKind::IoEventFd))?; - Ok(RegionIoEventFd { - fd, - addr_range: self.addr_range, - data_match: self.data_match, - data: self.data, - }) + pub fn after(&self, other: &RegionIoEventFd) -> bool { + self.addr_range.base.0 >= (other.addr_range.base.0 + other.addr_range.size) } } /// FlatRange is a piece of continuous memory address。 -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct FlatRange { /// The address range. pub addr_range: AddressRange, @@ -126,7 +160,7 @@ impl Eq for FlatRange {} impl PartialEq for FlatRange { fn eq(&self, other: &Self) -> bool { - self.addr_range.base == other.addr_range.base + self.addr_range == other.addr_range && self.owner.region_type == other.owner.region_type && self.rom_dev_romd.unwrap_or(false) == other.rom_dev_romd.unwrap_or(false) && self.owner == other.owner @@ -146,6 +180,55 @@ impl PartialEq for Region { impl Eq for Region {} +/// Used for read/write for multi times. +struct MultiOpsArgs { + /// The base address of the read/write ops. + base: GuestAddress, + /// The offset of the read/write ops. + offset: u64, + /// the number of the read/write ops in bytes. + count: u64, + /// The access size for one read/write in bytes. + access_size: u64, +} + +/// Read/Write for multi times. +macro_rules! rw_multi_ops { + ( $ops: ident, $slice: expr, $args: ident ) => { + // The data size is larger than the max access size, we split to read/write for multiple + // times. + let base = $args.base; + let offset = $args.offset; + let cnt = $args.count; + let access_size = $args.access_size; + let mut pos = 0_u64; + for _ in 0..(cnt / access_size) { + if !$ops( + &mut $slice[pos as usize..(pos + access_size) as usize], + base, + offset + pos, + ) { + return Err(anyhow!(AddressSpaceError::IoAccess( + base.raw_value(), + offset + pos, + access_size, + ))); + } + pos += access_size; + } + // Unaligned memory access. + if cnt % access_size > 0 + && !$ops(&mut $slice[pos as usize..cnt as usize], base, offset + pos) + { + return Err(anyhow!(AddressSpaceError::IoAccess( + base.raw_value(), + offset + pos, + cnt - pos + ))); + } + }; +} + impl Region { /// The core function of initialization. /// @@ -156,12 +239,14 @@ impl Region { /// * `mem_mapping` - Mapped memory. /// * `ops` - Region operations. fn init_region_internal( + name: &str, size: u64, region_type: RegionType, mem_mapping: Option>, ops: Option, ) -> Region { Region { + name: String::from(name), region_type, priority: Arc::new(AtomicI32::new(0)), offset: Arc::new(Mutex::new(GuestAddress(0))), @@ -172,6 +257,9 @@ impl Region { space: Arc::new(RwLock::new(Weak::new())), subregions: Arc::new(RwLock::new(Vec::new())), rom_dev_romd: Arc::new(AtomicBool::new(false)), + max_access_size: None, + alias: None, + alias_offset: 0_u64, } } @@ -180,8 +268,14 @@ impl Region { /// # Arguments /// /// * `mem_mapping` - Mapped memory of this Ram region. - pub fn init_ram_region(mem_mapping: Arc) -> Region { - Region::init_region_internal(mem_mapping.size(), RegionType::Ram, Some(mem_mapping), None) + pub fn init_ram_region(mem_mapping: Arc, name: &str) -> Region { + Region::init_region_internal( + name, + mem_mapping.size(), + RegionType::Ram, + Some(mem_mapping), + None, + ) } /// Initialize IO-type region. @@ -190,8 +284,17 @@ impl Region { /// /// * `size` - Size of IO region. /// * `ops` - Operation of Region. - pub fn init_io_region(size: u64, ops: RegionOps) -> Region { - Region::init_region_internal(size, RegionType::IO, None, Some(ops)) + pub fn init_io_region(size: u64, ops: RegionOps, name: &str) -> Region { + Region::init_region_internal(name, size, RegionType::IO, None, Some(ops)) + } + + /// Set the access size limit of the IO region. + /// + /// # Arguments + /// + /// * `access_size` - Max access size supported in bytes. + pub fn set_access_size(&mut self, access_size: u64) { + self.max_access_size = Some(access_size); } /// Initialize Container-type region. @@ -199,8 +302,8 @@ impl Region { /// # Arguments /// /// * `size` - Size of container region. - pub fn init_container_region(size: u64) -> Region { - Region::init_region_internal(size, RegionType::Container, None, None) + pub fn init_container_region(size: u64, name: &str) -> Region { + Region::init_region_internal(name, size, RegionType::Container, None, None) } /// Initialize RomDevice-type region. @@ -209,8 +312,13 @@ impl Region { /// /// * `mem_mapping` - Mapped memory of this region. /// * `ops` - Operation functions of this region. - pub fn init_rom_device_region(mem_mapping: Arc, ops: RegionOps) -> Region { + pub fn init_rom_device_region( + mem_mapping: Arc, + ops: RegionOps, + name: &str, + ) -> Region { let mut region = Region::init_region_internal( + name, mem_mapping.size(), RegionType::RomDevice, Some(mem_mapping), @@ -226,8 +334,9 @@ impl Region { /// # Arguments /// /// * `mem_mapping` - Mapped memory of this region. - pub fn init_ram_device_region(mem_mapping: Arc) -> Region { + pub fn init_ram_device_region(mem_mapping: Arc, name: &str) -> Region { Region::init_region_internal( + name, mem_mapping.size(), RegionType::RamDevice, Some(mem_mapping), @@ -235,6 +344,26 @@ impl Region { ) } + /// Initialize alias-type region. + /// + /// # Arguments + /// + /// * `alias` - alias to region. + /// * `alias_offset` - offset of alias + /// * `size` - region size + /// * `name` - alias name + pub fn init_alias_region( + alias: Arc, + alias_offset: u64, + size: u64, + name: &str, + ) -> Region { + let mut region = Region::init_region_internal(name, size, RegionType::Alias, None, None); + region.alias = Some(alias); + region.alias_offset = alias_offset; + region + } + /// Get the type of this region. pub fn region_type(&self) -> RegionType { self.region_type @@ -259,6 +388,16 @@ impl Region { self.size.load(Ordering::SeqCst) } + /// Get offset of this region. + pub fn alias_offset(&self) -> u64 { + self.alias_offset + } + + /// Get name of this alias region. + pub fn alias_name(&self) -> Option { + self.alias.as_ref().map(|mr| mr.name.clone()) + } + /// Get the offset of this region. /// The offset is within its parent region or belonged address space. pub fn offset(&self) -> GuestAddress { @@ -276,9 +415,13 @@ impl Region { } /// Returns the minimum address managed by the region. - /// If this region is not `Ram` type, this function will return `None`. + /// If this region is not `Ram`, `RamDevice`, `RomDevice` type, + /// this function will return `None`. pub fn start_addr(&self) -> Option { - if self.region_type != RegionType::Ram { + if self.region_type != RegionType::Ram + && self.region_type != RegionType::RamDevice + && self.region_type != RegionType::RomDevice + { return None; } @@ -292,7 +435,7 @@ impl Region { /// * `read_only` - Set region to read-only mode or not. pub fn set_rom_device_romd(&self, read_only: bool) -> Result<()> { if self.region_type != RegionType::RomDevice { - return Err(ErrorKind::RegionType(self.region_type).into()); + return Err(anyhow!(AddressSpaceError::RegionType(self.region_type))); } let old_mode = self.rom_dev_romd.as_ref().load(Ordering::SeqCst); @@ -312,8 +455,8 @@ impl Region { Ok(()) } - /// Get read-only mode of RomDevice-type region. Return true if in read-only mode, otherwise return false. - /// Return None if it is not a RomDevice-type region. + /// Get read-only mode of RomDevice-type region. Return true if in read-only mode, otherwise + /// return false. Return None if it is not a RomDevice-type region. pub fn get_rom_device_romd(&self) -> Option { if self.region_type != RegionType::RomDevice { None @@ -324,19 +467,43 @@ impl Region { /// Get the host address if this region is backed by host-memory, /// Return `None` if it is not a Ram-type region. - pub fn get_host_address(&self) -> Option { - if self.region_type == RegionType::IO || self.region_type == RegionType::Container { + /// + /// # Safety + /// + /// Need to make it clear that hva is always in the ram range of the virtual machine. + /// And if you want to operate [hva,hva+size], the range from hva to hva+size needs + /// to be in the ram range. + pub unsafe fn get_host_address(&self, attr: AddressAttr) -> Option { + let region_type = match attr { + AddressAttr::Ram => RegionType::Ram, + AddressAttr::MMIO => return None, + AddressAttr::RamDevice => RegionType::RamDevice, + AddressAttr::RomDevice | AddressAttr::RomDeviceForce => RegionType::RomDevice, + }; + + if self.region_type != region_type { return None; } self.mem_mapping.as_ref().map(|r| r.host_address()) } + pub fn get_host_share(&self) -> Option { + if self.region_type != RegionType::Ram + && self.region_type != RegionType::RamDevice + && self.region_type != RegionType::RomDevice + { + return None; + } + self.mem_mapping.as_ref().map(|r| r.mem_shared()) + } + /// Get the file information if this region is backed by host-memory. /// Return `None` if it is not a Ram-type region. pub fn get_file_backend(&self) -> Option { self.mem_mapping.as_ref().and_then(|r| r.file_backend()) } + /// Get the region file backend page size. pub fn get_region_page_size(&self) -> Option { self.mem_mapping .as_ref() @@ -384,7 +551,7 @@ impl Region { .filter(|end| *end <= self.size()) .is_none() { - return Err(ErrorKind::Overflow(addr).into()); + return Err(anyhow!(AddressSpaceError::Overflow(addr))); } Ok(()) } @@ -413,20 +580,26 @@ impl Region { ) -> Result<()> { match self.region_type { RegionType::Ram | RegionType::RamDevice => { - self.check_valid_offset(offset, count) - .chain_err(|| ErrorKind::InvalidOffset(offset, count, self.size()))?; + self.check_valid_offset(offset, count).with_context(|| { + AddressSpaceError::InvalidOffset(offset, count, self.size()) + })?; let host_addr = self.mem_mapping.as_ref().unwrap().host_address(); + // SAFETY: host_addr is managed by mem_mapping, it can be guaranteed to be legal, + // the legality of offset and count has been verified. let slice = unsafe { std::slice::from_raw_parts((host_addr + offset) as *const u8, count as usize) }; dst.write_all(slice) - .chain_err(|| "Failed to write content of Ram to mutable buffer")?; + .with_context(|| "Failed to write content of Ram to mutable buffer")?; } RegionType::RomDevice => { - self.check_valid_offset(offset, count) - .chain_err(|| ErrorKind::InvalidOffset(offset, count, self.size()))?; + self.check_valid_offset(offset, count).with_context(|| { + AddressSpaceError::InvalidOffset(offset, count, self.size()) + })?; if self.rom_dev_romd.as_ref().load(Ordering::SeqCst) { let host_addr = self.mem_mapping.as_ref().unwrap().host_address(); + // SAFETY: host_addr is managed by mem_mapping, it can be guaranteed to be legal, + // the legality of offset and count has been verified. let read_ret = unsafe { std::slice::from_raw_parts( (host_addr + offset) as *const u8, @@ -439,25 +612,39 @@ impl Region { let read_ops = self.ops.as_ref().unwrap().read.as_ref(); if !read_ops(&mut read_ret, base, offset) { - return Err(ErrorKind::IoAccess(base.raw_value(), offset, count).into()); + return Err(anyhow!(AddressSpaceError::IoAccess( + base.raw_value(), + offset, + count + ))); } dst.write_all(&read_ret)?; } } RegionType::IO => { - if count >= std::usize::MAX as u64 { - return Err(ErrorKind::Overflow(count).into()); - } let mut slice = vec![0_u8; count as usize]; let read_ops = self.ops.as_ref().unwrap().read.as_ref(); - if !read_ops(&mut slice, base, offset) { - return Err(ErrorKind::IoAccess(base.raw_value(), offset, count).into()); + if matches!(self.max_access_size, Some(access_size) if count > access_size) { + let args = MultiOpsArgs { + base, + offset, + count, + access_size: self.max_access_size.unwrap(), + }; + rw_multi_ops!(read_ops, slice, args); + } else if !read_ops(&mut slice, base, offset) { + return Err(anyhow!(AddressSpaceError::IoAccess( + base.raw_value(), + offset, + count + ))); } - dst.write_all(&slice) - .chain_err(|| "Failed to write slice provided by device to mutable buffer")?; + dst.write_all(&slice).with_context(|| { + "Failed to write slice provided by device to mutable buffer" + })?; } _ => { - return Err(ErrorKind::RegionType(self.region_type()).into()); + return Err(anyhow!(AddressSpaceError::RegionType(self.region_type()))); } } Ok(()) @@ -485,7 +672,7 @@ impl Region { offset: u64, count: u64, ) -> Result<()> { - self.check_valid_offset(offset, count).chain_err(|| { + self.check_valid_offset(offset, count).with_context(|| { format!( "Invalid offset: offset 0x{:X}, data length 0x{:X}, region size 0x{:X}", offset, @@ -497,70 +684,76 @@ impl Region { match self.region_type { RegionType::Ram | RegionType::RamDevice => { let host_addr = self.mem_mapping.as_ref().unwrap().host_address(); + // Mark vmm dirty page manually if live migration is active. + MigrationManager::mark_dirty_log(host_addr + offset, count); + + // SAFETY: host_addr is managed by mem_mapping, it can be guaranteed to be legal, + // the legality of offset and count has been verified. let slice = unsafe { std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, count as usize) }; src.read_exact(slice) - .chain_err(|| "Failed to write buffer to Ram")?; + .with_context(|| "Failed to write buffer to Ram")?; } RegionType::RomDevice | RegionType::IO => { if count >= std::usize::MAX as u64 { - return Err(ErrorKind::Overflow(count).into()); + return Err(anyhow!(AddressSpaceError::Overflow(count))); } let mut slice = vec![0_u8; count as usize]; - src.read_exact(&mut slice).chain_err(|| { + src.read_exact(&mut slice).with_context(|| { "Failed to write buffer to slice, which will be provided for device" })?; let write_ops = self.ops.as_ref().unwrap().write.as_ref(); - if !write_ops(&slice, base, offset) { - return Err(ErrorKind::IoAccess(base.raw_value(), offset, count).into()); + if matches!(self.max_access_size, Some(access_size) if count > access_size) { + let args = MultiOpsArgs { + base, + offset, + count, + access_size: self.max_access_size.unwrap(), + }; + rw_multi_ops!(write_ops, slice, args); + } else if !write_ops(&slice, base, offset) { + return Err(anyhow!(AddressSpaceError::IoAccess( + base.raw_value(), + offset, + count + ))); } } _ => { - return Err(ErrorKind::RegionType(self.region_type()).into()); + return Err(anyhow!(AddressSpaceError::RegionType(self.region_type()))); } } Ok(()) } + /// Set the ioeventfds within this Region, /// Return the IoEvent of a `Region`. pub fn set_ioeventfds(&self, new_fds: &[RegionIoEventFd]) { - *self.io_evtfds.lock().unwrap() = new_fds.iter().map(|e| e.try_clone().unwrap()).collect(); + *self.io_evtfds.lock().unwrap() = new_fds.to_vec(); } - /// Set the ioeventfds within this Region, - /// these fds will be register to `KVM` and used for guest notifier. + /// Get the ioeventfds within this Region, + /// these fds will be register to `Hypervisor` supporting + /// ioeventfd and used for guest notifier. pub fn ioeventfds(&self) -> Vec { - self.io_evtfds - .lock() - .unwrap() - .iter() - .map(|e| e.try_clone().unwrap()) - .collect() + self.io_evtfds.lock().unwrap().to_vec() } - /// Add sub-region to this region. + /// Add sub-region to this region, but not produce flat view /// /// # Arguments /// /// * `child` - Subregion of this region. - /// * `offset` - Offset of subregion. - /// - /// # Errors - /// - /// Return Error if - /// * This region is not a Container. - /// * The argument `offset` plus child region's size overflows or exceed this region's size. - /// * The child-region already exists in sub-regions array. - /// * Failed to generate flat view (topology changed after adding sub-region). - pub fn add_subregion(&self, child: Region, offset: u64) -> Result<()> { + /// * `offset` - Offset of subregion + pub fn add_subregion_not_update(&self, child: Region, offset: u64) -> Result<()> { // check parent Region's property, and check if child Region's offset is valid or not if self.region_type() != RegionType::Container { - return Err(ErrorKind::RegionType(self.region_type()).into()); + return Err(anyhow!(AddressSpaceError::RegionType(self.region_type()))); } self.check_valid_offset(offset, child.size()) - .chain_err(|| { + .with_context(|| { format!( "Invalid offset: offset 0x{:X}, child length 0x{:X}, region size 0x{:X}", offset, @@ -587,10 +780,30 @@ impl Region { sub_regions.insert(index, child); drop(sub_regions); + Ok(()) + } + + /// Add sub-region to this region and production flat view + /// + /// # Arguments + /// + /// * `child` - Subregion of this region. + /// * `offset` - Offset of subregion. + /// + /// # Errors + /// + /// Return Error if + /// * This region is not a Container. + /// * The argument `offset` plus child region's size overflows or exceed this region's size. + /// * The child-region already exists in sub-regions array. + /// * Failed to generate flat view (topology changed after adding sub-region). + pub fn add_subregion(&self, child: Region, offset: u64) -> Result<()> { + self.add_subregion_not_update(child, offset)?; + if let Some(space) = self.space.read().unwrap().upgrade() { space .update_topology() - .chain_err(|| "Failed to update topology for address_space")?; + .with_context(|| "Failed to update topology for address_space")?; } else { debug!("add subregion to container region, which has no belonged address-space"); } @@ -610,6 +823,11 @@ impl Region { /// * The child-region does not exist in sub-regions array. /// * Failed to generate flat view (topology changed after removing sub-region). pub fn delete_subregion(&self, child: &Region) -> Result<()> { + // check parent Region's property, and check if child Region's offset is valid or not + if self.region_type() != RegionType::Container { + return Err(anyhow!(AddressSpaceError::RegionType(self.region_type()))); + } + let mut sub_regions = self.subregions.write().unwrap(); let mut removed = false; for (index, sub_r) in sub_regions.iter().enumerate() { @@ -623,16 +841,18 @@ impl Region { if !removed { warn!("Failed to delete subregion from parent region: not found"); - return Err(ErrorKind::RegionNotFound(child.offset().raw_value()).into()); + return Err(anyhow!(AddressSpaceError::RegionNotFound( + child.offset().raw_value() + ))); } // get father address-space and update topology if let Some(space) = self.space.read().unwrap().upgrade() { space .update_topology() - .chain_err(|| "Failed to update topology for address_space")?; + .with_context(|| "Failed to update topology for address_space")?; } else { - debug!("add subregion to container region, which has no belonged address-space"); + debug!("delete subregion from container region, which has no belonged address-space"); } child.del_belonged_address_space(); @@ -656,24 +876,18 @@ impl Region { addr_range: AddressRange, flat_view: &mut FlatView, ) -> Result<()> { + let region_base = base.unchecked_add(self.offset().raw_value()); + let region_range = AddressRange::new(region_base, self.size()); + let intersect = match region_range.find_intersection(addr_range) { + Some(r) => r, + None => return Ok(()), + }; match self.region_type { RegionType::Container => { - let region_base = base.unchecked_add(self.offset().raw_value()); - let region_range = AddressRange::new(region_base, self.size()); - let intersect = match region_range.find_intersection(addr_range) { - Some(r) => r, - None => bail!( - "Generate flat view failed: region_addr 0x{:X} exceeds parent region range (0x{:X}, 0x{:X})", - region_base.raw_value(), - addr_range.base.raw_value(), - addr_range.size - ), - }; - for sub_r in self.subregions.read().unwrap().iter() { sub_r .render_region_pass(region_base, intersect, flat_view) - .chain_err(|| { + .with_context(|| { format!( "Failed to render subregion, base 0x{:X}, addr_range (0x{:X}, 0x{:X})", base.raw_value(), @@ -683,9 +897,24 @@ impl Region { })?; } } + RegionType::Alias => { + if let Some(alias_region) = &self.alias { + let alias_base = region_base + .unchecked_sub(self.alias_offset) + .unchecked_sub(alias_region.offset().raw_value()); + alias_region.render_region_pass(alias_base, intersect, flat_view).with_context(|| { + format!( + "Failed to render subregion, alias_base 0x{:X}, intersect (0x{:X}, 0x{:X})", + alias_base.raw_value(), + intersect.base.raw_value(), + intersect.size + ) + })?; + } + } RegionType::Ram | RegionType::IO | RegionType::RomDevice | RegionType::RamDevice => { self.render_terminate_region(base, addr_range, flat_view) - .chain_err(|| + .with_context(|| format!( "Failed to render terminate region, base 0x{:X}, addr_range (0x{:X}, 0x{:X})", base.raw_value(), addr_range.base.raw_value(), @@ -796,7 +1025,7 @@ impl Region { match self.region_type { RegionType::Container => { self.render_region_pass(base, addr_range, &mut flat_view) - .chain_err(|| { + .with_context(|| { format!( "Failed to render terminate region, base 0x{:X}, addr_range (0x{:X}, 0x{:X})", base.raw_value(), @@ -805,19 +1034,69 @@ impl Region { ) })?; } + _ => bail!("Generate flat view failed: only the container supported"), + } + Ok(flat_view) + } + + fn get_region_type_name(&self) -> String { + match self.region_type() { + RegionType::Ram => String::from("ram"), + RegionType::IO => String::from("i/o"), + RegionType::RomDevice => String::from("romd"), + RegionType::RamDevice => String::from("ramd"), + _ => String::from("err type"), + } + } + + pub fn mtree(&self, level: u32) { + let mut tab = String::new(); + let mut num = 0_u32; + + loop { + if num == level { + break; + } + tab.push_str(" "); + num += 1; + } + match self.region_type() { + RegionType::Container => { + println!( + "{}0x{:X} - 0x{:X}, (Prio {}, Container) : {}", + tab, + self.offset().raw_value(), + self.offset().raw_value() + self.size(), + self.priority(), + self.name + ); + for sub_r in self.subregions().iter() { + sub_r.mtree(level + 1); + } + } RegionType::Ram | RegionType::IO | RegionType::RomDevice | RegionType::RamDevice => { - self.render_terminate_region(base, addr_range, &mut flat_view) - .chain_err(|| { - format!( - "Failed to render terminate region, base 0x{:X}, addr_range (0x{:X}, 0x{:X})", - base.raw_value(), - addr_range.base.raw_value(), - addr_range.size - ) - })?; + println!( + "{}0x{:X} - 0x{:X}, (Prio {}, {}) : {}", + tab, + self.offset().raw_value(), + self.offset().raw_value() + self.size(), + self.priority(), + self.get_region_type_name(), + self.name + ); + } + RegionType::Alias => { + println!( + "{}0x{:X} - 0x{:X}, (Prio {}, alias) : {} @alias name: {}", + tab, + self.alias_offset(), + self.alias_offset() + self.size(), + self.priority(), + self.name, + self.alias_name().unwrap(), + ); } } - Ok(flat_view) } } @@ -865,7 +1144,7 @@ mod test { let mem_mapping = Arc::new( HostMemMapping::new(GuestAddress(0), None, 1024, None, false, false, false).unwrap(), ); - let ram_region = Region::init_ram_region(mem_mapping.clone()); + let ram_region = Region::init_ram_region(mem_mapping.clone(), "mem_mapping"); let data: [u8; 10] = [10; 10]; let mut res_data: [u8; 10] = [0; 10]; let count = data.len() as u64; @@ -876,7 +1155,7 @@ mod test { ram_region.set_offset(GuestAddress(0x11u64)); assert_eq!(ram_region.offset(), GuestAddress(0x11u64)); - //test read/write + // test read/write assert!(ram_region .write(&mut data.as_ref(), GuestAddress(0), 0, count) .is_ok()); @@ -886,7 +1165,7 @@ mod test { assert_eq!(&data, &mut res_data); assert_eq!( - ram_region.get_host_address().unwrap(), + unsafe { ram_region.get_host_address(AddressAttr::Ram).unwrap() }, mem_mapping.host_address() ); @@ -901,7 +1180,7 @@ mod test { let host_mmap = Arc::new( HostMemMapping::new(GuestAddress(0), None, 1024, None, false, false, false).unwrap(), ); - let ram_region = Region::init_ram_region(host_mmap); + let ram_region = Region::init_ram_region(host_mmap, "mem_mapping"); let file = TempFile::new().unwrap(); let mut file_read = std::fs::File::open(file.as_path()).unwrap(); @@ -922,7 +1201,7 @@ mod test { assert_eq!(&slice, &mut res_slice); // write the file content to 0~24 (24 not included) - // then ckeck the ram's content + // then check the ram's content file_read.seek(SeekFrom::Start(0)).unwrap(); assert!(ram_region.write(&mut file_read, rgn_start, 0, 24).is_ok()); ram_region @@ -939,7 +1218,7 @@ mod test { let mut device_locked = test_dev_clone.lock().unwrap(); device_locked.read(data, addr, offset) }; - let test_dev_clone = test_dev.clone(); + let test_dev_clone = test_dev; let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { let mut device_locked = test_dev_clone.lock().unwrap(); device_locked.write(data, addr, offset) @@ -950,7 +1229,7 @@ mod test { write: Arc::new(write_ops), }; - let io_region = Region::init_io_region(16, test_dev_ops.clone()); + let io_region = Region::init_io_region(16, test_dev_ops, "io_region"); let data = [0x01u8; 8]; let mut data_res = [0x0u8; 8]; let count = data.len() as u64; @@ -966,42 +1245,61 @@ mod test { .is_ok()); assert_eq!(data.to_vec(), data_res.to_vec()); - assert!(io_region.get_host_address().is_none()); + assert!(unsafe { io_region.get_host_address(AddressAttr::Ram).is_none() }); } #[test] fn test_region_ioeventfd() { let mut fd1 = RegionIoEventFd { - fd: EventFd::new(EFD_NONBLOCK).unwrap(), + fd: Arc::new(EventFd::new(EFD_NONBLOCK).unwrap()), addr_range: AddressRange::from((1000, 4u64)), data_match: false, data: 0, }; + // compare unchanged + let mut fd2 = fd1.clone(); + assert!(fd2 == fd1); + + // compare fd + fd2.fd = Arc::new(EventFd::new(EFD_NONBLOCK).unwrap()); + assert!(fd2 != fd1); + // compare length - let mut fd2 = fd1.try_clone().unwrap(); + fd2.fd = fd1.fd.clone(); + assert!(fd2 == fd1); fd2.addr_range.size = 8; - assert!(fd1.before(&fd2)); + assert!(fd1 != fd2); // compare address - fd2.addr_range.base.0 = 1024; fd2.addr_range.size = 4; - assert!(fd1.before(&fd2)); + assert!(fd2 == fd1); + fd2.addr_range.base.0 = 1024; + assert!(fd1 != fd2); // compare datamatch fd2.addr_range = fd1.addr_range; + assert!(fd2 == fd1); fd2.data_match = true; - assert_eq!(fd1.before(&fd2), false); + assert!(fd1 != fd2); // if datamatch, compare data fd1.data_match = true; + assert!(fd2 == fd1); fd2.data = 10u64; - assert!(fd1.before(&fd2)); + assert!(fd1 != fd2); + + // test after + fd2.data = 0; + assert!(fd2 == fd1); + assert!(!fd2.after(&fd1)); + fd2.addr_range.base.0 = 1004; + assert!(fd2.after(&fd1)); } // test add/del sub-region to container-region, and check priority #[test] fn test_add_del_subregion() { - let container = Region::init_container_region(1 << 10); + let container = Region::init_container_region(1 << 10, "root"); assert_eq!(container.region_type(), RegionType::Container); assert_eq!(container.priority(), 0); @@ -1010,8 +1308,8 @@ mod test { write: Arc::new(|_: &[u8], _: GuestAddress, _: u64| -> bool { true }), }; - let io_region = Region::init_io_region(1 << 4, default_ops.clone()); - let io_region2 = Region::init_io_region(1 << 4, default_ops.clone()); + let io_region = Region::init_io_region(1 << 4, default_ops.clone(), "io1"); + let io_region2 = Region::init_io_region(1 << 4, default_ops, "io2"); io_region2.set_priority(10); // add duplicate io-region or ram-region will fail @@ -1035,7 +1333,7 @@ mod test { .subregions .read() .unwrap() - .get(0) + .first() .unwrap() .priority(), 10 @@ -1067,18 +1365,18 @@ mod test { // the flat_view is as follows // [CCCCCCCCCCCC][DDDDD][CCCCC][EEEEE][CCCCC] { - let region_a = Region::init_container_region(8000); - let region_b = Region::init_container_region(4000); - let region_c = Region::init_io_region(6000, default_ops.clone()); - let region_d = Region::init_io_region(1000, default_ops.clone()); - let region_e = Region::init_io_region(1000, default_ops.clone()); + let region_a = Region::init_container_region(8000, "region_a"); + let region_b = Region::init_container_region(4000, "region_b"); + let region_c = Region::init_io_region(6000, default_ops.clone(), "region_c"); + let region_d = Region::init_io_region(1000, default_ops.clone(), "region_d"); + let region_e = Region::init_io_region(1000, default_ops.clone(), "region_e"); region_b.set_priority(2); region_c.set_priority(1); region_a.add_subregion(region_b.clone(), 2000).unwrap(); - region_a.add_subregion(region_c.clone(), 0).unwrap(); - region_b.add_subregion(region_d.clone(), 0).unwrap(); - region_b.add_subregion(region_e.clone(), 2000).unwrap(); + region_a.add_subregion(region_c, 0).unwrap(); + region_b.add_subregion(region_d, 0).unwrap(); + region_b.add_subregion(region_e, 2000).unwrap(); let addr_range = AddressRange::from((0u64, region_a.size())); let view = region_a @@ -1113,18 +1411,18 @@ mod test { // the flat_view is as follows // [CCCCCC] [DDDDDDDDDDDD][EEEEEEEEEEEEE] { - let region_a = Region::init_container_region(8000); - let region_b = Region::init_container_region(5000); - let region_c = Region::init_io_region(1000, default_ops.clone()); - let region_d = Region::init_io_region(3000, default_ops.clone()); - let region_e = Region::init_io_region(2000, default_ops.clone()); + let region_a = Region::init_container_region(8000, "region_a"); + let region_b = Region::init_container_region(5000, "region_b"); + let region_c = Region::init_io_region(1000, default_ops.clone(), "regionc"); + let region_d = Region::init_io_region(3000, default_ops.clone(), "region_d"); + let region_e = Region::init_io_region(2000, default_ops, "region_e"); region_a.add_subregion(region_b.clone(), 2000).unwrap(); - region_a.add_subregion(region_c.clone(), 0).unwrap(); + region_a.add_subregion(region_c, 0).unwrap(); region_d.set_priority(2); region_e.set_priority(3); - region_b.add_subregion(region_d.clone(), 0).unwrap(); - region_b.add_subregion(region_e.clone(), 2000).unwrap(); + region_b.add_subregion(region_d, 0).unwrap(); + region_b.add_subregion(region_e, 2000).unwrap(); let addr_range = AddressRange::from((0u64, region_a.size())); let view = region_a diff --git a/address_space/src/state.rs b/address_space/src/state.rs index eb34e91cea434f38e434e2e06f1ccf55c0e8c348..2f28232589e8c46b22df5383008e75a6db56e17c 100644 --- a/address_space/src/state.rs +++ b/address_space/src/state.rs @@ -11,13 +11,17 @@ // See the Mulan PSL v2 for more details. use std::fs::File; -use std::io::Write; +use std::io::{Read, Write}; use std::mem::size_of; use std::sync::Arc; -use crate::{AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region}; -use migration::errors::{ErrorKind, Result, ResultExt}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, StateTransfer}; +use anyhow::{bail, Context, Result}; + +use crate::{AddressAttr, AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region}; +use migration::{ + error::MigrationError, DeviceStateDesc, FieldDesc, MemBlock, MigrationHook, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; use util::unix::host_page_size; @@ -27,13 +31,15 @@ const MIGRATION_HEADER_LENGTH: usize = 4096; #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] pub struct AddressSpaceState { + nr_alias_region: u64, + ram_alias_state: [RamRegionState; 16], nr_ram_region: u64, ram_region_state: [RamRegionState; 16], } #[derive(Copy, Clone, ByteCode)] pub struct RamRegionState { - base_address: u64, + alias_offset: u64, size: u64, // The offset of this memory region in file backend file. offset: u64, @@ -55,14 +61,28 @@ impl StateTransfer for AddressSpace { let mut state = AddressSpaceState::default(); let mut offset = memory_offset() as u64; + if self.get_machine_ram().is_none() { + bail!("This address space does not support migration."); + } + + let machine_ram = self.get_machine_ram().unwrap(); + for region in machine_ram.subregions().iter() { + state.ram_alias_state[state.nr_alias_region as usize] = RamRegionState { + alias_offset: 0, + size: region.size(), + offset, + }; + offset += region.size(); + state.nr_alias_region += 1; + } + for region in self.root().subregions().iter() { - if let Some(start_addr) = region.start_addr() { + if region.alias_name().is_some() { state.ram_region_state[state.nr_ram_region as usize] = RamRegionState { - base_address: start_addr.0, + alias_offset: region.alias_offset(), size: region.size(), - offset, + offset: region.offset().0, }; - offset += region.size(); state.nr_ram_region += 1; } } @@ -76,59 +96,88 @@ impl StateTransfer for AddressSpace { } impl MigrationHook for AddressSpace { - fn pre_save(&self, _id: &str, writer: &mut dyn Write) -> Result<()> { + fn save_memory(&self, fd: &mut dyn Write) -> Result<()> { let ram_state = self.get_state_vec()?; - writer.write_all(&ram_state)?; + fd.write_all(&ram_state)?; let padding_buffer = [0].repeat(memory_offset() - MIGRATION_HEADER_LENGTH - size_of::()); - writer.write_all(&padding_buffer)?; - - for region in self.root().subregions().iter() { - if let Some(base_addr) = region.start_addr() { - region - .read(writer, base_addr, 0, region.size()) - .map_err(|e| ErrorKind::SaveVmMemoryErr(e.to_string()))?; + fd.write_all(&padding_buffer)?; + if let Some(machine_ram) = self.get_machine_ram() { + for region in machine_ram.subregions().iter() { + if let Some(base_addr) = region.start_addr() { + region + .read(fd, base_addr, 0, region.size()) + .map_err(|e| MigrationError::SaveVmMemoryErr(e.to_string()))?; + } } } - Ok(()) } - fn pre_load(&self, state: &[u8], memory: Option<&File>) -> Result<()> { + fn restore_memory(&self, memory: Option<&File>, state: &[u8]) -> Result<()> { let address_space_state: &AddressSpaceState = AddressSpaceState::from_bytes(&state[0..size_of::()]) - .ok_or(ErrorKind::FromBytesError("MEMORY"))?; + .with_context(|| MigrationError::FromBytesError("MEMORY"))?; let memfile_arc = Arc::new(memory.unwrap().try_clone().unwrap()); - - for ram_state in address_space_state.ram_region_state - [0..address_space_state.nr_ram_region as usize] - .iter() - { - let file_backend = FileBackend { - file: memfile_arc.clone(), - offset: ram_state.offset, - page_size: host_page_size(), - }; - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(ram_state.base_address), - None, + if let Some(machine_ram) = self.get_machine_ram() { + let mut offset = 0_u64; + for ram_state in address_space_state.ram_alias_state + [0..address_space_state.nr_alias_region as usize] + .iter() + { + let file_backend = FileBackend { + file: memfile_arc.clone(), + offset: ram_state.offset, + page_size: host_page_size(), + }; + let host_mmap = Arc::new( + HostMemMapping::new( + GuestAddress(0), + None, + ram_state.size, + Some(file_backend), + false, + false, + false, + ) + .map_err(|e| MigrationError::RestoreVmMemoryErr(e.to_string()))?, + ); + machine_ram + .add_subregion_not_update( + Region::init_ram_region(host_mmap.clone(), "HostMem"), + offset, + ) + .map_err(|e| MigrationError::RestoreVmMemoryErr(e.to_string()))?; + offset += ram_state.size; + } + for ram_state in address_space_state.ram_region_state + [0..address_space_state.nr_ram_region as usize] + .iter() + { + let ram = Region::init_alias_region( + machine_ram.clone(), + ram_state.alias_offset, ram_state.size, - Some(file_backend), - false, - false, - false, - ) - .chain_err(|| ErrorKind::RestoreVmMemoryErr)?, - ); - self.root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .chain_err(|| ErrorKind::RestoreVmMemoryErr)?; + "ram", + ); + self.root().add_subregion(ram, ram_state.offset)?; + } } Ok(()) } + + fn send_memory(&self, fd: &mut dyn Write, range: MemBlock) -> Result<()> { + self.read(fd, GuestAddress(range.gpa), range.len, AddressAttr::Ram) + .map_err(|e| MigrationError::SendVmMemoryErr(e.to_string()))?; + + Ok(()) + } + + fn recv_memory(&self, fd: &mut dyn Read, range: MemBlock) -> Result<()> { + self.write(fd, GuestAddress(range.gpa), range.len, AddressAttr::Ram) + .map_err(|e| MigrationError::RecvVmMemoryErr(e.to_string()))?; + + Ok(()) + } } diff --git a/block_backend/Cargo.toml b/block_backend/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..d052bd0d55d48d3c2fb4e0eef0e09b98cc251585 --- /dev/null +++ b/block_backend/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "block_backend" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" + +[dependencies] +thiserror = "1.0" +vmm-sys-util = "0.12.1" +anyhow = "1.0" +log = "0.4" +byteorder = "1.4.3" +once_cell = "1.18.0" +libc = "0.2" +machine_manager = { path = "../machine_manager" } +util = { path = "../util" } +trace = {path = "../trace"} diff --git a/block_backend/src/file.rs b/block_backend/src/file.rs new file mode 100644 index 0000000000000000000000000000000000000000..3acef03816abdc7d334d02efed5c0374ee27cfeb --- /dev/null +++ b/block_backend/src/file.rs @@ -0,0 +1,310 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + fs::File, + io::{Seek, SeekFrom}, + os::{ + linux::fs::MetadataExt, + unix::prelude::{AsRawFd, RawFd}, + }, + rc::Rc, + sync::{ + atomic::{AtomicBool, AtomicI64, AtomicU32, AtomicU64, Ordering}, + Arc, Mutex, + }, +}; + +use anyhow::{Context, Result}; +use log::error; +use vmm_sys_util::epoll::EventSet; + +use crate::{qcow2::DEFAULT_SECTOR_SIZE, BlockIoErrorCallback, BlockProperty}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use util::{ + aio::{Aio, AioCb, AioEngine, Iovec, OpCode}, + loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, + }, +}; + +pub struct CombineRequest { + pub iov: Vec, + pub offset: u64, + pub nbytes: u64, +} + +impl CombineRequest { + pub fn new(iov: Vec, offset: u64, nbytes: u64) -> Self { + Self { + iov, + offset, + nbytes, + } + } +} + +pub struct FileDriver { + pub file: Arc, + aio: Rc>>, + pub incomplete: Arc, + delete_evts: Vec, + pub block_prop: BlockProperty, +} + +impl FileDriver { + pub fn new(file: Arc, aio: Aio, block_prop: BlockProperty) -> Self { + Self { + file, + incomplete: aio.incomplete_cnt.clone(), + aio: Rc::new(RefCell::new(aio)), + delete_evts: Vec::new(), + block_prop, + } + } + + fn package_aiocb( + &self, + opcode: OpCode, + iovec: Vec, + offset: usize, + nbytes: u64, + iocompletecb: T, + ) -> AioCb { + AioCb { + direct: self.block_prop.direct, + req_align: self.block_prop.req_align, + buf_align: self.block_prop.buf_align, + file_fd: self.file.as_raw_fd(), + opcode, + iovec, + offset, + nbytes, + user_data: 0, + iocompletecb, + discard: self.block_prop.discard, + write_zeroes: self.block_prop.write_zeroes, + combine_req: None, + } + } + + fn process_request( + &mut self, + opcode: OpCode, + req_list: Vec, + completecb: T, + ) -> Result<()> { + if req_list.is_empty() { + return self.complete_request(opcode, 0, completecb); + } + let single_req = req_list.len() == 1; + let cnt = Arc::new(AtomicU32::new(req_list.len() as u32)); + let res = Arc::new(AtomicI64::new(0)); + for req in req_list { + let mut aiocb = self.package_aiocb( + opcode, + req.iov, + req.offset as usize, + req.nbytes, + completecb.clone(), + ); + if !single_req { + aiocb.combine_req = Some((cnt.clone(), res.clone())); + } + self.aio.borrow_mut().submit_request(aiocb)?; + } + Ok(()) + } + + pub fn read_vectored(&mut self, req_list: Vec, completecb: T) -> Result<()> { + self.process_request(OpCode::Preadv, req_list, completecb) + } + + pub fn complete_request(&mut self, opcode: OpCode, res: i64, completecb: T) -> Result<()> { + let iovec: Vec = Vec::new(); + let aiocb = self.package_aiocb(opcode, iovec.to_vec(), 0, 0, completecb); + (self.aio.borrow_mut().complete_func)(&aiocb, res) + } + + pub fn write_vectored(&mut self, req_list: Vec, completecb: T) -> Result<()> { + self.process_request(OpCode::Pwritev, req_list, completecb) + } + + pub fn write_zeroes( + &mut self, + req_list: Vec, + completecb: T, + unmap: bool, + ) -> Result<()> { + let opcode = if unmap { + OpCode::WriteZeroesUnmap + } else { + OpCode::WriteZeroes + }; + self.process_request(opcode, req_list, completecb) + } + + pub fn discard(&mut self, req_list: Vec, completecb: T) -> Result<()> { + self.process_request(OpCode::Discard, req_list, completecb) + } + + pub fn datasync(&mut self, completecb: T) -> Result<()> { + let aiocb = self.package_aiocb(OpCode::Fdsync, Vec::new(), 0, 0, completecb); + self.aio.borrow_mut().submit_request(aiocb) + } + + pub fn flush_request(&mut self) -> Result<()> { + self.aio.borrow_mut().flush_request() + } + + pub fn drain_request(&self) { + while self.incomplete.load(Ordering::Acquire) != 0 { + continue; + } + } + + pub fn register_io_event( + &mut self, + broken: Arc, + error_cb: BlockIoErrorCallback, + ) -> Result<()> { + let handler = FileIoHandler::new(self.aio.clone(), broken, error_cb); + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper( + notifiers, + self.block_prop.iothread.as_ref(), + &mut self.delete_evts, + ) + } + + pub fn unregister_io_event(&mut self) -> Result<()> { + unregister_event_helper(self.block_prop.iothread.as_ref(), &mut self.delete_evts) + } + + pub fn actual_size(&mut self) -> Result { + let meta_data = self.file.metadata()?; + Ok(meta_data.st_blocks() * DEFAULT_SECTOR_SIZE) + } + + pub fn disk_size(&mut self) -> Result { + let disk_size = self + .file + .as_ref() + .seek(SeekFrom::End(0)) + .with_context(|| "Failed to seek the end for file")?; + Ok(disk_size) + } + + pub fn extend_to_len(&mut self, len: u64) -> Result<()> { + let file_end = self.file.as_ref().seek(SeekFrom::End(0))?; + if len > file_end { + self.file.set_len(len)?; + } + Ok(()) + } +} + +struct FileIoHandler { + aio: Rc>>, + broken: Arc, + error_cb: BlockIoErrorCallback, +} + +impl FileIoHandler { + pub fn new( + aio: Rc>>, + broken: Arc, + error_cb: BlockIoErrorCallback, + ) -> Self { + Self { + aio, + broken, + error_cb, + } + } + + fn aio_complete_handler(&mut self) -> Result { + let error_cb = self.error_cb.clone(); + self.aio.borrow_mut().handle_complete().map_err(|e| { + error_cb(); + e + }) + } +} + +impl EventNotifierHelper for FileIoHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let handler_raw = handler.lock().unwrap(); + let mut notifiers = Vec::new(); + + // Register event notifier for aio. + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.broken.load(Ordering::SeqCst) { + return None; + } + if let Err(ref e) = h_lock.aio_complete_handler() { + error!("Failed to handle aio {:?}", e); + } + None + }); + let h_clone = handler.clone(); + let handler_iopoll: Box = Box::new(move |_, _fd: RawFd| { + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.broken.load(Ordering::SeqCst) { + return None; + } + if h_lock.aio.borrow_mut().get_engine() == AioEngine::Off { + return None; + } + match h_lock.aio_complete_handler() { + Ok(done) => { + if done { + Some(Vec::new()) + } else { + None + } + } + Err(e) => { + error!("Failed to handle aio {:?}", e); + None + } + } + }); + notifiers.push(build_event_notifier( + handler_raw.aio.borrow_mut().fd.as_raw_fd(), + vec![h], + Some(handler_iopoll), + )); + + notifiers + } +} + +fn build_event_notifier( + fd: RawFd, + handlers: Vec>, + handler_poll: Option>, +) -> EventNotifier { + let mut notifier = EventNotifier::new( + NotifierOperation::AddShared, + fd, + None, + EventSet::IN, + handlers, + ); + notifier.handler_poll = handler_poll; + notifier +} diff --git a/block_backend/src/lib.rs b/block_backend/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..525c1c662d3e0d061aa8cf2b3e6f3582babf290c --- /dev/null +++ b/block_backend/src/lib.rs @@ -0,0 +1,462 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod file; +pub mod qcow2; +pub mod raw; + +use std::{ + fmt, + fs::File, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, Mutex, + }, + thread::yield_now, +}; + +use anyhow::{bail, Context, Result}; +use log::{error, info}; + +use machine_manager::{ + config::DiskFormat, + temp_cleaner::{ExitNotifier, TempCleaner}, +}; +use qcow2::{qcow2_flush_metadata, Qcow2Driver, QCOW2_LIST}; +use raw::RawDriver; +use util::aio::{Aio, Iovec, WriteZeroesState}; + +/// Callback function which is called when aio handle failed. +pub type BlockIoErrorCallback = Arc; + +pub const SECTOR_BITS: u64 = 9; +pub const SECTOR_SIZE: u64 = 1 << SECTOR_BITS; +pub const CLUSTER_SIZE_MIN: u64 = 1 << 9; +pub const CLUSTER_SIZE_MAX: u64 = 1 << 21; +pub const NO_FIX: u64 = 0; +pub const FIX_LEAKS: u64 = 1; +pub const FIX_ERRORS: u64 = 2; + +const DEFAULT_QCOW2_VERSION: u32 = 3; +const DEFAULT_CLUSTER_BITS: u64 = 16; +const DEFAULT_CLUSTER_SIZE: u64 = 1 << DEFAULT_CLUSTER_BITS; +const DEFAULT_REFCOUNT_BITS: u64 = 16; +const MAX_REFCOUNT_BITS: u64 = 64; + +#[macro_export] +macro_rules! output_msg { + ($lvl:expr, $($arg:tt)*) => { + if !$lvl { + println!($($arg)*) + } + } +} + +pub struct RawCreateOptions { + pub path: String, + pub img_size: u64, +} + +pub struct Qcow2CreateOptions { + pub path: String, + pub img_size: u64, + pub version: u32, + pub cluster_size: u64, + pub refcount_bits: u64, +} + +#[derive(Default)] +pub struct CreateOptions { + pub path: String, + pub img_size: u64, + pub cluster_size: Option, + pub refcount_bits: Option, + pub conf: BlockProperty, +} + +impl CreateOptions { + fn check(&self) -> Result<()> { + if self.img_size == 0 || self.img_size % SECTOR_SIZE != 0 { + bail!( + "Image size {} is invalid, it can't be zero and it must be multiple of {}", + self.img_size, + SECTOR_SIZE + ); + } + + Ok(()) + } + + pub(crate) fn raw(&self) -> Result { + self.check()?; + if self.cluster_size.is_some() { + bail!("Format raw does not support parameter 'cluster_size'"); + } + + if self.refcount_bits.is_some() { + bail!("Format raw does not support parameter 'refcount_bits'"); + } + + let options_raw = RawCreateOptions { + path: self.path.clone(), + img_size: self.img_size, + }; + Ok(options_raw) + } + + pub(crate) fn qcow2(&self) -> Result { + self.check()?; + let mut cluster_size = DEFAULT_CLUSTER_SIZE; + if let Some(size) = self.cluster_size { + if !size.is_power_of_two() || !(CLUSTER_SIZE_MIN..=CLUSTER_SIZE_MAX).contains(&size) { + bail!( + "Cluster size is {}, it should be power of 2 and within the range of [{}:{}]", + size, + CLUSTER_SIZE_MIN, + CLUSTER_SIZE_MAX, + ); + } + cluster_size = size; + } + + let mut refcount_bits = DEFAULT_REFCOUNT_BITS; + if let Some(rc_bits) = self.refcount_bits { + if rc_bits > MAX_REFCOUNT_BITS || !rc_bits.is_power_of_two() { + bail!( + "Refcount bis {} is invalid, it should be power of 2 and not exceed {} bits", + rc_bits, + MAX_REFCOUNT_BITS + ); + } + refcount_bits = rc_bits; + } + + let options_qcow2 = Qcow2CreateOptions { + path: self.path.clone(), + img_size: self.img_size, + version: DEFAULT_QCOW2_VERSION, + cluster_size, + refcount_bits, + }; + + Ok(options_qcow2) + } +} + +// Transform size into string with storage units. +fn size_to_string(size: f64) -> Result { + let units = ["", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"]; + + // Switch to higher power if the integer part is >= 1000, + // For example: 1000 * 2^30 bytes + // It's better to output 0.978 TiB, rather than 1000 GiB. + let n = (size / 1000.0 * 1024.0).log2() as u64; + let idx = n / 10; + if idx >= units.len() as u64 { + bail!("Input value {} is too large", size); + } + let div = 1_u64 << (idx * 10); + + // Keep three significant digits and do not output any extra zeros, + // For example: 512 * 2^20 bytes + // It's better to output 512 MiB, rather than 512.000 MiB. + let num_str = format!("{:.3}", size / div as f64); + let num_str = num_str.trim_end_matches('0').trim_end_matches('.'); + + let res = format!("{} {}", num_str, units[idx as usize]); + Ok(res) +} + +#[derive(Default)] +pub struct ImageInfo { + pub path: String, + pub format: String, + pub actual_size: u64, + pub virtual_size: u64, + pub cluster_size: Option, + pub snap_lists: Option, +} + +impl fmt::Display for ImageInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "image: {}\n\ + file format: {}\n\ + virtual size: {} ({} bytes)\n\ + disk size: {}", + self.path, + self.format, + size_to_string(self.virtual_size as f64).unwrap_or_else(|e| format!("{:?}", e)), + self.virtual_size, + size_to_string(self.actual_size as f64).unwrap_or_else(|e| format!("{:?}", e)) + )?; + + if let Some(cluster_size) = self.cluster_size { + writeln!(f, "cluster_size: {}", cluster_size)?; + } + + if let Some(snap_lists) = &self.snap_lists { + write!(f, "Snapshot list:\n{}", snap_lists)?; + } + Ok(()) + } +} + +#[derive(Default, Clone, Copy)] +pub struct DiskFragments { + pub allocated_clusters: u64, + pub total_clusters: u64, + pub fragments: u64, + pub compressed_clusters: u64, +} + +#[derive(Default, Clone, Copy)] +pub struct CheckResult { + /// Number of leaked clusters. + pub leaks: i32, + /// Number of leaked clusters that have been fixed. + pub leaks_fixed: i32, + /// Number of corruptions clusters. + pub corruptions: i32, + /// Number of corruptions clusters that have been fixed. + pub corruptions_fixed: i32, + /// File length of virtual disk. + pub image_end_offset: u64, + /// Whether the refcount block table needs to be rebuilt. + pub need_rebuild: bool, + /// Total number of errors during the check. + pub err_num: u64, + /// Statistics information for clusters of virtual disk. + pub disk_frag: DiskFragments, +} + +impl CheckResult { + pub fn merge_result(&mut self, res: &CheckResult) { + self.corruptions += res.corruptions; + self.leaks += res.leaks; + self.err_num += res.err_num; + self.corruptions_fixed += res.corruptions_fixed; + self.leaks_fixed += res.leaks_fixed; + self.image_end_offset = res.image_end_offset; + self.disk_frag = res.disk_frag; + } + + pub fn collect_check_message(&self) -> String { + let mut message = String::from(""); + if self.leaks_fixed != 0 || self.corruptions_fixed != 0 { + message.push_str(&format!( + "The following inconsistencies were found and repaired:\n\n\ + \t{} leaked clusters\n\ + \t{} corruptions\n\n\ + Double checking the fixed image now..\n", + self.leaks_fixed, self.corruptions_fixed + )); + } + + if self.corruptions == 0 && self.leaks == 0 && self.err_num == 0 { + message.push_str("No errors were found on the image.\n"); + } else { + if self.corruptions != 0 { + message.push_str(&format!( + "{} errors were found on the image.\n\ + Data may be corrupted, or further writes to the image may corrupt it.\n", + self.corruptions + )); + } + + if self.leaks != 0 { + message.push_str(&format!( + "{} leaked clusters were found on the image.\n\ + This means waste of disk space, but no harm to data.\n", + self.leaks + )); + } + + if self.err_num != 0 { + message.push_str(&format!( + "{} internal errors have occurred during the check.\n", + self.err_num + )); + } + } + + if self.disk_frag.total_clusters != 0 && self.disk_frag.allocated_clusters != 0 { + message.push_str(&format!( + "{}/{} = {:.2}% allocated, {:.2}% fragmented, {:.2}% compressed clusters\n", + self.disk_frag.allocated_clusters, + self.disk_frag.total_clusters, + self.disk_frag.allocated_clusters as f64 / self.disk_frag.total_clusters as f64 + * 100.0, + self.disk_frag.fragments as f64 / self.disk_frag.allocated_clusters as f64 * 100.0, + self.disk_frag.compressed_clusters as f64 + / self.disk_frag.allocated_clusters as f64 + * 100.0 + )); + } + + if self.image_end_offset != 0 { + message.push_str(&format!("Image end offset: {}\n", self.image_end_offset)); + } + + message + } +} + +pub enum BlockStatus { + Init, + NormalIO, + Snapshot, +} + +#[derive(Debug, Clone)] +pub struct BlockProperty { + pub id: String, + pub format: DiskFormat, + pub iothread: Option, + pub direct: bool, + pub req_align: u32, + pub buf_align: u32, + pub discard: bool, + pub write_zeroes: WriteZeroesState, + pub l2_cache_size: Option, + pub refcount_cache_size: Option, +} + +impl Default for BlockProperty { + fn default() -> Self { + Self { + id: "".to_string(), + format: DiskFormat::Raw, + iothread: None, + direct: false, + req_align: 1_u32, + buf_align: 1_u32, + discard: false, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: None, + refcount_cache_size: None, + } + } +} + +pub trait BlockDriverOps: Send { + fn create_image(&mut self, options: &CreateOptions) -> Result; + + fn query_image(&mut self, image_info: &mut ImageInfo) -> Result<()>; + + fn check_image(&mut self, res: &mut CheckResult, quite: bool, fix: u64) -> Result<()>; + + fn disk_size(&mut self) -> Result; + + fn read_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()>; + + fn write_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()>; + + fn datasync(&mut self, completecb: T) -> Result<()>; + + fn discard(&mut self, offset: usize, nbytes: u64, completecb: T) -> Result<()>; + + fn write_zeroes( + &mut self, + offset: usize, + nbytes: u64, + completecb: T, + unmap: bool, + ) -> Result<()>; + + fn flush_request(&mut self) -> Result<()>; + + fn resize(&mut self, new_size: u64) -> Result<()>; + + fn drain_request(&self); + + fn get_inflight(&self) -> Arc; + + fn register_io_event( + &mut self, + device_broken: Arc, + error_cb: BlockIoErrorCallback, + ) -> Result<()>; + + fn unregister_io_event(&mut self) -> Result<()>; + + fn get_status(&mut self) -> Arc>; +} + +pub fn create_block_backend( + file: Arc, + aio: Aio, + prop: BlockProperty, +) -> Result>>> { + match prop.format { + DiskFormat::Raw => { + let mut raw_file = RawDriver::new(file, aio, prop.clone()); + let file_size = raw_file.disk_size()?; + if file_size & (u64::from(prop.req_align) - 1) != 0 { + bail!("The size of raw file is not aligned to {}.", prop.req_align); + } + Ok(Arc::new(Mutex::new(raw_file))) + } + DiskFormat::Qcow2 => { + let mut qcow2 = Qcow2Driver::new(file, aio, prop.clone()) + .with_context(|| "Failed to create qcow2 driver")?; + qcow2 + .load_metadata(prop.clone()) + .with_context(|| "Failed to load metadata")?; + + let file_size = qcow2.disk_size()?; + if file_size & (u64::from(prop.req_align) - 1) != 0 { + bail!( + "The size of qcow2 file is not aligned to {}.", + prop.req_align + ); + } + let new_qcow2 = Arc::new(Mutex::new(qcow2)); + QCOW2_LIST + .lock() + .unwrap() + .insert(prop.id.clone(), new_qcow2.clone()); + let cloned_qcow2 = Arc::downgrade(&new_qcow2); + // NOTE: we can drain request when request in io thread. + let drain = prop.iothread.is_some(); + let cloned_drive_id = prop.id.clone(); + + let exit_notifier = Arc::new(move || { + if let Some(qcow2) = cloned_qcow2.upgrade() { + info!("clean up qcow2 {:?} resources.", cloned_drive_id); + if drain { + info!("Drain the inflight IO for drive \"{}\"", cloned_drive_id); + let incomplete = qcow2.lock().unwrap().get_inflight(); + while incomplete.load(Ordering::SeqCst) != 0 { + yield_now(); + } + } + if let Err(e) = qcow2.lock().unwrap().flush() { + error!("Failed to flush qcow2 {:?}", e); + } + } + }) as Arc; + TempCleaner::add_exit_notifier(prop.id.clone(), exit_notifier); + + // Add timer for flushing qcow2 metadata. + qcow2_flush_metadata(Arc::downgrade(&new_qcow2), prop.id); + + Ok(new_qcow2) + } + } +} + +pub fn remove_block_backend(id: &str) { + QCOW2_LIST.lock().unwrap().remove(id); + TempCleaner::remove_exit_notifier(id); +} diff --git a/block_backend/src/qcow2/cache.rs b/block_backend/src/qcow2/cache.rs new file mode 100644 index 0000000000000000000000000000000000000000..e54739f81ebaa6f8ccf76a0843aa87e13efa59db --- /dev/null +++ b/block_backend/src/qcow2/cache.rs @@ -0,0 +1,336 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, collections::HashMap, rc::Rc}; + +use anyhow::{bail, Result}; +use byteorder::{BigEndian, ByteOrder}; +use log::{error, warn}; + +use crate::qcow2::SyncAioInfo; + +const CACHE_DEFAULT_SIZE: usize = 1; +pub const ENTRY_SIZE_U16: usize = 2; +pub const ENTRY_SIZE_U64: usize = 8; + +#[derive(Clone)] +pub struct DirtyInfo { + /// If the entry is marked dirty, it needs to be rewritten back to the disk. + pub is_dirty: bool, + /// The start of the dirty area. + pub start: u64, + /// The end of the dirty area. + pub end: u64, +} + +impl Default for DirtyInfo { + fn default() -> Self { + Self { + is_dirty: false, + start: u64::MAX, + end: 0, + } + } +} + +impl DirtyInfo { + pub fn clear(&mut self) { + self.is_dirty = false; + self.start = u64::MAX; + self.end = 0; + } +} + +#[derive(Clone, Default)] +pub struct CacheTable { + /// If the table is marked dirty, it needs to be rewritten back to the disk. + pub dirty_info: DirtyInfo, + /// Lru hit count. + pub lru_count: u64, + /// Host offset of cached table. + pub addr: u64, + /// The size of an entry in bytes. + entry_size: usize, + /// Buffer of table data. + table_data: Vec, +} + +impl CacheTable { + pub fn new(addr: u64, table_data: Vec, entry_size: usize) -> Result { + if entry_size == 0 { + bail!("Invalid entry size"); + } + Ok(Self { + dirty_info: Default::default(), + lru_count: 0, + addr, + entry_size, + table_data, + }) + } + + fn be_read(&self, idx: usize) -> Result { + let start = idx * self.entry_size; + let end = start + self.entry_size; + if end > self.table_data.len() { + bail!("Invalid idx {}", idx); + } + let v = match self.entry_size { + ENTRY_SIZE_U16 => u64::from(BigEndian::read_u16(&self.table_data[start..end])), + ENTRY_SIZE_U64 => BigEndian::read_u64(&self.table_data[start..end]), + _ => bail!("Unsupported entry size {}", self.entry_size), + }; + Ok(v) + } + + pub fn get_entry_num(&self) -> usize { + self.table_data.len() / self.entry_size + } + + #[inline(always)] + pub fn get_entry_map(&self, idx: usize) -> Result { + self.be_read(idx) + } + + #[inline(always)] + pub fn set_entry_map(&mut self, idx: usize, value: u64) -> Result<()> { + let start = idx * self.entry_size; + let end = start + self.entry_size; + if end > self.table_data.len() { + bail!("Invalid idx {}", idx); + } + match self.entry_size { + ENTRY_SIZE_U16 => BigEndian::write_u16(&mut self.table_data[start..end], value as u16), + ENTRY_SIZE_U64 => BigEndian::write_u64(&mut self.table_data[start..end], value), + _ => bail!("Unsupported entry size {}", self.entry_size), + } + + let dirty_info = &mut self.dirty_info; + dirty_info.start = std::cmp::min(dirty_info.start, start as u64); + dirty_info.end = std::cmp::max(dirty_info.end, end as u64); + dirty_info.is_dirty = true; + Ok(()) + } + + pub fn get_value(&self) -> &[u8] { + &self.table_data + } +} + +#[derive(Clone, Default)] +pub struct Qcow2Cache { + /// Max size of the cache map. + pub max_size: usize, + /// LRU count which record the latest count and increased when cache is accessed. + pub lru_count: u64, + pub cache_map: HashMap>>, + /// Used to store the modified CacheTable. + dirty_tables: Vec>>, +} + +impl Qcow2Cache { + pub fn new(mut max_size: usize) -> Self { + if max_size == 0 { + max_size = CACHE_DEFAULT_SIZE; + warn!( + "The cache max size is 0, use the default value {}", + CACHE_DEFAULT_SIZE + ); + } + Self { + max_size, + lru_count: 0, + cache_map: HashMap::with_capacity(max_size), + dirty_tables: Vec::with_capacity(max_size), + } + } + + fn check_refcount(&mut self) { + if self.lru_count < u64::MAX { + return; + } + warn!("refcount reaches the max limit and is reset to 0"); + self.lru_count = 0; + for (_, entry) in self.cache_map.iter() { + entry.borrow_mut().lru_count = 0; + } + } + + pub fn contains_keys(&self, key: u64) -> bool { + self.cache_map.contains_key(&key) + } + + pub fn get(&mut self, key: u64) -> Option<&Rc>> { + self.check_refcount(); + let entry = self.cache_map.get(&key)?; + // LRU replace algorithm. + entry.borrow_mut().lru_count = self.lru_count; + self.lru_count += 1; + Some(entry) + } + + pub fn clear_cache(&mut self) { + self.cache_map.clear(); + self.dirty_tables.clear(); + } + + pub fn lru_replace( + &mut self, + key: u64, + entry: Rc>, + ) -> Option>> { + let mut replaced_entry: Option>> = None; + let mut lru_count = u64::MAX; + let mut target_idx: u64 = 0; + self.check_refcount(); + entry.borrow_mut().lru_count = self.lru_count; + self.lru_count += 1; + + if self.cache_map.len() < self.max_size { + self.cache_map.insert(key, entry); + return replaced_entry; + } + + for (key, entry) in self.cache_map.iter() { + let borrowed_entry = entry.borrow(); + if borrowed_entry.lru_count < lru_count { + lru_count = borrowed_entry.lru_count; + replaced_entry = Some(entry.clone()); + target_idx = *key; + } + } + self.cache_map.remove(&target_idx); + self.cache_map.insert(key, entry); + replaced_entry + } + + pub fn clean_up_dirty_cache(&mut self) { + self.cache_map + .retain(|_, value| !value.borrow().dirty_info.is_dirty); + } + + pub fn flush(&mut self, sync_aio: Rc>) -> Result<()> { + let mut ret = Ok(()); + for entry in self.dirty_tables.iter() { + let mut borrowed_entry = entry.borrow_mut(); + if !borrowed_entry.dirty_info.is_dirty { + continue; + } + sync_aio + .borrow_mut() + .write_dirty_info( + borrowed_entry.addr, + borrowed_entry.get_value(), + borrowed_entry.dirty_info.start, + borrowed_entry.dirty_info.end, + ) + .unwrap_or_else(|e| { + error!("Failed to flush cache, {:?}", e.to_string()); + ret = Err(e); + }); + borrowed_entry.dirty_info.clear(); + } + self.dirty_tables.clear(); + + ret + } + + pub fn add_dirty_table(&mut self, table: Rc>) { + self.dirty_tables.push(table); + } +} + +#[cfg(test)] +mod test { + use std::{cell::RefCell, rc::Rc}; + + use super::{CacheTable, Qcow2Cache, ENTRY_SIZE_U64}; + + #[test] + fn test_cache_entry() { + let buf: Vec = vec![0x00, 0x01, 0x02, 0x03, 0x04]; + let mut vec = Vec::new(); + for i in 0..buf.len() { + vec.append(&mut buf[i].to_be_bytes().to_vec()); + } + let mut entry = CacheTable::new(0x00_u64, vec, ENTRY_SIZE_U64).unwrap(); + assert_eq!(entry.get_entry_map(0).unwrap(), 0x00); + assert_eq!(entry.get_entry_map(3).unwrap(), 0x03); + assert_eq!(entry.get_entry_map(4).unwrap(), 0x04); + + entry.set_entry_map(0x02, 0x09).unwrap(); + assert_eq!(entry.get_entry_map(2).unwrap(), 0x09); + } + + #[test] + fn test_qcow2_cache() { + let buf: Vec = vec![0x00, 0x01, 0x02, 0x03, 0x04]; + let mut vec = Vec::new(); + for i in 0..buf.len() { + vec.append(&mut buf[i].to_be_bytes().to_vec()); + } + let entry_0 = Rc::new(RefCell::new( + CacheTable::new(0x00_u64, vec.clone(), ENTRY_SIZE_U64).unwrap(), + )); + entry_0.borrow_mut().lru_count = 0; + let entry_1 = Rc::new(RefCell::new( + CacheTable::new(0x00_u64, vec.clone(), ENTRY_SIZE_U64).unwrap(), + )); + entry_1.borrow_mut().lru_count = 1; + let entry_2 = Rc::new(RefCell::new( + CacheTable::new(0x00_u64, vec.clone(), ENTRY_SIZE_U64).unwrap(), + )); + entry_2.borrow_mut().lru_count = 2; + let entry_3 = Rc::new(RefCell::new( + CacheTable::new(0x00_u64, vec.clone(), ENTRY_SIZE_U64).unwrap(), + )); + entry_3.borrow_mut().lru_count = 3; + + let mut qcow2_cache: Qcow2Cache = Qcow2Cache::new(3); + assert!(qcow2_cache.lru_replace(0x00, entry_0).is_none()); + assert!(qcow2_cache.lru_replace(0x01, entry_1).is_none()); + assert!(qcow2_cache.lru_replace(0x02, entry_2).is_none()); + assert!(qcow2_cache.lru_replace(0x03, entry_3).is_some()); + } + + #[test] + fn test_get_cache() { + let cnt = 200_u64; + let mut vec = Vec::new(); + for i in 0..cnt { + vec.append(&mut i.to_be_bytes().to_vec()); + } + let addr = 12345678; + let entry = Rc::new(RefCell::new( + CacheTable::new(addr, vec, ENTRY_SIZE_U64).unwrap(), + )); + + let mut qcow2_cache = Qcow2Cache::new(2); + qcow2_cache.lru_replace(addr, entry); + qcow2_cache.lru_count = u64::MAX - cnt / 2; + // Not in cache. + assert!(qcow2_cache.get(0).is_none()); + assert!(qcow2_cache.get(addr + 10).is_none()); + + for i in 0..cnt { + let value = qcow2_cache.get(addr).unwrap(); + let v = value.borrow_mut().get_entry_map(i as usize).unwrap(); + assert_eq!(v, i); + } + assert_eq!(qcow2_cache.lru_count, cnt / 2); + + // Entry index invalid. + let value = qcow2_cache.get(addr).unwrap(); + let v = value.borrow_mut().get_entry_map(cnt as usize + 1); + assert!(v.err().unwrap().to_string().contains("Invalid idx")); + } +} diff --git a/block_backend/src/qcow2/check.rs b/block_backend/src/qcow2/check.rs new file mode 100644 index 0000000000000000000000000000000000000000..4313111a7a58483c455c56df9f8b60ab736ecd51 --- /dev/null +++ b/block_backend/src/qcow2/check.rs @@ -0,0 +1,1148 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, cmp::Ordering, mem::size_of, rc::Rc}; + +use anyhow::{bail, Context, Result}; +use byteorder::{BigEndian, ByteOrder}; + +use crate::qcow2::{ + bytes_to_clusters, + cache::{CacheTable, ENTRY_SIZE_U16, ENTRY_SIZE_U64}, + header::QcowHeader, + is_aligned, + refcount::Qcow2DiscardType, + snapshot::{QcowSnapshotHeader, QCOW2_MAX_SNAPSHOTS}, + table::Qcow2ClusterType, + Qcow2Driver, SyncAioInfo, ENTRY_BITS, ENTRY_SIZE, L1_RESERVED_MASK, L1_TABLE_OFFSET_MASK, + L2_STD_RESERVED_MASK, L2_TABLE_OFFSET_MASK, METADATA_OVERLAP_CHECK_ACTIVEL2, + METADATA_OVERLAP_CHECK_INACTIVEL2, QCOW2_MAX_L1_SIZE, QCOW2_OFFSET_COPIED, QCOW2_OFLAG_ZERO, + REFCOUNT_TABLE_OFFSET_MASK, REFCOUNT_TABLE_RESERVED_MASK, +}; +use crate::{output_msg, CheckResult, FIX_ERRORS, FIX_LEAKS}; +use util::{ + aio::{raw_write_zeroes, OpCode}, + byte_code::ByteCode, + num_ops::div_round_up, + offset_of, +}; + +pub struct Qcow2Check { + pub res: CheckResult, + pub refblock: RefcountBlock, + pub fix: u64, + pub quite: bool, +} + +impl Qcow2Check { + pub fn new(fix: u64, quite: bool, entry_bytes: usize, table_size: usize) -> Self { + Self { + res: CheckResult::default(), + refblock: RefcountBlock::new(entry_bytes, table_size), + fix, + quite, + } + } +} + +pub struct RefcountBlock { + data: Vec, + table_size: usize, + entry_bytes: usize, + max_refcount: u64, + nb_clusters: u64, +} + +impl RefcountBlock { + fn new(entry_bytes: usize, table_size: usize) -> Self { + Self { + data: vec![0_u8; table_size * entry_bytes], + table_size, + entry_bytes, + max_refcount: ((1 << (entry_bytes * 8)) - 1) as u64, + nb_clusters: table_size as u64, + } + } + + fn reset(&mut self) { + self.data.fill(0); + } + + fn extend_table(&mut self, new_size: usize) { + if new_size <= self.table_size { + return; + }; + let new_table_bytes = new_size * self.entry_bytes; + // SAFETY: Upper limit of new_table_bytes is decided by disk file size. + self.data.resize(new_table_bytes, 0); + self.table_size = new_size; + self.nb_clusters = new_size as u64; + } + + fn get_data(&mut self, start: usize, size: usize) -> Vec { + let mut start_bytes = start * self.entry_bytes; + let mut end_bytes = (start + size) * self.entry_bytes; + start_bytes = std::cmp::min(start_bytes, self.data.len()); + end_bytes = std::cmp::min(end_bytes, self.data.len()); + self.data[start_bytes..end_bytes].to_vec() + } + + #[inline(always)] + fn get_refcount(&mut self, idx: usize) -> Result { + if idx >= self.table_size { + return Ok(0); + } + + let start_bytes = idx * self.entry_bytes; + let end_bytes = start_bytes + self.entry_bytes; + let value = match self.entry_bytes { + ENTRY_SIZE_U16 => u64::from(BigEndian::read_u16(&self.data[start_bytes..end_bytes])), + ENTRY_SIZE_U64 => BigEndian::read_u64(&self.data[start_bytes..end_bytes]), + _ => bail!("Entry size is unsupported"), + }; + Ok(value) + } + + #[inline(always)] + fn set_refcount(&mut self, idx: usize, value: u64) -> Result<()> { + if idx >= self.table_size { + bail!("Idx {:?} exceed table size {}", idx, self.table_size) + } + + let start_bytes = idx * self.entry_bytes; + let end_bytes = start_bytes + self.entry_bytes; + match self.entry_bytes { + ENTRY_SIZE_U16 => { + BigEndian::write_u16(&mut self.data[start_bytes..end_bytes], value as u16) + } + ENTRY_SIZE_U64 => BigEndian::write_u64(&mut self.data[start_bytes..end_bytes], value), + _ => bail!("Entry size is unsupported"), + } + Ok(()) + } + + /// Alloc blocks based on reference recorded in the refcount block, and the reference + /// of these clusters should be updated later by calling set_refcount function. + /// + /// # Arguments + /// + /// * `total_counts` - Total number of consecutive clusters that need to be allocated. + /// * `cluster_bits` - Bits of cluster. + /// * `first_free_cluster` - Alloc consecutive free data from first_free_cluster. + /// * `sync_aio` - The newly allocated data block needs to reset to 0 on disk. + fn alloc_clusters( + &mut self, + total_counts: u64, + cluster_bits: u64, + first_free_cluster: &mut usize, + sync_aio: Rc>, + ) -> Result { + let cluster_size = 1 << cluster_bits; + let mut first_update: bool = true; + let mut cluster_idx = *first_free_cluster; + let mut continue_clusters: usize = 0; + while continue_clusters < total_counts as usize { + if self.get_refcount(cluster_idx)? == 0 { + continue_clusters += 1; + if first_update { + *first_free_cluster = cluster_idx; + first_update = false; + } + } else { + continue_clusters = 0; + } + + cluster_idx += 1; + } + + if cluster_idx > self.table_size { + self.extend_table(cluster_idx); + } + + let start_idx = cluster_idx - total_counts as usize; + let zero_buf = vec![0_u8; cluster_size]; + for i in 0..total_counts { + let cluster_offset = (start_idx as u64 + i) << cluster_bits; + self.set_refcount(start_idx + i as usize, 1)?; + // Write zero to disk + let ret = raw_write_zeroes( + sync_aio.borrow_mut().fd, + cluster_offset as usize, + cluster_size as u64, + ); + if ret < 0 { + sync_aio + .borrow_mut() + .write_buffer(cluster_offset, &zero_buf)?; + } + } + + Ok((start_idx as u64) << cluster_bits) + } +} + +impl Qcow2Driver { + /// Read the snapshot table from the disk and verify it. + pub(crate) fn check_read_snapshot_table( + &mut self, + res: &mut CheckResult, + quite: bool, + fix: u64, + ) -> Result<()> { + let mut extra_data_dropped: i32 = 0; + let mut nb_clusters_reduced: i32 = 0; + let mut nb_snapshots = self.header.nb_snapshots; + + // Validate the number of snapshots. + if nb_snapshots as usize > QCOW2_MAX_SNAPSHOTS { + if fix & FIX_ERRORS == 0 { + res.err_num += 1; + bail!("You can force-remove all {} overhanging snapshots with \"stratovirt-img check -r all\"",nb_snapshots as usize - QCOW2_MAX_SNAPSHOTS); + } + + output_msg!( + quite, + "Discarding {} overhanging snapshots", + nb_snapshots as usize - QCOW2_MAX_SNAPSHOTS + ); + nb_clusters_reduced += (nb_snapshots as usize - QCOW2_MAX_SNAPSHOTS) as i32; + nb_snapshots = QCOW2_MAX_SNAPSHOTS as u32; + } + + let snapshot_table_length = size_of::() as u64; + let snapshot_table_offset = self.header.snapshots_offset; + // Validate snapshot table. + if (u64::MAX - u64::from(nb_snapshots) * snapshot_table_length) < snapshot_table_offset + || !is_aligned(self.header.cluster_size(), snapshot_table_offset) + { + res.err_num += 1; + self.header.snapshots_offset = 0; + self.header.nb_snapshots = 0; + bail!("Snapshot table can't exceeds the limit and it's offset must be aligned to cluster size {}", self.header.cluster_size()); + } + + match self.snapshot.load_snapshot_table( + snapshot_table_offset, + nb_snapshots, + fix & FIX_ERRORS != 0, + ) { + Ok((cluster_reduced, data_dropped)) => { + nb_clusters_reduced += cluster_reduced; + extra_data_dropped += data_dropped; + } + Err(e) => { + res.err_num += 1; + self.snapshot.snapshot_table_offset = 0; + self.snapshot.nb_snapshots = 0; + bail!("ERROR failed to read the snapshot table: {}", e); + } + } + res.corruptions += nb_clusters_reduced + extra_data_dropped; + + // Update snapshot in header + // This operations will leaks clusters(extra clusters of snapshot table will be dropped), which will + // be fixed in function of check_refcounts later. + if nb_clusters_reduced > 0 { + let new_nb_snapshots = self.snapshot.nb_snapshots; + let buf = new_nb_snapshots.as_bytes().to_vec(); + let offset = offset_of!(QcowHeader, nb_snapshots); + if let Err(e) = self + .sync_aio + .borrow_mut() + .write_buffer(offset as u64, &buf) + .with_context(|| { + "Failed to update the snapshot count in the image header".to_string() + }) + { + res.err_num += 1; + bail!( + "Failed to update the snapshot count in the image header: {}", + e + ); + } + + self.header.nb_snapshots = new_nb_snapshots; + res.corruptions_fixed += nb_clusters_reduced; + res.corruptions -= nb_clusters_reduced; + } + + Ok(()) + } + + pub fn check_fix_snapshot_table( + &mut self, + res: &mut CheckResult, + quite: bool, + fix: u64, + ) -> Result<()> { + if res.corruptions != 0 && fix & FIX_ERRORS != 0 { + if let Err(e) = self.write_snapshots_to_disk() { + res.err_num += 1; + output_msg!(quite, "ERROR failed to update snapshot table {:?}", e); + } + + res.corruptions_fixed += res.corruptions; + res.corruptions = 0; + } + + Ok(()) + } + + /// Rebuild a new refcount table according to metadata, including active l1 table, active l2 table, + /// snapshot table, refcount table and refcount block. + pub(crate) fn check_refcounts(&mut self, check: &mut Qcow2Check) -> Result<()> { + let cluster_bits = u64::from(self.header.cluster_bits); + let cluster_size = 1 << cluster_bits; + let virtual_size = self.header.size; + check.res.disk_frag.total_clusters = div_round_up(virtual_size, cluster_size).unwrap(); + + let file_len = self.driver.disk_size()?; + let nb_clusters = div_round_up(file_len, cluster_size).unwrap(); + if nb_clusters > i32::MAX as u64 { + check.res.err_num += 1; + bail!("Number of clusters exceed {:?}", i32::MAX); + } + + // Rebuild refcount block data in memory + self.calculate_refcount(check)?; + + // Compare the refcount block in Memory with real refcount block. + let pre_compare_res = check.res; + self.compare_refcounts(check)?; + + if check.res.need_rebuild && check.fix & FIX_ERRORS != 0 { + let old_res = check.res; + let mut fresh_leak: i32 = 0; + + output_msg!(check.quite, "Rebuilding refcount structure"); + self.rebuild_refcount_structure(check)?; + + check.res.corruptions = 0; + check.res.leaks = 0; + check.res.need_rebuild = false; + check.refblock.reset(); + + // This operation will leaks the old refcount table, so fix it. + self.calculate_refcount(check)?; + if check.fix & FIX_LEAKS != 0 { + let saved_res = check.res; + check.res = CheckResult::default(); + self.compare_refcounts(check)?; + if check.res.need_rebuild { + output_msg!( + check.quite, + "ERROR rebuilt refcount structure is still broken" + ); + } + fresh_leak = check.res.leaks; + check.res = saved_res; + } + + if check.res.corruptions < old_res.corruptions { + check.res.corruptions_fixed += old_res.corruptions - check.res.corruptions; + } + if check.res.leaks < old_res.leaks { + check.res.leaks_fixed += old_res.leaks - check.res.leaks; + } + + check.res.leaks += fresh_leak; + } else if check.fix != 0 { + if check.res.need_rebuild { + bail!("ERROR need to rebuild refcount structures"); + } + + if check.res.leaks | check.res.corruptions != 0 { + check.res = pre_compare_res; + check.fix = 0; + self.compare_refcounts(check)?; + } + } + + // Check OFLAG_COPIED in l1 table and l2 table. + self.check_oflag_copied(check) + } + + /// Calculate the reference of all cluster data according to l1 and l2 table. + fn calculate_refcount(&mut self, check: &mut Qcow2Check) -> Result<()> { + let file_len = self.driver.disk_size()?; + + // Increase the refcount of qcow2 header. + self.increase_refcounts( + 0, + self.header.cluster_size(), + file_len, + u64::from(self.header.cluster_bits), + check, + )?; + + // Increase the refcount of active l1 table. + let active_l1_offset = self.header.l1_table_offset; + let active_l1_size = self.header.l1_size; + self.check_refcounts_l1(active_l1_offset, u64::from(active_l1_size), true, check)?; + + // Increase the refcount of snapshot table. + for idx in 0..self.header.nb_snapshots { + let snap = self.snapshot.snapshots[idx as usize].clone(); + let snap_l1_offset = snap.l1_table_offset; + let snap_l1_size = snap.l1_size; + if !is_aligned(self.header.cluster_size(), snap_l1_offset) { + output_msg!( + check.quite, + "ERROR snapshot {:?}({:?}) l1_offset={:#X} L1 table is not cluster aligned; snapshot table entry corrupted", + snap.id, snap.name, snap_l1_offset + ); + check.res.corruptions += 1; + continue; + } + + if u64::from(snap_l1_size) > QCOW2_MAX_L1_SIZE / ENTRY_SIZE { + output_msg!( + check.quite, + "ERROR snapshot {:?}({:?}) l1_size={:?} l1 table is too large; snapshot table entry courropted", + snap.id, snap.name, snap_l1_size + ); + check.res.corruptions += 1; + continue; + } + + self.check_refcounts_l1(snap_l1_offset, u64::from(snap_l1_size), false, check)?; + } + + let snap_table_offset = self.header.snapshots_offset; + let snap_table_size = self.snapshot.snapshot_size; + if snap_table_offset != 0 && snap_table_size != 0 { + self.increase_refcounts( + snap_table_offset, + snap_table_size, + file_len, + u64::from(self.header.cluster_bits), + check, + )?; + } + + let reftable_offset = self.header.refcount_table_offset; + let reftable_bytes = + u64::from(self.header.refcount_table_clusters) * self.header.cluster_size(); + self.increase_refcounts( + reftable_offset, + reftable_bytes, + file_len, + u64::from(self.header.cluster_bits), + check, + )?; + + self.check_refcount_block(check) + } + + /// Traverse all l1 tables and data blocks indexed by l1 table and calculate the real + /// reference of these clusters, and performs some checks as well. + fn check_refcounts_l1( + &mut self, + l1_offset: u64, + l1_size: u64, + is_active: bool, + check: &mut Qcow2Check, + ) -> Result<()> { + if l1_offset == 0 || l1_size == 0 { + return Ok(()); + } + + let l1_size_bytes = l1_size * ENTRY_SIZE; + let file_len = self.driver.disk_size()?; + // Increase the refcount of cluster which l1 table is located. + self.increase_refcounts( + l1_offset, + l1_size_bytes, + file_len, + u64::from(self.header.cluster_bits), + check, + )?; + let l1_table = self + .sync_aio + .borrow_mut() + .read_ctrl_cluster(l1_offset, l1_size)?; + + // Entry in l1 table + for idx in 0..l1_size { + let l1_entry = match l1_table.get(idx as usize) { + Some(v) => v, + None => continue, + }; + + if l1_entry == &0 { + continue; + } + + // The error in reserved field of l1 entry not need to be fixed, as it not effect the basic functions + if l1_entry & L1_RESERVED_MASK != 0 { + output_msg!( + check.quite, + "ERROR found L1 entry with reserved bits set {:#X}", + l1_entry + ); + check.res.corruptions += 1; + } + + let l2_offset = l1_entry & L1_TABLE_OFFSET_MASK; + self.increase_refcounts( + l2_offset, + self.header.cluster_size(), + file_len, + u64::from(self.header.cluster_bits), + check, + )?; + + if !is_aligned(self.header.cluster_size(), l2_offset) { + output_msg!( + check.quite, + "ERROR l2_offset={:#X}: Table is not \n\ + cluster aligned; l1 entry corrupted", + l2_offset + ); + check.res.corruptions += 1; + } + + self.check_refcount_l2(l2_offset, is_active, file_len, check)?; + } + + // The l2 entry on disk may be modified. + if check.fix & FIX_ERRORS != 0 { + self.table.l2_table_cache.clear_cache(); + } + + Ok(()) + } + + fn check_refcount_l2( + &mut self, + l2_offset: u64, + is_active: bool, + file_len: u64, + check: &mut Qcow2Check, + ) -> Result<()> { + let cluster_bits = u64::from(self.header.cluster_bits); + let cluster_size = 1 << cluster_bits; + let l2_size = cluster_size >> ENTRY_BITS; + + // Read l2 table from disk + let mut l2_table: Vec; + match self + .sync_aio + .borrow_mut() + .read_ctrl_cluster(l2_offset, l2_size) + { + Ok(buf) => l2_table = buf, + Err(e) => { + check.res.err_num += 1; + bail!("ERROR: I/O error in reading l2 table {}", e); + } + }; + + let err_flag = if check.fix & FIX_ERRORS != 0 { + "Repairing".to_string() + } else { + "ERROR".to_string() + }; + let mut next_continue_offset: u64 = 0; + for l2_idx in 0..l2_size { + let l2_entry = match l2_table.get(l2_idx as usize) { + Some(value) => *value, + None => continue, + }; + let cluster_type = Qcow2ClusterType::get_cluster_type(l2_entry); + // The error in reserved field of l2 entry not need to be fixed, as it not effect the basic functions + if cluster_type != Qcow2ClusterType::Compressed && l2_entry & L2_STD_RESERVED_MASK != 0 + { + output_msg!( + check.quite, + "ERROR found l2 entry with reserved bits set: {:#X}", + l2_entry + ); + check.res.corruptions += 1; + } + + match cluster_type { + Qcow2ClusterType::Compressed => { + output_msg!(check.quite, "Compressed is not supported"); + } + Qcow2ClusterType::ZeroPlain | Qcow2ClusterType::Unallocated => continue, + Qcow2ClusterType::ZeroAlloc | Qcow2ClusterType::Normal => { + let cluster_offset = l2_entry & L1_TABLE_OFFSET_MASK; + + if !is_aligned(cluster_size, cluster_offset) { + check.res.corruptions += 1; + let contains_data = l2_entry & QCOW2_OFLAG_ZERO != 0; + if !contains_data { + output_msg!( + check.quite, + "{} offset={:#X}: Preallocated cluster is not properly aligned; L2 entry corrupted.", + err_flag, cluster_offset + ); + if check.fix & FIX_ERRORS != 0 { + self.repair_l2_entry( + &mut l2_table, + l2_offset, + l2_idx, + is_active, + check, + )?; + } + continue; + } else { + output_msg!( + check.quite, + "ERROR offset={:#X}: Data cluster is not properly aligned; L2 entry corrupted.", + cluster_offset + ); + } + } + + // Disk Fragmentation + check.res.disk_frag.allocated_clusters += 1; + if next_continue_offset != 0 && next_continue_offset != cluster_offset { + check.res.disk_frag.fragments += 1; + } + next_continue_offset = cluster_offset + cluster_size; + + // Mark cluster in refcount table. + self.increase_refcounts( + cluster_offset, + cluster_size, + file_len, + cluster_bits, + check, + )?; + } + } + } + Ok(()) + } + + /// Fix l2 entry with oflag zero. + fn repair_l2_entry( + &mut self, + l2_table: &mut [u64], + l2_offset: u64, + l2_idx: u64, + is_active: bool, + check: &mut Qcow2Check, + ) -> Result<()> { + let ignore = if is_active { + METADATA_OVERLAP_CHECK_ACTIVEL2 + } else { + METADATA_OVERLAP_CHECK_INACTIVEL2 + }; + let l2e_offset = l2_offset + l2_idx * ENTRY_SIZE; + l2_table[l2_idx as usize] = QCOW2_OFLAG_ZERO; + + let ret = self.check_overlap(ignore, l2e_offset, ENTRY_SIZE); + if ret != 0 { + bail!("ERROR: Overlap check failed"); + } + + // Write sync to disk + let buf = QCOW2_OFLAG_ZERO.to_be_bytes().to_vec(); + if let Err(e) = self.sync_aio.borrow_mut().write_buffer(l2e_offset, &buf) { + bail!("ERROR: Failed to overwrite L2 table entry: {:?}", e); + }; + + check.res.corruptions -= 1; + check.res.corruptions_fixed += 1; + Ok(()) + } + + fn check_refcount_block(&mut self, check: &mut Qcow2Check) -> Result<()> { + let cluster_bits = u64::from(self.header.cluster_bits); + let cluster_size = 1 << cluster_bits; + let file_len = self.driver.disk_size()?; + let nb_clusters = bytes_to_clusters(file_len, cluster_size)?; + let err_flag = if check.fix & FIX_ERRORS != 0 { + "Repairing".to_string() + } else { + "ERROR".to_string() + }; + + let reftable = self.refcount.refcount_table.clone(); + for (idx, reftable_entry) in reftable.iter().enumerate() { + let refblock_offset = reftable_entry & REFCOUNT_TABLE_OFFSET_MASK; + let cluster_idx = refblock_offset >> cluster_bits; + if reftable_entry & REFCOUNT_TABLE_RESERVED_MASK != 0 { + output_msg!( + check.quite, + "ERROR refcount table entry {:?} has reserved bits set", + idx + ); + check.res.corruptions += 1; + check.res.need_rebuild = true; + continue; + } + + if !is_aligned(cluster_size, refblock_offset) { + output_msg!( + check.quite, + "ERROR refcount block {:?} is not cluster aligned; refcount table entry corrupted", + idx + ); + check.res.corruptions += 1; + check.res.need_rebuild = true; + continue; + } + + if cluster_idx >= nb_clusters { + output_msg!( + check.quite, + "{} refcount block {} is outside image", + err_flag, + cluster_idx + ); + check.res.corruptions += 1; + + if check.fix & FIX_ERRORS != 0 { + // Need to try resize the image size. + check.res.need_rebuild = true; + } + continue; + } + + if refblock_offset != 0 { + self.increase_refcounts( + refblock_offset, + cluster_size, + file_len, + cluster_bits, + check, + )?; + let rc_value = check.refblock.get_refcount(cluster_idx as usize)?; + // The refcount for data clusters of refcount block must be 1 + if rc_value != 1 { + output_msg!( + check.quite, + "ERROR refcount block {:?}, refcount={:?}", + idx, + rc_value + ); + check.res.need_rebuild = true; + } + } + } + + Ok(()) + } + + /// Compare the real references of clusters with the references recorded on the disk, + /// and choose whether to repair it on the disk. + pub(crate) fn compare_refcounts(&mut self, check: &mut Qcow2Check) -> Result<()> { + self.table.load_l1_table()?; + self.table.l2_table_cache.clear_cache(); + self.load_refcount_table()?; + self.refcount.refcount_blk_cache.clear_cache(); + + let mut rc_value_1: u16; + let mut rc_value_2: u16; + let cluster_bits = self.header.cluster_bits; + let file_len = self.driver.disk_size()?; + let nb_clusters = div_round_up(file_len, self.header.cluster_size()).unwrap(); + for cluster_idx in 0..nb_clusters { + match self.refcount.get_refcount(cluster_idx << cluster_bits) { + Err(e) => { + output_msg!(check.quite, "Cant't get refcount for cluster {:?}", e); + check.res.err_num += 1; + continue; + } + Ok(value) => rc_value_1 = value, + }; + rc_value_2 = check.refblock.get_refcount(cluster_idx as usize)? as u16; + + if rc_value_1 != 0 || rc_value_2 != 0 { + check.res.image_end_offset = (cluster_idx + 1) << cluster_bits; + } + + if rc_value_1 != rc_value_2 { + let mut need_fixed: bool = false; + if rc_value_1 == 0 { + // The refcount block may not have assigned cluster, + // so need to rebuild refcount structure. + check.res.need_rebuild = true; + } else if (rc_value_1 > rc_value_2 && check.fix & FIX_LEAKS != 0) + || (rc_value_1 < rc_value_2 && check.fix & FIX_ERRORS != 0) + { + need_fixed = true; + } + let err_flag = if need_fixed { + "Repairing" + } else if rc_value_1 > rc_value_2 { + "Leaked" + } else { + "ERROR" + }; + output_msg!( + check.quite, + "{} cluster {:?} refcount={:?} reference={:?}", + err_flag, + cluster_idx, + rc_value_1, + rc_value_2 + ); + + if need_fixed { + let added = i32::from(rc_value_2) - i32::from(rc_value_1); + let cluster_offset = cluster_idx << cluster_bits; + self.refcount.update_refcount( + cluster_offset, + 1, + added, + false, + &Qcow2DiscardType::Always, + )?; + if added < 0 { + check.res.leaks_fixed += 1; + } else { + check.res.corruptions_fixed += 1; + } + continue; + } + + match rc_value_1.cmp(&rc_value_2) { + Ordering::Less => check.res.corruptions += 1, + Ordering::Greater => check.res.leaks += 1, + Ordering::Equal => {} + }; + } + } + + if !self.refcount.discard_list.is_empty() { + self.refcount.sync_process_discards(OpCode::Discard); + } + self.refcount.flush()?; + + Ok(()) + } + + // For the entry in active table, the reference equals to 1 means don't need to copy on write, + // So the oflag of copied must set to nonzero. + fn check_oflag_copied(&mut self, check: &mut Qcow2Check) -> Result<()> { + let l1_size = self.table.l1_table.len(); + let l2_size = div_round_up(self.header.cluster_size(), ENTRY_SIZE).unwrap(); + let mut l1_dirty = false; + let mut l1_corruptions: i32 = 0; + let mut l1_corruptions_fixed: i32 = 0; + + let repair = if check.fix & FIX_ERRORS != 0 { + true + } else if check.fix & FIX_LEAKS != 0 { + check.res.err_num == 0 && check.res.corruptions == 0 && check.res.leaks == 0 + } else { + false + }; + + for l1_idx in 0..l1_size { + let l1_entry = self.table.l1_table[l1_idx]; + let l2_offset = l1_entry & L1_TABLE_OFFSET_MASK; + if l2_offset == 0 { + continue; + } + let rc_value = self.refcount.get_refcount(l2_offset)?; + if (rc_value == 1) ^ (l1_entry & QCOW2_OFFSET_COPIED != 0) { + l1_corruptions += 1; + output_msg!( + check.quite, + "{} OFLAG_COPIED L2 cluster: l1_index={} l1_entry={:#X} refcount={}", + if repair { "Repairing" } else { "ERROR" }, + l1_idx, + l1_entry, + rc_value + ); + if repair { + let new_l1_entry = if rc_value == 1 { + l1_entry | QCOW2_OFFSET_COPIED + } else { + l1_entry & !QCOW2_OFFSET_COPIED + }; + self.table.l1_table[l1_idx] = new_l1_entry; + l1_dirty = true; + l1_corruptions -= 1; + l1_corruptions_fixed += 1; + } + } + + let mut num_repaired: i32 = 0; + let l2_buf = self.load_cluster(l2_offset)?; + let l2_table = Rc::new(RefCell::new(CacheTable::new( + l2_offset, + l2_buf, + ENTRY_SIZE_U64, + )?)); + + for l2_idx in 0..l2_size { + let cluster_entry = l2_table.borrow_mut().get_entry_map(l2_idx as usize)?; + let cluster_offset = cluster_entry & L2_TABLE_OFFSET_MASK; + let cluster_type = Qcow2ClusterType::get_cluster_type(cluster_entry); + + if cluster_type == Qcow2ClusterType::Normal + || cluster_type == Qcow2ClusterType::ZeroAlloc + { + let rc_value = match self.refcount.get_refcount(cluster_offset) { + Ok(value) => value, + Err(_) => continue, + }; + + if (rc_value == 1) ^ (cluster_entry & QCOW2_OFFSET_COPIED != 0) { + check.res.corruptions += 1; + output_msg!( + check.quite, + "{} OFLAG_COPIED data cluster: l2_entry={:#X} refcount={:?}", + if repair { "Repairing" } else { "ERROR" }, + cluster_entry, + rc_value + ); + if repair { + let new_cluster_entry = if rc_value == 1 { + cluster_entry | QCOW2_OFFSET_COPIED + } else { + cluster_entry & !QCOW2_OFFSET_COPIED + }; + num_repaired += 1; + l2_table + .borrow_mut() + .set_entry_map(l2_idx as usize, new_cluster_entry)?; + } + } + } + } + + let mut borrowed_l2 = l2_table.borrow_mut(); + if num_repaired != 0 && borrowed_l2.dirty_info.is_dirty { + if self.check_overlap( + METADATA_OVERLAP_CHECK_ACTIVEL2, + l2_offset, + self.header.cluster_size(), + ) != 0 + { + bail!("ERROR: Could not write L2 table; metadata overlap check failed"); + } + + self.sync_aio.borrow_mut().write_dirty_info( + borrowed_l2.addr, + borrowed_l2.get_value(), + borrowed_l2.dirty_info.start, + borrowed_l2.dirty_info.end, + )?; + borrowed_l2.dirty_info.clear(); + } + drop(borrowed_l2); + check.res.corruptions -= num_repaired; + check.res.corruptions_fixed += num_repaired; + } + + if l1_dirty && repair { + if let Err(e) = self.table.save_l1_table() { + check.res.err_num += 1; + return Err(e); + } + } + check.res.corruptions += l1_corruptions; + check.res.corruptions_fixed += l1_corruptions_fixed; + + // The entry on disk may not be consistent with the cache. + self.table.l2_table_cache.clear_cache(); + Ok(()) + } + + /// The error in refcount block can't be fixed, so has to rebuild the structure of refcount table + fn rebuild_refcount_structure(&mut self, check: &mut Qcow2Check) -> Result<()> { + let mut cluster_idx: u64 = 0; + let mut first_free_cluster: u64 = 0; + let mut reftable_offset: u64 = 0; + let mut new_reftable: Vec = Vec::new(); + let mut reftable_clusters: u64 = 0; + let cluster_bits = u64::from(self.header.cluster_bits); + let refblock_bits: u64 = cluster_bits + 3 - u64::from(self.header.refcount_order); + let refblock_size: u64 = 1 << refblock_bits; + + // self.refblock.nb_clusters means the maximum number of clusters that can be represented by + // the refcount table in memory. During this loop, this value may increase as the Refcount table expands. + // This operation will leaks old refcount table and old refcount block table, and it will be fixed later. + while cluster_idx < check.refblock.nb_clusters { + if check.refblock.get_refcount(cluster_idx as usize)? == 0 { + cluster_idx += 1; + continue; + } + let refblock_idx = (cluster_idx >> refblock_bits) as usize; + let refblock_start = (refblock_idx << refblock_bits) as u64; + // Refblock data with index smaller than refblock_start has been written to disk. + first_free_cluster = std::cmp::max(refblock_start, first_free_cluster); + + // Alloc a new cluster from first_free_cluster. + let refblock_offset = check.refblock.alloc_clusters( + 1, + cluster_bits, + &mut (first_free_cluster as usize), + self.sync_aio.clone(), + )?; + + // Extend the refcount table + if new_reftable.len() <= refblock_idx { + // SAFETY: Upper limit of refblock_idx is decided by disk file size. + new_reftable.resize(refblock_idx + 1, 0); + // Need to reallocate clusters for new refcount table. + reftable_offset = 0; + } + new_reftable[refblock_idx] = refblock_offset; + + // Alloc clusters for new refcount table. + if refblock_idx + 1 >= (check.refblock.nb_clusters >> refblock_bits) as usize + && reftable_offset == 0 + { + let reftable_size = new_reftable.len() as u64; + reftable_clusters = + bytes_to_clusters(reftable_size * ENTRY_SIZE, self.header.cluster_size())?; + reftable_offset = check.refblock.alloc_clusters( + reftable_clusters, + cluster_bits, + &mut (first_free_cluster as usize), + self.sync_aio.clone(), + )?; + } + + // New allocated refblock offset is overlap with other matedata. + if self.check_overlap(0, refblock_offset, self.header.cluster_size()) != 0 { + bail!("ERROR writing refblock"); + } + + // Refcount block data written back to disk + let start = refblock_idx * refblock_size as usize; + let size = refblock_size; + let refblock_buf = check.refblock.get_data(start, size as usize); + self.sync_aio + .borrow_mut() + .write_buffer(refblock_offset, &refblock_buf)?; + + // All data of this refcount block has been written to disk, so go to the next refcount block. + cluster_idx = refblock_start + refblock_size; + } + + if reftable_offset == 0 { + bail!("ERROR allocating reftable"); + } + + // Write new refcount table to disk + let reftable_size = new_reftable.len(); + if self.check_overlap(0, reftable_offset, reftable_size as u64 * ENTRY_SIZE) != 0 { + bail!("ERROR writing reftable"); + } + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(reftable_offset, &new_reftable)?; + + // Update header message to disk + // Inclust reftable offset and reftable cluster + let mut new_header = self.header.clone(); + new_header.refcount_table_offset = reftable_offset; + new_header.refcount_table_clusters = reftable_clusters as u32; + let header_buf = new_header.to_vec(); + self.sync_aio.borrow_mut().write_buffer(0, &header_buf)?; + self.header.refcount_table_offset = new_header.refcount_table_offset; + self.header.refcount_table_clusters = new_header.refcount_table_clusters; + + // Update the info of refcount table + self.refcount.refcount_table_offset = new_header.refcount_table_offset; + self.refcount.refcount_table_clusters = new_header.refcount_table_clusters; + self.refcount.refcount_table_size = new_reftable.len() as u64; + self.refcount.refcount_table = new_reftable; + self.refcount.refcount_blk_cache.clear_cache(); + + Ok(()) + } + + /// Increase the refcounts for a range of clusters. + fn increase_refcounts( + &mut self, + offset: u64, + size: u64, + file_len: u64, + cluster_bits: u64, + check: &mut Qcow2Check, + ) -> Result<()> { + if size == 0 { + return Ok(()); + } + + let cluster_size = 1 << cluster_bits; + if offset + size > file_len && offset + size - file_len >= cluster_size { + check.res.corruptions += 1; + bail!( + "ERROR: counting reference for region exceeding the end of the file by one cluster or more: offset {:#X} size {:#X}", + offset, size + ); + } + + let mut offset_beg = offset & !(cluster_size - 1); + let offset_end = (offset + size - 1) & !(cluster_size - 1); + while offset_beg <= offset_end { + let cluster_idx = offset_beg >> cluster_bits; + let rc_value = check.refblock.get_refcount(cluster_idx as usize)?; + if rc_value == check.refblock.max_refcount { + output_msg!( + check.quite, + "ERROR: overflow cluster offset={:#X}", + offset_beg + ); + check.res.corruptions += 1; + offset_beg += cluster_size; + continue; + } + check + .refblock + .set_refcount(cluster_idx as usize, rc_value + 1)?; + offset_beg += cluster_size; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::RefcountBlock; + + #[test] + fn test_refcount_block_basic() { + let mut refblock = RefcountBlock::new(2, 10); + assert!(refblock.set_refcount(10, 1).is_err()); + assert_eq!(refblock.max_refcount, 65535); + assert_eq!(refblock.table_size, 10); + assert!(refblock.set_refcount(0, 1).is_ok()); + assert!(refblock.set_refcount(1, 7).is_ok()); + assert!(refblock.set_refcount(9, 9).is_ok()); + + // Get inner dat + let mut vec_1 = 1_u16.to_be_bytes().to_vec(); + let mut vec_2 = 7_u16.to_be_bytes().to_vec(); + vec_1.append(&mut vec_2); + let buf = refblock.get_data(0, 2); + assert_eq!(buf, vec_1); + + // Get refcount + let count = refblock.get_refcount(0).unwrap(); + assert_eq!(count, 1); + let count = refblock.get_refcount(9).unwrap(); + assert_eq!(count, 9); + + refblock.extend_table(10); + refblock.extend_table(11); + let count = refblock.get_refcount(10).unwrap(); + assert_eq!(count, 0); + + refblock.reset(); + let count = refblock.get_refcount(9).unwrap(); + assert_eq!(count, 0); + } +} diff --git a/block_backend/src/qcow2/header.rs b/block_backend/src/qcow2/header.rs new file mode 100644 index 0000000000000000000000000000000000000000..fa3681d9cc3f014e7b23285f61df5f20a91d68db --- /dev/null +++ b/block_backend/src/qcow2/header.rs @@ -0,0 +1,428 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::{bail, Context, Result}; +use byteorder::{BigEndian, ByteOrder}; + +use super::ENTRY_SIZE; +use util::num_ops::div_round_up; + +pub const QCOW_MAGIC: u32 = 0x514649fb; +const QCOW_VERSION_2_MIN_LEN: usize = 72; +const QCOW_VERSION_3_MIN_LEN: usize = 104; +const MIN_CLUSTER_BIT: u32 = 9; +const MAX_CLUSTER_BIT: u32 = 21; +const MAX_REFTABLE_SIZE: u64 = 8 * (1 << 20); +const MAX_L1TABLE_SIZE: u64 = 32 * (1 << 20); + +#[repr(C)] +#[derive(Clone, Debug, Default)] +pub struct QcowHeader { + pub magic: u32, + pub version: u32, + pub backing_file_offset: u64, + pub backing_file_size: u32, + pub cluster_bits: u32, + pub size: u64, + pub crypt_method: u32, + pub l1_size: u32, + pub l1_table_offset: u64, + pub refcount_table_offset: u64, + pub refcount_table_clusters: u32, + pub nb_snapshots: u32, + pub snapshots_offset: u64, + // version >= v3 + pub incompatible_features: u64, + pub compatible_features: u64, + pub autoclear_features: u64, + pub refcount_order: u32, + pub header_length: u32, +} + +impl QcowHeader { + pub fn from_vec(buf: &[u8]) -> Result { + if buf.len() < QCOW_VERSION_2_MIN_LEN { + bail!( + "Invalid header len {}, the min len {}", + buf.len(), + QCOW_VERSION_2_MIN_LEN + ); + } + let mut header = QcowHeader { + magic: BigEndian::read_u32(&buf[0..4]), + version: BigEndian::read_u32(&buf[4..8]), + backing_file_offset: BigEndian::read_u64(&buf[8..16]), + backing_file_size: BigEndian::read_u32(&buf[16..20]), + cluster_bits: BigEndian::read_u32(&buf[20..24]), + size: BigEndian::read_u64(&buf[24..32]), + crypt_method: BigEndian::read_u32(&buf[32..36]), + l1_size: BigEndian::read_u32(&buf[36..40]), + l1_table_offset: BigEndian::read_u64(&buf[40..48]), + refcount_table_offset: BigEndian::read_u64(&buf[48..56]), + refcount_table_clusters: BigEndian::read_u32(&buf[56..60]), + nb_snapshots: BigEndian::read_u32(&buf[60..64]), + snapshots_offset: BigEndian::read_u64(&buf[64..72]), + ..Default::default() + }; + if header.magic != QCOW_MAGIC { + bail!("Invalid format {}", header.magic); + } + if header.version == 2 { + header.refcount_order = 4; + header.header_length = QCOW_VERSION_2_MIN_LEN as u32; + } else if header.version == 3 { + if buf.len() < QCOW_VERSION_3_MIN_LEN { + bail!("Invalid header len for version 3 {}", buf.len()); + } + header.incompatible_features = BigEndian::read_u64(&buf[72..80]); + header.compatible_features = BigEndian::read_u64(&buf[80..88]); + header.autoclear_features = BigEndian::read_u64(&buf[88..96]); + header.refcount_order = BigEndian::read_u32(&buf[96..100]); + header.header_length = BigEndian::read_u32(&buf[100..104]); + } else { + bail!("Invalid version {}", header.version); + } + Ok(header) + } + + pub fn to_vec(&self) -> Vec { + let sz = if self.version == 2 { + QCOW_VERSION_2_MIN_LEN + } else { + QcowHeader::len() + }; + let mut buf = vec![0; sz]; + BigEndian::write_u32(&mut buf[0..4], self.magic); + BigEndian::write_u32(&mut buf[4..8], self.version); + BigEndian::write_u64(&mut buf[8..16], self.backing_file_offset); + BigEndian::write_u32(&mut buf[16..20], self.backing_file_size); + BigEndian::write_u32(&mut buf[20..24], self.cluster_bits); + BigEndian::write_u64(&mut buf[24..32], self.size); + BigEndian::write_u32(&mut buf[32..36], self.crypt_method); + BigEndian::write_u32(&mut buf[36..40], self.l1_size); + BigEndian::write_u64(&mut buf[40..48], self.l1_table_offset); + BigEndian::write_u64(&mut buf[48..56], self.refcount_table_offset); + BigEndian::write_u32(&mut buf[56..60], self.refcount_table_clusters); + BigEndian::write_u32(&mut buf[60..64], self.nb_snapshots); + BigEndian::write_u64(&mut buf[64..72], self.snapshots_offset); + if self.version >= 3 { + BigEndian::write_u64(&mut buf[72..80], self.incompatible_features); + BigEndian::write_u64(&mut buf[80..88], self.compatible_features); + BigEndian::write_u64(&mut buf[88..96], self.autoclear_features); + BigEndian::write_u32(&mut buf[96..100], self.refcount_order); + BigEndian::write_u32(&mut buf[100..104], self.header_length); + } + buf + } + + #[inline] + pub fn len() -> usize { + std::mem::size_of::() + } + + #[inline] + pub fn cluster_size(&self) -> u64 { + 0x1 << self.cluster_bits + } + + pub fn check(&self) -> Result<()> { + if !(MIN_CLUSTER_BIT..=MAX_CLUSTER_BIT).contains(&self.cluster_bits) { + bail!("Invalid cluster bits {}", self.cluster_bits); + } + if u64::from(self.header_length) > self.cluster_size() { + bail!( + "Header length {} over cluster size {}", + self.header_length, + self.cluster_size() + ); + } + // NOTE: not support backing file now. + if self.backing_file_offset != 0 { + bail!( + "Don't support backing file offset, {}", + self.backing_file_offset + ); + } + // NOTE: only support refcount_order == 4. + if self.refcount_order != 4 { + bail!( + "Invalid refcount order {}, only support 4 now", + self.refcount_order + ); + } + self.check_refcount_table()?; + self.check_l1_table()?; + Ok(()) + } + + fn check_refcount_table(&self) -> Result<()> { + if self.refcount_table_clusters == 0 { + bail!("Refcount table clusters is zero"); + } + if u64::from(self.refcount_table_clusters) > MAX_REFTABLE_SIZE / self.cluster_size() { + bail!( + "Refcount table size over limit {}", + self.refcount_table_clusters + ); + } + if !self.cluster_aligned(self.refcount_table_offset) { + bail!( + "Refcount table offset not aligned {}", + self.refcount_table_offset + ); + } + self.refcount_table_offset + .checked_add(u64::from(self.refcount_table_clusters) * self.cluster_size()) + .with_context(|| { + format!( + "Invalid offset {} or refcount table clusters {}", + self.refcount_table_offset, self.refcount_table_clusters + ) + })?; + Ok(()) + } + + fn check_l1_table(&self) -> Result<()> { + if u64::from(self.l1_size) > MAX_L1TABLE_SIZE / ENTRY_SIZE { + bail!("L1 table size over limit {}", self.l1_size); + } + if !self.cluster_aligned(self.l1_table_offset) { + bail!("L1 table offset not aligned {}", self.l1_table_offset); + } + let size_per_l1_entry = self.cluster_size() * self.cluster_size() / ENTRY_SIZE; + let l1_need_sz = + div_round_up(self.size, size_per_l1_entry).with_context(|| "Failed to get l1 size")?; + if u64::from(self.l1_size) < l1_need_sz { + bail!( + "L1 table is too small, l1 size {} expect {}", + self.l1_size, + l1_need_sz + ); + } + self.l1_table_offset + .checked_add(u64::from(self.l1_size) * ENTRY_SIZE) + .with_context(|| { + format!( + "Invalid offset {} or entry size {}", + self.l1_table_offset, self.l1_size + ) + })?; + Ok(()) + } + + #[inline] + fn cluster_aligned(&self, offset: u64) -> bool { + offset & (self.cluster_size() - 1) == 0 + } +} + +#[cfg(test)] +mod test { + use crate::qcow2::header::*; + + const DEFAULT_CLUSTER_SIZE: u64 = 64 * 1024; + + fn valid_header_v3() -> Vec { + // 10G + vec![ + 0x51, 0x46, 0x49, 0xfb, // magic + 0x00, 0x00, 0x00, 0x03, // version + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset + 0x00, 0x00, 0x00, 0x00, // backing file size + 0x00, 0x00, 0x00, 0x10, // cluster bits + 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0x00, // size + 0x00, 0x00, 0x00, 0x00, // crypt method + 0x00, 0x00, 0x00, 0x14, // l1 size + 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, // l1 table offset + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset + 0x00, 0x00, 0x00, 0x01, // refcount table clusters + 0x00, 0x00, 0x00, 0x00, // nb snapshots + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // snapshots offset + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible features + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible features + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear features + 0x00, 0x00, 0x00, 0x04, // refcount order + 0x00, 0x00, 0x00, 0x68, // header length + ] + } + + fn extended_header_v3() -> Vec { + // 10G + let mut buf = valid_header_v3(); + buf.append(&mut vec![0_u8; 8]); + BigEndian::write_u32(&mut buf[100..104], 112); + buf + } + + fn valid_header_v2() -> Vec { + // 5G + vec![ + 0x51, 0x46, 0x49, 0xfb, // magic + 0x00, 0x00, 0x00, 0x02, // version + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset + 0x00, 0x00, 0x00, 0x00, // backing file size + 0x00, 0x00, 0x00, 0x10, // cluster bits + 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, // size + 0x00, 0x00, 0x00, 0x00, // crypt method + 0x00, 0x00, 0x00, 0x0a, // l1 size + 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, // l1 table offset + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset + 0x00, 0x00, 0x00, 0x01, // refcount table clusters + 0x00, 0x00, 0x00, 0x00, // nb snapshots + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // snapshots offset + ] + } + + #[test] + fn test_header_align() { + // 8 bytes alignments + let sz = std::mem::size_of::(); + assert_eq!(sz % 8, 0); + } + + #[test] + fn test_valid_header() { + let buf = valid_header_v2(); + let header = QcowHeader::from_vec(&buf).unwrap(); + assert_eq!(header.magic, QCOW_MAGIC); + assert_eq!(header.version, 2); + assert_eq!(header.cluster_size(), DEFAULT_CLUSTER_SIZE); + assert_eq!(header.header_length, QCOW_VERSION_2_MIN_LEN as u32); + assert_eq!(buf, header.to_vec()); + + let buf = valid_header_v3(); + let header = QcowHeader::from_vec(&buf).unwrap(); + assert_eq!(header.magic, QCOW_MAGIC); + assert_eq!(header.version, 3); + assert_eq!(header.cluster_size(), DEFAULT_CLUSTER_SIZE); + assert_eq!(header.header_length, QCOW_VERSION_3_MIN_LEN as u32); + assert_eq!(buf, header.to_vec()); + + let buf = extended_header_v3(); + let header = QcowHeader::from_vec(&buf).unwrap(); + assert_eq!(header.magic, QCOW_MAGIC); + assert_eq!(header.version, 3); + assert_eq!(header.cluster_size(), DEFAULT_CLUSTER_SIZE); + assert_eq!(header.header_length, 112); + // NOTE: only care the length we supported. + assert_eq!(buf[0..QcowHeader::len()], header.to_vec()); + } + + fn invalid_header_list() -> Vec<(Vec, String)> { + let mut list = Vec::new(); + // Invalid buffer length. + list.push((vec![0_u8; 16], "Invalid header len".to_string())); + // Invalid buffer length for v3. + let buf = valid_header_v3(); + list.push(( + buf[0..90].to_vec(), + "Invalid header len for version 3".to_string(), + )); + // Invalid magic. + let mut buf = valid_header_v2(); + BigEndian::write_u32(&mut buf[0..4], 1234); + list.push((buf, "Invalid format".to_string())); + // Invalid version. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[4..8], 1); + list.push((buf, "Invalid version".to_string())); + // Large header length. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[100..104], 0x10000000_u32); + list.push(( + buf, + format!("Header length {} over cluster size", 0x10000000_u32), + )); + // Small cluster bit. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[20..24], 0); + list.push((buf, "Invalid cluster bit".to_string())); + // Large cluster bit. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[20..24], 65); + list.push((buf, "Invalid cluster bit".to_string())); + // Invalid backing file offset. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[8..16], 0x2000); + list.push((buf, "Don't support backing file offset".to_string())); + // Invalid refcount order. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[96..100], 5); + list.push((buf, "Invalid refcount order".to_string())); + // Refcount table offset is not aligned. + let mut buf = valid_header_v3(); + BigEndian::write_u64(&mut buf[48..56], 0x1234); + list.push((buf, "Refcount table offset not aligned".to_string())); + // Refcount table offset is large. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[36..40], 4 * 1024 * 1024); + BigEndian::write_u64(&mut buf[48..56], 0xffff_ffff_ffff_0000_u64); + BigEndian::write_u32(&mut buf[56..60], 128); + list.push(( + buf, + format!( + "Invalid offset {} or refcount table clusters {}", + 0xffff_ffff_ffff_0000_u64, 128 + ), + )); + // Invalid refcount table cluster. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[56..60], 256); + list.push((buf, "Refcount table size over limit".to_string())); + // Refcount table cluster is 0. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[56..60], 0); + list.push((buf, "Refcount table clusters is zero".to_string())); + // L1 table offset is not aligned. + let mut buf = valid_header_v3(); + BigEndian::write_u64(&mut buf[40..48], 0x123456); + list.push((buf, "L1 table offset not aligned".to_string())); + // L1 table offset is large. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[36..40], 4 * 1024 * 1024); + BigEndian::write_u64(&mut buf[40..48], 0xffff_ffff_ffff_0000_u64); + list.push(( + buf, + format!( + "Invalid offset {} or entry size {}", + 0xffff_ffff_ffff_0000_u64, + 4 * 1024 * 1024 + ), + )); + // Invalid l1 table size. + let mut buf = valid_header_v3(); + BigEndian::write_u32(&mut buf[36..40], 0xffff_0000_u32); + list.push((buf, "L1 table size over limit".to_string())); + // File size is large than l1 table size. + let mut buf = valid_header_v3(); + BigEndian::write_u64(&mut buf[24..32], 0xffff_ffff_ffff_0000_u64); + BigEndian::write_u32(&mut buf[36..40], 10); + list.push((buf, "L1 table is too small".to_string())); + list + } + + #[test] + fn test_invalid_header() { + let list = invalid_header_list(); + for (buf, err) in list { + match QcowHeader::from_vec(&buf) { + Ok(header) => { + let e = header.check().err().unwrap(); + assert!(e.to_string().contains(&err)); + } + Err(e) => { + assert!(e.to_string().contains(&err)); + } + } + } + } +} diff --git a/block_backend/src/qcow2/mod.rs b/block_backend/src/qcow2/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..a5a1fb19ebcfd923032049b2c22ea823b905cf09 --- /dev/null +++ b/block_backend/src/qcow2/mod.rs @@ -0,0 +1,2964 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod cache; +pub mod check; +pub mod header; +pub mod refcount; +pub mod snapshot; +pub mod table; + +use std::{ + cell::RefCell, + collections::HashMap, + fs::File, + io::{Seek, SeekFrom, Write}, + mem::size_of, + os::unix::io::{AsRawFd, RawFd}, + rc::Rc, + sync::{ + atomic::{AtomicBool, AtomicU64}, + Arc, Mutex, Weak, + }, + time::Duration, +}; + +use anyhow::{bail, Context, Result}; +use byteorder::{BigEndian, ByteOrder}; +use log::{debug, error, info}; +use once_cell::sync::Lazy; + +use self::{ + cache::ENTRY_SIZE_U64, check::Qcow2Check, header::QCOW_MAGIC, refcount::Qcow2DiscardType, +}; +use crate::{ + file::{CombineRequest, FileDriver}, + qcow2::{ + cache::CacheTable, + header::QcowHeader, + refcount::RefCount, + snapshot::{InternalSnapshot, QcowSnapshot, QcowSnapshotExtraData, QCOW2_MAX_SNAPSHOTS}, + table::{Qcow2ClusterType, Qcow2Table}, + }, + BlockDriverOps, BlockIoErrorCallback, BlockProperty, BlockStatus, CheckResult, CreateOptions, + ImageInfo, SECTOR_SIZE, +}; +use machine_manager::event_loop::EventLoop; +use machine_manager::qmp::qmp_schema::SnapshotInfo; +use util::{ + aio::{ + get_iov_size, iovec_write_zero, iovecs_split, raw_write_zeroes, Aio, AioCb, AioEngine, + Iovec, OpCode, + }, + num_ops::{div_round_up, ranges_overlap, round_down, round_up}, + offset_of, + time::{get_format_time, gettime}, +}; + +// The L1/L2/Refcount table entry size. +pub const ENTRY_SIZE: u64 = 1 << ENTRY_BITS; +pub const ENTRY_BITS: u64 = 3; +pub const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; +pub const L1_RESERVED_MASK: u64 = 0x7f00_0000_0000_01ff; +pub const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; +pub const L2_STD_RESERVED_MASK: u64 = 0x3f00_0000_0000_01fe; +pub const REFCOUNT_TABLE_OFFSET_MASK: u64 = 0xffff_ffff_ffff_fe00; +pub const REFCOUNT_TABLE_RESERVED_MASK: u64 = 0x0000_0000_0000_01ff; +pub const QCOW2_OFLAG_ZERO: u64 = 1 << 0; +const QCOW2_OFFSET_COMPRESSED: u64 = 1 << 62; +pub const QCOW2_OFFSET_COPIED: u64 = 1 << 63; +const MAX_L1_SIZE: u64 = 32 * (1 << 20); +pub(crate) const DEFAULT_SECTOR_SIZE: u64 = 512; +pub(crate) const QCOW2_MAX_L1_SIZE: u64 = 1 << 25; + +// The default flush interval is 30s. +const DEFAULT_METADATA_FLUSH_INTERVAL: u64 = 30; + +const METADATA_OVERLAP_CHECK_MAINHEADER: u64 = 1 << 0; +const METADATA_OVERLAP_CHECK_ACTIVEL1: u64 = 1 << 1; +pub(crate) const METADATA_OVERLAP_CHECK_ACTIVEL2: u64 = 1 << 2; +const METADATA_OVERLAP_CHECK_REFCOUNTTABLE: u64 = 1 << 3; +const METADATA_OVERLAP_CHECK_REFCOUNTBLOCK: u64 = 1 << 4; +const METADATA_OVERLAP_CHECK_SNAPSHOTTABLE: u64 = 1 << 5; +const METADATA_OVERLAP_CHECK_INACTIVEL1: u64 = 1 << 6; +pub(crate) const METADATA_OVERLAP_CHECK_INACTIVEL2: u64 = 1 << 7; +#[allow(unused)] +const METADATA_OVERLAP_CHECK_BITMAPDIRECTORY: u64 = 1 << 8; + +const DEFAULT_QCOW2_METADATA_OVERLAP_CHECK: u64 = METADATA_OVERLAP_CHECK_MAINHEADER + | METADATA_OVERLAP_CHECK_ACTIVEL1 + | METADATA_OVERLAP_CHECK_ACTIVEL2 + | METADATA_OVERLAP_CHECK_REFCOUNTTABLE + | METADATA_OVERLAP_CHECK_REFCOUNTBLOCK + | METADATA_OVERLAP_CHECK_SNAPSHOTTABLE + | METADATA_OVERLAP_CHECK_INACTIVEL1; + +type Qcow2ListType = Lazy>>>>>; +/// Record the correspondence between disk drive ID and the qcow2 struct. +pub static QCOW2_LIST: Qcow2ListType = Lazy::new(|| Arc::new(Mutex::new(HashMap::new()))); + +/// Host continuous range. +pub enum HostRange { + /// Not init data size. + DataNotInit(u64), + /// Start address and size. + DataAddress(u64, u64), +} + +pub struct SyncAioInfo { + /// Aio for sync read/write metadata. + aio: Aio<()>, + pub(crate) fd: RawFd, + pub prop: BlockProperty, +} + +impl SyncAioInfo { + pub fn complete_func(aio: &AioCb<()>, ret: i64) -> Result<()> { + if ret < 0 { + bail!( + "Failed to complete {:?} offset {} nbytes {}", + aio.opcode, + aio.offset, + aio.nbytes + ); + } + Ok(()) + } + + pub fn new(fd: RawFd, prop: BlockProperty) -> Result { + Ok(Self { + aio: Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None)?, + fd, + prop, + }) + } + + fn package_sync_aiocb( + &self, + opcode: OpCode, + iovec: Vec, + offset: usize, + nbytes: u64, + ) -> AioCb<()> { + AioCb { + direct: self.prop.direct, + req_align: self.prop.req_align, + buf_align: self.prop.buf_align, + file_fd: self.fd, + opcode, + iovec, + offset, + nbytes, + user_data: 0, + iocompletecb: (), + discard: self.prop.discard, + write_zeroes: self.prop.write_zeroes, + combine_req: None, + } + } + + fn read_buffer(&mut self, offset: u64, buf: &mut [u8]) -> Result<()> { + let ptr = buf.as_mut_ptr() as u64; + let cnt = buf.len() as u64; + let aiocb = self.package_sync_aiocb( + OpCode::Preadv, + vec![Iovec::new(ptr, cnt)], + offset as usize, + cnt, + ); + self.aio.submit_request(aiocb) + } + + pub(crate) fn write_buffer(&mut self, offset: u64, buf: &[u8]) -> Result<()> { + let ptr = buf.as_ptr() as u64; + let cnt = buf.len() as u64; + let aiocb = self.package_sync_aiocb( + OpCode::Pwritev, + vec![Iovec::new(ptr, cnt)], + offset as usize, + cnt, + ); + self.aio.submit_request(aiocb) + } + + pub(crate) fn write_ctrl_cluster(&mut self, addr: u64, buf: &[u64]) -> Result<()> { + let output: Vec = buf.iter().flat_map(|val| val.to_be_bytes()).collect(); + self.write_buffer(addr, &output) + } + + pub(crate) fn read_ctrl_cluster(&mut self, addr: u64, sz: u64) -> Result> { + let mut buf = vec![0; sz as usize]; + let vec_len = size_of::() * sz as usize; + let mut vec = vec![0_u8; vec_len]; + self.read_buffer(addr, vec.as_mut_slice())?; + for i in 0..buf.len() { + buf[i] = BigEndian::read_u64(&vec[(size_of::() * i)..]); + } + Ok(buf) + } + + pub(crate) fn write_dirty_info( + &mut self, + addr: u64, + buf: &[u8], + start: u64, + end: u64, + ) -> Result<()> { + let start = round_down(start, DEFAULT_SECTOR_SIZE) + .with_context(|| format!("Round down failed, value is {}", start))?; + let end = round_up(end, DEFAULT_SECTOR_SIZE) + .with_context(|| format!("Round up failed, value is {}", end))?; + self.write_buffer(addr + start, &buf[start as usize..end as usize]) + } +} + +pub struct Qcow2Driver { + pub driver: FileDriver, + pub sync_aio: Rc>, + pub header: QcowHeader, + pub table: Qcow2Table, + pub refcount: RefCount, + pub snapshot: InternalSnapshot, + pub status: Arc>, +} + +impl Drop for Qcow2Driver { + fn drop(&mut self) { + self.flush() + .unwrap_or_else(|e| error!("Flush failed: {:?}", e)); + } +} + +/// Add timer for flushing qcow2 metadata. +pub fn qcow2_flush_metadata( + qcow2_driver: Weak>>, + drive_id: String, +) { + let qcow2_d = qcow2_driver.upgrade(); + if qcow2_d.is_none() { + info!("Qcow2 for drive \"{}\" flush metadata timer exit", drive_id); + return; + } + + let driver = qcow2_d.unwrap(); + let mut locked_driver = driver.lock().unwrap(); + locked_driver.flush().unwrap_or_else(|e| { + error!( + "Flush qcow2 metadata failed for drive {}, {:?}", + drive_id, e + ) + }); + + let flush_func = Box::new(move || { + qcow2_flush_metadata(qcow2_driver.clone(), drive_id.clone()); + }); + let iothread = locked_driver.sync_aio.borrow().prop.iothread.clone(); + EventLoop::get_ctx(iothread.as_ref()).unwrap().timer_add( + flush_func, + Duration::from_secs(DEFAULT_METADATA_FLUSH_INTERVAL), + ); +} + +impl Qcow2Driver { + pub fn new(file: Arc, aio: Aio, conf: BlockProperty) -> Result { + let fd = file.as_raw_fd(); + let sync_aio = Rc::new(RefCell::new(SyncAioInfo::new(fd, conf.clone())?)); + Ok(Self { + driver: FileDriver::new(file, aio, conf), + sync_aio: sync_aio.clone(), + header: QcowHeader::default(), + table: Qcow2Table::new(sync_aio.clone()), + refcount: RefCount::new(sync_aio.clone()), + snapshot: InternalSnapshot::new(sync_aio), + status: Arc::new(Mutex::new(BlockStatus::Init)), + }) + } + + pub fn load_metadata(&mut self, conf: BlockProperty) -> Result<()> { + self.load_header() + .with_context(|| "Failed to load header")?; + self.header.check().with_context(|| "Invalid header")?; + self.table + .init_table_info(&self.header, &conf) + .with_context(|| "Failed to create qcow2 table")?; + self.table + .load_l1_table() + .with_context(|| "Failed to load l1 table")?; + self.refcount.init_refcount_info(&self.header, &conf); + self.load_refcount_table() + .with_context(|| "Failed to load refcount table")?; + self.snapshot.set_cluster_size(self.header.cluster_size()); + self.snapshot + .load_snapshot_table( + self.header.snapshots_offset, + self.header.nb_snapshots, + false, + ) + .with_context(|| "Failed to load snapshot table")?; + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + trace::qcow2_flush(&self.driver.block_prop.id); + self.table.flush()?; + self.refcount.flush() + } + + pub fn drop_dirty_caches(&mut self) { + self.table.drop_dirty_caches(); + self.refcount.drop_dirty_caches(); + self.table.load_l1_table().unwrap_or_else(|e| { + error!( + "Failed to reload l1 table for dropping unused changes, {:?}", + e + ) + }); + } + + pub fn load_header(&mut self) -> Result<()> { + let mut buf = vec![0; QcowHeader::len()]; + self.sync_aio.borrow_mut().read_buffer(0, &mut buf)?; + self.header = QcowHeader::from_vec(&buf)?; + if self.header.backing_file_size != 0 { + bail!("Backing file is not supported now"); + } + Ok(()) + } + + pub fn load_refcount_table(&mut self) -> Result<()> { + let sz = u64::from(self.header.refcount_table_clusters) + * (self.header.cluster_size() / ENTRY_SIZE); + self.refcount.refcount_table = self + .sync_aio + .borrow_mut() + .read_ctrl_cluster(self.header.refcount_table_offset, sz)?; + for block_offset in &self.refcount.refcount_table { + if *block_offset == 0 { + continue; + } + let rfb_offset = block_offset & REFCOUNT_TABLE_OFFSET_MASK; + self.refcount.refcount_table_map.insert(rfb_offset, 1); + } + Ok(()) + } + + // NOTE: L2 table must be allocated. + fn get_l2_entry(&mut self, guest_offset: u64) -> Result { + let l2_index = self.table.get_l2_table_index(guest_offset); + if let Some(entry) = self.table.get_l2_table_cache_entry(guest_offset) { + entry.borrow_mut().get_entry_map(l2_index as usize) + } else { + let l2_address = self.table.get_l1_table_entry(guest_offset) & L1_TABLE_OFFSET_MASK; + if l2_address == 0 { + bail!("L2 table is unallocated when get l2 cache"); + } + let l2_cluster = self.load_cluster(l2_address)?; + let l2_table = Rc::new(RefCell::new(CacheTable::new( + l2_address, + l2_cluster, + ENTRY_SIZE_U64, + )?)); + let res = l2_table.borrow_mut().get_entry_map(l2_index as usize)?; + self.table.cache_l2_table(l2_table)?; + Ok(res) + } + } + + fn get_continuous_address( + &mut self, + guest_offset: u64, + expect_len: u64, + ) -> Result<(Qcow2ClusterType, u64, u64)> { + let begin = round_down(guest_offset, self.header.cluster_size()) + .with_context(|| format!("invalid offset {}", guest_offset))?; + let end = round_up(guest_offset + expect_len, self.header.cluster_size()) + .with_context(|| format!("invalid offset {} len {}", guest_offset, expect_len))?; + let clusters = (end - begin) / self.header.cluster_size(); + if clusters == 0 { + bail!( + "Failed to get continuous address offset {} len {}", + guest_offset, + expect_len + ); + } + let mut host_start: u64 = 0; + let mut first_cluster_type = Qcow2ClusterType::Unallocated; + let mut cnt: u64 = 0; + while cnt < clusters { + let offset = cnt * self.header.cluster_size(); + let l2_entry = self.get_l2_entry(begin + offset)?; + let cluster_type = Qcow2ClusterType::get_cluster_type(l2_entry); + let cluster_addr = l2_entry & L2_TABLE_OFFSET_MASK; + if cnt == 0 { + host_start = cluster_addr; + first_cluster_type = cluster_type; + } else if cluster_addr != host_start + offset || cluster_type != first_cluster_type { + break; + } + cnt += 1; + } + let sz = cnt * self.header.cluster_size() - self.offset_into_cluster(guest_offset); + let actual_len = std::cmp::min(expect_len, sz); + Ok(( + first_cluster_type, + host_start + self.offset_into_cluster(guest_offset), + actual_len, + )) + } + + pub fn host_offset_for_read(&mut self, guest_offset: u64, req_len: u64) -> Result { + // Request not support cross l2 table. + let l2_max_len = self + .table + .get_l2_table_max_remain_size(guest_offset, self.offset_into_cluster(guest_offset)); + let size = std::cmp::min(req_len, l2_max_len); + let l2_address = self.table.get_l1_table_entry(guest_offset) & L1_TABLE_OFFSET_MASK; + if l2_address == 0 { + return Ok(HostRange::DataNotInit(size)); + } + let (cluster_type, host_start, bytes) = self.get_continuous_address(guest_offset, size)?; + if cluster_type.is_read_zero() { + Ok(HostRange::DataNotInit(bytes)) + } else { + Ok(HostRange::DataAddress(host_start, bytes)) + } + } + + fn host_offset_for_write(&mut self, guest_offset: u64, nbytes: u64) -> Result { + let mut need_check = false; + let l2_index = self.table.get_l2_table_index(guest_offset); + let l2_table = self.get_table_cluster(guest_offset)?; + let mut l2_entry = l2_table.borrow_mut().get_entry_map(l2_index as usize)?; + let old_l2_entry = l2_entry; + l2_entry &= !QCOW2_OFLAG_ZERO; + let mut cluster_addr = l2_entry & L2_TABLE_OFFSET_MASK; + if cluster_addr == 0 { + let new_addr = self.alloc_cluster(1, false)?; + l2_entry = new_addr | QCOW2_OFFSET_COPIED; + cluster_addr = new_addr & L2_TABLE_OFFSET_MASK; + } else if l2_entry & QCOW2_OFFSET_COPIED == 0 { + // Copy on write for data cluster. + let new_data_addr = self.alloc_cluster(1, true)?; + if nbytes < self.header.cluster_size() { + let data = self.load_cluster(cluster_addr)?; + self.sync_aio + .borrow_mut() + .write_buffer(new_data_addr, &data)?; + } + self.refcount + .update_refcount(cluster_addr, 1, -1, false, &Qcow2DiscardType::Other)?; + l2_entry = new_data_addr | QCOW2_OFFSET_COPIED; + cluster_addr = new_data_addr & L2_TABLE_OFFSET_MASK; + } else { + need_check = true; + } + + if need_check && self.check_overlap(0, cluster_addr, nbytes) != 0 { + bail!( + "Failed to check overlap when getting host offset, addr: 0x{:x}, size: {}", + cluster_addr, + nbytes + ); + } + if l2_entry != old_l2_entry { + self.table + .update_l2_table(l2_table, l2_index as usize, l2_entry)?; + } + + Ok(cluster_addr + self.offset_into_cluster(guest_offset)) + } + + /// Extend the l1 table. + pub fn grow_l1_table(&mut self, new_l1_size: u64) -> Result<()> { + let old_l1_size = u64::from(self.header.l1_size); + if new_l1_size <= old_l1_size { + return Ok(()); + } + if new_l1_size > MAX_L1_SIZE / ENTRY_SIZE { + bail!("L1 size {} is too large", new_l1_size); + } + + info!( + "Resize the l1 table size from {} to {}", + old_l1_size, new_l1_size + ); + + // Copy data from old l1 table. + let cluster_size = self.header.cluster_size(); + let old_l1_table_size = old_l1_size * ENTRY_SIZE; + let old_l1_table_clusters = div_round_up(old_l1_table_size, cluster_size).unwrap(); + let old_l1_table_offset = self.header.l1_table_offset; + let new_l1_table_size = new_l1_size * ENTRY_SIZE; + let new_l1_table_clusters = div_round_up(new_l1_table_size, cluster_size).unwrap(); + let new_l1_table_offset = self.alloc_cluster(new_l1_table_clusters, true)?; + let mut new_l1_table = vec![0_u64; new_l1_size as usize]; + new_l1_table[..(old_l1_size as usize)] + .copy_from_slice(&self.table.l1_table[..(old_l1_size as usize)]); + + let ret = self.check_overlap(0, new_l1_table_offset, new_l1_table_size); + if ret != 0 { + bail!("Write on metadata(overlap with {:?})", ret); + } + self.refcount.flush()?; + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(new_l1_table_offset, &new_l1_table)?; + + // Update the message information, includes: + // entry size of l1 table and active l1 table offset. + // 4 bytes for l1 size and 8 bytes for l1 table offset. + let mut buf = vec![0; 12]; + BigEndian::write_u32(&mut buf[0..4], new_l1_size as u32); + BigEndian::write_u64(&mut buf[4..12], new_l1_table_offset); + self.sync_aio + .borrow_mut() + .write_buffer(offset_of!(QcowHeader, l1_size) as u64, &buf)?; + self.header.l1_size = new_l1_size as u32; + self.header.l1_table_offset = new_l1_table_offset; + self.table.l1_size = new_l1_size as u32; + self.table.l1_table_offset = new_l1_table_offset; + self.table.l1_table = new_l1_table; + + self.free_cluster( + old_l1_table_offset, + old_l1_table_clusters, + true, + &Qcow2DiscardType::Other, + ) + } + + /// Obtaining the target entry for guest offset. + /// If the corresponding entry didn't cache, it will be read from the disk synchronously. + /// Input: guest offset. + /// Output: target entry. + pub fn get_table_cluster(&mut self, guest_offset: u64) -> Result>> { + let l1_index = self.table.get_l1_table_index(guest_offset); + if l1_index >= u64::from(self.header.l1_size) { + bail!("Need to grow l1 table size."); + } + + let l1_entry = self.table.get_l1_table_entry(guest_offset); + let mut l2_address = l1_entry & L1_TABLE_OFFSET_MASK; + // Align to cluster size. + if (l2_address & (self.header.cluster_size() - 1)) != 0 { + bail!( + "L2 table offset {} unaligned(L1 index: {})", + l2_address, + l1_index + ); + } + + if l1_entry & QCOW2_OFFSET_COPIED == 0 { + // Alloc a new l2_table. + let old_l2_offset = l1_entry & L1_TABLE_OFFSET_MASK; + let new_l2_offset = self.alloc_cluster(1, false)?; + let l2_cluster: Vec = + if let Some(entry) = self.table.get_l2_table_cache_entry(guest_offset) { + entry.borrow().get_value().to_vec() + } else if old_l2_offset != 0 { + self.load_cluster(l2_address)? + } else { + vec![0_u8; self.header.cluster_size() as usize] + }; + self.sync_aio + .borrow_mut() + .write_buffer(new_l2_offset, &l2_cluster)?; + let l2_cache_entry = Rc::new(RefCell::new(CacheTable::new( + new_l2_offset, + l2_cluster, + ENTRY_SIZE_U64, + )?)); + self.table.cache_l2_table(l2_cache_entry)?; + + // Update l1_table. + self.table + .update_l1_table(l1_index as usize, new_l2_offset | QCOW2_OFFSET_COPIED); + self.table.save_l1_table()?; + + // Decrease the refcount of the old table. + if old_l2_offset != 0 { + self.refcount.update_refcount( + old_l2_offset, + 1, + -1, + true, + &Qcow2DiscardType::Other, + )?; + } + // Get the offset of the newly-allocated l2 table. + l2_address = new_l2_offset; + } + + // Cache hit. + if let Some(entry) = self.table.l2_table_cache.get(l2_address) { + return Ok(entry.clone()); + } + // Cache miss. + let l2_cluster = self.load_cluster(l2_address)?; + let l2_table_entry = Rc::new(RefCell::new(CacheTable::new( + l2_address, + l2_cluster, + ENTRY_SIZE_U64, + )?)); + self.table.cache_l2_table(l2_table_entry.clone())?; + Ok(l2_table_entry) + } + + /// Write back to disk synchronously, with a range no greater than cluster size. + fn sync_write_bytes(&mut self, guest_offset: u64, buf: &[u8]) -> Result<()> { + if buf.len() > self.header.cluster_size() as usize + || guest_offset as usize + buf.len() > self.virtual_disk_size() as usize + { + bail!("Buffer size: is out of range",); + } + // Return if the address is not allocated. + let host_offset = self.host_offset_for_write(guest_offset, buf.len() as u64)?; + self.sync_aio.borrow_mut().write_buffer(host_offset, buf)?; + Ok(()) + } + + /// Write zero data to cluster data as many as possible, and return the total number of + /// cluster. + /// Note: the guest offset should align to cluster size. + fn zero_in_l2_slice(&mut self, guest_offset: u64, nb_cluster: u64) -> Result { + // Zero flag is only support by version 3. + // If this flag is not supported, then transfer write_zero to discard. + if self.header.version < 3 { + return self.discard_in_l2_slice(guest_offset, nb_cluster, &Qcow2DiscardType::Request); + } + + let l2_index = self.table.get_l2_table_index(guest_offset); + let l2_slice_size = self.header.cluster_size() >> ENTRY_BITS; + let nb_cluster = std::cmp::min(nb_cluster, l2_slice_size - l2_index); + let table_entry = self.get_table_cluster(guest_offset)?; + for i in 0..nb_cluster { + let new_l2_index = l2_index + i; + let old_l2_entry = table_entry + .borrow_mut() + .get_entry_map(new_l2_index as usize)?; + let entry_type = Qcow2ClusterType::get_cluster_type(old_l2_entry); + let mut new_l2_entry = old_l2_entry; + let unmap: bool = entry_type.is_allocated(); + if unmap { + new_l2_entry = 0; + } + new_l2_entry |= QCOW2_OFLAG_ZERO; + + if new_l2_entry == old_l2_entry { + continue; + } + + self.table + .update_l2_table(table_entry.clone(), new_l2_index as usize, new_l2_entry)?; + if unmap { + self.qcow2_free_cluster(old_l2_entry, &Qcow2DiscardType::Request)?; + } + } + Ok(nb_cluster) + } + + /// Discard the data as many as possibale, and return the total number of cluster. + /// Note: the guest_offset should align to cluster size. + fn discard_in_l2_slice( + &mut self, + guest_offset: u64, + nb_cluster: u64, + discard_type: &Qcow2DiscardType, + ) -> Result { + let l2_index = self.table.get_l2_table_index(guest_offset); + let l2_slice_size = self.header.cluster_size() >> ENTRY_BITS; + let nb_cluster = std::cmp::min(nb_cluster, l2_slice_size - l2_index); + let table_entry = self.get_table_cluster(guest_offset)?; + for i in 0..nb_cluster { + let new_l2_index = l2_index + i; + let old_l2_entry = table_entry + .borrow_mut() + .get_entry_map(new_l2_index as usize)?; + let entry_type = Qcow2ClusterType::get_cluster_type(old_l2_entry); + let mut new_l2_entry = old_l2_entry; + + if entry_type.is_allocated() { + new_l2_entry = if self.header.version >= 3 { + QCOW2_OFLAG_ZERO + } else { + 0 + }; + } + if new_l2_entry == old_l2_entry { + continue; + } + + // Update l2 entry. + self.table + .update_l2_table(table_entry.clone(), new_l2_index as usize, new_l2_entry)?; + + // Decrease the refcount. + self.qcow2_free_cluster(old_l2_entry, discard_type)?; + } + Ok(nb_cluster) + } + + /// Update refount of cluster, if the value is equal to 0, + /// then clear the cluster. + pub fn qcow2_free_cluster( + &mut self, + l2_entry: u64, + discard_type: &Qcow2DiscardType, + ) -> Result<()> { + let cluster_type = Qcow2ClusterType::get_cluster_type(l2_entry); + match cluster_type { + Qcow2ClusterType::ZeroAlloc | Qcow2ClusterType::Normal => { + let offset = l2_entry & L2_TABLE_OFFSET_MASK; + let nbytes = self.header.cluster_size(); + // Align to cluster size. + if !is_aligned(nbytes, offset) { + bail!( + "Host offset {} is unaligned to cluster size {}", + offset, + nbytes + ); + } + self.free_cluster(offset, 1, false, discard_type)?; + } + Qcow2ClusterType::Compressed => { + bail!("Compressed is not supported"); + } + _ => {} + } + Ok(()) + } + + fn offset_into_cluster(&self, guest_offset: u64) -> u64 { + guest_offset & (self.header.cluster_size() - 1) + } + + pub(crate) fn load_cluster(&mut self, addr: u64) -> Result> { + if !is_aligned(self.header.cluster_size(), addr) { + bail!("Cluster address not aligned {}", addr); + } + let mut buf = vec![0_u8; self.header.cluster_size() as usize]; + self.sync_aio.borrow_mut().read_buffer(addr, &mut buf)?; + Ok(buf) + } + + fn virtual_disk_size(&self) -> u64 { + self.header.size + } + + fn cluster_aligned_bytes(&self, addr: u64, cnt: u64) -> u64 { + let offset = self.offset_into_cluster(addr); + std::cmp::min(cnt, self.header.cluster_size() - offset) + } + + pub fn alloc_cluster(&mut self, clusters: u64, write_zero: bool) -> Result { + if !self.refcount.discard_list.is_empty() { + self.refcount.sync_process_discards(OpCode::Discard); + } + + let size = clusters * self.header.cluster_size(); + let addr = self + .refcount + .alloc_clusters_with_ref(&mut self.header, size)?; + let ret = self.check_overlap(0, addr, size); + if ret != 0 { + bail!( + "Failed to check overlap when allocing clusterk, ret is {}, addr: 0x{:x}, size: {}", + ret, + addr, + size + ); + } + if write_zero && addr < self.driver.disk_size()? { + let ret = raw_write_zeroes(self.sync_aio.borrow_mut().fd, addr as usize, size); + if ret < 0 { + let zero_buf = vec![0_u8; self.header.cluster_size() as usize]; + for i in 0..clusters { + let offset = addr + i * self.header.cluster_size(); + self.sync_aio.borrow_mut().write_buffer(offset, &zero_buf)?; + } + } + } + self.driver.extend_to_len(addr + size)?; + Ok(addr) + } + + fn check_request(&self, offset: usize, nbytes: u64) -> Result<()> { + if offset as u64 > self.virtual_disk_size() { + bail!("Invalid offset {}", offset); + } + let end = (offset as u64) + .checked_add(nbytes) + .with_context(|| format!("Invalid offset {} or size {}", offset, nbytes))?; + if end > self.virtual_disk_size() { + bail!("Request over limit {}", end); + } + Ok(()) + } + + fn free_cluster( + &mut self, + addr: u64, + clusters: u64, + flush: bool, + discard_type: &Qcow2DiscardType, + ) -> Result<()> { + self.refcount + .update_refcount(addr, clusters, -1, flush, discard_type) + } + + fn get_snapshot_by_name(&mut self, name: &String) -> i32 { + self.snapshot.find_snapshot(name) + } + + fn qcow2_apply_snapshot(&mut self, name: String) -> Result<()> { + // Get target snapshot by name. + let snap_id = self.get_snapshot_by_name(&name); + if snap_id < 0 { + bail!("Failed to load snapshots {}", name); + } + let snap = self.snapshot.snapshots[snap_id as usize].clone(); + + // Validate snapshot table + if u64::from(snap.l1_size) > MAX_L1_SIZE / ENTRY_SIZE { + bail!("Snapshot L1 table too large"); + } + + if i64::MAX as u64 - u64::from(snap.l1_size) * ENTRY_SIZE < snap.l1_table_offset + || !is_aligned(self.header.cluster_size(), snap.l1_table_offset) + { + bail!("Snapshot L1 table offset invalid"); + } + + // Apply the l1 table of snapshot to active l1 table. + let mut snap_l1_table = self + .sync_aio + .borrow_mut() + .read_ctrl_cluster(snap.l1_table_offset, u64::from(snap.l1_size))?; + // SAFETY: Upper limit of l1_size is decided by disk virtual size. + snap_l1_table.resize(snap.l1_size as usize, 0); + + let cluster_size = self.header.cluster_size(); + let snap_l1_table_bytes = u64::from(snap.l1_size) * ENTRY_SIZE; + let snap_l1_table_clusters = bytes_to_clusters(snap_l1_table_bytes, cluster_size).unwrap(); + let new_l1_table_offset = self.alloc_cluster(snap_l1_table_clusters, true)?; + + // Increase the refcount of all clusters searched by L1 table. + self.qcow2_update_snapshot_refcount(snap.l1_table_offset, snap.l1_size as usize, 1)?; + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(new_l1_table_offset, &snap_l1_table)?; + + // Sync active l1 table offset of header to disk. + let mut new_header = self.header.clone(); + new_header.l1_table_offset = new_l1_table_offset; + new_header.l1_size = snap.l1_size; + new_header.size = snap.disk_size; + self.sync_aio + .borrow_mut() + .write_buffer(0, &new_header.to_vec())?; + + let old_l1_table_offset = self.header.l1_table_offset; + let old_l1_size = self.header.l1_size; + self.header = new_header; + self.table.l1_table_offset = new_l1_table_offset; + self.table.l1_size = snap.l1_size; + self.table.l1_table = snap_l1_table; + self.table.l1_table_map.clear(); + for l1_entry in self.table.l1_table.iter() { + let addr = l1_entry & L1_TABLE_OFFSET_MASK; + self.table.l1_table_map.insert(addr, 1); + } + + self.qcow2_update_snapshot_refcount(old_l1_table_offset, old_l1_size as usize, -1)?; + + // Free the snaphshot L1 table. + let old_l1_table_clusters = + bytes_to_clusters(u64::from(old_l1_size) * ENTRY_SIZE, cluster_size).unwrap(); + self.refcount.update_refcount( + old_l1_table_offset, + old_l1_table_clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + + // Update flag of QCOW2_OFFSET_COPIED in current active l1 table, as it has been changed. + self.qcow2_update_snapshot_refcount( + self.header.l1_table_offset, + self.header.l1_size as usize, + 0, + )?; + + self.flush()?; + self.table.save_l1_table()?; + + // Discard unused clusters. + self.refcount.sync_process_discards(OpCode::Discard); + + Ok(()) + } + + fn qcow2_delete_snapshot(&mut self, name: String) -> Result { + let snapshot_idx = self.get_snapshot_by_name(&name); + if snapshot_idx < 0 { + bail!("Snapshot with name {} does not exist", name); + } + + // Record the old snapshot table size which will be used to free these old snapshot table clusters. + let cluster_size = self.header.cluster_size(); + let old_snapshot_table_clusters = + bytes_to_clusters(self.snapshot.snapshot_size, cluster_size).unwrap(); + + // Delete snapshot information in memory. + let snap = self.snapshot.del_snapshot(snapshot_idx as usize); + + // Alloc new cluster to save snapshots(except the deleted one) to disk. + let mut new_snapshots_offset = 0_u64; + let new_snapshot_table_clusters = + bytes_to_clusters(self.snapshot.snapshot_size, cluster_size).unwrap(); + if self.snapshot.snapshots_number() > 0 { + new_snapshots_offset = self.alloc_cluster(new_snapshot_table_clusters, true)?; + self.snapshot + .save_snapshot_table(new_snapshots_offset, Some(&snap), false)?; + } + self.snapshot.snapshot_table_offset = new_snapshots_offset; + + // Decrease the refcounts of clusters referenced by the snapshot. + self.qcow2_update_snapshot_refcount(snap.l1_table_offset, snap.l1_size as usize, -1)?; + + // Free the snaphshot L1 table. + let l1_table_clusters = + bytes_to_clusters(u64::from(snap.l1_size) * ENTRY_SIZE, cluster_size).unwrap(); + self.refcount.update_refcount( + snap.l1_table_offset, + l1_table_clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + + // Update the flag of the L1/L2 table entries. + self.qcow2_update_snapshot_refcount( + self.header.l1_table_offset, + self.header.l1_size as usize, + 0, + )?; + + // Free the cluster of the old snapshot table. + self.refcount.update_refcount( + self.header.snapshots_offset, + old_snapshot_table_clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + + // Flush the cache of the refcount block and l2 table. + self.flush()?; + + self.table.save_l1_table()?; + + // Update the snapshot information in qcow2 header. + self.update_snapshot_info_in_header(new_snapshots_offset, -1)?; + + // Discard unused clusters. + self.refcount.sync_process_discards(OpCode::Discard); + + Ok(SnapshotInfo { + id: snap.id.to_string(), + name: snap.name.clone(), + vm_state_size: u64::from(snap.vm_state_size), + date_sec: snap.date_sec, + date_nsec: snap.date_nsec, + vm_clock_nsec: snap.vm_clock_nsec, + icount: snap.icount, + }) + } + + fn qcow2_create_snapshot(&mut self, name: String, vm_clock_nsec: u64) -> Result<()> { + if self.get_snapshot_by_name(&name) >= 0 { + bail!("Snapshot {} exists!", name); + } + if self.snapshot.snapshots_number() >= QCOW2_MAX_SNAPSHOTS { + bail!( + "The number of snapshots exceed the maximum limit {}", + QCOW2_MAX_SNAPSHOTS + ); + } + + // Alloc cluster and copy L1 table for snapshot. + let cluster_size = self.header.cluster_size(); + let l1_table_len = u64::from(self.header.l1_size) * ENTRY_SIZE; + let l1_table_clusters = bytes_to_clusters(l1_table_len, cluster_size).unwrap(); + let new_l1_table_offset = self.alloc_cluster(l1_table_clusters, true)?; + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(new_l1_table_offset, &self.table.l1_table)?; + + // Increase the refcount of all clusters searched by L1 table. + self.qcow2_update_snapshot_refcount( + self.header.l1_table_offset, + self.header.l1_size as usize, + 1, + )?; + + // Alloc new snapshot table. + let (date_sec, date_nsec) = gettime()?; + // Note: The `Snapshots` chapter in Qcow2 spec states: + // Snapshot table entry: + // Byte 16 - 19: Time at which the snapshot was taken in seconds since the + // Epoch + // Byte 20 - 23: Subsecond part of the time at which the snapshot was taken + // in nanoseconds + // + // 32 bits of seconds can represent a range of approximately 136 years since 1970. + // It's enough for current use. If an incorrect host time is used to inject error, + // there may be an issue of inaccurate creation time in the snapshot description. + // Considering compatibility, this issue of inaccurate time is acceptable. + let snap = QcowSnapshot { + l1_table_offset: new_l1_table_offset, + l1_size: self.header.l1_size, + id: self.snapshot.find_new_snapshot_id(), + name, + disk_size: self.virtual_disk_size(), + vm_state_size: 0, + date_sec: date_sec as u32, + date_nsec: date_nsec as u32, + vm_clock_nsec, + icount: u64::MAX, + extra_data_size: size_of::() as u32, + }; + let old_snapshot_table_len = self.snapshot.snapshot_size; + let snapshot_table_clusters = + bytes_to_clusters(old_snapshot_table_len + snap.get_size(), cluster_size).unwrap(); + let new_snapshots_offset = self.alloc_cluster(snapshot_table_clusters, true)?; + info!( + "Snapshot table offset: old(0x{:x}) -> new(0x{:x})", + self.header.snapshots_offset, new_snapshots_offset, + ); + + // Append the new snapshot to the snapshot table and write new snapshot table to file. + self.snapshot + .save_snapshot_table(new_snapshots_offset, Some(&snap), true)?; + + // Free the old snapshot table cluster if snapshot exists. + if self.header.snapshots_offset != 0 { + let clusters = bytes_to_clusters(old_snapshot_table_len, cluster_size).unwrap(); + self.refcount.update_refcount( + self.header.snapshots_offset, + clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + } + + // Flush the cache of the refcount block and l1/l2 table. + self.flush()?; + + self.table.save_l1_table()?; + + // Update snapshot offset and num in qcow2 header. + self.update_snapshot_info_in_header(new_snapshots_offset, 1)?; + + // Add and update snapshot information in memory. + self.snapshot.add_snapshot(snap); + self.snapshot.snapshot_table_offset = new_snapshots_offset; + + // Discard unused clusters. + self.refcount.sync_process_discards(OpCode::Discard); + + Ok(()) + } + + fn update_snapshot_info_in_header(&mut self, snapshot_offset: u64, add: i32) -> Result<()> { + let mut new_header = self.header.clone(); + new_header.snapshots_offset = snapshot_offset; + new_header.nb_snapshots = (new_header.nb_snapshots as i32 + add) as u32; + self.sync_aio + .borrow_mut() + .write_buffer(0, &new_header.to_vec())?; + self.header.snapshots_offset = new_header.snapshots_offset; + self.header.nb_snapshots = new_header.nb_snapshots; + + Ok(()) + } + + /// Write the modified snapshots back to disk + /// 1. Alloc a new space for new snapshot table. + /// 2. Write the snapshots to the new space. + /// 3. Free the old snapshot table. + /// 4. Modify qcow2 header. + pub(crate) fn write_snapshots_to_disk(&mut self) -> Result<()> { + let cluster_size = self.header.cluster_size(); + let snapshot_size = self.snapshot.snapshot_size; + let snapshot_clusters = bytes_to_clusters(snapshot_size, cluster_size).unwrap(); + let new_nb_snapshots = self.snapshot.snapshots.len() as u32; + let mut new_snapshot_offset = 0_u64; + if snapshot_clusters != 0 { + new_snapshot_offset = self.alloc_cluster(snapshot_clusters, true)?; + info!( + "Snapshot table offset: old(0x{:x}) -> new(0x{:x})", + self.header.snapshots_offset, new_snapshot_offset, + ); + + // Write new snapshots to disk. + let mut snap_buf = Vec::new(); + for snap in &self.snapshot.snapshots { + snap_buf.append(&mut snap.gen_snapshot_table_entry()); + } + self.sync_aio + .borrow_mut() + .write_buffer(new_snapshot_offset, &snap_buf)?; + } + + let old_snapshot_offset = self.header.snapshots_offset; + // Free the old snapshot table cluster if snapshot exists. + if self.header.snapshots_offset != 0 { + self.refcount.update_refcount( + old_snapshot_offset, + snapshot_clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + } + + // Flush the cache of the refcount block and l1/l2 table. + self.flush()?; + + // Update snapshot offset and num in qcow2 header. + let mut new_header = self.header.clone(); + new_header.snapshots_offset = new_snapshot_offset; + new_header.nb_snapshots = new_nb_snapshots; + self.sync_aio + .borrow_mut() + .write_buffer(0, &new_header.to_vec())?; + + self.snapshot.snapshot_table_offset = new_snapshot_offset; + self.header.snapshots_offset = new_header.snapshots_offset; + self.header.nb_snapshots = new_header.nb_snapshots; + Ok(()) + } + + /// Update the refcounts of all clusters searched by l1_table_offset. + fn qcow2_update_snapshot_refcount( + &mut self, + l1_table_offset: u64, + l1_table_size: usize, + added: i32, + ) -> Result<()> { + debug!( + "Update snapshot refcount: l1 table offset {:x}, active header l1 table addr {:x}, add {}", + l1_table_offset, + self.header.l1_table_offset, + added + ); + + let mut l1_table = if l1_table_offset != self.header.l1_table_offset { + // Read snapshot l1 table from qcow2 file. + self.sync_aio + .borrow_mut() + .read_ctrl_cluster(l1_table_offset, l1_table_size as u64)? + } else { + self.table.l1_table.clone() + }; + + let mut old_l2_table_offset: u64; + for (i, l1_entry) in l1_table.iter_mut().enumerate().take(l1_table_size) { + let mut l2_table_offset = *l1_entry; + if l2_table_offset == 0 { + // No l2 table. + continue; + } + old_l2_table_offset = l2_table_offset; + l2_table_offset &= L1_TABLE_OFFSET_MASK; + + if self.refcount.offset_into_cluster(l2_table_offset) != 0 { + bail!( + "L2 table offset {:x} unaligned (L1 index {})!", + l2_table_offset, + i + ); + } + + if !self.table.l2_table_cache.contains_keys(l2_table_offset) { + let l2_cluster = self.load_cluster(l2_table_offset)?; + let l2_table_entry = Rc::new(RefCell::new(CacheTable::new( + l2_table_offset, + l2_cluster, + ENTRY_SIZE_U64, + )?)); + self.table.cache_l2_table(l2_table_entry)?; + } + + let cached_l2_table = self.table.l2_table_cache.get(l2_table_offset).unwrap(); + let entry_num = cached_l2_table.borrow().get_entry_num(); + let cloned_table = cached_l2_table.clone(); + for idx in 0..entry_num { + let l2_entry = cloned_table.borrow().get_entry_map(idx)?; + let mut new_l2_entry = l2_entry & !QCOW2_OFFSET_COPIED; + let data_cluster_offset = new_l2_entry & L2_TABLE_OFFSET_MASK; + if data_cluster_offset == 0 { + // Unallocated data cluster. + continue; + } + if self.refcount.offset_into_cluster(data_cluster_offset) != 0 { + bail!( + "Cluster offset 0x{:x} unaligned, (L2 table offset 0x{:x}, L2 index {})!", + data_cluster_offset, + l2_table_offset, + idx + ); + } + + if added != 0 { + // Update Data Cluster refcount. + self.refcount.update_refcount( + data_cluster_offset, + 1, + added, + false, + &Qcow2DiscardType::Snapshot, + )?; + } + + let refcount = self.refcount.get_refcount(data_cluster_offset)?; + if refcount == 1 { + new_l2_entry |= QCOW2_OFFSET_COPIED; + } + if l2_entry != new_l2_entry { + self.table + .update_l2_table(cloned_table.clone(), idx, new_l2_entry)?; + } + } + + if added != 0 { + // Update L2 table cluster refcount. + self.refcount.update_refcount( + l2_table_offset, + 1, + added, + false, + &Qcow2DiscardType::Snapshot, + )?; + } + + let refcount = self.refcount.get_refcount(l2_table_offset)?; + if refcount == 1 { + l2_table_offset |= QCOW2_OFFSET_COPIED; + } + if l2_table_offset != old_l2_table_offset { + *l1_entry = l2_table_offset; + if l1_table_offset == self.header.l1_table_offset { + self.table.update_l1_table(i, l2_table_offset); + } + } + } + + Ok(()) + } + + fn qcow2_list_snapshots(&self) -> String { + let mut snap_strs = format!( + "{:<10}{:<17}{:>8}{:>20}{:>13}{:>11}\r\n", + "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT" + ); + for snap in &self.snapshot.snapshots { + let id_str = snap.id.to_string(); + let name_str = snap.name.clone(); + // Note: vm state size is not needed in disk snapshot, so it's "0 B". + let vm_size_str = snap.vm_state_size.to_string(); + let icount_str = match snap.icount { + u64::MAX => "".to_string(), + _ => snap.icount.to_string(), + }; + + let date = get_format_time(i64::from(snap.date_sec)); + let date_str = format!( + "{:04}-{:02}-{:02} {:02}:{:02}:{:02}", + date[0], date[1], date[2], date[3], date[4], date[5] + ); + + let vm_clock_secs = snap.vm_clock_nsec / 1_000_000_000; + let vm_clock_str = format!( + "{:02}:{:02}:{:02}.{:02}", + vm_clock_secs / 3600, + (vm_clock_secs / 3600) % 60, + vm_clock_secs % 60, + (vm_clock_secs / 1_000_000) % 1000 + ); + + let snap_str = format!( + "{:<9} {:<16} {:>6} B{:>20}{:>13}{:>11}\r\n", + id_str, name_str, vm_size_str, date_str, vm_clock_str, icount_str + ); + snap_strs += &snap_str; + } + snap_strs + } + + // Check if there exist intersection between given address range and qcow2 metadata. + pub(crate) fn check_overlap(&self, ignore: u64, offset: u64, size: u64) -> i64 { + let check = DEFAULT_QCOW2_METADATA_OVERLAP_CHECK & !ignore; + if check == 0 { + return 0; + } + + let cluster_size = self.header.cluster_size(); + if check & METADATA_OVERLAP_CHECK_MAINHEADER != 0 && offset < cluster_size { + return METADATA_OVERLAP_CHECK_MAINHEADER as i64; + } + + let size = round_up( + self.refcount.offset_into_cluster(offset) + size, + cluster_size, + ) + .unwrap() as usize; + let offset = self.refcount.start_of_cluster(offset) as usize; + if usize::MAX - offset < size { + // Ensure there exist no overflow. + return -1; + } + + // SAFETY: all tables have been assigned, indicating that their addresses are reasonable. + if check & METADATA_OVERLAP_CHECK_ACTIVEL1 != 0 + && self.header.l1_size != 0 + && ranges_overlap( + offset, + size, + self.header.l1_table_offset as usize, + self.header.l1_size as usize * ENTRY_SIZE as usize, + ) + .unwrap() + { + return METADATA_OVERLAP_CHECK_ACTIVEL1 as i64; + } + + if check & METADATA_OVERLAP_CHECK_ACTIVEL2 != 0 { + let num = size as u64 / cluster_size; + for i in 0..num { + let addr = offset as u64 + i * cluster_size; + if self.table.l1_table_map.contains_key(&addr) { + return METADATA_OVERLAP_CHECK_ACTIVEL2 as i64; + } + } + } + + if check & METADATA_OVERLAP_CHECK_REFCOUNTTABLE != 0 + && ranges_overlap( + offset, + size, + self.header.refcount_table_offset as usize, + self.header.refcount_table_clusters as usize * cluster_size as usize, + ) + .unwrap() + { + return METADATA_OVERLAP_CHECK_REFCOUNTTABLE as i64; + } + + if check & METADATA_OVERLAP_CHECK_REFCOUNTBLOCK != 0 { + let num = size as u64 / cluster_size; + for i in 0..num { + let addr = offset as u64 + i * cluster_size; + if self.refcount.refcount_table_map.contains_key(&addr) { + return METADATA_OVERLAP_CHECK_REFCOUNTBLOCK as i64; + } + } + } + + if check & METADATA_OVERLAP_CHECK_SNAPSHOTTABLE != 0 + && ranges_overlap( + offset, + size, + self.snapshot.snapshot_table_offset as usize, + self.snapshot.snapshot_size as usize, + ) + .unwrap() + { + return METADATA_OVERLAP_CHECK_SNAPSHOTTABLE as i64; + } + + if check & METADATA_OVERLAP_CHECK_INACTIVEL1 != 0 { + for snap in &self.snapshot.snapshots { + if ranges_overlap( + offset, + size, + snap.l1_table_offset as usize, + snap.l1_size as usize * ENTRY_SIZE as usize, + ) + .unwrap() + { + return METADATA_OVERLAP_CHECK_INACTIVEL1 as i64; + } + } + } + + 0 + } +} + +pub fn bytes_to_clusters(size: u64, cluster_sz: u64) -> Result { + div_round_up(size, cluster_sz) + .with_context(|| format!("Failed to div round up, size is {}", size)) +} + +pub trait InternalSnapshotOps: Send + Sync { + fn create_snapshot(&mut self, name: String, vm_clock_nsec: u64) -> Result<()>; + fn delete_snapshot(&mut self, name: String) -> Result; + fn apply_snapshot(&mut self, name: String) -> Result<()>; + fn list_snapshots(&self) -> String; + fn get_status(&self) -> Arc>; + fn rename_snapshot( + &mut self, + old_snapshot_name: String, + new_snapshot_name: String, + ) -> Result<()>; +} + +impl InternalSnapshotOps for Qcow2Driver { + fn create_snapshot(&mut self, name: String, vm_clock_nsec: u64) -> Result<()> { + // Flush the dirty metadata first, so it can drop dirty caches for reverting + // when creating snapshot failed. + self.flush()?; + self.qcow2_create_snapshot(name, vm_clock_nsec) + .map_err(|e| { + self.drop_dirty_caches(); + e + }) + } + + fn delete_snapshot(&mut self, name: String) -> Result { + // Flush the dirty metadata first, so it can drop dirty caches for reverting + // when deleting snapshot failed. + self.flush()?; + self.qcow2_delete_snapshot(name).map_err(|e| { + self.drop_dirty_caches(); + e + }) + } + + fn apply_snapshot(&mut self, name: String) -> Result<()> { + self.flush()?; + self.qcow2_apply_snapshot(name).map_err(|e| { + self.drop_dirty_caches(); + e + }) + } + + fn list_snapshots(&self) -> String { + self.qcow2_list_snapshots() + } + + fn get_status(&self) -> Arc> { + self.status.clone() + } + + fn rename_snapshot( + &mut self, + old_snapshot_name: String, + new_snapshot_name: String, + ) -> Result<()> { + let snap_id = self.get_snapshot_by_name(&old_snapshot_name); + if snap_id < 0 { + bail!("Snapshot name {} doesn't exit!", old_snapshot_name); + } + + // update snapshot info in memory. + // Stratovirt-img will exit if next actions fail. And these modified snapshot information + // in memory will not affect. + self.snapshot.snapshots[snap_id as usize].name = new_snapshot_name; + + // write new snapshot info to new snapshot table. + let cluster_size = self.header.cluster_size(); + let snapshot_table_len = self.snapshot.snapshot_size; + let snapshot_table_clusters = bytes_to_clusters(snapshot_table_len, cluster_size).unwrap(); + let new_snapshots_offset = self.alloc_cluster(snapshot_table_clusters, true)?; + self.snapshot + .save_snapshot_table(new_snapshots_offset, None, true)?; + + // Delete old snapshot: Free the cluster of the old snapshot table. + self.refcount.update_refcount( + self.header.snapshots_offset, + snapshot_table_clusters, + -1, + false, + &Qcow2DiscardType::Snapshot, + )?; + self.flush()?; + + // Update the snapshot information in qcow2 header. + self.update_snapshot_info_in_header(new_snapshots_offset, 0)?; + + // Discard unused clusters. + self.refcount.sync_process_discards(OpCode::Discard); + + Ok(()) + } +} + +// SAFETY: Send and Sync is not auto-implemented for raw pointer type in Aio. +// We use Arc>> to allow used in multi-threading. +unsafe impl Send for Qcow2Driver {} +// SAFETY: The reason is same as above. +unsafe impl Sync for Qcow2Driver {} + +impl Qcow2Driver { + fn qcow2_cluster_discard(&mut self, offset: u64, nbytes: u64, args: T) -> Result<()> { + let cluster_bits = self.header.cluster_bits; + let cluster_size = self.header.cluster_size(); + let mut nb_cluster = nbytes >> cluster_bits; + let mut host_offset = offset; + + while nb_cluster > 0 { + match self.discard_in_l2_slice(host_offset, nb_cluster, &Qcow2DiscardType::Request) { + Ok(cleared) => { + nb_cluster -= cleared; + host_offset += cleared * cluster_size; + } + Err(e) => { + error!("Discard in l2 slice: {:?}", e); + break; + } + } + } + + self.table.flush().unwrap_or_else(|e| { + error!( + "Flush l2 table cache failed while discarding clusters, {:?}", + e + ) + }); + self.refcount.flush().unwrap_or_else(|e| { + error!( + "Flush refcount block failed when discarding clusters, {:?}", + e + ) + }); + + self.process_discards(args, OpCode::Discard, false) + } + + /// Align to cluster size and write zeroes. + fn qcow2_cluster_write_zeroes(&mut self, offset: u64, nbytes: u64) -> Result<()> { + // Offset and offset + nbytes should align to cluster size. + if !is_aligned(self.header.cluster_size(), offset | nbytes) { + return Ok(()); + } + + let mut nb_cluster = bytes_to_clusters(nbytes, self.header.cluster_size())?; + let mut guest_offset = offset; + while nb_cluster > 0 { + match self.zero_in_l2_slice(guest_offset, nb_cluster) { + Ok(cleared) => { + nb_cluster -= cleared; + guest_offset += cleared * self.header.cluster_size(); + } + Err(e) => { + error!("Write zero: {:?}", e); + break; + } + } + } + + self.table + .flush() + .unwrap_or_else(|e| error!("Flush l2 table cache failed when writing zeroes, {:?}", e)); + self.refcount + .flush() + .unwrap_or_else(|e| error!("Flush refcount block failed when writing zeroes, {:?}", e)); + Ok(()) + } + + fn process_discards(&mut self, completecb: T, opcode: OpCode, unmap: bool) -> Result<()> { + trace::qcow2_process_discards(&self.driver.block_prop.id, &opcode, unmap); + let mut req_list = Vec::new(); + for task in self.refcount.discard_list.iter() { + req_list.push(CombineRequest { + iov: Vec::new(), + offset: task.offset, + nbytes: task.nbytes, + }) + } + + match opcode { + OpCode::Discard => { + self.driver + .discard(req_list, completecb) + .unwrap_or_else(|e| error!("Discard failed: {}", e)); + } + OpCode::WriteZeroes => { + self.driver + .write_zeroes(req_list, completecb, unmap) + .unwrap_or_else(|e| error!("Write zero failed: {}", e)); + } + _ => { + bail!("Unsuppoerted opcode: {:?}", opcode); + } + } + self.refcount.discard_list.clear(); + Ok(()) + } +} + +impl BlockDriverOps for Qcow2Driver { + fn create_image(&mut self, options: &CreateOptions) -> Result { + let qcow2_options = options.qcow2()?; + let cluster_size = qcow2_options.cluster_size; + let refcount_bytes = qcow2_options.refcount_bits / 8; + + let rc_table_offset: u64 = cluster_size; + let rc_block_offset: u64 = cluster_size * 2; + let mut rc_table: Vec = Vec::new(); + let mut rc_block: Vec = Vec::new(); + let zero_buf: Vec = vec![0_u8; cluster_size as usize]; + rc_table.append(&mut rc_block_offset.to_be_bytes().to_vec()); + + // Init image with 3 clusters(Header + refcount table + refcount block) + let count = match refcount_bytes { + 1 => 1_u8.to_be_bytes().to_vec(), + 2 => 1_u16.to_be_bytes().to_vec(), + 4 => 1_u32.to_be_bytes().to_vec(), + 8 => 1_u64.to_be_bytes().to_vec(), + _ => { + bail!("Refcount bytes {:?} is invalid", refcount_bytes); + } + }; + for _ in 0..3 { + rc_block.append(&mut count.clone()); + } + + let header = QcowHeader { + magic: QCOW_MAGIC, + version: qcow2_options.version, + backing_file_offset: 0, + backing_file_size: 0, + cluster_bits: qcow2_options.cluster_size.trailing_zeros(), + size: 0, + crypt_method: 0, + l1_size: 0, + l1_table_offset: 0, + refcount_table_offset: rc_table_offset, + refcount_table_clusters: 1, + nb_snapshots: 0, + snapshots_offset: 0, + incompatible_features: 0, + compatible_features: 0, + autoclear_features: 0, + refcount_order: qcow2_options.refcount_bits.trailing_zeros(), + header_length: std::mem::size_of::() as u32, + }; + + let conf = options.conf.clone(); + self.table + .init_table_info(&header, &conf) + .with_context(|| "Failed to create qcow2 table")?; + self.refcount.init_refcount_info(&header, &conf); + self.snapshot.set_cluster_size(header.cluster_size()); + self.header = header; + + // Header. + self.driver.file.set_len(3 * cluster_size)?; + + // Write zero. + for i in 0..3 { + let offset = i * cluster_size; + self.driver.file.as_ref().seek(SeekFrom::Start(offset))?; + self.driver.file.as_ref().write_all(&zero_buf.to_vec())? + } + self.driver.file.as_ref().rewind()?; + self.driver.file.as_ref().write_all(&self.header.to_vec())?; + + // Refcount table. + self.driver + .file + .as_ref() + .seek(SeekFrom::Start(cluster_size))?; + self.driver.file.as_ref().write_all(&rc_table)?; + + // Refcount block table. + self.driver + .file + .as_ref() + .seek(SeekFrom::Start(cluster_size * 2))?; + self.driver.file.as_ref().write_all(&rc_block)?; + + // Create qcow2 driver. + self.load_refcount_table()?; + + // Expand image to the new size. + self.resize(qcow2_options.img_size)?; + + let image_info = format!( + "fmt=qcow2 cluster_size={} extended_l2=off compression_type=zlib size={} lazy_refcounts=off refcount_bits={}", + qcow2_options.cluster_size, + qcow2_options.img_size, + qcow2_options.refcount_bits + ); + Ok(image_info) + } + + fn query_image(&mut self, info: &mut ImageInfo) -> Result<()> { + info.format = "qcow2".to_string(); + info.virtual_size = self.disk_size()?; + info.actual_size = self.driver.actual_size()?; + info.cluster_size = Some(self.header.cluster_size()); + + if !self.snapshot.snapshots.is_empty() { + info.snap_lists = Some(self.qcow2_list_snapshots()); + } + Ok(()) + } + + fn check_image(&mut self, res: &mut CheckResult, quite: bool, fix: u64) -> Result<()> { + let cluster_size = self.header.cluster_size(); + let refcount_order = self.header.refcount_order; + let entry_bytes = ((1 << refcount_order) / 8) as usize; + let file_len = self.driver.disk_size().unwrap(); + let nb_clusters = div_round_up(file_len, cluster_size).unwrap(); + let mut qcow2_check = Qcow2Check::new(fix, quite, entry_bytes, nb_clusters as usize); + + self.check_read_snapshot_table(res, quite, fix)?; + + let mut ret = self.check_refcounts(&mut qcow2_check); + if ret.is_err() { + res.merge_result(&qcow2_check.res); + return ret; + } + + ret = self.check_fix_snapshot_table(res, quite, fix); + res.merge_result(&qcow2_check.res); + ret + } + + fn read_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()> { + let nbytes = get_iov_size(&iovec); + self.check_request(offset, nbytes) + .with_context(|| " Invalid read request")?; + trace::block_read_vectored(&self.driver.block_prop.id, offset, nbytes); + + let mut left = iovec; + let mut req_list: Vec = Vec::new(); + let mut copied: u64 = 0; + while copied < nbytes { + let pos = offset as u64 + copied; + match self.host_offset_for_read(pos, nbytes - copied) { + Ok(HostRange::DataAddress(host_offset, cnt)) => { + let (begin, end) = iovecs_split(left, cnt); + left = end; + req_list.push(CombineRequest { + iov: begin, + offset: host_offset, + nbytes: cnt, + }); + copied += cnt; + } + Ok(HostRange::DataNotInit(cnt)) => { + let (begin, end) = iovecs_split(left, cnt); + left = end; + // SAFETY: iovecs is generated by address_space. + unsafe { iovec_write_zero(&begin) }; + copied += cnt; + } + Err(e) => { + error!("Failed to read vectored: {:?}", e); + return self.driver.complete_request(OpCode::Preadv, -1, completecb); + } + } + } + + self.driver.read_vectored(req_list, completecb) + } + + fn write_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()> { + let nbytes = get_iov_size(&iovec); + self.check_request(offset, nbytes) + .with_context(|| " Invalid write request")?; + trace::block_write_vectored(&self.driver.block_prop.id, offset, nbytes); + + let mut req_list: Vec = Vec::new(); + let mut copied: u64 = 0; + while copied < nbytes { + let pos = offset as u64 + copied; + let count = self.cluster_aligned_bytes(pos, nbytes - copied); + let host_offset = match self.host_offset_for_write(pos, count) { + Ok(host_offset) => host_offset, + Err(e) => { + error!("Failed to write vectored: {:?}", e); + return self + .driver + .complete_request(OpCode::Pwritev, -1, completecb); + } + }; + if let Some(end) = req_list.last_mut() { + if end.offset + end.nbytes == host_offset { + end.nbytes += count; + copied += count; + continue; + } + } + req_list.push(CombineRequest { + iov: Vec::new(), + offset: host_offset, + nbytes: count, + }); + copied += count; + } + + if req_list.is_empty() { + bail!("Request list is empty!"); + } + + let mut left = iovec; + for req in req_list.iter_mut() { + let (begin, end) = iovecs_split(left, req.nbytes); + req.iov = begin; + left = end; + } + + self.driver.write_vectored(req_list, completecb) + } + + fn datasync(&mut self, completecb: T) -> Result<()> { + trace::block_datasync(&self.driver.block_prop.id); + self.flush() + .unwrap_or_else(|e| error!("Flush failed when syncing data, {:?}", e)); + self.driver.datasync(completecb) + } + + fn disk_size(&mut self) -> Result { + Ok(self.virtual_disk_size()) + } + + fn discard(&mut self, offset: usize, nbytes: u64, completecb: T) -> Result<()> { + trace::block_discard(&self.driver.block_prop.id, offset, nbytes); + // Align to cluster_size. + let file_size = self.header.size; + let align_size = self.header.cluster_size(); + let mut offset_start = std::cmp::min(offset as u64, file_size); + let offset_end = std::cmp::min(offset as u64 + nbytes, file_size); + let mut bytes = offset_end + .checked_sub(offset_start) + .with_context(|| format!("Discard :{} out of range: {}", offset_end, file_size))?; + let head_align = (align_size - offset_start % align_size) % align_size; + let tail_align = offset_end % align_size; + if head_align + tail_align >= bytes { + bytes = 0; + } else { + bytes -= head_align; + bytes -= tail_align; + } + offset_start += head_align; + + self.qcow2_cluster_discard(offset_start, bytes, completecb) + } + + fn write_zeroes( + &mut self, + offset: usize, + nbytes: u64, + completecb: T, + unmap: bool, + ) -> Result<()> { + trace::block_write_zeroes(&self.driver.block_prop.id, offset, nbytes, unmap); + let file_size = self.header.size; + let align_size = self.header.cluster_size(); + let mut offset_start = std::cmp::min(offset as u64, file_size); + let offset_end = std::cmp::min(offset_start + nbytes, file_size); + let mut total_bytes = offset_end.checked_sub(offset_start).with_context(|| { + format!( + "Write zeroes: ofset: {} nbytes: {} out of range", + offset, nbytes + ) + })?; + let mut head = offset_start % align_size; + let tail = offset_end % align_size; + + while total_bytes > 0 { + let mut num = total_bytes; + if head != 0 { + num = std::cmp::min(num, align_size - head); + head = (head + num) % align_size; + } else if tail != 0 && num > align_size { + num -= tail; + } + + // Writing buffer with zero to disk for the addr that + // is not aligned with cluster size. + // The write order is: head -> offset align to cluster size -> tail. + if !is_aligned(self.header.cluster_size(), offset_start | num) { + let buf: Vec = vec![0; num as usize]; + if let Err(e) = self.sync_write_bytes(offset_start, &buf) { + error!("Write zero failed: {:?}", e); + break; + } + } else if let Err(e) = self.qcow2_cluster_write_zeroes(offset_start, num) { + error!("Write zero failed: {:?}", e); + break; + } + + total_bytes -= num; + offset_start += num; + } + self.process_discards(completecb, OpCode::WriteZeroes, unmap) + } + + fn flush_request(&mut self) -> Result<()> { + trace::block_flush_request(&self.driver.block_prop.id); + self.driver.flush_request() + } + + fn resize(&mut self, new_size: u64) -> Result<()> { + if !is_aligned(SECTOR_SIZE, new_size) { + bail!( + "The new size {} is not aligned to {}", + new_size, + SECTOR_SIZE + ); + } + + if !self.snapshot.snapshots.is_empty() && self.header.version < 3 { + bail!("Can't resize a version 2 image with snapshots"); + } + + let old_size = self.virtual_disk_size(); + if new_size == old_size { + return Ok(()); + } + + let cluster_size = self.header.cluster_size(); + // Number of l1 table entries. + let l1_entry_size = cluster_size * (cluster_size / ENTRY_SIZE); + let new_l1_size = div_round_up(new_size, l1_entry_size).unwrap(); + + self.grow_l1_table(new_l1_size)?; + + // Write the size information back to the disk. + // The field of size in header needs 8 bytes. + let mut buf = vec![0; 8]; + BigEndian::write_u64(&mut buf, new_size); + self.sync_aio + .borrow_mut() + .write_buffer(offset_of!(QcowHeader, size) as u64, &buf)?; + self.header.size = new_size; + + self.flush() + } + + fn drain_request(&self) { + trace::block_drain_request(&self.driver.block_prop.id); + self.driver.drain_request(); + } + + fn get_inflight(&self) -> Arc { + self.driver.incomplete.clone() + } + + fn register_io_event( + &mut self, + broken: Arc, + error_cb: BlockIoErrorCallback, + ) -> Result<()> { + self.driver.register_io_event(broken, error_cb) + } + + fn unregister_io_event(&mut self) -> Result<()> { + self.driver.unregister_io_event() + } + + fn get_status(&mut self) -> Arc> { + self.status.clone() + } +} + +pub fn is_aligned(cluster_sz: u64, offset: u64) -> bool { + offset & (cluster_sz - 1) == 0 +} + +#[cfg(test)] +mod test { + use std::{ + fs::remove_file, + io::{Seek, SeekFrom, Write}, + os::{ + linux::fs::MetadataExt, + unix::{fs::OpenOptionsExt, prelude::FileExt}, + }, + }; + + use super::*; + use machine_manager::config::DiskFormat; + use util::{ + aio::{iov_to_buf_direct, Iovec, WriteZeroesState}, + file::get_file_alignment, + }; + + const CLUSTER_SIZE: u64 = 64 * 1024; + + pub struct TestImage { + pub img_bits: u64, + pub cluster_bits: u64, + pub path: String, + pub file: File, + } + + impl TestImage { + fn new(path: &str, img_bits: u64, cluster_bits: u64) -> TestImage { + let cluster_sz: u64 = 1 << cluster_bits; + let img_size: u64 = 1 << img_bits; + let l1_entry_size: u64 = 1 << (cluster_bits * 2 - 3); + let l1_size = (img_size + l1_entry_size - 1) / l1_entry_size; + let header = QcowHeader { + magic: crate::qcow2::header::QCOW_MAGIC, + version: 3, + backing_file_offset: 0, + backing_file_size: 0, + cluster_bits: cluster_bits as u32, + size: 1 << img_bits, + crypt_method: 0, + l1_size: l1_size as u32, + l1_table_offset: 3 * cluster_sz, + refcount_table_offset: cluster_sz, + refcount_table_clusters: 1, + nb_snapshots: 0, + snapshots_offset: 0, + incompatible_features: 0, + compatible_features: 0, + autoclear_features: 0, + refcount_order: 4, + header_length: std::mem::size_of::() as u32, + }; + + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CREAT | libc::O_TRUNC) + .open(path) + .unwrap(); + file.set_len(cluster_sz * 3 + u64::from(header.l1_size) * ENTRY_SIZE) + .unwrap(); + let zero_buf = + vec![0_u8; (cluster_sz * 3 + u64::from(header.l1_size) * ENTRY_SIZE) as usize]; + file.write_all(&zero_buf).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(&header.to_vec()).unwrap(); + + // Cluster 1 is the refcount table. + assert_eq!(header.refcount_table_offset, cluster_sz); + let mut refcount_table = [0_u8; ENTRY_SIZE as usize]; + BigEndian::write_u64(&mut refcount_table, cluster_sz * 2); + file.seek(SeekFrom::Start(cluster_sz)).unwrap(); + file.write_all(&refcount_table).unwrap(); + + // Clusters which has been allocated. + assert_eq!(header.refcount_order, 4); + let clusters = + 3 + ((header.l1_size * ENTRY_SIZE as u32 + cluster_sz as u32 - 1) >> cluster_bits); + let mut refcount_block = Vec::new(); + for _ in 0..clusters { + refcount_block.push(0x00); + refcount_block.push(0x01); + } + file.seek(SeekFrom::Start(cluster_sz * 2)).unwrap(); + file.write_all(&refcount_block).unwrap(); + + TestImage { + img_bits, + cluster_bits, + path: path.to_string(), + file, + } + } + + fn create_qcow2_driver(&self, conf: BlockProperty) -> Qcow2Driver<()> { + let file = Arc::new( + std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&self.path) + .unwrap(), + ); + let aio = Aio::new( + Arc::new(SyncAioInfo::complete_func), + util::aio::AioEngine::Off, + None, + ) + .unwrap(); + let mut qcow2_driver = Qcow2Driver::new(file, aio, conf.clone()).unwrap(); + qcow2_driver.load_metadata(conf).unwrap(); + qcow2_driver + } + + /// Write full the disk with value disorderly. + fn write_full_disk(&self, qcow2_driver: &mut Qcow2Driver<()>, value: u8) -> Result<()> { + let buf = vec![value; 1 << self.cluster_bits]; + // Simulate discontinuity of host offset. + let mod_range = 2; + for mod_value in 0..mod_range { + for i in 0..1 << (self.img_bits - self.cluster_bits) { + if i % mod_range == mod_value { + let offset: u64 = i * (1 << self.cluster_bits); + qcow2_write(qcow2_driver, &buf, offset as usize)?; + } + } + } + Ok(()) + } + + fn get_disk_size(&mut self) -> Result { + let meta_data = self.file.metadata()?; + let blk_size = meta_data.st_blocks() * DEFAULT_SECTOR_SIZE; + Ok(blk_size) + } + } + + impl Drop for TestImage { + fn drop(&mut self) { + remove_file(&self.path).unwrap() + } + } + + fn vec_is_zero(vec: &[u8]) -> bool { + for elem in vec { + if elem != &0 { + return false; + } + } + true + } + + struct TestData { + data: u8, + sz: usize, + } + + impl TestData { + fn new(data: u8, sz: usize) -> Self { + Self { data, sz } + } + } + + struct TestRwCase { + riovec: Vec, + wiovec: Vec, + data: Vec, + offset: usize, + sz: u64, + } + + pub fn create_qcow2(path: &str) -> (TestImage, Qcow2Driver<()>) { + let mut image = TestImage::new(path, 30, 16); + let file = Arc::new( + std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(path) + .unwrap(), + ); + let aio = Aio::new( + Arc::new(SyncAioInfo::complete_func), + util::aio::AioEngine::Off, + None, + ) + .unwrap(); + let (req_align, buf_align) = get_file_alignment(&image.file, true); + let conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align, + buf_align, + discard: false, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: None, + refcount_cache_size: None, + }; + image.file = file.try_clone().unwrap(); + let mut qcow2_driver = Qcow2Driver::new(file, aio, conf.clone()).unwrap(); + qcow2_driver.load_metadata(conf).unwrap(); + (image, qcow2_driver) + } + + fn qcow2_read(qcow2: &mut Qcow2Driver<()>, buf: &mut [u8], offset: usize) -> Result<()> { + qcow2.read_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + offset, + (), + ) + } + + fn qcow2_write(qcow2: &mut Qcow2Driver<()>, buf: &[u8], offset: usize) -> Result<()> { + qcow2.write_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + offset, + (), + ) + } + + #[test] + fn test_read_zero() { + let path = "/tmp/block_backend_test_read_zero.qcow2"; + let (mut image, mut qcow2) = create_qcow2(path); + let org_len = image.file.seek(SeekFrom::End(0)).unwrap(); + + let mut buf = vec![1_u8; 128]; + qcow2_read(&mut qcow2, &mut buf, 40).unwrap(); + assert_eq!(buf, vec![0; 128]); + let mut buf = vec![2_u8; 512]; + qcow2_read(&mut qcow2, &mut buf, 65536).unwrap(); + assert_eq!(buf, vec![0; 512]); + for i in 0..100 { + let sz = 100_000; + let mut buf = vec![3_u8; sz]; + qcow2_read(&mut qcow2, &mut buf, 655350 + i * sz).unwrap(); + assert_eq!(buf, vec![0; 100_000]); + } + + let len = image.file.seek(SeekFrom::End(0)).unwrap(); + assert_eq!(org_len, len); + } + + #[test] + fn test_write_single_cluster() { + let path = "/tmp/block_backend_test_write_single_cluster.qcow2"; + let (_, mut qcow2) = create_qcow2(path); + + let wbuf = vec![7_u8; CLUSTER_SIZE as usize]; + qcow2_write(&mut qcow2, &wbuf, 0).unwrap(); + let mut rbuf = vec![0_u8; CLUSTER_SIZE as usize]; + qcow2_read(&mut qcow2, &mut rbuf, 0).unwrap(); + assert_eq!(rbuf, wbuf); + let cnt = qcow2.refcount.get_refcount(0).unwrap(); + assert_eq!(cnt, 1); + + let wbuf = vec![5_u8; 1000]; + qcow2_write(&mut qcow2, &wbuf, 2000).unwrap(); + let mut rbuf = vec![0_u8; 1000]; + qcow2_read(&mut qcow2, &mut rbuf, 2000).unwrap(); + assert_eq!(rbuf, wbuf); + let cnt = qcow2.refcount.get_refcount(2000).unwrap(); + assert_eq!(cnt, 1); + } + + fn test_write_multi_cluster_helper( + qcow2: &mut Qcow2Driver<()>, + off: usize, + sz: usize, + cnt: u8, + ) { + let mut offset = off; + for i in 0..cnt { + let buf = vec![i + 1; sz]; + qcow2_write(qcow2, &buf, offset).unwrap(); + offset += buf.len(); + } + let mut offset = off; + for i in 0..cnt { + let mut buf = vec![i + 1; sz]; + qcow2_read(qcow2, &mut buf, offset).unwrap(); + for (_, item) in buf.iter().enumerate() { + assert_eq!(item, &(i + 1)); + } + offset += buf.len(); + } + } + + #[test] + fn test_write_multi_cluster() { + let path = "/tmp/block_backend_test_write_multi_cluster.qcow2"; + let (_, mut qcow2) = create_qcow2(path); + + test_write_multi_cluster_helper(&mut qcow2, 832574, 100_000, 200); + test_write_multi_cluster_helper(&mut qcow2, 0, 16, 250); + test_write_multi_cluster_helper(&mut qcow2, 7689, 512, 99); + test_write_multi_cluster_helper(&mut qcow2, 56285351, 4096, 123); + } + + #[test] + fn test_invalid_read_write() { + let path = "/tmp/block_backend_test_invalid_read_write.qcow2"; + let (_, mut qcow2) = create_qcow2(path); + + let mut buf = vec![0_u8; 100]; + let disk_size = qcow2.disk_size().unwrap(); + let res = qcow2_write(&mut qcow2, &buf, disk_size as usize + 1); + assert!(res.is_err()); + + let res = qcow2_read(&mut qcow2, &mut buf, disk_size as usize + 100); + assert!(res.is_err()); + } + + fn generate_iovecs( + buf_list: &mut Vec>, + list: &Vec, + ) -> (Vec, Vec) { + let mut riovec = Vec::new(); + let mut wiovec = Vec::new(); + for item in list { + let buf = vec![0_u8; item.sz]; + riovec.push(Iovec::new(buf.as_ptr() as u64, buf.len() as u64)); + buf_list.push(buf); + let buf = vec![item.data; item.sz]; + wiovec.push(Iovec::new(buf.as_ptr() as u64, buf.len() as u64)); + buf_list.push(buf); + } + (riovec, wiovec) + } + + fn generate_rw_case_list() -> (Vec, Vec>) { + let mut list = Vec::new(); + let mut buf_list = Vec::new(); + let test_data = vec![ + TestData::new(1, 100_000), + TestData::new(2, 100_000), + TestData::new(3, 100_000), + ]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 12590, + sz: 100_000 * 3, + }); + + let test_data = vec![ + TestData::new(1, 1_000), + TestData::new(2, 100_000), + TestData::new(3, 10_000), + TestData::new(4, 20_000), + TestData::new(5, 80_000), + ]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 8935201, + sz: 211_000, + }); + + (list, buf_list) + } + + #[test] + fn test_read_write_vectored() { + let path = "/tmp/block_backend_test_read_write_vectored.qcow2"; + let (_, mut qcow2) = create_qcow2(path); + let (case_list, _buf_list) = generate_rw_case_list(); + for case in &case_list { + qcow2 + .write_vectored(case.wiovec.clone(), case.offset, ()) + .unwrap(); + qcow2 + .read_vectored(case.riovec.clone(), case.offset, ()) + .unwrap(); + + let mut wbuf = vec![0; case.sz as usize]; + let mut rbuf = vec![0; case.sz as usize]; + // SAFETY: wiovec is valid. + let wsz = unsafe { iov_to_buf_direct(&case.wiovec, 0, &mut wbuf).unwrap() }; + // SAFETY: riovec is valid. + let rsz = unsafe { iov_to_buf_direct(&case.riovec, 0, &mut rbuf).unwrap() }; + assert_eq!(wsz, case.sz as usize); + assert_eq!(rsz, case.sz as usize); + assert_eq!(wbuf, rbuf); + } + } + + fn generate_rw_random_list() -> (Vec, Vec>) { + let mut list = Vec::new(); + let mut buf_list = Vec::new(); + let test_data = vec![TestData::new(1, CLUSTER_SIZE as usize)]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 0, + sz: CLUSTER_SIZE, + }); + let test_data = vec![TestData::new(2, CLUSTER_SIZE as usize)]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 2 * CLUSTER_SIZE as usize, + sz: CLUSTER_SIZE, + }); + let test_data = vec![TestData::new(3, CLUSTER_SIZE as usize)]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 4 * CLUSTER_SIZE as usize, + sz: CLUSTER_SIZE, + }); + let test_data = vec![TestData::new(4, CLUSTER_SIZE as usize)]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: CLUSTER_SIZE as usize, + sz: CLUSTER_SIZE, + }); + let test_data = vec![TestData::new(5, CLUSTER_SIZE as usize)]; + let (riovec, wiovec) = generate_iovecs(&mut buf_list, &test_data); + list.push(TestRwCase { + riovec, + wiovec, + data: test_data, + offset: 3 * CLUSTER_SIZE as usize, + sz: CLUSTER_SIZE, + }); + + (list, buf_list) + } + + #[test] + fn test_read_write_random() { + let path = "/tmp/block_backend_test_read_write_random.qcow2"; + let (_, mut qcow2) = create_qcow2(path); + let (mut case_list, _buf_list) = generate_rw_random_list(); + for case in &case_list { + qcow2 + .write_vectored(case.wiovec.clone(), case.offset, ()) + .unwrap(); + qcow2 + .read_vectored(case.riovec.clone(), case.offset, ()) + .unwrap(); + + let mut wbuf = vec![0; case.sz as usize]; + let mut rbuf = vec![0; case.sz as usize]; + // SAFETY: wiovec is valid. + let wsz = unsafe { iov_to_buf_direct(&case.wiovec, 0, &mut wbuf).unwrap() }; + // SAFETY: riovec is valid. + let rsz = unsafe { iov_to_buf_direct(&case.riovec, 0, &mut rbuf).unwrap() }; + assert_eq!(wsz, case.sz as usize); + assert_eq!(rsz, case.sz as usize); + assert_eq!(wbuf, rbuf); + } + + // read all write data once. + let buf = vec![0_u8; 5 * CLUSTER_SIZE as usize]; + let riovecs = vec![Iovec::new(buf.as_ptr() as u64, 5 * CLUSTER_SIZE)]; + qcow2.read_vectored(riovecs, 0, ()).unwrap(); + + case_list.sort_by(|a, b| a.offset.cmp(&b.offset)); + let mut idx = 0; + for case in case_list.iter() { + for item in case.data.iter() { + assert_eq!(buf[idx..(idx + item.sz)].to_vec(), vec![item.data; item.sz]); + idx += item.sz; + } + } + } + + /// Test the basic functions of alloc cluster. + /// TestStep: + /// 1. Init qcow2 file driver with property of discard and write zero. + /// 2. Write full of disk and then send discard command to recycle space. + /// 3. Call the function for alloc_cluster with args of write zero + /// and read data from the corresponding address of the file. + /// Expect: + /// Newly allocated data is full of zero. + #[test] + fn test_alloc_cluster_with_zero() { + // Create a new image, with size = 16M, cluster_size = 64K. + let image_bits = 24; + let cluster_bits = 16; + let paths = [ + "/tmp/alloc_cluster_with_zero.qcow2", + "./alloc_cluster_with_zero.qcow2", + ]; + for path in paths { + let alloc_clusters: Vec = vec![1, 2, 4, 8, 16, 32]; + for n_clusters in alloc_clusters { + let image = TestImage::new(path, image_bits, cluster_bits); + let (req_align, buf_align) = get_file_alignment(&image.file, true); + let conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align, + buf_align, + discard: true, + write_zeroes: WriteZeroesState::On, + l2_cache_size: None, + refcount_cache_size: None, + }; + let mut qcow2_driver = image.create_qcow2_driver(conf.clone()); + + assert!(image.write_full_disk(&mut qcow2_driver, 1).is_ok()); + assert!(qcow2_driver.discard(0, 1 << image_bits, ()).is_ok()); + + let times: u64 = (1 << (image_bits - cluster_bits)) / n_clusters; + for _time in 0..times { + let addr = qcow2_driver.alloc_cluster(n_clusters, true).unwrap(); + for i in 0..n_clusters { + let mut buf = vec![1_u8; qcow2_driver.header.cluster_size() as usize]; + let offset = addr + i * qcow2_driver.header.cluster_size(); + assert!(image.file.read_at(&mut buf, offset).is_ok()); + assert!(vec_is_zero(&buf)); + } + } + } + } + } + + /// Test the basic functions of discard. + /// TestStep: + /// 1. Init qcow2 file driver with property of discard. + /// 2. Create a new qcow2 image, and then write full disk. + /// 3. Send discard command. + /// Expect: + /// The size of disk space has been reduced. + #[test] + fn test_discard_basic() { + let path = "./discard_basic.qcow2"; + // Create a new image, with size = 16M, cluster_size = 64K. + let image_bits = 24; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let mut conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align: 512, + buf_align: 512, + discard: true, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: None, + refcount_cache_size: None, + }; + + // (offset_begin, offset_end) + let test_data: Vec<(u64, u64)> = vec![ + (0, cluster_size * 5), + (cluster_size * 5, cluster_size * 10), + (cluster_size * 5 + 32768, cluster_size * 10), + (cluster_size * 5, cluster_size * 10 + 32768), + (cluster_size * 5, cluster_size * 5 + 32768), + (cluster_size * 5 + 32768, cluster_size * 5 + 32768), + (cluster_size * 5 + 32768, cluster_size * 5 + 49152), + (cluster_size * 5 + 32768, cluster_size * 6), + (cluster_size * 5 + 32768, cluster_size * 10 + 32768), + (0, 1 << image_bits), + ]; + + // Qcow2 driver will align the offset of requests according to the cluster size, + // and then use the aligned interval for recying disk. + for (offset_begin, offset_end) in test_data { + let mut image = TestImage::new(path, image_bits, cluster_bits); + let (req_align, buf_align) = get_file_alignment(&image.file, true); + conf.req_align = req_align; + conf.buf_align = buf_align; + let mut qcow2_driver = image.create_qcow2_driver(conf.clone()); + + assert!(image.write_full_disk(&mut qcow2_driver, 1).is_ok()); + let offset_begin_algn = round_up(offset_begin, cluster_size).unwrap(); + let offset_end_align = round_down(offset_end, cluster_size).unwrap(); + let expect_discard_space = if offset_end_align <= offset_begin_algn { + 0 + } else { + offset_end_align - offset_begin_algn + }; + let full_image_size = image.get_disk_size().unwrap(); + assert!(qcow2_driver + .discard(offset_begin as usize, offset_end - offset_begin, ()) + .is_ok()); + assert!(qcow2_driver.flush().is_ok()); + + let discard_image_size = image.get_disk_size().unwrap(); + assert!(full_image_size < discard_image_size + expect_discard_space + cluster_size / 2); + assert!(full_image_size > discard_image_size + expect_discard_space - cluster_size / 2); + // TODO: Check the metadata for qcow2 image. + } + } + + /// Test the discard during the delete snapshot. + /// TestStep: + /// 1. Init qcow2 file driver with property of discard. + /// 2. Create a new qcow2 image, and then write full disk. + /// 3. Create a new snapshot, and then rewrite the disk, which will result in copy on write. + /// 4. Delete snapshot, which will result in discard. + /// Expect: + /// The size of disk space has been reduced. + #[test] + fn test_snapshot_with_discard() { + let path = "./snapshot_with_discard.qcow2"; + // Create a new image, with size = 1G, cluster_size = 64K. + let image_bits = 24; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let mut image = TestImage::new(path, image_bits, cluster_bits); + let (req_align, buf_align) = get_file_alignment(&image.file, true); + let conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align, + buf_align, + discard: true, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: None, + refcount_cache_size: None, + }; + + let mut qcow2_driver = image.create_qcow2_driver(conf); + assert!(image.write_full_disk(&mut qcow2_driver, 1).is_ok()); + + let disk_size_1 = image.get_disk_size().unwrap(); + // Create a snapshot and write full disk again, which will result in copy on write. + // Delete the snapshot, which will result in discard, and recycle disk size. + assert!(qcow2_driver + .create_snapshot("test_snapshot_1".to_string(), 1000) + .is_ok()); + assert!(image.write_full_disk(&mut qcow2_driver, 2).is_ok()); + assert!(qcow2_driver.flush().is_ok()); + let disk_size_2 = image.get_disk_size().unwrap(); + // Data cluster + 1 snapshot table + l1 table(cow) + l2 table(cow) + // But, the cluster of snapshots may not be fully allocated + assert!(disk_size_1 < disk_size_2); + + assert!(qcow2_driver + .create_snapshot("test_snapshot_2".to_string(), 1000) + .is_ok()); + assert!(image.write_full_disk(&mut qcow2_driver, 2).is_ok()); + assert!(qcow2_driver.flush().is_ok()); + let disk_size_3 = image.get_disk_size().unwrap(); + // Data cluster + l1 table(cow) + l2 table(cow) + assert!(disk_size_2 < disk_size_3); + + // Snapshot delete will result in discard, which will recycle the disk space. + assert!(qcow2_driver + .delete_snapshot("test_snapshot_2".to_string()) + .is_ok()); + let disk_size_4 = image.get_disk_size().unwrap(); + // The actual size of the file should not exceed 1 cluster. + assert!(disk_size_4 > disk_size_2 - cluster_size / 2); + assert!(disk_size_4 < disk_size_2 + cluster_size / 2); + + assert!(qcow2_driver + .delete_snapshot("test_snapshot_1".to_string()) + .is_ok()); + let disk_size_5 = image.get_disk_size().unwrap(); + assert!(disk_size_5 > disk_size_1 - cluster_size / 2); + assert!(disk_size_5 < disk_size_1 + cluster_size / 2); + } + + /// Test the basic functions of write zero. + /// TestStep: + /// 1. Init qcow2 file driver with property of write zero. + /// 2. Create a new qcow2 image, and then write full disk with value of 1. + /// 3. Send write zero command with (offset, nbytes). + /// Expect: + /// 1. The data read from disk of the specified interval is zero. + #[test] + fn test_write_zero_basic() { + // Create a new image, with size = 16M, cluster_size = 64K. + let path = "./discard_write_zero.qcow2"; + let image_bits = 24; + let cluster_bits = 16; + + // let test_data = vec![65536, 65536 + 32768, 65536 * 2, 65536 + 32768]; + let cluster_size: u64 = 1 << cluster_bits; + // (offset_begin, offset_end) + let test_data: Vec<(u64, u64)> = vec![ + (0, cluster_size * 5), + (cluster_size * 5, cluster_size * 10), + (cluster_size * 5 + 32768, cluster_size * 10), + (cluster_size * 5, cluster_size * 10 + 32768), + (cluster_size * 5, cluster_size * 5 + 32768), + (cluster_size * 5 + 32768, cluster_size * 5 + 32768), + (cluster_size * 5 + 32768, cluster_size * 5 + 49152), + (cluster_size * 5 + 32768, cluster_size * 6), + (cluster_size * 5 + 32768, cluster_size * 10 + 32768), + (0, 1 << image_bits), + ]; + for (offset_start, offset_end) in test_data { + for discard in [true, false] { + let image = TestImage::new(path, image_bits, cluster_bits); + let conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align: 512, + buf_align: 512, + discard, + write_zeroes: WriteZeroesState::On, + l2_cache_size: None, + refcount_cache_size: None, + }; + + let mut qcow2_driver = image.create_qcow2_driver(conf); + assert!(image.write_full_disk(&mut qcow2_driver, 1).is_ok()); + + assert!(qcow2_driver + .write_zeroes( + offset_start as usize, + offset_end - offset_start, + (), + discard + ) + .is_ok()); + + let mut read_buf = vec![1_u8; (offset_end - offset_start) as usize]; + assert!( + qcow2_read(&mut qcow2_driver, &mut read_buf, offset_start as usize).is_ok() + ); + assert!(vec_is_zero(&read_buf)); + } + } + } + + #[test] + fn test_snapshot_basic() { + // TODO: + // 1) add check step when stratovirt-img works. + // 2) add snapshot apply step to check function. + let path = "/tmp/snashot_test.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let (_, mut qcow2) = create_qcow2(path); + + let guest_offsets = [ + cluster_size * 0, + cluster_size * 10, + cluster_size * 100, + cluster_size * 1000, + cluster_size * 10000, + ]; + + let wbuf = vec![1_u8; CLUSTER_SIZE as usize]; + // Write data and create snapshot 'snap1'. + for offset in guest_offsets { + qcow2_write(&mut qcow2, &wbuf, offset).unwrap(); + } + qcow2.qcow2_create_snapshot("snap1".to_string(), 0).unwrap(); + + let wbuf = vec![2_u8; CLUSTER_SIZE as usize]; + // Write data and create snapshot 'snap2'. + for offset in guest_offsets { + qcow2_write(&mut qcow2, &wbuf, offset).unwrap(); + } + qcow2.qcow2_create_snapshot("snap2".to_string(), 0).unwrap(); + + // Read 1 byte for checking. Add more checks after implementing snapshot restore. + let mut rbuf = vec![0_u8; 1]; + for offset in guest_offsets { + qcow2_read(&mut qcow2, &mut rbuf, offset).unwrap(); + assert_eq!(rbuf, [2]); + } + + // Delete snapshot 'snap2'. + qcow2.qcow2_delete_snapshot("snap2".to_string()).unwrap(); + + // Delete snapshot 'snap1'. + qcow2.qcow2_delete_snapshot("snap1".to_string()).unwrap(); + } + + fn get_host_offset(qcow2_driver: &mut Qcow2Driver<()>, guest_offset: u64) -> u64 { + let l2_index = qcow2_driver.table.get_l2_table_index(guest_offset); + if qcow2_driver + .table + .get_l2_table_cache_entry(guest_offset) + .is_none() + { + let l2_address = + qcow2_driver.table.get_l1_table_entry(guest_offset) & L1_TABLE_OFFSET_MASK; + let l2_cluster = qcow2_driver.load_cluster(l2_address).unwrap(); + let l2_table = Rc::new(RefCell::new( + CacheTable::new(l2_address, l2_cluster, ENTRY_SIZE_U64).unwrap(), + )); + qcow2_driver.table.cache_l2_table(l2_table).unwrap(); + } + + // All used l2 table will be cached for it's little data size in these tests. + let l2_table = qcow2_driver + .table + .get_l2_table_cache_entry(guest_offset) + .unwrap(); + let l2_entry = l2_table + .borrow_mut() + .get_entry_map(l2_index as usize) + .unwrap(); + + l2_entry & L2_TABLE_OFFSET_MASK + } + + // Change snapshot table offset to unaligned address which will lead to error in refcount update + // process. + #[test] + fn simulate_revert_snapshot_creation() { + let path = "/tmp/revert_create.qcow2"; + let (_image, mut qcow2_driver) = create_qcow2(path); + + // Write some random data. + let (case_list, _buf_list) = generate_rw_random_list(); + for case in &case_list { + qcow2_driver + .write_vectored(case.wiovec.clone(), case.offset, ()) + .unwrap(); + } + + // Change snapshot table offset to a fake address which is not align to cluster size and + // it will fail in update_refcount. + qcow2_driver.header.snapshots_offset = 0x1111; + let result = qcow2_driver.create_snapshot("snapshot1".to_string(), 0); + assert!(result.is_err()); + + // Check + // 1) No snapshot. + assert_eq!(qcow2_driver.header.nb_snapshots, 0); + // 2) Refcount is right. + for case in &case_list { + let host_offset = get_host_offset(&mut qcow2_driver, case.offset as u64); + assert_eq!(qcow2_driver.refcount.get_refcount(host_offset).unwrap(), 1); + } + // 3) L1 table refcount is right. + assert_eq!( + qcow2_driver + .refcount + .get_refcount(qcow2_driver.header.l1_table_offset) + .unwrap(), + 1 + ); + // 4) L2 table refcount is right. + let mut l1_table = qcow2_driver.table.l1_table.clone(); + for l1_entry in l1_table.iter_mut() { + if *l1_entry == 0 { + // No l2 table. + continue; + } + assert_eq!( + qcow2_driver + .refcount + .get_refcount(*l1_entry & L1_TABLE_OFFSET_MASK) + .unwrap(), + 1 + ); + } + } + + // Change snapshot table offset to unaligned address which will lead to error in refcount update + // process. + #[test] + fn simulate_revert_snapshot_deletion() { + let path = "/tmp/revert_delete.qcow2"; + let (_image, mut qcow2_driver) = create_qcow2(path); + + // Write some random data. + let (case_list, _buf_list) = generate_rw_random_list(); + for case in &case_list { + qcow2_driver + .write_vectored(case.wiovec.clone(), case.offset, ()) + .unwrap(); + } + + // Create two new snapshots. + qcow2_driver + .qcow2_create_snapshot("snaptest1".to_string(), 0) + .unwrap(); + qcow2_driver + .qcow2_create_snapshot("snaptest2".to_string(), 0) + .unwrap(); + + // Check. + // 1) 2 snapshots: snaptest1, snaptest2. + assert_eq!(qcow2_driver.header.nb_snapshots, 2); + assert_eq!(qcow2_driver.snapshot.snapshots[0].name, "snaptest1"); + assert_eq!(qcow2_driver.snapshot.snapshots[1].name, "snaptest2"); + // 2) Data cluster refcount is right. + for case in &case_list { + let host_offset = get_host_offset(&mut qcow2_driver, case.offset as u64); + assert_eq!(qcow2_driver.refcount.get_refcount(host_offset).unwrap(), 3); + } + // 3) L1 table refcount is right. + assert_eq!( + qcow2_driver + .refcount + .get_refcount(qcow2_driver.header.l1_table_offset) + .unwrap(), + 1 + ); + // 4) L2 table refcount is right. + let mut l1_table = qcow2_driver.table.l1_table.clone(); + for l1_entry in l1_table.iter_mut() { + if *l1_entry == 0 { + // No l2 table. + continue; + } + assert_eq!( + qcow2_driver + .refcount + .get_refcount(*l1_entry & L1_TABLE_OFFSET_MASK) + .unwrap(), + 3 + ); + } + + // Change snapshot table offset to a fake address which is not align to cluster size and + // it will fail in update_refcount. + qcow2_driver.header.snapshots_offset = 0x1111; + let result = qcow2_driver.delete_snapshot("snapshot1".to_string()); + assert!(result.is_err()); + + // Check again. + // 1) 2 snapshots: snaptest1, snaptest2. + assert_eq!(qcow2_driver.header.nb_snapshots, 2); + assert_eq!(qcow2_driver.snapshot.snapshots[0].name, "snaptest1"); + assert_eq!(qcow2_driver.snapshot.snapshots[1].name, "snaptest2"); + // 2) Data cluster refcount is right. + for case in &case_list { + let host_offset = get_host_offset(&mut qcow2_driver, case.offset as u64); + assert_eq!(qcow2_driver.refcount.get_refcount(host_offset).unwrap(), 3); + } + // 3) L1 table refcount is right. + assert_eq!( + qcow2_driver + .refcount + .get_refcount(qcow2_driver.header.l1_table_offset) + .unwrap(), + 1 + ); + // 4) L2 table refcount is right. + let mut l1_table = qcow2_driver.table.l1_table.clone(); + for l1_entry in l1_table.iter_mut() { + if *l1_entry == 0 { + // No l2 table. + continue; + } + assert_eq!( + qcow2_driver + .refcount + .get_refcount(*l1_entry & L1_TABLE_OFFSET_MASK) + .unwrap(), + 3 + ); + } + } +} diff --git a/block_backend/src/qcow2/refcount.rs b/block_backend/src/qcow2/refcount.rs new file mode 100644 index 0000000000000000000000000000000000000000..6e403da5d972bdf4a84348a35aa9ce3fe955ec2d --- /dev/null +++ b/block_backend/src/qcow2/refcount.rs @@ -0,0 +1,1036 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, collections::HashMap, rc::Rc}; + +use anyhow::{bail, Context, Result}; +use log::{error, info}; + +use crate::{ + qcow2::{ + bytes_to_clusters, + cache::{CacheTable, Qcow2Cache, ENTRY_SIZE_U16}, + header::QcowHeader, + is_aligned, SyncAioInfo, ENTRY_SIZE, REFCOUNT_TABLE_OFFSET_MASK, + }, + BlockProperty, +}; +use util::{ + aio::OpCode, + num_ops::{div_round_up, round_up}, +}; + +// The max refcount table size default is 4 clusters; +const MAX_REFTABLE_NUM: u64 = 4; + +// Default refcount table map length, which can describe 512GiB data for 64Kib cluster. +const REFCOUNT_TABLE_MAP_LEN: usize = 256; + +#[derive(Eq, PartialEq, Clone)] +pub enum Qcow2DiscardType { + Never, + Always, + Request, + Snapshot, + Other, +} + +#[derive(Clone, Default)] +pub struct DiscardTask { + pub offset: u64, + pub nbytes: u64, +} + +impl DiscardTask { + pub fn is_overlap(&self, task: &DiscardTask) -> bool { + !(self.offset > task.offset + task.nbytes || task.offset > self.offset + self.nbytes) + } + + pub fn merge_task(&mut self, task: &DiscardTask) { + let offset = std::cmp::min(self.offset, task.offset); + let end_offset = std::cmp::max(self.offset + self.nbytes, task.offset + task.nbytes); + let nbytes = end_offset - offset; + self.offset = offset; + self.nbytes = nbytes; + } +} + +#[derive(Clone)] +pub struct RefCount { + pub refcount_table: Vec, + pub refcount_table_map: HashMap, + sync_aio: Rc>, + pub(crate) refcount_blk_cache: Qcow2Cache, + pub discard_list: Vec, + /// Pass the discard operation if refcount of cluster decrease to 0. + pub discard_passthrough: Vec, + free_cluster_index: u64, + pub(crate) refcount_table_offset: u64, + pub(crate) refcount_table_clusters: u32, + /// Number of refcount table entries. + pub(crate) refcount_table_size: u64, + pub(crate) refcount_blk_bits: u32, + /// Number of refcount block entries. + pub(crate) refcount_blk_size: u32, + refcount_max: u64, + /// Cluster size in bytes. + cluster_size: u64, + cluster_bits: u32, +} + +impl RefCount { + pub fn new(sync_aio: Rc>) -> Self { + RefCount { + refcount_table: Vec::new(), + refcount_table_map: HashMap::with_capacity(REFCOUNT_TABLE_MAP_LEN), + sync_aio, + refcount_blk_cache: Qcow2Cache::default(), + discard_list: Vec::new(), + discard_passthrough: Vec::new(), + free_cluster_index: 0, + refcount_table_offset: 0, + refcount_table_clusters: 0, + refcount_table_size: 0, + refcount_blk_bits: 0, + refcount_blk_size: 0, + refcount_max: 0, + cluster_size: 0, + cluster_bits: 0, + } + } + + pub fn init_refcount_info(&mut self, header: &QcowHeader, conf: &BlockProperty) { + // Update discard_pass_through depend on config. + self.discard_passthrough.push(Qcow2DiscardType::Always); + if conf.discard { + self.discard_passthrough.push(Qcow2DiscardType::Request); + self.discard_passthrough.push(Qcow2DiscardType::Snapshot); + } + + self.refcount_table_offset = header.refcount_table_offset; + self.refcount_table_clusters = header.refcount_table_clusters; + self.refcount_table_size = + u64::from(header.refcount_table_clusters) * header.cluster_size() / ENTRY_SIZE; + self.refcount_blk_bits = header.cluster_bits + 3 - header.refcount_order; + self.refcount_blk_size = 1 << self.refcount_blk_bits; + self.cluster_bits = header.cluster_bits; + self.cluster_size = header.cluster_size(); + let refcount_bits = 1 << header.refcount_order; + self.refcount_max = 1 << (refcount_bits - 1); + self.refcount_max += self.refcount_max - 1; + let sz = if let Some(rc_size) = conf.refcount_cache_size { + rc_size / header.cluster_size() + } else { + MAX_REFTABLE_NUM + }; + info!("Driver {} refcount cache size {}", conf.id, sz); + self.refcount_blk_cache = Qcow2Cache::new(sz as usize); + } + + pub fn start_of_cluster(&self, offset: u64) -> u64 { + offset & !(self.cluster_size - 1) + } + + fn cluster_in_rc_block(&self, cluster_index: u64) -> u64 { + cluster_index & u64::from(self.refcount_blk_size - 1) + } + + /// Allocate a continuous space that is not referenced by existing refcount table + fn alloc_clusters_with_noref(&mut self, size: u64) -> Result { + if !self.discard_list.is_empty() { + self.sync_process_discards(OpCode::Discard); + } + + let nb_clusters = bytes_to_clusters(size, self.cluster_size).unwrap(); + let mut free_clusters: u64 = 0; + while free_clusters < nb_clusters { + let offset = self.free_cluster_index << self.cluster_bits; + self.free_cluster_index += 1; + if self.get_refcount(offset)? != 0 { + free_clusters = 0; + } else { + free_clusters += 1; + } + } + + let cluster_index = self.free_cluster_index - nb_clusters; + Ok(cluster_index << self.cluster_bits) + } + + /// Allocate a contiguous space that already has a reference count in the refcount table + pub fn alloc_clusters_with_ref(&mut self, header: &mut QcowHeader, size: u64) -> Result { + if size == 0 { + bail!("Don't allow to alloc cluster size of 0!"); + } + let offset = self.alloc_clusters_with_noref(size)?; + let offset_end = round_up(offset + size, self.cluster_size).unwrap(); + let rt_end = offset_end >> (self.cluster_bits + self.refcount_blk_bits); + let nb_clusters = bytes_to_clusters(offset_end - offset, self.cluster_size)?; + + if rt_end >= self.refcount_table_size { + let clusters = self.free_cluster_index; + let start_idx = self.free_cluster_index; + let (table, blocks) = refcount_metadata_size( + clusters, + self.cluster_size, + u64::from(header.refcount_order), + true, + )?; + self.extend_refcount_table(header, start_idx, table, blocks)?; + } + self.update_alloc_refcount(offset, nb_clusters, 1, false, &Qcow2DiscardType::Never)?; + Ok(offset) + } + + /// Extend refcount table. + /// + /// # Arguments + /// + /// * `header` - header message of this qower driver. + /// * `start_idx` - alloc space for the new refcount table starting from the start index. + /// * `new_table_clusters` - number of clusters for new refcount table. + /// * `new_block_clusters` - number of clusters for refcount block, the size of refcount blocks + /// should be guaranteed to record all newly added clusters. + fn extend_refcount_table( + &mut self, + header: &mut QcowHeader, + start_idx: u64, + new_table_clusters: u64, + new_block_clusters: u64, + ) -> Result<()> { + info!("Qcow2 needs to extend the refcount table"); + // Alloc space for new refcount table. + let new_table_size = new_table_clusters * (self.cluster_size / ENTRY_SIZE); + if new_block_clusters > new_table_size { + bail!( + "Refcount block clusters {:?} exceed table size: {:?}", + new_block_clusters, + new_table_size + ); + } + let mut new_table = self.refcount_table.clone(); + // SAFETY: Upper limit of new_table_size is disk file size. + new_table.resize(new_table_size as usize, 0); + let start_offset = start_idx * self.cluster_size; + let mut table_offset = start_offset; + let mut added_rb = Vec::new(); + for i in 0..new_block_clusters { + if new_table[i as usize] == 0 { + new_table[i as usize] = table_offset; + added_rb.push(table_offset & REFCOUNT_TABLE_OFFSET_MASK); + table_offset += self.cluster_size; + } + } + let end_offset = table_offset + new_table_clusters * self.cluster_size; + let metadata_clusters = div_round_up(end_offset - start_offset, self.cluster_size).unwrap(); + + // Write new extended refcount table to disk. + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(table_offset, &new_table)?; + + // Update and save qcow2 header to disk. + let mut new_header = header.clone(); + new_header.refcount_table_offset = table_offset; + new_header.refcount_table_clusters = new_table_clusters as u32; + self.sync_aio + .borrow_mut() + .write_buffer(0, &new_header.to_vec())?; + + // Update qcow2 header in memory. + header.refcount_table_offset = new_header.refcount_table_offset; + header.refcount_table_clusters = new_header.refcount_table_clusters; + + // Update refcount information. + let old_table_offset = self.refcount_table_offset; + let old_table_clusters = self.refcount_table_clusters; + self.refcount_table = new_table; + for rb_offset in added_rb.iter() { + self.refcount_table_map.insert(*rb_offset, 1); + } + self.refcount_table_offset = header.refcount_table_offset; + self.refcount_table_clusters = header.refcount_table_clusters; + self.refcount_table_size = new_table_size; + self.free_cluster_index = end_offset / self.cluster_size; + + // Update refcount for metadata. + self.update_refcount( + start_offset, + metadata_clusters, + 1, + true, + &Qcow2DiscardType::Never, + )?; + + // Free the old cluster of refcount table. + self.update_refcount( + old_table_offset, + u64::from(old_table_clusters), + -1, + true, + &Qcow2DiscardType::Other, + )?; + info!( + "Qcow2 extends refcount table success, offset 0x{:x} -> 0x{:x}", + old_table_offset, self.refcount_table_offset + ); + + Ok(()) + } + + fn update_alloc_refcount( + &mut self, + offset: u64, + clusters: u64, + added: i32, + flush: bool, + discard_type: &Qcow2DiscardType, + ) -> Result<()> { + let start_clusters = div_round_up(offset, self.cluster_size).unwrap(); + for i in start_clusters..start_clusters + clusters { + let rt_idx = i >> self.refcount_blk_bits; + if rt_idx >= self.refcount_table_size { + bail!("Invalid refcount table index {}", rt_idx); + } + + let rb_addr = self.refcount_table[rt_idx as usize]; + if rb_addr == 0 { + self.alloc_refcount_block(rt_idx).map_err(|e| { + self.refcount_table[rt_idx as usize] = 0; + e + })?; + } + } + + self.update_refcount(offset, clusters, added, flush, discard_type) + } + + pub fn update_refcount( + &mut self, + offset: u64, + clusters: u64, + added: i32, + flush: bool, + discard_type: &Qcow2DiscardType, + ) -> Result<()> { + if self.offset_into_cluster(offset) != 0 { + bail!("Failed to update refcount, offset is not aligned to cluster"); + } + let first_cluster = bytes_to_clusters(offset, self.cluster_size).unwrap(); + let mut rc_vec: Vec<(u64, u64, usize)> = Vec::with_capacity(clusters as usize); + let mut i: u64 = 0; + while i < clusters { + let rt_idx = (first_cluster + i) >> self.refcount_blk_bits; + if rt_idx >= self.refcount_table_size { + bail!("Invalid refcount table index {}", rt_idx); + } + let rb_addr = self.refcount_table[rt_idx as usize]; + if rb_addr == 0 || self.offset_into_cluster(rb_addr) != 0 { + bail!( + "Invalid refcount block address 0x{:x}, index is {}", + rb_addr, + rt_idx + ); + } + let rb_idx = self.cluster_in_rc_block(i + first_cluster) as usize; + let num = std::cmp::min( + self.refcount_blk_size as usize - rb_idx, + (clusters - i) as usize, + ); + rc_vec.push((rt_idx, rb_idx as u64, num)); + i += num as u64; + } + + let idx = self.set_refcount_blocks(&rc_vec, added, discard_type); + if idx != rc_vec.len() { + // Revert the updating operation for refount block. + let rev_idx = self.set_refcount_blocks(&rc_vec[..idx], -added, discard_type); + let status = if rev_idx == idx { "success" } else { "failed" }; + bail!("Failed to set refcounts, recover {}", status); + } + if flush { + self.flush()?; + } + Ok(()) + } + + fn set_refcount_blocks( + &mut self, + rc_vec: &[(u64, u64, usize)], + added: i32, + discard_type: &Qcow2DiscardType, + ) -> usize { + for (i, (rt_idx, rb_idx, num)) in rc_vec.iter().enumerate() { + let ret = self.set_refcount(*rt_idx, *rb_idx, *num, added, discard_type); + if let Err(err) = ret { + error!( + "Set refcount failed, rt_idx {}, rb_idx {}, clusters {}, added {}, {}", + rt_idx, + rb_idx, + num, + added, + err.to_string() + ); + return i; + } + } + + rc_vec.len() + } + + pub fn flush(&mut self) -> Result<()> { + self.refcount_blk_cache.flush(self.sync_aio.clone()) + } + + fn get_refcount_block_cache(&mut self, rt_idx: u64) -> Result>> { + let entry = self.refcount_blk_cache.get(rt_idx); + let cache_entry = if let Some(entry) = entry { + entry.clone() + } else { + self.load_refcount_block(rt_idx).with_context(|| { + format!("Failed to get refcount block cache, index is {}", rt_idx) + })?; + self.refcount_blk_cache + .get(rt_idx) + .with_context(|| format!("Not found refcount block cache, index is {}", rt_idx))? + .clone() + }; + Ok(cache_entry) + } + + fn set_refcount( + &mut self, + rt_idx: u64, + rb_idx: u64, + clusters: usize, + added: i32, + discard_type: &Qcow2DiscardType, + ) -> Result<()> { + let is_add = added > 0; + let added_value = added.unsigned_abs() as u16; + let cache_entry = self + .get_refcount_block_cache(rt_idx) + .with_context(|| "Get refcount block cache failed")?; + let mut rb_vec: Vec = Vec::with_capacity(clusters); + let mut borrowed_entry = cache_entry.borrow_mut(); + let is_dirty = borrowed_entry.dirty_info.is_dirty; + for i in 0..clusters { + let mut rc_value = borrowed_entry.get_entry_map(rb_idx as usize + i)? as u16; + rc_value = if is_add { + rc_value + .checked_add(added_value) + .filter(|&v| v <= self.refcount_max as u16) + .with_context(|| { + format!( + "Refcount {} add {} cause overflows, index is {}", + rc_value, added_value, i + ) + })? + } else { + rc_value.checked_sub(added_value).with_context(|| { + format!( + "Refcount {} sub {} cause overflows, index is {}", + rc_value, added_value, i + ) + })? + }; + let cluster_idx = rt_idx * u64::from(self.refcount_blk_size) + rb_idx + i as u64; + if rc_value == 0 { + if self.discard_passthrough.contains(discard_type) { + // update refcount discard. + let offset = cluster_idx * self.cluster_size; + let nbytes = self.cluster_size; + self.update_discard_list(offset, nbytes)?; + } + + if cluster_idx < self.free_cluster_index { + self.free_cluster_index = cluster_idx; + } + } + rb_vec.push(rc_value); + } + + for (idx, rc_value) in rb_vec.iter().enumerate() { + borrowed_entry.set_entry_map(rb_idx as usize + idx, u64::from(*rc_value))?; + } + if !is_dirty { + self.refcount_blk_cache.add_dirty_table(cache_entry.clone()); + } + + Ok(()) + } + + pub fn get_refcount(&mut self, offset: u64) -> Result { + let cluster = offset >> self.cluster_bits; + let rt_idx = cluster >> self.refcount_blk_bits; + if rt_idx >= self.refcount_table_size { + return Ok(0); + } + + let rb_addr = self.refcount_table[rt_idx as usize] & REFCOUNT_TABLE_OFFSET_MASK; + if rb_addr == 0 { + return Ok(0); + } + + if self.offset_into_cluster(rb_addr) != 0 { + bail!( + "Invalid refcount block address 0x{:x}, index is {}", + rb_addr, + rt_idx + ); + } + + let cache_entry = self + .get_refcount_block_cache(rt_idx) + .with_context(|| "Get refcount block cache failed")?; + let rb_idx = self.cluster_in_rc_block(cluster) as usize; + let rc_value = cache_entry.borrow_mut().get_entry_map(rb_idx).unwrap(); + + Ok(rc_value as u16) + } + + /// Add discard task to the list. + fn update_discard_list(&mut self, offset: u64, nbytes: u64) -> Result<()> { + let mut discard_task = DiscardTask { offset, nbytes }; + let len = self.discard_list.len(); + let mut discard_list: Vec = Vec::with_capacity(len + 1); + for task in self.discard_list.iter() { + if discard_task.is_overlap(task) { + discard_task.merge_task(task); + } else { + discard_list.push(task.clone()); + } + } + discard_list.push(discard_task); + self.discard_list = discard_list; + Ok(()) + } + + /// Process discards task by sync aio. + pub fn sync_process_discards(&mut self, opcode: OpCode) { + for task in self.discard_list.iter() { + let offset = task.offset; + let nbytes = task.nbytes; + let mut borrowed_sync_aio = self.sync_aio.borrow_mut(); + let discard_aio = + borrowed_sync_aio.package_sync_aiocb(opcode, Vec::new(), offset as usize, nbytes); + borrowed_sync_aio + .aio + .submit_request(discard_aio) + .unwrap_or_else(|e| error!("Discard failed: {:?}", e)); + } + self.discard_list.clear(); + } + + pub fn offset_into_cluster(&self, offset: u64) -> u64 { + offset & (self.cluster_size - 1) + } + + /// Alloc a new cluster for refcount block. If this new allocated cluster exceed + /// the refcount table, then return fail. + fn alloc_refcount_block(&mut self, rt_idx: u64) -> Result<()> { + if rt_idx >= self.refcount_table_size { + bail!("The size of refcount table is not enough"); + } + + let alloc_offset = self.alloc_clusters_with_noref(self.cluster_size)?; + let alloc_cluster_idx = alloc_offset >> self.cluster_bits; + let alloc_rt_idx = alloc_cluster_idx >> self.refcount_blk_bits; + // Avoid to resize the refcount table. + if alloc_rt_idx >= self.refcount_table_size { + bail!("The size of refcount table is not enough"); + } + + // Update refcount table. + self.refcount_table[rt_idx as usize] = alloc_offset; + let rb_offset = alloc_offset & REFCOUNT_TABLE_OFFSET_MASK; + self.refcount_table_map.insert(rb_offset, 1); + let rc_block = vec![0_u8; self.cluster_size as usize]; + let cache_entry = Rc::new(RefCell::new(CacheTable::new( + alloc_offset, + rc_block, + ENTRY_SIZE_U16, + )?)); + if let Some(replaced_entry) = self + .refcount_blk_cache + .lru_replace(rt_idx, cache_entry.clone()) + { + self.save_refcount_block(&replaced_entry)?; + } + + let mut borrowed_entry = cache_entry.borrow_mut(); + if alloc_rt_idx == rt_idx { + // Update and save refcount block. + let alloc_rcb_idx = self.cluster_in_rc_block(alloc_cluster_idx); + borrowed_entry.set_entry_map(self.cluster_in_rc_block(alloc_rcb_idx) as usize, 1)?; + borrowed_entry.dirty_info.clear(); + } + + // Sync to disk. + self.sync_aio.borrow_mut().write_dirty_info( + borrowed_entry.addr, + borrowed_entry.get_value(), + 0, + self.cluster_size, + )?; + drop(borrowed_entry); + + if alloc_rt_idx != rt_idx { + self.update_alloc_refcount(alloc_offset, 1, 1, true, &Qcow2DiscardType::Never)?; + } + + let start = rt_idx * ENTRY_SIZE; + self.save_refcount_table(start, start + ENTRY_SIZE) + } + + fn load_refcount_block(&mut self, rt_idx: u64) -> Result<()> { + let rb_addr = self.refcount_table[rt_idx as usize]; + if !is_aligned(self.cluster_size, rb_addr) { + bail!("Refcount block address not aligned {}", rb_addr); + } + let mut rc_block = vec![0_u8; self.cluster_size as usize]; + self.sync_aio + .borrow_mut() + .read_buffer(rb_addr, &mut rc_block)?; + let cache_entry = Rc::new(RefCell::new(CacheTable::new( + rb_addr, + rc_block, + ENTRY_SIZE_U16, + )?)); + if let Some(replaced_entry) = self.refcount_blk_cache.lru_replace(rt_idx, cache_entry) { + self.save_refcount_block(&replaced_entry)?; + } + Ok(()) + } + + fn save_refcount_table(&mut self, start: u64, end: u64) -> Result<()> { + let vec: Vec = self + .refcount_table + .iter() + .flat_map(|val| val.to_be_bytes()) + .collect(); + self.sync_aio + .borrow_mut() + .write_dirty_info(self.refcount_table_offset, &vec, start, end) + } + + fn save_refcount_block(&mut self, entry: &Rc>) -> Result<()> { + let mut borrowed_entry = entry.borrow_mut(); + if !borrowed_entry.dirty_info.is_dirty { + return Ok(()); + } + if !is_aligned(self.cluster_size, borrowed_entry.addr) { + bail!( + "Refcount block address is not aligned {}", + borrowed_entry.addr + ); + } + self.sync_aio.borrow_mut().write_dirty_info( + borrowed_entry.addr, + borrowed_entry.get_value(), + borrowed_entry.dirty_info.start, + borrowed_entry.dirty_info.end, + )?; + borrowed_entry.dirty_info.clear(); + + Ok(()) + } + + pub fn drop_dirty_caches(&mut self) { + self.refcount_blk_cache.clean_up_dirty_cache(); + } +} + +/// Recalculate the metadata size of refcount to expand the Refcount table, +/// so that it can refcount enough clusters. +/// +/// # Arguments +/// +/// * `nb_clusters` - number of clusters to refcount. +/// * `cluster_size` - size of cluster in bytes. +/// * `refcount_order` - refcount bits power of 2 exponent. +/// * `is_reserve` - if this parameter set true, the refcount table size +/// will have 50% more entries than necessary, which can avoid growing again soon. +/// Returns: (clusters of new refcount table, clusters of refcount block) +pub fn refcount_metadata_size( + nb_clusters: u64, + cluster_size: u64, + refcount_order: u64, + mut is_reserve: bool, +) -> Result<(u64, u64)> { + let reftable_entries = cluster_size / ENTRY_SIZE; + let refblock_entries = cluster_size * 8 / (1 << refcount_order); + let mut table: u64 = 0; + let mut blocks: u64 = 0; + let mut clusters = nb_clusters; + let mut last_clusters; + let mut total_clusters: u64 = 0; + + loop { + last_clusters = total_clusters; + blocks = div_round_up(clusters + table + blocks, refblock_entries).unwrap(); + table = div_round_up(blocks, reftable_entries).unwrap(); + total_clusters = table + blocks + clusters; + + if total_clusters == last_clusters { + if is_reserve { + clusters += div_round_up(table, 2).unwrap(); + total_clusters = 0; + is_reserve = false; + } else { + break; + } + } + } + + table = total_clusters - nb_clusters - blocks; + Ok((table, blocks)) +} + +#[cfg(test)] +mod test { + use std::{ + fs::{remove_file, File}, + io::{Seek, SeekFrom, Write}, + os::unix::fs::{FileExt, OpenOptionsExt}, + sync::Arc, + }; + + use byteorder::{BigEndian, ByteOrder}; + + use crate::qcow2::*; + use crate::qcow2::{ + header::*, + refcount::{refcount_metadata_size, Qcow2DiscardType}, + }; + use machine_manager::config::DiskFormat; + use util::aio::{Aio, WriteZeroesState}; + + fn image_create(path: &str, img_bits: u32, cluster_bits: u32) -> File { + let cluster_sz = 1 << cluster_bits; + let l1_size: u64 = 1 << (img_bits - (cluster_bits * 2 - ENTRY_SIZE as u32)); + let entry_per_l1_cluster: u64 = 1 << (cluster_bits - ENTRY_BITS as u32); + let l1_clusters = div_round_up(l1_size, entry_per_l1_cluster).unwrap(); + // Header + l1 table clusters + let nb_clusters = 1 + l1_clusters; + let (rct, rcb) = refcount_metadata_size(nb_clusters, cluster_sz, 4, false).unwrap(); + let mut rc_table: Vec = Vec::new(); + let mut rc_block: Vec = Vec::new(); + let l1_table = vec![0_u8; (l1_clusters * cluster_sz) as usize]; + let rct_offset = cluster_sz; + let rcb_offset = (1 + rct) * cluster_sz; + let l1_table_offset = (1 + rct + rcb) * cluster_sz; + let total_clusters = nb_clusters + rct + rcb; + let rc_per_block = cluster_sz / 2; + let rct_size = div_round_up(total_clusters, rc_per_block).unwrap(); + for i in 0..rct_size { + let addr = rcb_offset + i * cluster_sz; + rc_table.append(&mut addr.to_be_bytes().to_vec()); + } + for _i in 0..total_clusters { + rc_block.push(0x00); + rc_block.push(0x01); + } + // SAFETY: Upper limit of following value is decided by disk file size. + rc_table.resize((rct * cluster_sz) as usize, 0); + rc_block.resize((rcb * cluster_sz) as usize, 0); + let header = QcowHeader { + magic: QCOW_MAGIC, + version: 3, + backing_file_offset: 0, + backing_file_size: 0, + cluster_bits, + size: 1 << img_bits, + crypt_method: 0, + l1_size: l1_size as u32, + l1_table_offset, + refcount_table_offset: rct_offset, + refcount_table_clusters: rct as u32, + nb_snapshots: 0, + snapshots_offset: 0, + incompatible_features: 0, + compatible_features: 0, + autoclear_features: 0, + refcount_order: 4, + header_length: std::mem::size_of::() as u32, + }; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CREAT | libc::O_TRUNC) + .open(path) + .unwrap(); + file.set_len(total_clusters * cluster_sz).unwrap(); + file.write_all(&header.to_vec()).unwrap(); + + // Refcount table. + file.seek(SeekFrom::Start(rct_offset)).unwrap(); + file.write_all(&rc_table).unwrap(); + + // Recount block. + file.seek(SeekFrom::Start(rcb_offset)).unwrap(); + file.write_all(&rc_block).unwrap(); + + // L1 table. + file.seek(SeekFrom::Start(l1_table_offset)).unwrap(); + file.write_all(&l1_table).unwrap(); + + file + } + + fn create_qcow2_driver( + path: &str, + img_bits: u32, + cluster_bits: u32, + ) -> (Qcow2Driver<()>, Arc) { + let file = Arc::new(image_create(path, img_bits, cluster_bits)); + let aio = Aio::new( + Arc::new(SyncAioInfo::complete_func), + util::aio::AioEngine::Off, + None, + ) + .unwrap(); + let conf = BlockProperty { + id: path.to_string(), + format: DiskFormat::Qcow2, + iothread: None, + direct: true, + req_align: 512, + buf_align: 512, + discard: false, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: None, + refcount_cache_size: None, + }; + let mut qcow2_driver = Qcow2Driver::new(file.clone(), aio, conf.clone()).unwrap(); + qcow2_driver.load_metadata(conf).unwrap(); + (qcow2_driver, file) + } + + #[test] + fn test_alloc_cluster() { + let path = "/tmp/refcount_case1.qcow2"; + let image_bits = 30; + let cluster_bits = 16; + let (mut qcow2, cloned_file) = create_qcow2_driver(path, image_bits, cluster_bits); + let header = qcow2.header.clone(); + + // Alloc one free clusters + let cluster_sz = 1 << cluster_bits; + let free_cluster_index = + 3 + ((header.l1_size * ENTRY_SIZE as u32 + cluster_sz as u32 - 1) >> cluster_bits); + let addr = qcow2.alloc_cluster(1, true).unwrap(); + assert_eq!(addr, cluster_sz * u64::from(free_cluster_index)); + qcow2.flush().unwrap(); + // Check if the refcount of the cluster is updated to the disk. + let mut rc_value = [0_u8; 2]; + cloned_file + .as_ref() + .read_at( + &mut rc_value, + cluster_sz * 2 + 2 * u64::from(free_cluster_index), + ) + .unwrap(); + assert_eq!(1, BigEndian::read_u16(&rc_value)); + remove_file(path).unwrap(); + } + + /// Test function of allocating cluster. + /// TestStep: + /// 1. Init qcow2 file driver. + /// 2. Call alloc function. + /// Expect: + /// 1. The refcount on the disk has been updated correctly. + /// 2. All allocated spaces will not overlap. + #[test] + fn test_alloc_cluster_with_refcount_check() { + let path = "/tmp/test_alloc_cluster_with_refcount_check.qcow2"; + let image_bits = 30; + let cluster_bits = 9; + let (mut qcow2, cloned_file) = create_qcow2_driver(path, image_bits, cluster_bits); + + let test_data = vec![1024, 152, 2048, 1, 20000, 65536, 512, 768, 7111, 2000000]; + let cluster_size: u64 = 1 << cluster_bits; + let mut res_data: Vec<(u64, u64)> = vec![]; + // Call function of alloc_cluster. + for clusters in test_data { + let addr = qcow2.alloc_cluster(clusters, true).unwrap(); + res_data.push((addr, clusters * cluster_size)); + } + // The refcount of all cluster update to disk. + let image_size: u64 = 1 << image_bits; + let table_offset = qcow2.header.refcount_table_offset; + let block_size: u64 = 1 << qcow2.refcount.refcount_blk_bits; + let table_size = div_round_up(image_size, block_size * cluster_size).unwrap(); + let mut refcount_table = vec![0_u8; table_size as usize * ENTRY_SIZE as usize]; + assert!(cloned_file + .as_ref() + .read_at(&mut refcount_table, table_offset) + .is_ok()); + for i in 0..table_size { + let start_idx = i as usize * 8; + let addr = BigEndian::read_u64(&refcount_table[start_idx..start_idx + 8]); + assert_ne!(addr, 0); + } + + // All allocated cluster should not overlap with each other. + let len = res_data.len(); + for i in 0..len { + let addr1 = res_data[i].0 as usize; + let size1 = res_data[i].1 as usize; + for j in (i + 1)..len { + let addr2 = res_data[j].0 as usize; + let size2 = res_data[j].1 as usize; + assert!(!ranges_overlap(addr1, size1, addr2, size2).unwrap()); + } + } + + remove_file(path).unwrap(); + } + + #[test] + fn test_extend_refcount_table() { + let path = "/tmp/refcount_case2.qcow2"; + // Image size is 128MB. + let image_bits = 27; + // Cluster size is 1KB. + let cluster_bits = 10; + let (mut qcow2, cloned_file) = create_qcow2_driver(path, image_bits, cluster_bits); + let header = &qcow2.header; + let rct_offset = header.refcount_table_offset; + let rct_clusters = header.refcount_table_clusters; + + // Extend refcount table which can not mark all clusters. + let cluster_sz = 1 << cluster_bits; + // 3 bit means refcount table entry size(8 Byte) + // 1 bit means refcount block entry size(2 Byte). + let mut clusters = 1 << (cluster_bits - 3 + cluster_bits - 1); + clusters /= 64; + // Alloc 2 cluster once for all clusters which will cause extending refcount table. + for _ in 0..clusters + 1 { + qcow2.alloc_cluster(128, true).unwrap(); + } + qcow2.flush().unwrap(); + let new_rct_offset = qcow2.header.refcount_table_offset; + let new_rct_clusters = qcow2.header.refcount_table_clusters; + assert_ne!(new_rct_offset, rct_offset); + assert!(qcow2.header.refcount_table_clusters > rct_clusters); + + // Check if the new refcount table contains the old refcount table. + let old_rct_size = cluster_sz as usize * rct_clusters as usize; + let new_rct_size = cluster_sz as usize * new_rct_clusters as usize; + let mut old_rc_table = vec![0_u8; old_rct_size]; + cloned_file + .as_ref() + .read_at(&mut old_rc_table, rct_offset) + .unwrap(); + let mut new_rc_table = vec![0_u8; new_rct_size]; + cloned_file + .as_ref() + .read_at(&mut new_rc_table, new_rct_offset) + .unwrap(); + for i in 0..old_rct_size { + assert_eq!(old_rc_table[i], new_rc_table[i]); + } + + // Read the first refcount table entry in the extended cluster of the refcount table. + let mut rct_entry = vec![0_u8; ENTRY_SIZE as usize]; + cloned_file + .read_at(&mut rct_entry, new_rct_offset + old_rct_size as u64) + .unwrap(); + let rcb_offset = BigEndian::read_u64(&rct_entry); + + // Check the refcount block in the extended cluster of the refcount table. + // It will include the cluster of refcount table and itself. + let mut rc_table = vec![0_u8; cluster_sz as usize]; + cloned_file.read_at(&mut rc_table, rcb_offset).unwrap(); + for i in 0..new_rct_clusters as usize + 1 { + let offset = 2 * i; + assert_eq!(1, BigEndian::read_u16(&rc_table[offset..offset + 2])); + } + + remove_file(path).unwrap(); + } + + #[test] + fn test_update_refcount() { + let path = "/tmp/refcount_case3.qcow2"; + let image_bits = 30; + let cluster_bits = 16; + let (qcow2, _) = create_qcow2_driver(path, image_bits, cluster_bits); + let mut refcount = qcow2.refcount.clone(); + + // Add refcount for the first cluster. + let ret = refcount.update_refcount(0, 1, 1, true, &Qcow2DiscardType::Never); + assert!(ret.is_ok()); + + // Test invalid cluster offset. + let ret = refcount.update_refcount(1 << 63, 1, 1, true, &Qcow2DiscardType::Never); + if let Err(err) = ret { + // 16 bit is cluster bits, 15 is refcount block bits. + let err_msg = format!("Invalid refcount table index {}", 1_u64 << (63 - 15 - 16)); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + // Test refcount block not in cache. + let ret = refcount.update_refcount( + 1 << (cluster_bits * 2), + 1, + 1, + true, + &Qcow2DiscardType::Never, + ); + if let Err(err) = ret { + let err_msg = "Invalid refcount block address 0x0, index is 2".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + remove_file(path).unwrap(); + } + + #[test] + fn test_set_refcount() { + let path = "/tmp/refcount_case4.qcow2"; + let image_bits = 30; + let cluster_bits = 16; + let (qcow2, _) = create_qcow2_driver(path, image_bits, cluster_bits); + let mut refcount = qcow2.refcount.clone(); + + // Add refcount for the first cluster. + let ret = refcount.set_refcount(0, 0, 1, 1, &Qcow2DiscardType::Never); + assert!(ret.is_ok()); + + // Test refcount overflow. + let ret = refcount.set_refcount(0, 0, 1, 65535, &Qcow2DiscardType::Never); + if let Err(err) = ret { + let err_msg = "Refcount 2 add 65535 cause overflows, index is 0".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + // Test refcount underflow. + let ret = refcount.set_refcount(0, 0, 1, -65535, &Qcow2DiscardType::Never); + if let Err(err) = ret { + let err_msg = "Refcount 2 sub 65535 cause overflows, index is 0".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + remove_file(path).unwrap(); + } +} diff --git a/block_backend/src/qcow2/snapshot.rs b/block_backend/src/qcow2/snapshot.rs new file mode 100644 index 0000000000000000000000000000000000000000..f0b638f2a345c727a9cfdeba1afd3d5b731dd246 --- /dev/null +++ b/block_backend/src/qcow2/snapshot.rs @@ -0,0 +1,368 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, rc::Rc}; +use std::{mem::size_of, str::from_utf8}; + +use anyhow::{bail, Context, Result}; +use byteorder::{BigEndian, ByteOrder}; + +use super::{is_aligned, SyncAioInfo}; +use util::num_ops::round_up; + +/// Maximum number of snapshots. +pub const QCOW2_MAX_SNAPSHOTS: usize = 65536; + +// Length of Qcow2 internal snapshot which doesn't have icount in extra data. +// Qcow2 snapshots created by qemu-img(version <= 5.0) may have this format. +const SNAPSHOT_EXTRA_DATA_LEN_16: usize = 16; +// Length of Qcow2 internal snapshot which has icount in extra data. +const SNAPSHOT_EXTRA_DATA_LEN_24: usize = 24; + +#[derive(Clone)] +pub struct InternalSnapshot { + pub snapshots: Vec, + sync_aio: Rc>, + cluster_size: u64, + // Total snapshot table size in bytes. + pub snapshot_size: u64, + pub snapshot_table_offset: u64, + // Number of snapshot table entry. + pub(crate) nb_snapshots: u32, +} + +impl InternalSnapshot { + pub fn new(sync_aio: Rc>) -> Self { + Self { + snapshots: Vec::new(), + sync_aio, + cluster_size: 0, + snapshot_size: 0, + snapshot_table_offset: 0, + nb_snapshots: 0, + } + } + + pub fn snapshots_number(&self) -> usize { + self.nb_snapshots as usize + } + + pub fn find_snapshot(&self, name: &String) -> i32 { + for (idx, snap) in self.snapshots.iter().enumerate() { + if snap.name.eq(name) { + return idx as i32; + } + } + -1 + } + + pub fn set_cluster_size(&mut self, cluster_size: u64) { + self.cluster_size = cluster_size; + } + + pub fn add_snapshot(&mut self, snap: QcowSnapshot) { + let size = snap.get_size(); + self.snapshots.push(snap); + self.snapshot_size += size; + self.nb_snapshots += 1; + } + + pub fn del_snapshot(&mut self, index: usize) -> QcowSnapshot { + let snap = self.snapshots.remove(index); + self.nb_snapshots -= 1; + self.snapshot_size -= snap.get_size(); + + snap + } + + pub fn find_new_snapshot_id(&self) -> u64 { + let mut id_max: u64 = 0; + for snap in &self.snapshots { + if id_max < snap.id { + id_max = snap.id; + } + } + + id_max + 1 + } + + pub fn save_snapshot_table( + &self, + addr: u64, + extra_snap: Option<&QcowSnapshot>, + attach: bool, + ) -> Result<()> { + let mut buf = Vec::new(); + for snap in &self.snapshots { + if !attach && extra_snap.is_some() && snap.id == extra_snap.unwrap().id { + continue; + } + buf.append(&mut snap.gen_snapshot_table_entry()); + } + if attach { + if let Some(extra) = extra_snap { + buf.append(&mut extra.gen_snapshot_table_entry()); + } + } + self.sync_aio.borrow_mut().write_buffer(addr, &buf) + } + + pub(crate) fn load_snapshot_table( + &mut self, + addr: u64, + nb_snapshots: u32, + repair: bool, + ) -> Result<(i32, i32)> { + let mut extra_data_dropped: i32 = 0; + let mut clusters_reduced: i32 = 0; + + if nb_snapshots == 0 { + self.nb_snapshots = 0; + self.snapshots.clear(); + return Ok((clusters_reduced, extra_data_dropped)); + } + + if addr == 0 || !is_aligned(self.cluster_size, addr) { + bail!( + "The offset of snapshot table {} can't be 0 and mut aligned to cluster size", + addr + ); + } + + for i in 0..nb_snapshots { + let offset = addr + self.snapshot_size; + + let mut pos: usize = 0; + let header_size = size_of::(); + let mut header_buf = vec![0_u8; header_size]; + self.sync_aio + .borrow_mut() + .read_buffer(offset, &mut header_buf) + .with_context(|| format!("read snapshot header error(addr {:x}).", offset))?; + let header = QcowSnapshotHeader::from_vec(&header_buf)?; + pos += header_size; + + let extra_size = header.extra_date_size as usize; + if ![SNAPSHOT_EXTRA_DATA_LEN_16, SNAPSHOT_EXTRA_DATA_LEN_24].contains(&extra_size) { + if !repair { + bail!("Too much extra metadata in snapshot table entry {}", i); + } + let err_msg = format!( + "Discarding too much extra metadata in snapshot table entry {}, {} > {}", + i, extra_size, SNAPSHOT_EXTRA_DATA_LEN_24 + ); + println!("{:?}", err_msg); + extra_data_dropped += 1; + } + let mut extra_buf = vec![0_u8; extra_size]; + self.sync_aio + .borrow_mut() + .read_buffer(offset + pos as u64, &mut extra_buf) + .with_context(|| { + format!( + "read snapshot extra data error(addr {:x}).", + offset + pos as u64 + ) + })?; + let extra = QcowSnapshotExtraData::from_vec(&extra_buf)?; + pos += extra_size; + + if header.id_str_size == 0 { + bail!("Invalid snapshot id size: 0"); + } + let mut id_buf = vec![0_u8; header.id_str_size as usize]; + self.sync_aio + .borrow_mut() + .read_buffer(offset + pos as u64, &mut id_buf) + .with_context(|| { + format!("read snapshot ID error(addr {:x}).", offset + pos as u64) + })?; + let id = from_utf8(&id_buf)?.parse::()?; + pos += header.id_str_size as usize; + + let mut name_buf = vec![0_u8; header.name_size as usize]; + self.sync_aio + .borrow_mut() + .read_buffer(offset + pos as u64, &mut name_buf) + .with_context(|| { + format!("read snapshot name error(addr {:x}).", offset + pos as u64) + })?; + let name = from_utf8(&name_buf)?; + + let snap = QcowSnapshot { + l1_table_offset: header.l1_table_offset, + l1_size: header.l1_size, + id, + name: name.to_string(), + disk_size: extra.disk_size, + vm_state_size: header.vm_state_size, + date_sec: header.date_sec, + date_nsec: header.date_nsec, + vm_clock_nsec: header.vm_clock_nsec, + icount: extra.icount, + extra_data_size: header.extra_date_size, + }; + + self.add_snapshot(snap); + if self.snapshot_size > QCOW2_MAX_SNAPSHOTS as u64 * 1024 + || offset - addr > i32::MAX as u64 + { + if !repair { + bail!("Snapshot table is too big"); + } + let err_msg = format!( + "Discarding {} overhanging snapshots(snapshot) table is too big", + nb_snapshots - i + ); + println!("{:?}", err_msg); + clusters_reduced += (nb_snapshots - i) as i32; + self.del_snapshot(i as usize); + self.nb_snapshots = i; + break; + } + } + + Ok((clusters_reduced, extra_data_dropped)) + } +} + +#[derive(Clone)] +pub struct QcowSnapshot { + pub l1_table_offset: u64, + pub l1_size: u32, + pub id: u64, + pub name: String, + pub disk_size: u64, + // VM state info size, used for vm snapshot. + // Set to 0 for disk internal snapshot. + pub vm_state_size: u32, + pub date_sec: u32, + pub date_nsec: u32, + pub vm_clock_nsec: u64, + // Icount value which corresponds to the record/replay instruction count when the snapshots was + // token. Sed to -1 which means icount was disabled. + pub icount: u64, + pub extra_data_size: u32, +} + +impl QcowSnapshot { + pub fn get_size(&self) -> u64 { + let tmp_size = size_of::() + + self.extra_data_size as usize + + self.id.to_string().len() + + self.name.len(); + + round_up(tmp_size as u64, 8).unwrap() + } + + pub(crate) fn gen_snapshot_table_entry(&self) -> Vec { + let id_str = self.id.to_string(); + let entry_size = size_of::() + self.extra_data_size as usize; + let mut buf = vec![0_u8; entry_size]; + + // Snapshot Header. + BigEndian::write_u64(&mut buf[0..8], self.l1_table_offset); + BigEndian::write_u32(&mut buf[8..12], self.l1_size); + BigEndian::write_u16(&mut buf[12..14], id_str.len() as u16); + BigEndian::write_u16(&mut buf[14..16], self.name.len() as u16); + BigEndian::write_u32(&mut buf[16..20], self.date_sec); + BigEndian::write_u32(&mut buf[20..24], self.date_nsec); + BigEndian::write_u64(&mut buf[24..32], self.vm_clock_nsec); + BigEndian::write_u32(&mut buf[32..36], self.vm_state_size); + BigEndian::write_u32(&mut buf[36..40], self.extra_data_size); + + // Snapshot Extra data. + // vm_state_size_large is used for vm snapshot. + // It's equal to vm_state_size which is also 0 in disk snapshot. + BigEndian::write_u64(&mut buf[40..48], u64::from(self.vm_state_size)); + BigEndian::write_u64(&mut buf[48..56], self.disk_size); + if self.extra_data_size == SNAPSHOT_EXTRA_DATA_LEN_24 as u32 { + BigEndian::write_u64(&mut buf[56..64], self.icount); + } + + // Snapshot ID. + let mut id_vec = id_str.as_bytes().to_vec(); + buf.append(&mut id_vec); + + // Snapshot Name. + let mut name_vec = self.name.as_bytes().to_vec(); + buf.append(&mut name_vec); + + // 8 bytes Alignment. + let tmp_size = buf.len(); + let size = round_up(tmp_size as u64, 8).unwrap(); + // SAFETY: The size is a round up of old size. + buf.resize(size as usize, 0); + + buf + } +} + +pub struct QcowSnapshotHeader { + l1_table_offset: u64, + l1_size: u32, + id_str_size: u16, + name_size: u16, + date_sec: u32, + date_nsec: u32, + vm_clock_nsec: u64, + vm_state_size: u32, + extra_date_size: u32, +} + +impl QcowSnapshotHeader { + fn from_vec(buf: &[u8]) -> Result { + if buf.len() < size_of::() { + bail!("Invalid qcow2 snapshot header length {}.", buf.len()); + } + + Ok(QcowSnapshotHeader { + l1_table_offset: BigEndian::read_u64(&buf[0..8]), + l1_size: BigEndian::read_u32(&buf[8..12]), + id_str_size: BigEndian::read_u16(&buf[12..14]), + name_size: BigEndian::read_u16(&buf[14..16]), + date_sec: BigEndian::read_u32(&buf[16..20]), + date_nsec: BigEndian::read_u32(&buf[20..24]), + vm_clock_nsec: BigEndian::read_u64(&buf[24..32]), + vm_state_size: BigEndian::read_u32(&buf[32..36]), + extra_date_size: BigEndian::read_u32(&buf[36..40]), + }) + } +} + +pub struct QcowSnapshotExtraData { + _vm_state_size_large: u64, + disk_size: u64, + icount: u64, +} + +impl QcowSnapshotExtraData { + fn from_vec(buf: &[u8]) -> Result { + let has_icount = match buf.len() { + SNAPSHOT_EXTRA_DATA_LEN_24 => true, + SNAPSHOT_EXTRA_DATA_LEN_16 => false, + _ => bail!("Invalid snapshot extra data length {}.", buf.len()), + }; + + let mut extra = QcowSnapshotExtraData { + _vm_state_size_large: BigEndian::read_u64(&buf[0..8]), + disk_size: BigEndian::read_u64(&buf[8..16]), + icount: u64::MAX, + }; + + if has_icount { + extra.icount = BigEndian::read_u64(&buf[16..24]); + } + + Ok(extra) + } +} diff --git a/block_backend/src/qcow2/table.rs b/block_backend/src/qcow2/table.rs new file mode 100644 index 0000000000000000000000000000000000000000..3abd1fe4978d6e81eb13525427ef35ec177dc3e7 --- /dev/null +++ b/block_backend/src/qcow2/table.rs @@ -0,0 +1,309 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, collections::HashMap, rc::Rc}; + +use anyhow::{Context, Result}; +use log::info; + +use super::ENTRY_BITS; +use crate::{ + qcow2::{ + cache::{CacheTable, Qcow2Cache}, + header::QcowHeader, + SyncAioInfo, ENTRY_SIZE, L1_TABLE_OFFSET_MASK, L2_TABLE_OFFSET_MASK, + QCOW2_OFFSET_COMPRESSED, QCOW2_OFLAG_ZERO, + }, + BlockProperty, +}; +use machine_manager::config::MAX_L2_CACHE_SIZE; +use util::num_ops::div_round_up; + +// Default l1 table map length, which can describe 512GiB data for 64KiB cluster. +const L1_TABLE_MAP_LEN: usize = 1024; + +#[derive(PartialEq, Eq, Debug)] +pub enum Qcow2ClusterType { + /// Cluster is unallocated. + Unallocated, + /// Cluster is zero and not allocated. + ZeroPlain, + /// cluster is zero and allocated. + ZeroAlloc, + /// Cluster is allocated. + Normal, + /// Cluster is compressed. + Compressed, +} + +impl Qcow2ClusterType { + pub fn is_allocated(&self) -> bool { + self.eq(&Qcow2ClusterType::Compressed) + || self.eq(&Qcow2ClusterType::Normal) + || self.eq(&Qcow2ClusterType::ZeroAlloc) + } + + pub fn is_read_zero(&self) -> bool { + if self.eq(&Qcow2ClusterType::Unallocated) + || self.eq(&Qcow2ClusterType::ZeroAlloc) + || self.eq(&Qcow2ClusterType::ZeroPlain) + { + return true; + } + false + } + + /// Get cluster type of l2 table entry. + pub fn get_cluster_type(l2_entry: u64) -> Self { + if l2_entry & QCOW2_OFFSET_COMPRESSED != 0 { + return Qcow2ClusterType::Compressed; + } + if l2_entry & QCOW2_OFLAG_ZERO != 0 { + if l2_entry & L2_TABLE_OFFSET_MASK != 0 { + return Qcow2ClusterType::ZeroAlloc; + } + return Qcow2ClusterType::ZeroPlain; + } + if l2_entry & L2_TABLE_OFFSET_MASK == 0 { + return Qcow2ClusterType::Unallocated; + } + Qcow2ClusterType::Normal + } +} + +pub struct Qcow2Table { + cluster_bits: u64, + cluster_size: u64, + pub l1_table: Vec, + pub l1_table_map: HashMap, + pub l1_table_offset: u64, + pub l1_size: u32, + pub l2_table_cache: Qcow2Cache, + sync_aio: Rc>, + l2_bits: u64, + l2_size: u64, +} + +impl Qcow2Table { + pub fn new(sync_aio: Rc>) -> Self { + Self { + sync_aio, + cluster_bits: 0, + cluster_size: 0, + l1_table: Vec::new(), + l1_table_map: HashMap::with_capacity(L1_TABLE_MAP_LEN), + l1_table_offset: 0, + l1_size: 0, + l2_table_cache: Qcow2Cache::default(), + l2_bits: 0, + l2_size: 0, + } + } + + pub fn init_table_info(&mut self, header: &QcowHeader, conf: &BlockProperty) -> Result<()> { + let max_l2_entries = + div_round_up(header.size, header.cluster_size()).with_context(|| { + format!( + "Invalid size {} or cluster size {}", + header.size, + header.cluster_size() + ) + })?; + let max_l2_cache = div_round_up(max_l2_entries * ENTRY_SIZE, header.cluster_size()) + .with_context(|| { + format!( + "Invalid l2 entries {} or cluster size {}", + max_l2_entries * ENTRY_SIZE, + header.cluster_size() + ) + })?; + let cache_size = if let Some(l2_cache) = conf.l2_cache_size { + l2_cache / header.cluster_size() + } else { + std::cmp::min(max_l2_cache, MAX_L2_CACHE_SIZE / header.cluster_size()) + }; + info!("Driver {} l2 cache size {}", conf.id, cache_size); + let l2_table_cache: Qcow2Cache = Qcow2Cache::new(cache_size as usize); + self.cluster_bits = u64::from(header.cluster_bits); + self.cluster_size = header.cluster_size(); + self.l2_bits = u64::from(header.cluster_bits) - ENTRY_BITS; + self.l2_size = header.cluster_size() / ENTRY_SIZE; + self.l2_table_cache = l2_table_cache; + self.l1_table_offset = header.l1_table_offset; + self.l1_size = header.l1_size; + Ok(()) + } + + pub fn load_l1_table(&mut self) -> Result<()> { + self.l1_table = self + .sync_aio + .borrow_mut() + .read_ctrl_cluster(self.l1_table_offset, u64::from(self.l1_size))?; + for l1_entry in &self.l1_table { + let l1_entry_addr = l1_entry & L1_TABLE_OFFSET_MASK; + self.l1_table_map.insert(l1_entry_addr, 1); + } + Ok(()) + } + + pub fn save_l1_table(&mut self) -> Result<()> { + self.sync_aio + .borrow_mut() + .write_ctrl_cluster(self.l1_table_offset, &self.l1_table) + } + + pub fn get_l1_table_index(&self, guest_offset: u64) -> u64 { + guest_offset >> (self.cluster_bits + self.l2_bits) + } + + pub fn get_l2_table_index(&self, guest_offset: u64) -> u64 { + (guest_offset >> self.cluster_bits) & (self.l2_size - 1) + } + + pub fn get_l1_table_entry(&self, guest_offset: u64) -> u64 { + let l1_idx = self.get_l1_table_index(guest_offset); + self.l1_table[l1_idx as usize] + } + + pub fn get_l2_table_cache_entry( + &mut self, + guest_offset: u64, + ) -> Option<&Rc>> { + let l1_entry = self.get_l1_table_entry(guest_offset); + let l2_entry_addr = l1_entry & L1_TABLE_OFFSET_MASK; + if l2_entry_addr == 0 { + None + } else { + self.l2_table_cache.get(l2_entry_addr) + } + } + + /// Get max data remaining size after the offset which indexed by l2 table. + pub fn get_l2_table_max_remain_size(&self, guest_offset: u64, offset_in_cluster: u64) -> u64 { + (self.l2_size - self.get_l2_table_index(guest_offset)) * self.cluster_size + - offset_in_cluster + } + + pub fn update_l1_table(&mut self, l1_index: usize, l2_address: u64) { + let old_addr = self.l1_table[l1_index] & L1_TABLE_OFFSET_MASK; + let new_addr = l2_address & L1_TABLE_OFFSET_MASK; + self.l1_table[l1_index] = l2_address; + self.l1_table_map.remove(&old_addr); + self.l1_table_map.insert(new_addr, 1); + } + + pub fn update_l2_table( + &mut self, + table: Rc>, + index: usize, + entry: u64, + ) -> Result<()> { + let is_dirty = table.borrow().dirty_info.is_dirty; + table.borrow_mut().set_entry_map(index, entry)?; + if !is_dirty { + self.l2_table_cache.add_dirty_table(table); + } + + Ok(()) + } + + pub fn cache_l2_table(&mut self, l2_table_entry: Rc>) -> Result<()> { + let l2_entry_addr = l2_table_entry.borrow().addr; + if self.l2_table_cache.contains_keys(l2_entry_addr) { + self.l2_table_cache.cache_map.remove(&l2_entry_addr); + } + if let Some(replaced_entry) = self + .l2_table_cache + .lru_replace(l2_entry_addr, l2_table_entry) + { + let borrowed_entry = replaced_entry.borrow(); + // Flush the dirty entry. + if borrowed_entry.dirty_info.is_dirty { + self.sync_aio.borrow_mut().write_dirty_info( + borrowed_entry.addr, + borrowed_entry.get_value(), + borrowed_entry.dirty_info.start, + borrowed_entry.dirty_info.end, + )?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + self.l2_table_cache.flush(self.sync_aio.clone()) + } + + pub fn drop_dirty_caches(&mut self) { + self.l2_table_cache.clean_up_dirty_cache(); + } +} + +#[cfg(test)] +mod test { + use std::{ + cell::RefCell, + io::{Read, Seek, SeekFrom}, + rc::Rc, + }; + + use crate::qcow2::{ + cache::{CacheTable, ENTRY_SIZE_U64}, + test::create_qcow2, + }; + + #[test] + fn test_update_l2_table() { + let path = "/tmp/block_backend_test_update_l2_table.qcow2"; + let (mut image, mut qcow2) = create_qcow2(path); + let cluster_size = qcow2.header.cluster_size() as usize; + let addr = qcow2.alloc_cluster(1, true).unwrap(); + let l2_cluster: Vec = vec![0_u8; cluster_size]; + let l2_table = Rc::new(RefCell::new( + CacheTable::new(addr, l2_cluster, ENTRY_SIZE_U64).unwrap(), + )); + qcow2.table.cache_l2_table(l2_table.clone()).unwrap(); + + let test_val1 = 0xff00ff00_u64; + qcow2 + .table + .update_l2_table(l2_table.clone(), 0, test_val1) + .unwrap(); + let res = l2_table.borrow_mut().get_entry_map(0).unwrap(); + assert_eq!(res, test_val1); + + image.file.seek(SeekFrom::Start(addr)).unwrap(); + let mut buf = vec![0_u8; ENTRY_SIZE_U64]; + image.file.read_exact(&mut buf).unwrap(); + assert_eq!(buf, [0_u8; ENTRY_SIZE_U64]); + + let test_val2 = 0x00ff00ff_u64; + qcow2 + .table + .update_l2_table(l2_table.clone(), 8191, test_val2) + .unwrap(); + let res = l2_table.borrow_mut().get_entry_map(8191).unwrap(); + assert_eq!(res, test_val2); + + qcow2.table.flush().unwrap(); + image.file.seek(SeekFrom::Start(addr)).unwrap(); + let mut buf = vec![0_u8; ENTRY_SIZE_U64]; + image.file.read_exact(&mut buf).unwrap(); + assert_eq!(buf, test_val1.to_be_bytes()); + + let offset = addr + ENTRY_SIZE_U64 as u64 * 8191; + image.file.seek(SeekFrom::Start(offset)).unwrap(); + let mut buf = vec![0_u8; ENTRY_SIZE_U64]; + image.file.read_exact(&mut buf).unwrap(); + assert_eq!(buf, test_val2.to_be_bytes()); + } +} diff --git a/block_backend/src/raw.rs b/block_backend/src/raw.rs new file mode 100644 index 0000000000000000000000000000000000000000..d49578641b72eebabb14bb8f99e80d409babc724 --- /dev/null +++ b/block_backend/src/raw.rs @@ -0,0 +1,215 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::File, + os::unix::io::AsRawFd, + sync::{ + atomic::{AtomicBool, AtomicU64}, + Arc, Mutex, + }, +}; + +use anyhow::{bail, Result}; + +use crate::{ + file::{CombineRequest, FileDriver}, + qcow2::is_aligned, + BlockDriverOps, BlockIoErrorCallback, BlockProperty, BlockStatus, CheckResult, CreateOptions, + ImageInfo, SECTOR_SIZE, +}; +use util::{ + aio::{get_iov_size, raw_write, Aio, Iovec}, + file::MAX_FILE_ALIGN, + unix::host_page_size, +}; + +pub struct RawDriver { + driver: FileDriver, + status: Arc>, +} + +// SAFETY: Send and Sync is not auto-implemented for raw pointer type in Aio. +// We use Arc>> to allow used in multi-threading. +unsafe impl Send for RawDriver {} +// SAFETY: The reason is same as above. +unsafe impl Sync for RawDriver {} + +impl RawDriver { + pub fn new(file: Arc, aio: Aio, prop: BlockProperty) -> Self { + Self { + driver: FileDriver::new(file, aio, prop), + status: Arc::new(Mutex::new(BlockStatus::Init)), + } + } + + // Fill the first block with zero. + // get_file_alignment() detects the alignment length by submitting IO to the first sector. + // If this area is fallocated, misaligned IO will also return success, so we pre fill this area. + pub fn alloc_first_block(&mut self, new_size: u64) -> Result<()> { + let write_size = if new_size < u64::from(MAX_FILE_ALIGN) { + SECTOR_SIZE + } else { + u64::from(MAX_FILE_ALIGN) + }; + let max_align = std::cmp::max(u64::from(MAX_FILE_ALIGN), host_page_size()) as usize; + // SAFETY: allocate aligned memory and free it later. + let align_buf = unsafe { libc::memalign(max_align, write_size as usize) }; + if align_buf.is_null() { + bail!("Failed to alloc memory for write."); + } + + // SAFETY: align_buf is valid and large enough. + let ret = unsafe { + raw_write( + self.driver.file.as_raw_fd(), + align_buf as u64, + write_size as usize, + 0, + ) + }; + // SAFETY: the memory is allocated in this function. + unsafe { libc::free(align_buf) }; + + if ret < 0 { + bail!("Failed to alloc first block, ret={}", ret); + } + Ok(()) + } +} + +impl BlockDriverOps for RawDriver { + fn create_image(&mut self, options: &CreateOptions) -> Result { + let raw_options = options.raw()?; + self.resize(raw_options.img_size)?; + let image_info = format!("fmt=raw size={}", raw_options.img_size); + Ok(image_info) + } + + fn query_image(&mut self, info: &mut ImageInfo) -> Result<()> { + info.format = "raw".to_string(); + info.virtual_size = self.disk_size()?; + info.actual_size = self.driver.actual_size()?; + Ok(()) + } + + fn check_image(&mut self, _res: &mut CheckResult, _quite: bool, _fix: u64) -> Result<()> { + bail!("This image format does not support checks"); + } + + fn read_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()> { + let nbytes = get_iov_size(&iovec); + trace::block_read_vectored(&self.driver.block_prop.id, offset, nbytes); + self.driver.read_vectored( + vec![CombineRequest::new(iovec, offset as u64, nbytes)], + completecb, + ) + } + + fn write_vectored(&mut self, iovec: Vec, offset: usize, completecb: T) -> Result<()> { + let nbytes = get_iov_size(&iovec); + trace::block_write_vectored(&self.driver.block_prop.id, offset, nbytes); + self.driver.write_vectored( + vec![CombineRequest::new(iovec, offset as u64, nbytes)], + completecb, + ) + } + + fn write_zeroes( + &mut self, + offset: usize, + nbytes: u64, + completecb: T, + unmap: bool, + ) -> Result<()> { + trace::block_write_zeroes(&self.driver.block_prop.id, offset, nbytes, unmap); + self.driver.write_zeroes( + vec![CombineRequest::new(Vec::new(), offset as u64, nbytes)], + completecb, + unmap, + ) + } + + fn discard(&mut self, offset: usize, nbytes: u64, completecb: T) -> Result<()> { + trace::block_discard(&self.driver.block_prop.id, offset, nbytes); + self.driver.discard( + vec![CombineRequest::new(Vec::new(), offset as u64, nbytes)], + completecb, + ) + } + + fn datasync(&mut self, completecb: T) -> Result<()> { + trace::block_datasync(&self.driver.block_prop.id); + self.driver.datasync(completecb) + } + + fn flush_request(&mut self) -> Result<()> { + trace::block_flush_request(&self.driver.block_prop.id); + self.driver.flush_request() + } + + fn resize(&mut self, new_size: u64) -> Result<()> { + if !is_aligned(SECTOR_SIZE, new_size) { + bail!( + "The new size {} is not aligned to {}", + new_size, + SECTOR_SIZE + ); + } + + let old_size = self.disk_size()?; + if new_size == old_size { + return Ok(()); + } + + let meta_data = self.driver.file.metadata()?; + if !meta_data.is_file() { + bail!("Cannot resize unregular file"); + } + + self.driver.extend_to_len(new_size)?; + if old_size == 0 { + self.alloc_first_block(new_size)?; + } + + Ok(()) + } + + fn drain_request(&self) { + trace::block_drain_request(&self.driver.block_prop.id); + self.driver.drain_request(); + } + + fn get_inflight(&self) -> Arc { + self.driver.incomplete.clone() + } + + fn register_io_event( + &mut self, + broken: Arc, + error_cb: BlockIoErrorCallback, + ) -> Result<()> { + self.driver.register_io_event(broken, error_cb) + } + + fn unregister_io_event(&mut self) -> Result<()> { + self.driver.unregister_io_event() + } + + fn disk_size(&mut self) -> Result { + self.driver.disk_size() + } + + fn get_status(&mut self) -> Arc> { + self.status.clone() + } +} diff --git a/boot_loader/Cargo.toml b/boot_loader/Cargo.toml index 933ff86431d06a60f3669b0f62ddf346f293941e..e2c9c45f69aac4c55dd13eb2fe95eda0a51e27f0 100644 --- a/boot_loader/Cargo.toml +++ b/boot_loader/Cargo.toml @@ -1,17 +1,15 @@ [package] name = "boot_loader" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" [dependencies] -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = "0.4.8" -vmm-sys-util = ">=0.7.0" +thiserror = "1.0" +anyhow = "1.0" +kvm-bindings = { version = "0.7.0", features = ["fam-wrappers"] } +log = "0.4" address_space = { path = "../address_space" } devices = { path = "../devices" } util = { path = "../util" } diff --git a/boot_loader/src/aarch64/mod.rs b/boot_loader/src/aarch64/mod.rs index c9608cb26f309f5ef04329081ce3e2bcdc81d966..d06a95af65a57a31a6884132f1a0eb99efcd7f84 100644 --- a/boot_loader/src/aarch64/mod.rs +++ b/boot_loader/src/aarch64/mod.rs @@ -15,11 +15,13 @@ use std::io::Read; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -use address_space::{AddressSpace, GuestAddress}; -use devices::legacy::{errors::ErrorKind as FwcfgErrorKind, FwCfgEntryType, FwCfgOps}; -use util::byte_code::ByteCode; +use anyhow::{anyhow, Context, Result}; +use log::info; -use crate::errors::{ErrorKind, Result, ResultExt}; +use crate::error::BootLoaderError; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use devices::legacy::{error::LegacyError as FwcfgErrorKind, FwCfgEntryType, FwCfgOps}; +use util::byte_code::ByteCode; const AARCH64_KERNEL_OFFSET: u64 = 0x8_0000; @@ -52,7 +54,8 @@ fn load_kernel( kernel_path: &Path, sys_mem: &Arc, ) -> Result { - let mut kernel_image = File::open(kernel_path).chain_err(|| ErrorKind::BootLoaderOpenKernel)?; + let mut kernel_image = + File::open(kernel_path).with_context(|| BootLoaderError::BootLoaderOpenKernel)?; let kernel_size = kernel_image.metadata().unwrap().len(); let kernel_end = kernel_start + kernel_size; @@ -65,10 +68,10 @@ fn load_kernel( FwCfgEntryType::KernelSize, (kernel_size as u32).as_bytes().to_vec(), ) - .chain_err(|| FwcfgErrorKind::AddEntryErr("KernelSize".to_string()))?; + .with_context(|| FwcfgErrorKind::AddEntryErr("KernelSize".to_string()))?; lock_dev .add_data_entry(FwCfgEntryType::KernelData, kernel_data) - .chain_err(|| FwcfgErrorKind::AddEntryErr("KernelData".to_string()))?; + .with_context(|| FwcfgErrorKind::AddEntryErr("KernelData".to_string()))?; } else { if sys_mem .memory_end_address() @@ -76,11 +79,19 @@ fn load_kernel( .checked_sub(kernel_end) .is_none() { - return Err(ErrorKind::KernelOverflow(kernel_start, kernel_size).into()); + return Err(anyhow!(BootLoaderError::KernelOverflow( + kernel_start, + kernel_size + ))); } sys_mem - .write(&mut kernel_image, GuestAddress(kernel_start), kernel_size) - .chain_err(|| "Fail to write kernel to guest memory")?; + .write( + &mut kernel_image, + GuestAddress(kernel_start), + kernel_size, + AddressAttr::Ram, + ) + .with_context(|| "Fail to write kernel to guest memory")?; } Ok(kernel_end) } @@ -91,19 +102,16 @@ fn load_initrd( sys_mem: &Arc, kernel_end: u64, ) -> Result<(u64, u64)> { - let mut initrd_image = File::open(initrd_path).chain_err(|| ErrorKind::BootLoaderOpenInitrd)?; + let mut initrd_image = + File::open(initrd_path).with_context(|| BootLoaderError::BootLoaderOpenInitrd)?; let initrd_size = initrd_image.metadata().unwrap().len(); - let initrd_start = if let Some(addr) = sys_mem + let initrd_start = sys_mem .memory_end_address() .raw_value() .checked_sub(initrd_size) .filter(|addr| addr >= &kernel_end) - { - addr - } else { - return Err(ErrorKind::InitrdOverflow(kernel_end, initrd_size).into()); - }; + .with_context(|| BootLoaderError::InitrdOverflow(kernel_end, initrd_size))?; if let Some(fw_cfg) = fwcfg { let mut initrd_data = Vec::new(); @@ -114,20 +122,25 @@ fn load_initrd( FwCfgEntryType::InitrdAddr, (initrd_start as u32).as_bytes().to_vec(), ) - .chain_err(|| FwcfgErrorKind::AddEntryErr("InitrdAddr".to_string()))?; + .with_context(|| FwcfgErrorKind::AddEntryErr("InitrdAddr".to_string()))?; lock_dev .add_data_entry( FwCfgEntryType::InitrdSize, (initrd_size as u32).as_bytes().to_vec(), ) - .chain_err(|| FwcfgErrorKind::AddEntryErr("InitrdSize".to_string()))?; + .with_context(|| FwcfgErrorKind::AddEntryErr("InitrdSize".to_string()))?; lock_dev .add_data_entry(FwCfgEntryType::InitrdData, initrd_data) - .chain_err(|| FwcfgErrorKind::AddEntryErr("InitrdData".to_string()))?; + .with_context(|| FwcfgErrorKind::AddEntryErr("InitrdData".to_string()))?; } else { sys_mem - .write(&mut initrd_image, GuestAddress(initrd_start), initrd_size) - .chain_err(|| "Fail to write initrd to guest memory")?; + .write( + &mut initrd_image, + GuestAddress(initrd_start), + initrd_size, + AddressAttr::Ram, + ) + .with_context(|| "Fail to write initrd to guest memory")?; } Ok((initrd_start, initrd_size)) @@ -167,7 +180,9 @@ pub fn load_linux( .filter(|addr| addr >= &config.mem_start) .is_none() { - return Err(ErrorKind::DTBOverflow(sys_mem.memory_end_address().raw_value()).into()); + return Err(anyhow!(BootLoaderError::DTBOverflow( + sys_mem.memory_end_address().raw_value() + ))); } let kernel_start = config.mem_start + AARCH64_KERNEL_OFFSET; @@ -188,13 +203,13 @@ pub fn load_linux( config.kernel.as_ref().unwrap(), sys_mem, ) - .chain_err(|| "Fail to load kernel")?; + .with_context(|| "Fail to load kernel")?; let mut initrd_start = 0_u64; let mut initrd_size = 0_u64; if config.initrd.is_some() { let initrd_tuple = load_initrd(fwcfg, config.initrd.as_ref().unwrap(), sys_mem, kernel_end) - .chain_err(|| "Fail to load initrd")?; + .with_context(|| "Fail to load initrd")?; initrd_start = initrd_tuple.0; initrd_size = initrd_tuple.1; } else { diff --git a/boot_loader/src/error.rs b/boot_loader/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..410f0fe6a2a624096992e917519056f9b0593497 --- /dev/null +++ b/boot_loader/src/error.rs @@ -0,0 +1,58 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum BootLoaderError { + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("FwCfg")] + FwCfg { + #[from] + source: devices::legacy::error::LegacyError, + }, + #[allow(clippy::upper_case_acronyms)] + #[cfg(target_arch = "aarch64")] + #[error( + "guest memory size {0} should bigger than {}", + util::device_tree::FDT_MAX_SIZE + )] + DTBOverflow(u64), + #[error("Failed to load kernel image {0} to memory {1}.")] + KernelOverflow(u64, u64), + #[error("Failed to load initrd image {0} to memory {1}.")] + InitrdOverflow(u64, u64), + #[error("Failed to open kernel image")] + BootLoaderOpenKernel, + #[error("Failed to open initrd image")] + BootLoaderOpenInitrd, + #[error("Configure cpu number({0}) above supported max cpu numbers(254)")] + MaxCpus(u8), + #[error("Invalid bzImage kernel file")] + #[cfg(target_arch = "x86_64")] + InvalidBzImage, + #[error("Kernel version is too old.")] + #[cfg(target_arch = "x86_64")] + OldVersionKernel, + #[error("ELF-format kernel is not supported")] + #[cfg(target_arch = "x86_64")] + ElfKernel, +} diff --git a/boot_loader/src/lib.rs b/boot_loader/src/lib.rs index 9a3426442d4416755bc41493f6b102255217dc47..46955e83f8cd040d6995d666460e3160cc02f4bb 100644 --- a/boot_loader/src/lib.rs +++ b/boot_loader/src/lib.rs @@ -36,11 +36,16 @@ //! # extern crate boot_loader; //! //! use address_space::{AddressSpace, Region}; -//! use boot_loader::{BootLoaderConfig, load_linux}; +//! use boot_loader::{load_linux, BootLoaderConfig}; //! -//! #[cfg(target_arch="x86_64")] +//! #[cfg(target_arch = "x86_64")] //! fn main() { -//! let guest_mem = AddressSpace::new(Region::init_container_region(std::u64::MAX)).unwrap(); +//! let guest_mem = AddressSpace::new( +//! Region::init_container_region(std::u64::MAX, "guest_mem"), +//! "guest_mem", +//! None, +//! ) +//! .unwrap(); //! let kernel_file = std::path::PathBuf::from("/path/to/my/kernel"); //! let bootloader_config = BootLoaderConfig { //! kernel: Some(kernel_file), @@ -58,9 +63,14 @@ //! // Now PE linux kernel and kernel cmdline are loaded to guest memory... //! } //! -//! #[cfg(target_arch="aarch64")] +//! #[cfg(target_arch = "aarch64")] //! fn main() { -//! let guest_mem = AddressSpace::new(Region::init_container_region(u64::MAX)).unwrap(); +//! let guest_mem = AddressSpace::new( +//! Region::init_container_region(u64::MAX, "guest_mem"), +//! "guest_mem", +//! None, +//! ) +//! .unwrap(); //! let kernel_file = std::path::PathBuf::from("/path/to/my/kernel"); //! let bootloader_config = BootLoaderConfig { //! kernel: Some(kernel_file), @@ -73,14 +83,10 @@ //! } //! ``` -#[macro_use] -extern crate log; -#[macro_use] -extern crate error_chain; - #[allow(clippy::upper_case_acronyms)] #[cfg(target_arch = "aarch64")] mod aarch64; +pub mod error; #[cfg(target_arch = "x86_64")] mod x86_64; @@ -90,6 +96,7 @@ pub use aarch64::load_linux; pub use aarch64::AArch64BootLoader as BootLoader; #[cfg(target_arch = "aarch64")] pub use aarch64::AArch64BootLoaderConfig as BootLoaderConfig; +pub use error::BootLoaderError; #[cfg(target_arch = "x86_64")] pub use x86_64::load_linux; @@ -97,57 +104,3 @@ pub use x86_64::load_linux; pub use x86_64::X86BootLoader as BootLoader; #[cfg(target_arch = "x86_64")] pub use x86_64::X86BootLoaderConfig as BootLoaderConfig; - -pub mod errors { - error_chain! { - foreign_links { - Io(std::io::Error); - } - links { - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - FwCfg(devices::legacy::errors::Error, devices::legacy::errors::ErrorKind); - } - errors { - #[allow(clippy::upper_case_acronyms)] - #[cfg(target_arch = "aarch64")] DTBOverflow(size: u64) { - display( - "guest memory size {} should bigger than {}", - size, - util::device_tree::FDT_MAX_SIZE - ) - } - KernelOverflow(addr: u64, size: u64) { - display( - "Failed to load kernel image {} to memory {}.", - size, - addr - ) - } - InitrdOverflow(addr: u64, size: u64) { - display( - "Failed to load initrd image {} to memory {}.", - size, - addr - ) - } - BootLoaderOpenKernel { - display("Failed to open kernel image") - } - BootLoaderOpenInitrd { - display("Failed to open initrd image") - } - MaxCpus(cpus: u8) { - display("Configure cpu number({}) above supported max cpu numbers(254)", cpus) - } - #[cfg(target_arch = "x86_64")] InvalidBzImage { - display("Invalid bzImage kernel file") - } - #[cfg(target_arch = "x86_64")] OldVersionKernel { - display("Kernel version is too old.") - } - #[cfg(target_arch = "x86_64")] ElfKernel { - display("ELF-format kernel is not supported") - } - } - } -} diff --git a/boot_loader/src/x86_64/bootparam.rs b/boot_loader/src/x86_64/bootparam.rs index 8f1b35da8bd0bb04e7173b91d0bdfb8012b2db63..b51f792aef66c5fa6d0bc84b58a834c8036d15de 100644 --- a/boot_loader/src/x86_64/bootparam.rs +++ b/boot_loader/src/x86_64/bootparam.rs @@ -12,14 +12,15 @@ use std::sync::Arc; -use address_space::AddressSpace; -use util::byte_code::ByteCode; +use anyhow::{anyhow, Result}; use super::{ X86BootLoaderConfig, EBDA_START, MB_BIOS_BEGIN, REAL_MODE_IVT_BEGIN, VGA_RAM_BEGIN, VMLINUX_RAM_START, }; -use crate::errors::{ErrorKind, Result}; +use crate::error::BootLoaderError; +use address_space::AddressSpace; +use util::byte_code::ByteCode; pub const E820_RAM: u32 = 1; pub const E820_RESERVED: u32 = 2; @@ -40,7 +41,7 @@ pub struct RealModeKernelHeader { root_flags: u16, syssize: u32, ram_size: u16, - vid_mode: u16, + video_mode: u16, root_dev: u16, boot_flag: u16, jump: u16, @@ -91,13 +92,13 @@ impl RealModeKernelHeader { pub fn check_valid_kernel(&self) -> Result<()> { if self.header != HDRS { - return Err(ErrorKind::ElfKernel.into()); + return Err(anyhow!(BootLoaderError::ElfKernel)); } if (self.version < BOOT_VERSION) || ((self.loadflags & 0x1) == 0x0) { - return Err(ErrorKind::InvalidBzImage.into()); + return Err(anyhow!(BootLoaderError::InvalidBzImage)); } if self.version < 0x202 { - return Err(ErrorKind::OldVersionKernel.into()); + return Err(anyhow!(BootLoaderError::OldVersionKernel)); } Ok(()) } @@ -170,6 +171,8 @@ impl ByteCode for BootParams {} impl Default for BootParams { fn default() -> Self { + // SAFETY: The function of default is only used in trait of ByteCode, + // it can be sure all member variables will be initialized later. unsafe { ::std::mem::zeroed() } } } @@ -225,15 +228,14 @@ mod test { use std::path::PathBuf; use std::sync::Arc; - use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; - use super::super::X86BootLoaderConfig; use super::*; + use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; #[test] fn test_boot_param() { - let root = Region::init_container_region(0x2000_0000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(0x2000_0000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let ram1 = Arc::new( HostMemMapping::new( GuestAddress(0), @@ -246,7 +248,7 @@ mod test { ) .unwrap(), ); - let region_a = Region::init_ram_region(ram1.clone()); + let region_a = Region::init_ram_region(ram1.clone(), "region_a"); root.add_subregion(region_a, ram1.start_address().raw_value()) .unwrap(); diff --git a/boot_loader/src/x86_64/direct_boot/gdt.rs b/boot_loader/src/x86_64/direct_boot/gdt.rs index 79f7e2c81353486e4bf1ffb0685d945b7dddc14f..07995a061ccaefae082c856131c72d4012ac0743 100644 --- a/boot_loader/src/x86_64/direct_boot/gdt.rs +++ b/boot_loader/src/x86_64/direct_boot/gdt.rs @@ -12,14 +12,14 @@ use std::sync::Arc; -use address_space::{AddressSpace, GuestAddress}; +use anyhow::{Context, Result}; use kvm_bindings::kvm_segment; use super::super::BootGdtSegment; use super::super::{ BOOT_GDT_MAX, BOOT_GDT_OFFSET, BOOT_IDT_OFFSET, GDT_ENTRY_BOOT_CS, GDT_ENTRY_BOOT_DS, }; -use crate::errors::{Result, ResultExt}; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; // /* // * Constructor for a conventional segment GDT (or LDT) entry. @@ -32,10 +32,10 @@ use crate::errors::{Result, ResultExt}; // (((base) & _AC(0x00ffffff,ULL)) << 16) | \ // (((limit) & _AC(0x0000ffff,ULL)))) // -pub struct GdtEntry(pub u64); +struct GdtEntry(pub u64); impl GdtEntry { - pub fn new(flags: u64, base: u64, limit: u64) -> Self { + fn new(flags: u64, base: u64, limit: u64) -> Self { let base = (base & 0xff00_0000) << (56 - 24) | (base & 0x00ff_ffff) << 16; let limit = (limit & 0x000f_0000) << (48 - 16) | (limit & 0x0000_ffff); let flags = (flags & 0x0000_f0ff) << 40; @@ -91,11 +91,11 @@ impl From for u64 { } fn write_gdt_table(table: &[u64], guest_mem: &Arc) -> Result<()> { - let mut boot_gdt_addr = BOOT_GDT_OFFSET as u64; + let mut boot_gdt_addr = BOOT_GDT_OFFSET; for (_, entry) in table.iter().enumerate() { guest_mem - .write_object(entry, GuestAddress(boot_gdt_addr)) - .chain_err(|| format!("Failed to load gdt to 0x{:x}", boot_gdt_addr))?; + .write_object(entry, GuestAddress(boot_gdt_addr), AddressAttr::Ram) + .with_context(|| format!("Failed to load gdt to 0x{:x}", boot_gdt_addr))?; boot_gdt_addr += 8; } Ok(()) @@ -104,14 +104,14 @@ fn write_gdt_table(table: &[u64], guest_mem: &Arc) -> Result<()> { fn write_idt_value(val: u64, guest_mem: &Arc) -> Result<()> { let boot_idt_addr = BOOT_IDT_OFFSET; guest_mem - .write_object(&val, GuestAddress(boot_idt_addr)) - .chain_err(|| format!("Failed to load gdt to 0x{:x}", boot_idt_addr))?; + .write_object(&val, GuestAddress(boot_idt_addr), AddressAttr::Ram) + .with_context(|| format!("Failed to load gdt to 0x{:x}", boot_idt_addr))?; Ok(()) } pub fn setup_gdt(guest_mem: &Arc) -> Result { - let gdt_table: [u64; BOOT_GDT_MAX as usize] = [ + let gdt_table: [u64; BOOT_GDT_MAX] = [ GdtEntry::new(0, 0, 0).into(), // NULL GdtEntry::new(0, 0, 0).into(), // NULL GdtEntry::new(0xa09b, 0, 0xfffff).into(), // CODE @@ -119,9 +119,9 @@ pub fn setup_gdt(guest_mem: &Arc) -> Result { ]; let mut code_seg: kvm_segment = GdtEntry(gdt_table[GDT_ENTRY_BOOT_CS as usize]).into(); - code_seg.selector = GDT_ENTRY_BOOT_CS as u16 * 8; + code_seg.selector = u16::from(GDT_ENTRY_BOOT_CS) * 8; let mut data_seg: kvm_segment = GdtEntry(gdt_table[GDT_ENTRY_BOOT_DS as usize]).into(); - data_seg.selector = GDT_ENTRY_BOOT_DS as u16 * 8; + data_seg.selector = u16::from(GDT_ENTRY_BOOT_DS) * 8; write_gdt_table(&gdt_table[..], guest_mem)?; write_idt_value(0, guest_mem)?; @@ -138,9 +138,10 @@ pub fn setup_gdt(guest_mem: &Arc) -> Result { #[cfg(test)] mod test { - use super::*; use kvm_bindings::kvm_segment; + use super::*; + #[test] fn test_gdt_entry() { assert_eq!(GdtEntry::new(0xa09b, 0x0, 0xfffff).0, 0xaf9b000000ffff); diff --git a/boot_loader/src/x86_64/direct_boot/mod.rs b/boot_loader/src/x86_64/direct_boot/mod.rs index da0c12383be353896452114cf1c74031c98ad37e..c910d8225ec772480d8a6cf81d275d321c18ee5a 100644 --- a/boot_loader/src/x86_64/direct_boot/mod.rs +++ b/boot_loader/src/x86_64/direct_boot/mod.rs @@ -17,18 +17,20 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; -use address_space::{AddressSpace, GuestAddress}; -use util::byte_code::ByteCode; +use anyhow::{Context, Result}; +use log::info; +use self::gdt::setup_gdt; +use self::mptable::setup_isa_mptable; use super::bootparam::{BootParams, RealModeKernelHeader, UNDEFINED_ID}; use super::{X86BootLoader, X86BootLoaderConfig}; use super::{ BOOT_HDR_START, BOOT_LOADER_SP, BZIMAGE_BOOT_OFFSET, CMDLINE_START, EBDA_START, INITRD_ADDR_MAX, PDE_START, PDPTE_START, PML4_START, VMLINUX_STARTUP, ZERO_PAGE_START, }; -use crate::errors::{ErrorKind, Result, ResultExt}; -use gdt::setup_gdt; -use mptable::setup_isa_mptable; +use crate::error::BootLoaderError; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use util::byte_code::ByteCode; /// Load bzImage linux kernel to Guest Memory. /// @@ -50,26 +52,26 @@ use mptable::setup_isa_mptable; /// /// * Invalid BzImage header or version. /// * Failed to write bzImage linux kernel to guest memory. -pub fn load_bzimage(kernel_image: &mut File) -> Result { +fn load_bzimage(kernel_image: &mut File) -> Result { let mut boot_hdr = RealModeKernelHeader::new(); kernel_image.seek(SeekFrom::Start(BOOT_HDR_START))?; kernel_image .read_exact(boot_hdr.as_mut_bytes()) - .chain_err(|| "Failed to read boot_hdr from bzImage kernel")?; + .with_context(|| "Failed to read boot_hdr from bzImage kernel")?; boot_hdr.type_of_loader = UNDEFINED_ID; if let Err(e) = boot_hdr.check_valid_kernel() { - kernel_image.seek(SeekFrom::Start(0))?; + kernel_image.rewind()?; return Err(e); } - let mut setup_size = boot_hdr.setup_sects as u64; + let mut setup_size = u64::from(boot_hdr.setup_sects); if setup_size == 0 { setup_size = 4; } setup_size = (setup_size + 1) << 9; - kernel_image.seek(SeekFrom::Start(setup_size as u64))?; + kernel_image.seek(SeekFrom::Start(setup_size))?; Ok(boot_hdr) } @@ -85,11 +87,16 @@ pub fn load_bzimage(kernel_image: &mut File) -> Result { /// /// * Write image to guest memory failed. fn load_image(image: &mut File, start_addr: u64, sys_mem: &Arc) -> Result<()> { - let curr_loc = image.seek(SeekFrom::Current(0))?; + let curr_loc = image.stream_position()?; let len = image.seek(SeekFrom::End(0))?; image.seek(SeekFrom::Start(curr_loc))?; - sys_mem.write(image, GuestAddress(start_addr), len - curr_loc)?; + sys_mem.write( + image, + GuestAddress(start_addr), + len - curr_loc, + AddressAttr::Ram, + )?; Ok(()) } @@ -99,13 +106,14 @@ fn load_kernel_image( sys_mem: &Arc, boot_layout: &mut X86BootLoader, ) -> Result { - let mut kernel_image = File::open(kernel_path).chain_err(|| ErrorKind::BootLoaderOpenKernel)?; + let mut kernel_image = + File::open(kernel_path).with_context(|| BootLoaderError::BootLoaderOpenKernel)?; let (boot_hdr, kernel_start, vmlinux_start) = if let Ok(hdr) = load_bzimage(&mut kernel_image) { ( hdr, - hdr.code32_start as u64 + BZIMAGE_BOOT_OFFSET, - hdr.code32_start as u64, + u64::from(hdr.code32_start) + BZIMAGE_BOOT_OFFSET, + u64::from(hdr.code32_start), ) } else { ( @@ -115,7 +123,8 @@ fn load_kernel_image( ) }; - load_image(&mut kernel_image, vmlinux_start, sys_mem).chain_err(|| "Failed to load image")?; + load_image(&mut kernel_image, vmlinux_start, sys_mem) + .with_context(|| "Failed to load image")?; boot_layout.boot_ip = kernel_start; @@ -138,11 +147,11 @@ fn load_initrd( }; let mut initrd_image = File::open(config.initrd.as_ref().unwrap()) - .chain_err(|| ErrorKind::BootLoaderOpenInitrd)?; - let initrd_size = initrd_image.metadata().unwrap().len() as u64; + .with_context(|| BootLoaderError::BootLoaderOpenInitrd)?; + let initrd_size = initrd_image.metadata().unwrap().len(); let initrd_addr = (initrd_addr_max - initrd_size) & !0xfff_u64; - load_image(&mut initrd_image, initrd_addr, sys_mem).chain_err(|| "Failed to load image")?; + load_image(&mut initrd_image, initrd_addr, sys_mem).with_context(|| "Failed to load image")?; header.set_ramdisk(initrd_addr as u32, initrd_size as u32); @@ -159,22 +168,22 @@ fn setup_page_table(sys_mem: &Arc) -> Result { // Entry covering VA [0..512GB) let pdpte = boot_pdpte_addr | 0x03; sys_mem - .write_object(&pdpte, GuestAddress(boot_pml4_addr)) - .chain_err(|| format!("Failed to load PD PTE to 0x{:x}", boot_pml4_addr))?; + .write_object(&pdpte, GuestAddress(boot_pml4_addr), AddressAttr::Ram) + .with_context(|| format!("Failed to load PD PTE to 0x{:x}", boot_pml4_addr))?; // Entry covering VA [0..1GB) let pde = boot_pde_addr | 0x03; sys_mem - .write_object(&pde, GuestAddress(boot_pdpte_addr)) - .chain_err(|| format!("Failed to load PDE to 0x{:x}", boot_pdpte_addr))?; + .write_object(&pde, GuestAddress(boot_pdpte_addr), AddressAttr::Ram) + .with_context(|| format!("Failed to load PDE to 0x{:x}", boot_pdpte_addr))?; // 512 2MB entries together covering VA [0..1GB). Note we are assuming // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do. for i in 0..512u64 { let pde = (i << 21) + 0x83u64; sys_mem - .write_object(&pde, GuestAddress(boot_pde_addr + i * 8)) - .chain_err(|| format!("Failed to load PDE to 0x{:x}", boot_pde_addr + i * 8))?; + .write_object(&pde, GuestAddress(boot_pde_addr + i * 8), AddressAttr::Ram) + .with_context(|| format!("Failed to load PDE to 0x{:x}", boot_pde_addr + i * 8))?; } Ok(boot_pml4_addr) @@ -188,13 +197,17 @@ fn setup_boot_params( let mut boot_params = BootParams::new(*boot_hdr); boot_params.setup_e820_entries(config, sys_mem); sys_mem - .write_object(&boot_params, GuestAddress(ZERO_PAGE_START)) - .chain_err(|| format!("Failed to load zero page to 0x{:x}", ZERO_PAGE_START))?; + .write_object( + &boot_params, + GuestAddress(ZERO_PAGE_START), + AddressAttr::Ram, + ) + .with_context(|| format!("Failed to load zero page to 0x{:x}", ZERO_PAGE_START))?; Ok(()) } -pub fn setup_kernel_cmdline( +fn setup_kernel_cmdline( config: &X86BootLoaderConfig, sys_mem: &Arc, boot_hdr: &mut RealModeKernelHeader, @@ -205,7 +218,8 @@ pub fn setup_kernel_cmdline( sys_mem.write( &mut config.kernel_cmdline.as_bytes(), GuestAddress(CMDLINE_START), - cmdline_len as u64, + u64::from(cmdline_len), + AddressAttr::Ram, )?; Ok(()) @@ -234,28 +248,25 @@ pub fn load_linux( config: &X86BootLoaderConfig, sys_mem: &Arc, ) -> Result { - if config.kernel.is_none() { - bail!("Kernel is required for direct-boot mode."); - } - + let kernel_path = config + .kernel + .as_ref() + .with_context(|| "Kernel is required for direct-boot mode.")?; let mut boot_loader_layout = X86BootLoader { boot_sp: BOOT_LOADER_SP, zero_page_addr: ZERO_PAGE_START, ..Default::default() }; - let mut boot_header = load_kernel_image( - config.kernel.as_ref().unwrap(), - sys_mem, - &mut boot_loader_layout, - )?; + let mut boot_header = load_kernel_image(kernel_path, sys_mem, &mut boot_loader_layout)?; load_initrd(config, sys_mem, &mut boot_header) - .chain_err(|| "Failed to load initrd to vm memory")?; + .with_context(|| "Failed to load initrd to vm memory")?; setup_kernel_cmdline(config, sys_mem, &mut boot_header) - .chain_err(|| "Failed to setup kernel cmdline")?; + .with_context(|| "Failed to setup kernel cmdline")?; - setup_boot_params(config, sys_mem, &boot_header).chain_err(|| "Failed to setup boot params")?; + setup_boot_params(config, sys_mem, &boot_header) + .with_context(|| "Failed to setup boot params")?; setup_isa_mptable( sys_mem, @@ -266,26 +277,27 @@ pub fn load_linux( )?; boot_loader_layout.boot_pml4_addr = - setup_page_table(sys_mem).chain_err(|| "Failed to setup page table")?; - boot_loader_layout.segments = setup_gdt(sys_mem).chain_err(|| "Failed to setup gdt")?; + setup_page_table(sys_mem).with_context(|| "Failed to setup page table")?; + boot_loader_layout.segments = setup_gdt(sys_mem).with_context(|| "Failed to setup gdt")?; Ok(boot_loader_layout) } #[cfg(test)] mod test { - use super::*; use std::path::PathBuf; use std::sync::Arc; + use kvm_bindings::kvm_segment; + use super::super::BOOT_GDT_MAX; + use super::*; use address_space::*; - use kvm_bindings::kvm_segment; #[test] fn test_x86_bootloader_and_kernel_cmdline() { - let root = Region::init_container_region(0x2000_0000); - let space = AddressSpace::new(root.clone()).unwrap(); + let root = Region::init_container_region(0x2000_0000, "root"); + let space = AddressSpace::new(root.clone(), "space", None).unwrap(); let ram1 = Arc::new( HostMemMapping::new( GuestAddress(0), @@ -298,23 +310,29 @@ mod test { ) .unwrap(), ); - let region_a = Region::init_ram_region(ram1.clone()); + let region_a = Region::init_ram_region(ram1.clone(), "region_a"); root.add_subregion(region_a, ram1.start_address().raw_value()) .unwrap(); assert_eq!(setup_page_table(&space).unwrap(), 0x0000_9000); assert_eq!( - space.read_object::(GuestAddress(0x0000_9000)).unwrap(), + space + .read_object::(GuestAddress(0x0000_9000), AddressAttr::Ram) + .unwrap(), 0x0000_a003 ); assert_eq!( - space.read_object::(GuestAddress(0x0000_a000)).unwrap(), + space + .read_object::(GuestAddress(0x0000_a000), AddressAttr::Ram) + .unwrap(), 0x0000_b003 ); let mut page_addr: u64 = 0x0000_b000; let mut tmp_value: u64 = 0x83; for _ in 0..512u64 { assert_eq!( - space.read_object::(GuestAddress(page_addr)).unwrap(), + space + .read_object::(GuestAddress(page_addr), AddressAttr::Ram) + .unwrap(), tmp_value ); page_addr += 8; @@ -335,7 +353,7 @@ mod test { let mut boot_hdr = RealModeKernelHeader::new(); assert!(setup_boot_params(&config, &space, &boot_hdr).is_ok()); - //test setup_gdt function + // test setup_gdt function let c_seg = kvm_segment { base: 0, limit: 1048575, @@ -376,7 +394,11 @@ mod test { let mut arr: Vec = Vec::new(); let mut boot_addr: u64 = 0x500; for _ in 0..BOOT_GDT_MAX { - arr.push(space.read_object(GuestAddress(boot_addr)).unwrap()); + arr.push( + space + .read_object(GuestAddress(boot_addr), AddressAttr::Ram) + .unwrap(), + ); boot_addr += 8; } assert_eq!(arr[0], 0); @@ -384,7 +406,7 @@ mod test { assert_eq!(arr[2], 0xaf9b000000ffff); assert_eq!(arr[3], 0xcf93000000ffff); - //test setup_kernel_cmdline function + // test setup_kernel_cmdline function let cmd_len: u64 = config.kernel_cmdline.len() as u64; let mut read_buffer: [u8; 30] = [0; 30]; assert!(setup_kernel_cmdline(&config, &space, &mut boot_hdr).is_ok()); @@ -393,6 +415,7 @@ mod test { &mut read_buffer.as_mut(), GuestAddress(0x0002_0000), cmd_len, + AddressAttr::Ram, ) .unwrap(); let s = String::from_utf8(read_buffer.to_vec()).unwrap(); diff --git a/boot_loader/src/x86_64/direct_boot/mptable.rs b/boot_loader/src/x86_64/direct_boot/mptable.rs index 4adc3e3d2bc953255922657526f3f1a9adf45a14..8ea1ce2d224f26eff1a656c16bd49000012289e2 100644 --- a/boot_loader/src/x86_64/direct_boot/mptable.rs +++ b/boot_loader/src/x86_64/direct_boot/mptable.rs @@ -12,12 +12,13 @@ use std::sync::Arc; -use address_space::{AddressSpace, GuestAddress}; +use anyhow::{anyhow, Result}; + +use crate::error::BootLoaderError; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; use util::byte_code::ByteCode; use util::checksum::obj_checksum; -use crate::errors::{ErrorKind, Result}; - const SPEC_VERSION: u8 = 4; // version 1.4 const APIC_VERSION: u8 = 0x14; @@ -266,7 +267,7 @@ impl LocalInterruptEntry { macro_rules! write_entry { ( $d:expr, $t:ty, $m:expr, $o:expr, $s:expr ) => { let entry = $d; - $m.write_object(&entry, GuestAddress($o))?; + $m.write_object(&entry, GuestAddress($o), AddressAttr::Ram)?; $o += std::mem::size_of::<$t>() as u64; $s = $s.wrapping_add(obj_checksum(&entry)); }; @@ -285,7 +286,7 @@ pub fn setup_isa_mptable( const MPTABLE_IOAPIC_NR: u8 = 16; if u32::from(num_cpus) > MPTABLE_MAX_CPUS { - return Err(ErrorKind::MaxCpus(num_cpus).into()); + return Err(anyhow!(BootLoaderError::MaxCpus(num_cpus))); } let ioapic_id: u8 = num_cpus + 1; @@ -293,13 +294,14 @@ pub fn setup_isa_mptable( sys_mem.write_object( &FloatingPointer::new(header as u32), GuestAddress(start_addr), + AddressAttr::Ram, )?; let mut offset = header + std::mem::size_of::() as u64; let mut sum = 0u8; for cpu_id in 0..num_cpus { write_entry!( - ProcessEntry::new(cpu_id as u8, true, cpu_id == 0), + ProcessEntry::new(cpu_id, true, cpu_id == 0), ProcessEntry, sys_mem, offset, @@ -344,6 +346,7 @@ pub fn setup_isa_mptable( sys_mem.write_object( &ConfigTableHeader::new((offset - header) as u16, sum, lapic_addr), GuestAddress(header), + AddressAttr::Ram, )?; Ok(()) diff --git a/boot_loader/src/x86_64/mod.rs b/boot_loader/src/x86_64/mod.rs index 0c22ced557bc9bde50db8461ae922bb8c5ee840f..36e971a899dd6a01343958781fabec671946c447 100644 --- a/boot_loader/src/x86_64/mod.rs +++ b/boot_loader/src/x86_64/mod.rs @@ -54,17 +54,16 @@ mod bootparam; mod direct_boot; -#[allow(dead_code)] mod standard_boot; use std::path::PathBuf; use std::sync::{Arc, Mutex}; -use address_space::AddressSpace; -use devices::legacy::FwCfgOps; +use anyhow::{Context, Result}; use kvm_bindings::kvm_segment; -use crate::errors::Result; +use address_space::AddressSpace; +use devices::legacy::FwCfgOps; const ZERO_PAGE_START: u64 = 0x0000_7000; const PML4_START: u64 = 0x0000_9000; @@ -144,10 +143,8 @@ pub fn load_linux( if config.prot64_mode { direct_boot::load_linux(config, sys_mem) } else { - if fwcfg.is_none() { - bail!("Failed to load linux: No FwCfg provided"); - } - let mut locked_fwcfg = fwcfg.unwrap().lock().unwrap(); + let fwcfg = fwcfg.with_context(|| "Failed to load linux: No FwCfg provided")?; + let mut locked_fwcfg = fwcfg.lock().unwrap(); standard_boot::load_linux(config, sys_mem, &mut *locked_fwcfg)?; Ok(X86BootLoader { diff --git a/boot_loader/src/x86_64/standard_boot/elf.rs b/boot_loader/src/x86_64/standard_boot/elf.rs index a76e7892071eb41ae5c030e93c65c6800767abcf..9158d4c1c43aca747097ff5301cc285a57cb8cf6 100644 --- a/boot_loader/src/x86_64/standard_boot/elf.rs +++ b/boot_loader/src/x86_64/standard_boot/elf.rs @@ -14,16 +14,14 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; -use address_space::{AddressSpace, GuestAddress}; +use anyhow::{bail, Context, Result}; + +use address_space::{AddressAttr, AddressSpace, GuestAddress}; use devices::legacy::{FwCfgEntryType, FwCfgOps}; use util::byte_code::ByteCode; use util::num_ops::round_up; -use crate::errors::{Result, ResultExt}; - const EI_MAG0: usize = 0; -const EI_MAG1: usize = 1; -const EI_MAG2: usize = 2; const EI_MAG3: usize = 3; const EI_CLASS: usize = 4; const EI_DATA: usize = 5; @@ -33,11 +31,9 @@ const ELFMAG1: u8 = b'E'; const ELFMAG2: u8 = b'L'; const ELFMAG3: u8 = b'F'; -const ELFCLASS32: u8 = 1; const ELFCLASS64: u8 = 2; const ELFDATA2LSB: u8 = 1; -const ELFDATA2MSB: u8 = 2; const PT_LOAD: u32 = 1; const PT_NOTE: u32 = 4; @@ -137,18 +133,18 @@ pub fn load_elf_kernel( sys_mem: &Arc, fwcfg: &mut dyn FwCfgOps, ) -> Result<()> { - kernel_image.seek(SeekFrom::Start(0))?; + kernel_image.rewind()?; let kernel_length = kernel_image.metadata().map(|m| m.len())?; let mut elf_header = Elf64Header::default(); kernel_image.read_exact(elf_header.as_mut_bytes())?; elf_header .is_valid() - .chain_err(|| "ELF header is invalid")?; + .with_context(|| "ELF header is invalid")?; let ep_hdrs = elf_header .parse_prog_hdrs(kernel_image) - .chain_err(|| "Failed to parse ELF program header")?; + .with_context(|| "Failed to parse ELF program header")?; let mut pvh_start_addr: Option = None; let mut addr_low = u64::MAX; @@ -167,7 +163,12 @@ pub fn load_elf_kernel( if ph.p_type == PT_LOAD { kernel_image.seek(SeekFrom::Start(ph.p_offset))?; - sys_mem.write(kernel_image, GuestAddress(ph.p_paddr), ph.p_filesz)?; + sys_mem.write( + kernel_image, + GuestAddress(ph.p_paddr), + ph.p_filesz, + AddressAttr::Ram, + )?; addr_low = std::cmp::min(addr_low, ph.p_paddr); addr_max = std::cmp::max(addr_max, ph.p_paddr); @@ -184,10 +185,14 @@ pub fn load_elf_kernel( offset += note_size; let p_align = ph.p_align; - let aligned_namesz = round_up(note_hdr.namesz as u64, p_align).ok_or(format!( - "Overflows when align up: num 0x{:x}, alignment 0x{:x}", - note_hdr.namesz as u64, p_align, - ))?; + let aligned_namesz = + round_up(u64::from(note_hdr.namesz), p_align).with_context(|| { + format!( + "Overflows when align up: num 0x{:x}, alignment 0x{:x}", + u64::from(note_hdr.namesz), + p_align, + ) + })?; if note_hdr.type_ == XEN_ELFNOTE_PHYS32_ENTRY { kernel_image.seek(SeekFrom::Current(aligned_namesz as i64))?; @@ -196,11 +201,14 @@ pub fn load_elf_kernel( pvh_start_addr = Some(entry_addr); break; } else { - let aligned_descsz = - round_up(note_hdr.descsz as u64, p_align).ok_or(format!( - "Overflows when align up, num 0x{:x}, alignment 0x{:x}", - note_hdr.descsz as u64, p_align, - ))?; + let aligned_descsz = round_up(u64::from(note_hdr.descsz), p_align) + .with_context(|| { + format!( + "Overflows when align up, num 0x{:x}, alignment 0x{:x}", + u64::from(note_hdr.descsz), + p_align, + ) + })?; let tail_size = aligned_namesz + aligned_descsz; kernel_image.seek(SeekFrom::Current(tail_size as i64))?; @@ -210,13 +218,12 @@ pub fn load_elf_kernel( } } } - if pvh_start_addr.is_none() { - bail!("No Note header contains PVH entry info in ELF kernel image."); - } + let pvh_start_addr = pvh_start_addr + .with_context(|| "No Note header contains PVH entry info in ELF kernel image.")?; fwcfg.add_data_entry( FwCfgEntryType::KernelEntry, - (pvh_start_addr.unwrap() as u32).as_bytes().to_vec(), + (pvh_start_addr as u32).as_bytes().to_vec(), )?; fwcfg.add_data_entry( FwCfgEntryType::KernelAddr, diff --git a/boot_loader/src/x86_64/standard_boot/mod.rs b/boot_loader/src/x86_64/standard_boot/mod.rs index 49e8a0302d15a51ba46fd78da935b0adb59b7124..5ad697bf769c4401d6a93f8084e965e0f626505d 100644 --- a/boot_loader/src/x86_64/standard_boot/mod.rs +++ b/boot_loader/src/x86_64/standard_boot/mod.rs @@ -17,17 +17,19 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; -use address_space::AddressSpace; -use devices::legacy::{FwCfgEntryType, FwCfgOps}; -use util::byte_code::ByteCode; +use anyhow::{bail, Context, Result}; +use log::{error, info}; use self::elf::load_elf_kernel; use super::bootparam::RealModeKernelHeader; use super::X86BootLoaderConfig; use super::{BOOT_HDR_START, CMDLINE_START}; -use crate::errors::{ErrorKind, Result, ResultExt}; +use crate::error::BootLoaderError; use crate::x86_64::bootparam::{E820Entry, E820_RAM, E820_RESERVED, UEFI_OVMF_ID}; use crate::x86_64::{INITRD_ADDR_MAX, SETUP_START}; +use address_space::AddressSpace; +use devices::legacy::{FwCfgEntryType, FwCfgOps}; +use util::byte_code::ByteCode; fn load_image( image: &mut File, @@ -57,30 +59,30 @@ fn load_kernel_image( header: &RealModeKernelHeader, fwcfg: &mut dyn FwCfgOps, ) -> Result> { - let mut setup_size = header.setup_sects as u64; + let mut setup_size = u64::from(header.setup_sects); if setup_size == 0 { setup_size = 4; } setup_size = (setup_size + 1) << 9; let mut setup_data = vec![0_u8; setup_size as usize]; - kernel_image.seek(SeekFrom::Start(0))?; + kernel_image.rewind()?; kernel_image.read_exact(setup_data.as_mut_slice())?; let kernel_size = kernel_image.metadata().unwrap().len() - setup_size; load_image(kernel_image, setup_size, FwCfgEntryType::KernelData, fwcfg) - .chain_err(|| "Failed to load kernel image")?; + .with_context(|| "Failed to load kernel image")?; let kernel_start = header.code32_start; // boot_hdr.code32_start = 0x100000 fwcfg .add_data_entry(FwCfgEntryType::KernelAddr, kernel_start.as_bytes().to_vec()) - .chain_err(|| "Failed to add kernel-addr entry to FwCfg")?; + .with_context(|| "Failed to add kernel-addr entry to FwCfg")?; fwcfg .add_data_entry( FwCfgEntryType::KernelSize, (kernel_size as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add kernel-size entry to FwCfg")?; + .with_context(|| "Failed to add kernel-size entry to FwCfg")?; Ok(setup_data) } @@ -101,24 +103,24 @@ fn load_initrd( }; let mut initrd_image = File::open(config.initrd.as_ref().unwrap()) - .chain_err(|| ErrorKind::BootLoaderOpenInitrd)?; - let initrd_size = initrd_image.metadata().unwrap().len() as u64; + .with_context(|| BootLoaderError::BootLoaderOpenInitrd)?; + let initrd_size = initrd_image.metadata().unwrap().len(); let initrd_addr = (initrd_addr_max - initrd_size) & !0xfff_u64; load_image(&mut initrd_image, 0, FwCfgEntryType::InitrdData, fwcfg) - .chain_err(|| "Failed to load initrd")?; + .with_context(|| "Failed to load initrd")?; fwcfg .add_data_entry( FwCfgEntryType::InitrdAddr, (initrd_addr as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add initrd-addr entry to FwCfg")?; + .with_context(|| "Failed to add initrd-addr entry to FwCfg")?; fwcfg .add_data_entry( FwCfgEntryType::InitrdSize, (initrd_size as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add initrd-size to FwCfg")?; + .with_context(|| "Failed to add initrd-size to FwCfg")?; header.set_ramdisk(initrd_addr as u32, initrd_size as u32); Ok(()) @@ -156,7 +158,7 @@ fn setup_e820_table( }); fwcfg .add_file_entry("etc/e820", bytes) - .chain_err(|| "Failed to add e820 file entry to FwCfg")?; + .with_context(|| "Failed to add e820 file entry to FwCfg")?; Ok(()) } @@ -173,17 +175,17 @@ fn load_kernel_cmdline( FwCfgEntryType::CmdlineAddr, (CMDLINE_START as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add cmdline-addr entry to FwCfg")?; + .with_context(|| "Failed to add cmdline-addr entry to FwCfg")?; // The length of cmdline should add the tailing `\0`. fwcfg .add_data_entry( FwCfgEntryType::CmdlineSize, (cmdline_len + 1).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add cmdline-size entry to FwCfg")?; + .with_context(|| "Failed to add cmdline-size entry to FwCfg")?; fwcfg .add_string_entry(FwCfgEntryType::CmdlineData, config.kernel_cmdline.as_ref()) - .chain_err(|| "Failed to add cmdline-data entry to FwCfg")?; + .with_context(|| "Failed to add cmdline-data entry to FwCfg")?; Ok(()) } @@ -206,7 +208,7 @@ pub fn load_linux( } let mut kernel_image = File::open(config.kernel.as_ref().unwrap().clone()) - .chain_err(|| ErrorKind::BootLoaderOpenKernel)?; + .with_context(|| BootLoaderError::BootLoaderOpenKernel)?; let mut boot_header = RealModeKernelHeader::default(); kernel_image.seek(SeekFrom::Start(BOOT_HDR_START))?; @@ -217,12 +219,14 @@ pub fn load_linux( setup_e820_table(config, sys_mem, fwcfg)?; load_initrd(config, sys_mem, &mut boot_header, fwcfg)?; if let Err(e) = boot_header.check_valid_kernel() { - match e.kind() { - ErrorKind::ElfKernel => { - load_elf_kernel(&mut kernel_image, sys_mem, fwcfg)?; - return Ok(()); + if let Some(err) = e.downcast_ref::() { + match err { + BootLoaderError::ElfKernel => { + load_elf_kernel(&mut kernel_image, sys_mem, fwcfg)?; + return Ok(()); + } + _ => return Err(e), } - _ => return Err(e), } } @@ -239,16 +243,16 @@ pub fn load_linux( FwCfgEntryType::SetupAddr, (SETUP_START as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add setup-addr to FwCfg")?; + .with_context(|| "Failed to add setup-addr to FwCfg")?; fwcfg .add_data_entry( FwCfgEntryType::SetupSize, (setup_data.len() as u32).as_bytes().to_vec(), ) - .chain_err(|| "Failed to add setup-size entry to FwCfg")?; + .with_context(|| "Failed to add setup-size entry to FwCfg")?; fwcfg .add_data_entry(FwCfgEntryType::SetupData, setup_data) - .chain_err(|| "Failed to add setup-data entry to FwCfg")?; + .with_context(|| "Failed to add setup-data entry to FwCfg")?; Ok(()) } diff --git a/build.rs b/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..13b5a89850ba2c00a0087c8f80ff47b88c170594 --- /dev/null +++ b/build.rs @@ -0,0 +1,44 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +fn ohos_env_configure() { + if let Ok(ohos_sdk_path) = std::env::var("OHOS_SDK") { + println!("cargo:rustc-link-arg=--target=aarch64-linux-ohos"); + println!("cargo:rustc-link-arg=--verbose"); + println!("cargo:rustc-link-arg=--sysroot={}/sysroot", ohos_sdk_path); + println!("cargo:rustc-link-arg=-lpixman_static"); + if cfg!(feature = "usb_host") { + println!("cargo:rustc-link-arg=-lusb-1.0"); + } + println!( + "cargo:rustc-link-search={}/sysroot/usr/lib/aarch64-linux-ohos", + ohos_sdk_path + ); + } +} + +fn main() { + let target_env_ohos = matches!(std::env::var("CARGO_CFG_TARGET_ENV"), Ok(ret) if ret == "ohos"); + + if target_env_ohos { + println!("cargo:rerun-if-env-changed=OHOS_SDK"); + ohos_env_configure(); + } else if cfg!(any( + feature = "demo_device", + feature = "gtk", + feature = "ramfb", + feature = "virtio_gpu", + feature = "vnc", + )) { + println!("cargo:rustc-link-arg=-lpixman-1"); + } +} diff --git a/chardev_backend/Cargo.toml b/chardev_backend/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..8a1b15358ac4ed7dc68ddd2dc2ebb6f4339eb7c7 --- /dev/null +++ b/chardev_backend/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "chardev_backend" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" + +[dependencies] +vmm-sys-util = "0.12.1" +anyhow = "1.0" +log = "0.4" +libc = "0.2" +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"] } +machine_manager = { path = "../machine_manager" } +util = { path = "../util" } diff --git a/chardev_backend/src/chardev.rs b/chardev_backend/src/chardev.rs new file mode 100644 index 0000000000000000000000000000000000000000..3f9e58b062e579508fa76061f87d001ffc044bfa --- /dev/null +++ b/chardev_backend/src/chardev.rs @@ -0,0 +1,685 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::VecDeque; +use std::fs::{read_link, File, OpenOptions}; +use std::io::{ErrorKind, Stdin, Stdout}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::path::PathBuf; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use anyhow::{bail, Context, Result}; +use log::{error, info, warn}; +use nix::fcntl::{fcntl, FcntlArg, OFlag}; +use nix::pty::openpty; +use nix::sys::termios::{cfmakeraw, tcgetattr, tcsetattr, SetArg, Termios}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use machine_manager::event_loop::EventLoop; +use machine_manager::machine::{PathInfo, PTY_PATH}; +use machine_manager::{ + config::{ChardevConfig, ChardevType, SocketType}, + temp_cleaner::TempCleaner, +}; +use util::file::clear_file; +use util::loop_context::{ + gen_delete_notifiers, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; +use util::set_termi_raw_mode; +use util::socket::{SocketListener, SocketStream}; +use util::unix::limit_permission; + +const BUF_QUEUE_SIZE: usize = 128; + +/// Provide the trait that helps handle the input data. +pub trait InputReceiver: Send { + /// Handle the input data and trigger interrupt if necessary. + fn receive(&mut self, buffer: &[u8]); + /// Return the remain space size of receiver buffer. + /// 0 if receiver is not ready or no space in FIFO + fn remain_size(&mut self) -> usize; + /// Tell receiver that RX is paused and receiver + /// must unpause it when it becomes ready + fn set_paused(&mut self); +} + +/// Provide the trait that notifies device the socket is opened or closed. +pub trait ChardevNotifyDevice: Send { + fn chardev_notify(&mut self, status: ChardevStatus); +} + +pub enum ChardevStatus { + Close, + Open, +} + +/// Character device structure. +pub struct Chardev { + /// Id of chardev. + id: String, + /// Type of backend device. + backend: ChardevType, + /// Socket listener for chardev of socket type. + listener: Option, + /// Chardev input. + input: Option>>, + /// Chardev output. + pub output: Option>>, + /// Fd of socket stream. + stream_fd: Option, + /// Input receiver. + receiver: Option>>, + /// Used to notify device the socket is opened or closed. + dev: Option>>, + /// Whether event-handling of device is initialized + /// and we wait for port to become available + wait_port: bool, + /// Scheduled DPC to unpause input stream. + /// Unpause must be done inside event-loop + unpause_timer: Option, + /// output listener to notify when output stream fd can be written + output_listener_fd: Option>, + /// output buffer queue + outbuf: VecDeque>, +} + +impl Chardev { + pub fn new(chardev_cfg: ChardevConfig) -> Self { + Chardev { + id: chardev_cfg.id(), + backend: chardev_cfg.classtype, + listener: None, + input: None, + output: None, + stream_fd: None, + receiver: None, + dev: None, + wait_port: false, + unpause_timer: None, + output_listener_fd: None, + outbuf: VecDeque::with_capacity(BUF_QUEUE_SIZE), + } + } + + pub fn realize(&mut self) -> Result<()> { + match &self.backend { + ChardevType::Stdio { .. } => { + set_termi_raw_mode().with_context(|| "Failed to set terminal to raw mode")?; + self.input = Some(Arc::new(Mutex::new(std::io::stdin()))); + self.output = Some(Arc::new(Mutex::new(std::io::stdout()))); + } + ChardevType::Pty { .. } => { + let (master, path) = + set_pty_raw_mode().with_context(|| "Failed to set pty to raw mode")?; + info!("Pty path is: {:?}", path); + let path_info = PathInfo { + path: format!("pty:{:?}", &path), + label: self.id.clone(), + }; + PTY_PATH.lock().unwrap().push(path_info); + // SAFETY: master was created in the function of set_pty_raw_mode, + // the value can be guaranteed to be legal. + let master_arc = Arc::new(Mutex::new(unsafe { File::from_raw_fd(master) })); + self.input = Some(master_arc.clone()); + self.output = Some(master_arc); + } + ChardevType::Socket { server, nowait, .. } => { + if !*server || !*nowait { + bail!( + "Argument \'server\' and \'nowait\' are both required for chardev \'{}\'", + &self.id + ); + } + let socket_type = self.backend.socket_type()?; + if let SocketType::Tcp { host, port } = socket_type { + let listener = SocketListener::bind_by_tcp(&host, port).with_context(|| { + format!( + "Failed to bind socket for chardev \'{}\', address: {}:{}", + &self.id, host, port + ) + })?; + self.listener = Some(listener); + } else if let SocketType::Unix { path } = socket_type { + clear_file(path.clone())?; + let listener = SocketListener::bind_by_uds(&path).with_context(|| { + format!( + "Failed to bind socket for chardev \'{}\', path: {}", + &self.id, path + ) + })?; + self.listener = Some(listener); + + // add file to temporary pool, so it could be cleaned when vm exit. + TempCleaner::add_path(path.clone()); + limit_permission(&path).with_context(|| { + format!( + "Failed to change file permission for chardev \'{}\', path: {}", + &self.id, path + ) + })?; + } + } + ChardevType::File { path, .. } => { + let file = Arc::new(Mutex::new( + OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(path)?, + )); + self.output = Some(file); + } + }; + Ok(()) + } + + pub fn set_receiver(&mut self, dev: &Arc>) { + self.receiver = Some(dev.clone()); + if self.wait_port { + warn!("Serial port for chardev \'{}\' appeared.", &self.id); + self.wait_port = false; + self.unpause_rx(); + } + } + + fn wait_for_port(&mut self, input_fd: RawFd) -> EventNotifier { + // set_receiver() will unpause rx + warn!( + "Serial port for chardev \'{}\' is not ready yet, waiting for port.", + &self.id + ); + + self.wait_port = true; + + EventNotifier::new( + NotifierOperation::Modify, + input_fd, + None, + EventSet::HANG_UP, + vec![], + ) + } + + pub fn set_device(&mut self, dev: Arc>) { + self.dev = Some(dev.clone()); + } + + pub fn unpause_rx(&mut self) { + // Receiver calls this if it returned 0 from remain_size() + // and now it's ready to accept rx-data again + if self.input.is_none() { + error!("unpause called for non-initialized device \'{}\'", &self.id); + return; + } + if self.unpause_timer.is_some() { + return; // already set + } + + let input_fd = self.input.clone().unwrap().lock().unwrap().as_raw_fd(); + + let unpause_fn = Box::new(move || { + let res = EventLoop::update_event( + vec![EventNotifier::new( + NotifierOperation::AddEvents, + input_fd, + None, + EventSet::IN | EventSet::HANG_UP, + vec![], + )], + None, + ); + if let Err(e) = res { + error!("Failed to unpause on fd {input_fd}: {e:?}"); + } + }); + let main_loop = EventLoop::get_ctx(None).unwrap(); + let timer_id = main_loop.timer_add(unpause_fn, Duration::ZERO); + self.unpause_timer = Some(timer_id); + } + + fn cancel_unpause_timer(&mut self) { + if let Some(timer_id) = self.unpause_timer { + let main_loop = EventLoop::get_ctx(None).unwrap(); + main_loop.timer_del(timer_id); + self.unpause_timer = None; + } + } + + fn clear_outbuf(&mut self) { + self.outbuf.clear(); + } + + pub fn outbuf_is_full(&self) -> bool { + self.outbuf.len() == self.outbuf.capacity() + } + + pub fn fill_outbuf(&mut self, buf: Vec, listener_fd: Option>) -> Result<()> { + match self.backend { + ChardevType::File { .. } | ChardevType::Pty { .. } | ChardevType::Stdio { .. } => { + if self.output.is_none() { + bail!("chardev has no output"); + } + return write_buffer_sync(self.output.as_ref().unwrap().clone(), buf); + } + ChardevType::Socket { .. } => { + if self.output.is_none() { + return Ok(()); + } + if listener_fd.is_none() { + return write_buffer_sync(self.output.as_ref().unwrap().clone(), buf); + } + } + } + + if self.outbuf_is_full() { + bail!("Failed to append buffer because output buffer queue is full"); + } + self.outbuf.push_back(buf); + self.output_listener_fd = listener_fd; + + let event_notifier = EventNotifier::new( + NotifierOperation::AddEvents, + self.stream_fd.unwrap(), + None, + EventSet::OUT, + Vec::new(), + ); + EventLoop::update_event(vec![event_notifier], None)?; + Ok(()) + } + + fn consume_outbuf(&mut self) -> Result<()> { + if self.output.is_none() { + bail!("no output interface"); + } + let output = self.output.as_ref().unwrap(); + while !self.outbuf.is_empty() { + if write_buffer_async(output.clone(), self.outbuf.front_mut().unwrap())? { + break; + } + self.outbuf.pop_front(); + } + Ok(()) + } +} + +fn write_buffer_sync(writer: Arc>, buf: Vec) -> Result<()> { + let len = buf.len(); + let mut written = 0_usize; + let mut locked_writer = writer.lock().unwrap(); + + while written < len { + match locked_writer.write(&buf[written..len]) { + Ok(n) => written += n, + Err(e) => bail!("chardev failed to write file with error {:?}", e), + } + } + locked_writer + .flush() + .with_context(|| "chardev failed to flush")?; + Ok(()) +} + +// If write is blocked, return true. Otherwise return false. +fn write_buffer_async( + writer: Arc>, + buf: &mut Vec, +) -> Result { + let len = buf.len(); + let mut locked_writer = writer.lock().unwrap(); + let mut written = 0_usize; + + while written < len { + match locked_writer.write(&buf[written..len]) { + Ok(0) => break, + Ok(n) => written += n, + Err(e) => { + let err_type = e.kind(); + if err_type != ErrorKind::WouldBlock && err_type != ErrorKind::Interrupted { + bail!("chardev failed to write data with error {:?}", e); + } + break; + } + } + } + locked_writer + .flush() + .with_context(|| "chardev failed to flush")?; + + if written == len { + return Ok(false); + } + buf.drain(0..written); + Ok(true) +} + +fn set_pty_raw_mode() -> Result<(i32, PathBuf)> { + let (master, slave) = match openpty(None, None) { + Ok(res) => (res.master, res.slave), + Err(e) => bail!("Failed to open pty, error is {:?}", e), + }; + + let proc_path = PathBuf::from(format!("/proc/self/fd/{}", slave)); + let path = read_link(proc_path).with_context(|| "Failed to read slave pty link")?; + + let mut new_termios: Termios = match tcgetattr(slave) { + Ok(tm) => tm, + Err(e) => bail!("Failed to get mode of pty, error is {:?}", e), + }; + + cfmakeraw(&mut new_termios); + + if let Err(e) = tcsetattr(slave, SetArg::TCSAFLUSH, &new_termios) { + bail!("Failed to set pty to raw mode, error is {:?}", e); + } + + let fcnt_arg = FcntlArg::F_SETFL(OFlag::from_bits(libc::O_NONBLOCK).unwrap()); + if let Err(e) = fcntl(master, fcnt_arg) { + bail!( + "Failed to set pty master to nonblocking mode, error is {:?}", + e + ); + } + + Ok((master, path)) +} + +// Notification handling in case of stdio or pty usage. +fn get_terminal_notifier(chardev: Arc>) -> Option { + let locked_chardev = chardev.lock().unwrap(); + let input = locked_chardev.input.clone(); + if input.is_none() { + // Method `realize` expected to be called before we get here because to build event + // notifier we need already valid file descriptors here. + error!( + "Failed to initialize input events for chardev \'{}\', chardev not initialized", + &locked_chardev.id + ); + return None; + } + + let cloned_chardev = chardev.clone(); + let input_fd = input.unwrap().lock().unwrap().as_raw_fd(); + + let event_handler: Rc = Rc::new(move |_, _| { + let mut locked_chardev = cloned_chardev.lock().unwrap(); + if locked_chardev.receiver.is_none() { + let wait_port = locked_chardev.wait_for_port(input_fd); + return Some(vec![wait_port]); + } + + locked_chardev.cancel_unpause_timer(); // it will be rescheduled if needed + + let receiver = locked_chardev.receiver.clone().unwrap(); + let input = locked_chardev.input.clone().unwrap(); + drop(locked_chardev); + + let mut locked_receiver = receiver.lock().unwrap(); + let buff_size = locked_receiver.remain_size(); + if buff_size == 0 { + locked_receiver.set_paused(); + + return Some(vec![EventNotifier::new( + NotifierOperation::Modify, + input_fd, + None, + EventSet::HANG_UP, + vec![], + )]); + } + + let mut buffer = vec![0_u8; buff_size]; + if let Ok(bytes_count) = input.lock().unwrap().chr_read_raw(&mut buffer) { + locked_receiver.receive(&buffer[..bytes_count]); + } else { + let os_error = std::io::Error::last_os_error(); + let locked_chardev = cloned_chardev.lock().unwrap(); + error!( + "Failed to read input data from chardev \'{}\', {}", + &locked_chardev.id, &os_error + ); + } + None + }); + + Some(EventNotifier::new( + NotifierOperation::AddShared, + input_fd, + None, + EventSet::IN, + vec![event_handler], + )) +} + +// Notification handling in case of listening (server) socket. +fn get_socket_notifier(chardev: Arc>) -> Option { + let locked_chardev = chardev.lock().unwrap(); + let listener = &locked_chardev.listener; + if listener.is_none() { + // Method `realize` expected to be called before we get here because to build event + // notifier we need already valid file descriptors here. + error!( + "Failed to setup io-event notifications for chardev \'{}\', device not initialized", + &locked_chardev.id + ); + return None; + } + + let cloned_chardev = chardev.clone(); + let event_handler: Rc = Rc::new(move |_, _| { + let mut locked_chardev = cloned_chardev.lock().unwrap(); + + let stream = locked_chardev.listener.as_ref().unwrap().accept().unwrap(); + let connection_info = stream.link_description(); + info!( + "Chardev \'{}\' event, connection opened: {}", + &locked_chardev.id, connection_info + ); + let stream_fd = stream.as_raw_fd(); + let stream_arc = Arc::new(Mutex::new(stream)); + let listener_fd = locked_chardev.listener.as_ref().unwrap().as_raw_fd(); + let notify_dev = locked_chardev.dev.clone(); + + locked_chardev.stream_fd = Some(stream_fd); + locked_chardev.input = Some(stream_arc.clone()); + locked_chardev.output = Some(stream_arc.clone()); + drop(locked_chardev); + + if let Some(dev) = notify_dev { + dev.lock().unwrap().chardev_notify(ChardevStatus::Open); + } + + let handling_chardev = cloned_chardev.clone(); + let close_connection = Rc::new(move || { + let mut locked_chardev = handling_chardev.lock().unwrap(); + let notify_dev = locked_chardev.dev.clone(); + locked_chardev.input = None; + locked_chardev.output = None; + locked_chardev.stream_fd = None; + locked_chardev.cancel_unpause_timer(); + info!( + "Chardev \'{}\' event, connection closed: {}", + &locked_chardev.id, connection_info + ); + drop(locked_chardev); + + if let Some(dev) = notify_dev { + dev.lock().unwrap().chardev_notify(ChardevStatus::Close); + } + + // Note: we use stream_arc variable here because we want to capture it and prolongate + // its lifetime with this notifier callback lifetime. It allows us to ensure + // that socket fd be valid until we unregister it from epoll_fd subscription. + let stream_fd = stream_arc.lock().unwrap().as_raw_fd(); + Some(gen_delete_notifiers(&[stream_fd])) + }); + + let handling_chardev = cloned_chardev.clone(); + let input_handler: Rc = Rc::new(move |event, _| { + let mut locked_chardev = handling_chardev.lock().unwrap(); + + let peer_disconnected = event & EventSet::HANG_UP == EventSet::HANG_UP; + if peer_disconnected && locked_chardev.receiver.is_none() { + drop(locked_chardev); + return close_connection(); + } + + let input_ready = event & EventSet::IN == EventSet::IN; + if input_ready { + locked_chardev.cancel_unpause_timer(); + + if locked_chardev.receiver.is_none() { + let wait_port = locked_chardev.wait_for_port(stream_fd); + return Some(vec![wait_port]); + } + + let receiver = locked_chardev.receiver.clone().unwrap(); + let input = locked_chardev.input.clone().unwrap(); + drop(locked_chardev); + + let mut locked_receiver = receiver.lock().unwrap(); + let buff_size = locked_receiver.remain_size(); + if buff_size == 0 { + locked_receiver.set_paused(); + + return Some(vec![EventNotifier::new( + NotifierOperation::DeleteEvents, + stream_fd, + None, + EventSet::IN, + vec![], + )]); + } + + let mut buffer = vec![0_u8; buff_size]; + let mut locked_input = input.lock().unwrap(); + if let Ok(bytes_count) = locked_input.chr_read_raw(&mut buffer) { + if bytes_count > 0 { + locked_receiver.receive(&buffer[..bytes_count]); + } else { + drop(locked_receiver); + drop(locked_input); + return close_connection(); + } + } else { + let os_error = std::io::Error::last_os_error(); + if os_error.kind() != std::io::ErrorKind::WouldBlock { + let locked_chardev = handling_chardev.lock().unwrap(); + error!( + "Failed to read input data from chardev \'{}\', {}", + &locked_chardev.id, &os_error + ); + } + } + } + + None + }); + + let handling_chardev = cloned_chardev.clone(); + let output_handler = Rc::new(move |event, fd| { + if event & EventSet::OUT != EventSet::OUT { + return None; + } + + let mut locked_cdev = handling_chardev.lock().unwrap(); + if let Err(e) = locked_cdev.consume_outbuf() { + error!("Failed to consume outbuf with error {:?}", e); + locked_cdev.clear_outbuf(); + return Some(vec![EventNotifier::new( + NotifierOperation::DeleteEvents, + fd, + None, + EventSet::OUT, + Vec::new(), + )]); + } + + if locked_cdev.output_listener_fd.is_some() { + let fd = locked_cdev.output_listener_fd.as_ref().unwrap(); + if let Err(e) = fd.write(1) { + error!("Failed to write eventfd with error {:?}", e); + return None; + } + locked_cdev.output_listener_fd = None; + } + + if locked_cdev.outbuf.is_empty() { + Some(vec![EventNotifier::new( + NotifierOperation::DeleteEvents, + fd, + None, + EventSet::OUT, + Vec::new(), + )]) + } else { + None + } + }); + + Some(vec![EventNotifier::new( + NotifierOperation::AddShared, + stream_fd, + Some(listener_fd), + EventSet::IN | EventSet::HANG_UP, + vec![input_handler, output_handler], + )]) + }); + + let listener_fd = listener.as_ref().unwrap().as_raw_fd(); + Some(EventNotifier::new( + NotifierOperation::AddShared, + listener_fd, + None, + EventSet::IN, + vec![event_handler], + )) +} + +impl EventNotifierHelper for Chardev { + fn internal_notifiers(chardev: Arc>) -> Vec { + let notifier = { + let backend = chardev.lock().unwrap().backend.clone(); + match backend { + ChardevType::Stdio { .. } => get_terminal_notifier(chardev), + ChardevType::Pty { .. } => get_terminal_notifier(chardev), + ChardevType::Socket { .. } => get_socket_notifier(chardev), + ChardevType::File { .. } => None, + } + }; + notifier.map_or(Vec::new(), |value| vec![value]) + } +} + +/// Provide backend trait object receiving the input from the guest. +pub trait CommunicatInInterface: std::marker::Send + std::os::unix::io::AsRawFd { + fn chr_read_raw(&mut self, buf: &mut [u8]) -> Result { + match nix::unistd::read(self.as_raw_fd(), buf) { + Err(e) => bail!("Failed to read buffer: {:?}", e), + Ok(bytes) => Ok(bytes), + } + } +} + +/// Provide backend trait object processing the output from the guest. +pub trait CommunicatOutInterface: std::io::Write + std::marker::Send {} + +impl CommunicatInInterface for SocketStream {} +impl CommunicatInInterface for File {} +impl CommunicatInInterface for Stdin {} + +impl CommunicatOutInterface for SocketStream {} +impl CommunicatOutInterface for File {} +impl CommunicatOutInterface for Stdout {} diff --git a/chardev_backend/src/lib.rs b/chardev_backend/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..9612180007c68e3bc721b702a7e4a19153008b17 --- /dev/null +++ b/chardev_backend/src/lib.rs @@ -0,0 +1,13 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod chardev; diff --git a/cpu/Cargo.toml b/cpu/Cargo.toml index e0e34ce2cfa675b191aa514a07dc6a5150a65b4f..57e4d7e26099f38205c0feb8cca8b735cc0f6092 100644 --- a/cpu/Cargo.toml +++ b/cpu/Cargo.toml @@ -1,23 +1,25 @@ [package] name = "cpu" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "CPU emulation" [dependencies] -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -log = "0.4.8" -libc = ">=0.2.71" -vmm-sys-util = ">=0.7.0" -hypervisor = { path = "../hypervisor" } +thiserror = "1.0" +anyhow = "1.0" +kvm-bindings = { version = "0.7.0", features = ["fam-wrappers"] } +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"] } +log = "0.4" +libc = "0.2" +vmm-sys-util = "0.12.1" machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } +trace = { path = "../trace" } -[dev-dependencies] -serial_test = "0.5.1" +[features] +default = [] +boot_time = [] diff --git a/cpu/src/aarch64/caps.rs b/cpu/src/aarch64/caps.rs index 9da3f1530dec57acb7d17bbb3eeb114ff2d71ad5..5b2a3055338c92509277eb5e4f66095eba3d3489 100644 --- a/cpu/src/aarch64/caps.rs +++ b/cpu/src/aarch64/caps.rs @@ -10,120 +10,32 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::{convert::TryInto, mem::size_of}; +use machine_manager::config::{CpuConfig, PmuConfig, SveConfig}; -use kvm_bindings::{ - KVM_REG_ARM_COPROC_MASK, KVM_REG_ARM_CORE, KVM_REG_SIZE_MASK, KVM_REG_SIZE_U32, - KVM_REG_SIZE_U64, -}; -use kvm_ioctls::{Cap, Kvm, VcpuFd}; - -use super::core_regs::{get_one_reg_vec, set_one_reg_vec, Result}; - -// Capabilities for ARM cpu. -#[derive(Debug, Clone)] -pub struct ArmCPUCaps { - pub irq_chip: bool, - pub ioevent_fd: bool, - pub irq_fd: bool, - pub user_mem: bool, - pub psci02: bool, - pub mp_state: bool, +#[derive(Copy, Clone, Debug, Default)] +pub struct ArmCPUFeatures { + pub pmu: bool, + pub sve: bool, } -impl ArmCPUCaps { - /// Initialize ArmCPUCaps instance. - pub fn init_capabilities() -> Self { - let kvm = Kvm::new().unwrap(); - - ArmCPUCaps { - irq_chip: kvm.check_extension(Cap::Irqchip), - ioevent_fd: kvm.check_extension(Cap::Ioeventfd), - irq_fd: kvm.check_extension(Cap::Irqfd), - user_mem: kvm.check_extension(Cap::UserMemory), - psci02: kvm.check_extension(Cap::ArmPsci02), - mp_state: kvm.check_extension(Cap::MpState), +impl From<&CpuConfig> for ArmCPUFeatures { + fn from(conf: &CpuConfig) -> Self { + Self { + pmu: match &conf.pmu { + PmuConfig::On => true, + PmuConfig::Off => false, + }, + sve: match &conf.sve { + SveConfig::On => true, + SveConfig::Off => false, + }, } } } /// Entry to cpreg list. -#[derive(Default, Clone, Copy)] +#[derive(Default, Clone, Copy, Debug)] pub struct CpregListEntry { - pub index: u64, - pub value: u64, -} - -impl CpregListEntry { - fn cpreg_tuples_entry(&self) -> bool { - self.index & KVM_REG_ARM_COPROC_MASK as u64 == KVM_REG_ARM_CORE as u64 - } - - fn normal_cpreg_entry(&self) -> bool { - if self.cpreg_tuples_entry() { - return false; - } - - ((self.index & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) - || ((self.index & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) - } - - /// Validate cpreg_list's tuples entry and normal entry. - pub fn validate(&self) -> bool { - if self.cpreg_tuples_entry() { - return true; - } - - self.normal_cpreg_entry() - } - - /// Get Cpreg value from Kvm. - /// - /// # Arguments - /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - pub fn get_cpreg(&mut self, vcpu_fd: &VcpuFd) -> Result<()> { - if self.normal_cpreg_entry() { - let val = get_one_reg_vec(vcpu_fd, self.index)?; - if (self.index & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32 { - self.value = u32::from_be_bytes( - val.as_slice() - .split_at(size_of::()) - .0 - .try_into() - .unwrap(), - ) as u64; - } else if (self.index & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64 { - self.value = u64::from_be_bytes( - val.as_slice() - .split_at(size_of::()) - .0 - .try_into() - .unwrap(), - ) - } - } - - Ok(()) - } - - /// Set Cpreg value to Kvm. - /// - /// # Arguments - /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - pub fn set_cpreg(&self, vcpu_fd: &VcpuFd) -> Result<()> { - if self.normal_cpreg_entry() { - let mut value: Vec = self.value.to_be_bytes().to_vec(); - let data = if (self.index & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32 { - value.split_off(size_of::() / size_of::()) - } else { - value - }; - - set_one_reg_vec(vcpu_fd, self.index, &data)?; - } - - Ok(()) - } + pub reg_id: u64, + pub value: u128, } diff --git a/cpu/src/aarch64/core_regs.rs b/cpu/src/aarch64/core_regs.rs deleted file mode 100644 index bc46f69a9f0b4d728cf3f53744bf9e669b2b4489..0000000000000000000000000000000000000000 --- a/cpu/src/aarch64/core_regs.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::mem::size_of; - -use hypervisor::kvm::{KVM_GET_ONE_REG, KVM_SET_ONE_REG}; -use kvm_bindings::{ - kvm_one_reg, kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, - KVM_REG_ARM_CORE, KVM_REG_SIZE_MASK, KVM_REG_SIZE_SHIFT, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, - KVM_REG_SIZE_U64, -}; -use kvm_ioctls::VcpuFd; -use util::byte_code::ByteCode; -use vmm_sys_util::{ - errno, - ioctl::{ioctl_with_mut_ref, ioctl_with_ref}, -}; - -pub type Result = std::result::Result; - -const KVM_REG_MAX_SIZE: u64 = 256; -const KVM_NR_REGS: u64 = 31; -const KVM_NR_FP_REGS: u64 = 32; - -/// AArch64 cpu core register. -/// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/kvm.h#L50 -/// User structures for general purpose, floating point and debug registers. -/// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/ptrace.h#L75 -pub enum Arm64CoreRegs { - KvmSpEl1, - KvmElrEl1, - KvmSpsr(usize), - UserPTRegRegs(usize), - UserPTRegSp, - UserPTRegPc, - UserPTRegPState, - UserFPSIMDStateVregs(usize), - UserFPSIMDStateFpsr, - UserFPSIMDStateFpcr, -} - -impl From for u64 { - fn from(elem: Arm64CoreRegs) -> Self { - let register_size; - let regid; - match elem { - Arm64CoreRegs::KvmSpEl1 => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, sp_el1) - } - Arm64CoreRegs::KvmElrEl1 => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, elr_el1) - } - Arm64CoreRegs::KvmSpsr(idx) if idx < KVM_NR_SPSR as usize => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, spsr) + idx * 8 - } - Arm64CoreRegs::UserPTRegRegs(idx) if idx < 31 => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, regs, user_pt_regs, regs) + idx * 8 - } - Arm64CoreRegs::UserPTRegSp => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, regs, user_pt_regs, sp) - } - Arm64CoreRegs::UserPTRegPc => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, regs, user_pt_regs, pc) - } - Arm64CoreRegs::UserPTRegPState => { - register_size = KVM_REG_SIZE_U64; - regid = offset_of!(kvm_regs, regs, user_pt_regs, pstate) - } - Arm64CoreRegs::UserFPSIMDStateVregs(idx) if idx < 32 => { - register_size = KVM_REG_SIZE_U128; - regid = offset_of!(kvm_regs, fp_regs, user_fpsimd_state, vregs) + idx * 16 - } - Arm64CoreRegs::UserFPSIMDStateFpsr => { - register_size = KVM_REG_SIZE_U32; - regid = offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpsr) - } - Arm64CoreRegs::UserFPSIMDStateFpcr => { - register_size = KVM_REG_SIZE_U32; - regid = offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpcr) - } - _ => panic!("No such Register"), - }; - - KVM_REG_ARM64 as u64 - | register_size as u64 - | u64::from(KVM_REG_ARM_CORE) - | (regid / size_of::()) as u64 - } -} - -/// Returns the 128 bits value of the specified vCPU register. -/// -/// The id of the register is encoded as specified in the kernel documentation -/// for `KVM_GET_ONE_REG`. -/// -/// Max register size is 256 Bytes. -/// -/// # Arguments -/// -/// * `vcpu_fd` - The file descriptor of kvm_based vcpu. -/// * `reg_id` - ID of register. -pub fn get_one_reg_vec(vcpu_fd: &VcpuFd, reg_id: u64) -> Result> { - let reg_size = 1_u64 << ((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT); - if reg_size > KVM_REG_MAX_SIZE { - return Err(errno::Error::new(libc::EINVAL)); - } - let mut reg_value: Vec = vec![0; reg_size as usize]; - reg_value.resize(reg_size as usize, 0); - let mut onereg = kvm_one_reg { - id: reg_id, - addr: reg_value.as_mut_ptr() as *mut u8 as u64, - }; - - // This is safe because we allocated the struct and we know the kernel will read - // exactly the size of the struct. - let ret = unsafe { ioctl_with_mut_ref(vcpu_fd, KVM_GET_ONE_REG(), &mut onereg) }; - if ret < 0 { - return Err(errno::Error::last()); - } - - Ok(reg_value) -} - -/// Sets the value of one register for this vCPU. -/// -/// The id of the register is encoded as specified in the kernel documentation -/// for `KVM_SET_ONE_REG`. -/// -/// Max register size is 256 Bytes. -/// -/// # Arguments -/// -/// * `reg_id` - ID of the register for which we are setting the value. -/// * `data` - value for the specified register. -pub fn set_one_reg_vec(vcpu_fd: &VcpuFd, reg_id: u64, data: &[u8]) -> Result<()> { - let reg_size = 1u64 << ((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT); - if reg_size > KVM_REG_MAX_SIZE || reg_size as usize > data.len() { - return Err(errno::Error::new(libc::EINVAL)); - }; - let data_ref = data.as_ptr() as *const u8; - let onereg = kvm_one_reg { - id: reg_id, - addr: data_ref as u64, - }; - - // This is safe because we allocated the struct and we know the kernel will read - // exactly the size of the struct. - let ret = unsafe { ioctl_with_ref(vcpu_fd, KVM_SET_ONE_REG(), &onereg) }; - if ret < 0 { - return Err(errno::Error::last()); - } - - Ok(()) -} - -/// Returns the vcpu's current `core_register`. -/// -/// The register state is gotten from `KVM_GET_ONE_REG` api in KVM. -/// -/// # Arguments -/// -/// * `vcpu_fd` - the VcpuFd in KVM mod. -pub fn get_core_regs(vcpu_fd: &VcpuFd) -> Result { - let mut core_regs = kvm_regs::default(); - - core_regs.regs.sp = vcpu_fd.get_one_reg(Arm64CoreRegs::UserPTRegSp.into())?; - core_regs.sp_el1 = vcpu_fd.get_one_reg(Arm64CoreRegs::KvmSpEl1.into())?; - core_regs.regs.pstate = vcpu_fd.get_one_reg(Arm64CoreRegs::UserPTRegPState.into())?; - core_regs.regs.pc = vcpu_fd.get_one_reg(Arm64CoreRegs::UserPTRegPc.into())?; - core_regs.elr_el1 = vcpu_fd.get_one_reg(Arm64CoreRegs::KvmElrEl1.into())?; - - for i in 0..KVM_NR_REGS as usize { - core_regs.regs.regs[i] = vcpu_fd.get_one_reg(Arm64CoreRegs::UserPTRegRegs(i).into())?; - } - - for i in 0..KVM_NR_SPSR as usize { - core_regs.spsr[i] = vcpu_fd.get_one_reg(Arm64CoreRegs::KvmSpsr(i).into())?; - } - - for i in 0..KVM_NR_FP_REGS as usize { - let register_value_vec = - get_one_reg_vec(vcpu_fd, Arm64CoreRegs::UserFPSIMDStateVregs(i).into())?; - core_regs.fp_regs.vregs[i][0] = *u64::from_bytes(®ister_value_vec[0..8]).unwrap(); - core_regs.fp_regs.vregs[i][1] = *u64::from_bytes(®ister_value_vec[8..16]).unwrap(); - } - - let register_value_vec = get_one_reg_vec(vcpu_fd, Arm64CoreRegs::UserFPSIMDStateFpsr.into())?; - core_regs.fp_regs.fpsr = *u32::from_bytes(®ister_value_vec[0..4]).unwrap(); - - let register_value_vec = get_one_reg_vec(vcpu_fd, Arm64CoreRegs::UserFPSIMDStateFpcr.into())?; - core_regs.fp_regs.fpcr = *u32::from_bytes(®ister_value_vec[0..4]).unwrap(); - - Ok(core_regs) -} - -/// Sets the vcpu's current "core_register" -/// -/// The register state is gotten from `KVM_SET_ONE_REG` api in KVM. -/// -/// # Arguments -/// -/// * `vcpu_fd` - the VcpuFd in KVM mod. -/// * `core_regs` - kvm_regs state to be written. -pub fn set_core_regs(vcpu_fd: &VcpuFd, core_regs: kvm_regs) -> Result<()> { - vcpu_fd.set_one_reg(Arm64CoreRegs::UserPTRegSp.into(), core_regs.regs.sp)?; - vcpu_fd.set_one_reg(Arm64CoreRegs::KvmSpEl1.into(), core_regs.sp_el1)?; - vcpu_fd.set_one_reg(Arm64CoreRegs::UserPTRegPState.into(), core_regs.regs.pstate)?; - vcpu_fd.set_one_reg(Arm64CoreRegs::UserPTRegPc.into(), core_regs.regs.pc)?; - vcpu_fd.set_one_reg(Arm64CoreRegs::KvmElrEl1.into(), core_regs.elr_el1)?; - - for i in 0..KVM_NR_REGS as usize { - vcpu_fd.set_one_reg( - Arm64CoreRegs::UserPTRegRegs(i).into(), - core_regs.regs.regs[i] as u64, - )?; - } - - for i in 0..KVM_NR_SPSR as usize { - vcpu_fd.set_one_reg(Arm64CoreRegs::KvmSpsr(i).into(), core_regs.spsr[i])?; - } - - for i in 0..KVM_NR_FP_REGS as usize { - let mut data: Vec = Vec::new(); - data.append(&mut core_regs.fp_regs.vregs[i][0].as_bytes().to_vec()); - data.append(&mut core_regs.fp_regs.vregs[i][1].as_bytes().to_vec()); - set_one_reg_vec( - vcpu_fd, - Arm64CoreRegs::UserFPSIMDStateVregs(i).into(), - &data, - )?; - } - - set_one_reg_vec( - vcpu_fd, - Arm64CoreRegs::UserFPSIMDStateFpsr.into(), - &core_regs.fp_regs.fpsr.as_bytes().to_vec(), - )?; - - set_one_reg_vec( - vcpu_fd, - Arm64CoreRegs::UserFPSIMDStateFpcr.into(), - &core_regs.fp_regs.fpcr.as_bytes().to_vec(), - )?; - - Ok(()) -} diff --git a/cpu/src/aarch64/mod.rs b/cpu/src/aarch64/mod.rs index 1c427e7f67eccd588eac0644579b538ad626f476..9d89b48979feca2b5b75316efb37d9748d83b8b1 100644 --- a/cpu/src/aarch64/mod.rs +++ b/cpu/src/aarch64/mod.rs @@ -11,28 +11,25 @@ // See the Mulan PSL v2 for more details. pub mod caps; -mod core_regs; + +pub use self::caps::ArmCPUFeatures; +pub use self::caps::CpregListEntry; use std::sync::{Arc, Mutex}; -use hypervisor::kvm::KVM_FDS; +use anyhow::{Context, Result}; use kvm_bindings::{ - kvm_mp_state, kvm_reg_list, kvm_regs, kvm_vcpu_events, kvm_vcpu_init, KVM_MP_STATE_RUNNABLE, - KVM_MP_STATE_STOPPED, + kvm_mp_state as MpState, kvm_regs as Regs, kvm_vcpu_events as VcpuEvents, + KVM_MP_STATE_RUNNABLE as MP_STATE_RUNNABLE, KVM_MP_STATE_STOPPED as MP_STATE_STOPPED, }; -use kvm_ioctls::VcpuFd; -use vmm_sys_util::fam::FamStructWrapper; -use crate::errors::{Result, ResultExt}; use crate::CPU; -pub use caps::ArmCPUCaps; -use caps::CpregListEntry; -use core_regs::{get_core_regs, set_core_regs}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration::{ + DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; -type CpregList = FamStructWrapper; - // PSR (Processor State Register) bits. // See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/ptrace.h#L34 #[allow(non_upper_case_globals)] @@ -44,10 +41,12 @@ const PSR_D_BIT: u64 = 0x0000_0200; // MPIDR is Multiprocessor Affinity Register // [40:63] bit reserved on AArch64 Architecture, const UNINIT_MPIDR: u64 = 0xFFFF_FF00_0000_0000; -// MPIDR - Multiprocessor Affinity Register. -// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/asm/sysreg.h#L130 -const SYS_MPIDR_EL1: u64 = 0x6030_0000_0013_c005; -const KVM_MAX_CPREG_ENTRIES: usize = 1024; + +/// Interrupt ID for pmu. +/// See: https://developer.arm.com/documentation/den0094/b/ +/// And: https://developer.arm.com/documentation/dai0492/b/ +pub const PPI_BASE: u32 = 16; +pub const PMU_INTR: u32 = 7; /// AArch64 CPU booting configure information /// @@ -56,35 +55,61 @@ const KVM_MAX_CPREG_ENTRIES: usize = 1024; /// tree blob (dtb) in system RAM. /// /// See: https://elixir.bootlin.com/linux/v5.6/source/Documentation/arm64/booting.rst -#[derive(Default, Copy, Clone)] +#[derive(Default, Copy, Clone, Debug)] pub struct ArmCPUBootConfig { pub fdt_addr: u64, pub boot_pc: u64, } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ArmRegsIndex { + CoreRegs, + MpState, + VcpuEvents, + CpregList, + VtimerCount, +} + +#[derive(Default, Copy, Clone, Debug)] +pub struct ArmCPUTopology {} + +impl ArmCPUTopology { + pub fn new() -> Self { + ArmCPUTopology::default() + } + + pub fn set_topology(self, _topology: (u8, u8, u8)) -> Self { + self + } +} + /// AArch64 CPU architect information #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] pub struct ArmCPUState { /// The vcpu id, `0` means primary CPU. - apic_id: u32, + pub apic_id: u32, /// MPIDR register value of this vcpu, /// The MPIDR provides an additional processor identification mechanism /// for scheduling purposes. - mpidr: u64, - /// Used to pass vcpu target and supported features to kvm. - kvi: kvm_vcpu_init, + pub mpidr: u64, /// Vcpu core registers. - core_regs: kvm_regs, + pub core_regs: Regs, /// Vcpu cpu events register. - cpu_events: kvm_vcpu_events, + pub cpu_events: VcpuEvents, /// Vcpu mpstate register. - mp_state: kvm_mp_state, + pub mp_state: MpState, /// The length of Cpreg. - cpreg_len: usize, + pub cpreg_len: usize, /// The list of Cpreg. - cpreg_list: [CpregListEntry; 512], + pub cpreg_list: [CpregListEntry; 512], + /// Vcpu features + pub features: ArmCPUFeatures, + /// Virtual timer count. + pub vtimer_cnt: u64, + /// Virtual timer count valid. + pub vtimer_cnt_valid: bool, } impl ArmCPUState { @@ -94,11 +119,11 @@ impl ArmCPUState { /// /// * `vcpu_id` - ID of this `CPU`. pub fn new(vcpu_id: u32) -> Self { - let mp_state = kvm_mp_state { + let mp_state = MpState { mp_state: if vcpu_id == 0 { - KVM_MP_STATE_RUNNABLE + MP_STATE_RUNNABLE } else { - KVM_MP_STATE_STOPPED + MP_STATE_STOPPED }, }; @@ -114,73 +139,20 @@ impl ArmCPUState { let locked_cpu_state = cpu_state.lock().unwrap(); self.apic_id = locked_cpu_state.apic_id; self.mpidr = locked_cpu_state.mpidr; - self.kvi = locked_cpu_state.kvi; self.core_regs = locked_cpu_state.core_regs; self.cpu_events = locked_cpu_state.cpu_events; self.mp_state = locked_cpu_state.mp_state; self.cpreg_len = locked_cpu_state.cpreg_len; self.cpreg_list = locked_cpu_state.cpreg_list; + self.features = locked_cpu_state.features; } - /// Set register value in `ArmCPUState` according to `boot_config`. + /// Set cpu topology /// /// # Arguments /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - /// * `boot_config` - Boot message from boot_loader. - pub fn set_boot_config( - &mut self, - vcpu_fd: &Arc, - boot_config: &ArmCPUBootConfig, - ) -> Result<()> { - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .get_preferred_target(&mut self.kvi) - .chain_err(|| "Failed to get kvm vcpu preferred target")?; - - // support PSCI 0.2 - // We already checked that the capability is supported. - self.kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; - // Non-boot cpus are powered off initially. - if self.apic_id != 0 { - self.kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; - } - - self.set_core_reg(boot_config); - - vcpu_fd - .vcpu_init(&self.kvi) - .chain_err(|| "Failed to init kvm vcpu")?; - self.mpidr = vcpu_fd - .get_one_reg(SYS_MPIDR_EL1) - .chain_err(|| "Failed to get mpidr")?; - - Ok(()) - } - - /// Reset register value in `Kvm` with `ArmCPUState`. - /// - /// # Arguments - /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - pub fn reset_vcpu(&self, vcpu_fd: &Arc) -> Result<()> { - set_core_regs(vcpu_fd, self.core_regs) - .chain_err(|| format!("Failed to set core register for CPU {}", self.apic_id))?; - vcpu_fd - .set_mp_state(self.mp_state) - .chain_err(|| format!("Failed to set mpstate for CPU {}", self.apic_id))?; - for cpreg in self.cpreg_list[0..self.cpreg_len].iter() { - cpreg - .set_cpreg(&vcpu_fd.clone()) - .chain_err(|| format!("Failed to set cpreg for CPU {}", self.apic_id))?; - } - vcpu_fd - .set_vcpu_events(&self.cpu_events) - .chain_err(|| format!("Failed to set vcpu event for CPU {}", self.apic_id))?; - + /// * `topology` - ARM CPU Topology + pub fn set_cpu_topology(&mut self, _topology: &ArmCPUTopology) -> Result<()> { Ok(()) } @@ -190,16 +162,11 @@ impl ArmCPUState { } /// Get core_regs value. - pub fn core_regs(&self) -> kvm_regs { + pub fn core_regs(&self) -> Regs { self.core_regs } - /// Get kvm_vcpu_init. - pub fn kvi(&self) -> kvm_vcpu_init { - self.kvi - } - - fn set_core_reg(&mut self, boot_config: &ArmCPUBootConfig) { + pub fn set_core_reg(&mut self, boot_config: &ArmCPUBootConfig) { // Set core regs. self.core_regs.regs.pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; self.core_regs.regs.regs[1] = 0; @@ -212,58 +179,40 @@ impl ArmCPUState { self.core_regs.regs.pc = boot_config.boot_pc; } } + + /// Get cpu features. + pub fn get_features(&self) -> &ArmCPUFeatures { + &self.features + } } impl StateTransfer for CPU { - fn get_state_vec(&self) -> migration::errors::Result> { - let mut cpu_state_locked = self.arch_cpu.lock().unwrap(); - - cpu_state_locked.core_regs = get_core_regs(&self.fd)?; - if self.caps.mp_state { - let mut mp_state = self.fd.get_mp_state()?; - if mp_state.mp_state != KVM_MP_STATE_STOPPED { - mp_state.mp_state = KVM_MP_STATE_RUNNABLE; - } - cpu_state_locked.mp_state = mp_state; - } - - let mut cpreg_list = CpregList::new(KVM_MAX_CPREG_ENTRIES); - self.fd.get_reg_list(&mut cpreg_list)?; - cpu_state_locked.cpreg_len = 0; - for (index, cpreg) in cpreg_list.as_slice().iter().enumerate() { - let mut cpreg_entry = CpregListEntry { - index: *cpreg, - value: 0, - }; - if cpreg_entry.validate() { - cpreg_entry.get_cpreg(&self.fd.clone())?; - cpu_state_locked.cpreg_list[index] = cpreg_entry; - cpu_state_locked.cpreg_len += 1; - } - } - cpu_state_locked.cpu_events = self.fd.get_vcpu_events()?; - - Ok(cpu_state_locked.as_bytes().to_vec()) + fn get_state_vec(&self) -> Result> { + self.hypervisor_cpu + .get_regs(self.arch_cpu.clone(), ArmRegsIndex::CoreRegs)?; + self.hypervisor_cpu + .get_regs(self.arch_cpu.clone(), ArmRegsIndex::MpState)?; + self.hypervisor_cpu + .get_regs(self.arch_cpu.clone(), ArmRegsIndex::CpregList)?; + self.hypervisor_cpu + .get_regs(self.arch_cpu.clone(), ArmRegsIndex::VcpuEvents)?; + + Ok(self.arch_cpu.lock().unwrap().as_bytes().to_vec()) } - fn set_state(&self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state(&self, state: &[u8]) -> Result<()> { let cpu_state = *ArmCPUState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("CPU"))?; + .with_context(|| MigrationError::FromBytesError("CPU"))?; let mut cpu_state_locked = self.arch_cpu.lock().unwrap(); *cpu_state_locked = cpu_state; - - self.fd.vcpu_init(&cpu_state.kvi)?; + drop(cpu_state_locked); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&ArmCPUState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&ArmCPUState::descriptor().name).unwrap_or(!0) } } diff --git a/cpu/src/error.rs b/cpu/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..3f91fffd726dda7dc80e9594caa24065033f1088 --- /dev/null +++ b/cpu/src/error.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum CpuError { + #[error("Signal")] + Signal { + #[from] + source: vmm_sys_util::errno::Error, + }, + + #[error("Failed to create vcpu: {0}!")] + CreateVcpu(String), + #[error("Failed to configure vcpu: {0}!")] + RealizeVcpu(String), + #[error("Failed to starting vcpu: {0}!")] + StartVcpu(String), + #[error("Failed to stopping vcpu: {0}!")] + StopVcpu(String), + #[error("Failed to kick vcpu: {0}!")] + KickVcpu(String), + #[error("Failed to destroy vcpu: {0}!")] + DestroyVcpu(String), + #[error("CPU {0} halted!")] + VcpuHltEvent(u8), + #[error("CPU {0} received an unexpected exit reason: {1}!")] + VcpuExitReason(u8, String), + #[error("CPU {0} received an unhandled exit event: error {1}!")] + UnhandledHypervisorExit(u8, i32), + #[error("Vcpu not present in local thread.")] + VcpuLocalThreadNotPresent, + #[error("No Machine Interface saved in CPU")] + NoMachineInterface, + #[cfg(target_arch = "aarch64")] + #[error("Failed to get system register: {0}!")] + GetSysRegister(String), + #[cfg(target_arch = "aarch64")] + #[error("Failed to Set system register: {0}!")] + SetSysRegister(String), +} diff --git a/cpu/src/lib.rs b/cpu/src/lib.rs index 64a1fc5ece1a210f81df752afd4a9cf2495a5025..704ef6044f548dacb0d2483706030abfa7566d12 100644 --- a/cpu/src/lib.rs +++ b/cpu/src/lib.rs @@ -27,121 +27,81 @@ //! - `x86_64` //! - `aarch64` -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate machine_manager; -#[cfg(target_arch = "aarch64")] -#[macro_use] -extern crate util; -#[macro_use] -extern crate migration_derive; -#[cfg(target_arch = "aarch64")] +pub mod error; + #[allow(clippy::upper_case_acronyms)] #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "x86_64")] mod x86_64; -pub mod errors { - error_chain! { - foreign_links { - Signal(vmm_sys_util::errno::Error); - } - errors { - CreateVcpu(err_info: String) { - display("Failed to create kvm vcpu: {}!", err_info) - } - RealizeVcpu(err_info: String) { - display("Failed to configure kvm vcpu: {}!", err_info) - } - StartVcpu(err_info: String) { - display("Failed to starting kvm vcpu: {}!", err_info) - } - StopVcpu(err_info: String) { - display("Failed to stopping kvm vcpu: {}!", err_info) - } - KickVcpu(err_info: String) { - display("Failed to kick kvm vcpu: {}!", err_info) - } - DestroyVcpu(err_info: String) { - display("Failed to destroy kvm vcpu: {}!", err_info) - } - VcpuHltEvent(cpu_id: u8) { - display("CPU {}/KVM halted!", cpu_id) - } - VcpuExitReason(cpu_id: u8, err_info: String) { - display("CPU {}/KVM received an unexpected exit reason: {}!", cpu_id, err_info) - } - UnhandledKvmExit(cpu_id: u8) { - display("CPU {}/KVM received an unhandled kvm exit event!", cpu_id) - } - VcpuLocalThreadNotPresent { - display("Vcpu not present in local thread.") - } - NoMachineInterface { - display("No Machine Interface saved in CPU") - } - #[cfg(target_arch = "aarch64")] - GetSysRegister(err_info: String) { - description("Get sys Register error") - display("Failed to get system register: {}!", err_info) - } - #[cfg(target_arch = "aarch64")] - SetSysRegister(err_info: String) { - description("Set sys Register error") - display("Failed to Set system register: {}!", err_info) - } - } - } -} - #[cfg(target_arch = "aarch64")] pub use aarch64::ArmCPUBootConfig as CPUBootConfig; #[cfg(target_arch = "aarch64")] -pub use aarch64::ArmCPUCaps as CPUCaps; +pub use aarch64::ArmCPUFeatures as CPUFeatures; #[cfg(target_arch = "aarch64")] pub use aarch64::ArmCPUState as ArchCPU; -#[cfg(target_arch = "x86_64")] -use x86_64::caps::X86CPUCaps as CPUCaps; +#[cfg(target_arch = "aarch64")] +pub use aarch64::ArmCPUTopology as CPUTopology; +#[cfg(target_arch = "aarch64")] +pub use aarch64::ArmRegsIndex as RegsIndex; +#[cfg(target_arch = "aarch64")] +pub use aarch64::CpregListEntry; +#[cfg(target_arch = "aarch64")] +pub use aarch64::PMU_INTR; +#[cfg(target_arch = "aarch64")] +pub use aarch64::PPI_BASE; +pub use error::CpuError; #[cfg(target_arch = "x86_64")] pub use x86_64::X86CPUBootConfig as CPUBootConfig; #[cfg(target_arch = "x86_64")] pub use x86_64::X86CPUState as ArchCPU; +#[cfg(target_arch = "x86_64")] +pub use x86_64::X86CPUTopology as CPUTopology; +#[cfg(target_arch = "x86_64")] +pub use x86_64::X86RegsIndex as RegsIndex; use std::cell::RefCell; -use std::sync::atomic::fence; -use std::sync::atomic::Ordering; +use std::sync::atomic::{fence, AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Condvar, Mutex, Weak}; use std::thread; use std::time::Duration; +use std::time::Instant; -use kvm_ioctls::{VcpuExit, VcpuFd}; -use libc::{c_int, c_void, siginfo_t}; -use machine_manager::machine::MachineInterface; -use machine_manager::{qmp::qmp_schema as schema, qmp::QmpChannel}; -use vmm_sys_util::signal::{register_signal_handler, Killable}; +use anyhow::{anyhow, bail, Context, Result}; +use log::{error, info, warn}; +use nix::unistd::gettid; -use errors::{ErrorKind, Result, ResultExt}; +use machine_manager::config::ShutdownAction::{ShutdownActionPause, ShutdownActionPoweroff}; +use machine_manager::event; +use machine_manager::machine::{HypervisorType, MachineInterface, VmState}; +use machine_manager::qmp::{qmp_channel::QmpChannel, qmp_schema}; // SIGRTMIN = 34 (GNU, in MUSL is 35) and SIGRTMAX = 64 in linux, VCPU signal // number should be assigned to SIGRTMIN + n, (n = 0...30). -#[cfg(not(target_env = "musl"))] -const VCPU_TASK_SIGNAL: i32 = 34; -#[cfg(target_env = "musl")] -const VCPU_TASK_SIGNAL: i32 = 35; -#[cfg(not(target_env = "musl"))] -const VCPU_RESET_SIGNAL: i32 = 35; +#[cfg(target_env = "gnu")] +pub const VCPU_TASK_SIGNAL: i32 = 34; #[cfg(target_env = "musl")] -const VCPU_RESET_SIGNAL: i32 = 36; +pub const VCPU_TASK_SIGNAL: i32 = 35; +#[cfg(target_env = "ohos")] +pub const VCPU_TASK_SIGNAL: i32 = 40; + +/// Watch `0x3ff` IO port to record the magic value trapped from guest kernel. +#[cfg(all(target_arch = "x86_64", feature = "boot_time"))] +const MAGIC_SIGNAL_GUEST_BOOT: u64 = 0x3ff; +/// Watch Uart MMIO region to record the magic value trapped from guest kernel. +#[cfg(all(target_arch = "aarch64", feature = "boot_time"))] +const MAGIC_SIGNAL_GUEST_BOOT: u64 = 0x9000f00; +/// The boot start value can be verified before kernel start. +#[cfg(feature = "boot_time")] +const MAGIC_VALUE_SIGNAL_GUEST_BOOT_START: u8 = 0x01; +/// The boot complete value can be verified before init guest userspace. +#[cfg(feature = "boot_time")] +const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 0x02; /// State for `CPU` lifecycle. -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum CpuLifecycleState { - /// `CPU` structure is only be initialized, but nothing set. - Nothing = 0, /// `CPU` structure's property is set with configuration. Created = 1, /// `CPU` start to be running. @@ -158,9 +118,14 @@ pub enum CpuLifecycleState { #[allow(clippy::upper_case_acronyms)] pub trait CPUInterface { /// Realize `CPU` structure, set registers value for `CPU`. - fn realize(&self, boot: &CPUBootConfig) -> Result<()>; - - /// Start `CPU` thread and run virtual CPU in kvm. + fn realize( + &self, + boot: &CPUBootConfig, + topology: &CPUTopology, + #[cfg(target_arch = "aarch64")] features: &CPUFeatures, + ) -> Result<()>; + + /// Start `CPU` thread and run virtual CPU in hypervisor. /// /// # Arguments /// @@ -171,9 +136,6 @@ pub trait CPUInterface { where Self: std::marker::Sized; - /// Kick `CPU` to exit kvm emulation. - fn kick(&self) -> Result<()>; - /// Make `CPU` lifecycle from `Running` to `Paused`. fn pause(&self) -> Result<()>; @@ -183,40 +145,86 @@ pub trait CPUInterface { /// Make `CPU` lifecycle to `Stopping`, then `Stopped`. fn destroy(&self) -> Result<()>; - /// Reset registers value for `CPU`. - fn reset(&self) -> Result<()>; - /// Make `CPU` destroy because of guest inner shutdown. fn guest_shutdown(&self) -> Result<()>; /// Make `CPU` destroy because of guest inner reset. fn guest_reset(&self) -> Result<()>; +} + +pub trait CPUHypervisorOps: Send + Sync { + fn get_hypervisor_type(&self) -> HypervisorType; + + fn init_pmu(&self) -> Result<()>; + + fn vcpu_init(&self) -> Result<()>; + + fn set_boot_config( + &self, + arch_cpu: Arc>, + boot_config: &CPUBootConfig, + #[cfg(target_arch = "aarch64")] vcpu_config: &CPUFeatures, + ) -> Result<()>; + + fn get_one_reg(&self, reg_id: u64) -> Result; - /// Handle vcpu event from `kvm`. - fn kvm_vcpu_exec(&self) -> Result; + fn get_regs(&self, arch_cpu: Arc>, regs_index: RegsIndex) -> Result<()>; + + fn set_regs(&self, arch_cpu: Arc>, regs_index: RegsIndex) -> Result<()>; + + fn put_register(&self, cpu: Arc) -> Result<()>; + + fn reset_vcpu(&self, cpu: Arc) -> Result<()>; + + fn vcpu_exec( + &self, + cpu_thread_worker: CPUThreadWorker, + thread_barrier: Arc, + ) -> Result<()>; + + fn set_hypervisor_exit(&self) -> Result<()>; + + fn pause( + &self, + task: Arc>>>, + state: Arc<(Mutex, Condvar)>, + pause_signal: Arc, + ) -> Result<()>; + + fn resume( + &self, + state: Arc<(Mutex, Condvar)>, + pause_signal: Arc, + ) -> Result<()>; + + fn destroy( + &self, + task: Arc>>>, + state: Arc<(Mutex, Condvar)>, + ) -> Result<()>; } -/// `CPU` is a wrapper around creating and using a kvm-based VCPU. +/// `CPU` is a wrapper around creating and using a hypervisor-based VCPU. #[allow(clippy::upper_case_acronyms)] pub struct CPU { /// ID of this virtual CPU, `0` means this cpu is primary `CPU`. - id: u8, - /// The file descriptor of this kvm-based VCPU. - fd: Arc, + pub id: u8, /// Architecture special CPU property. - arch_cpu: Arc>, - /// LifeCycle state of kvm-based VCPU. - state: Arc<(Mutex, Condvar)>, + pub arch_cpu: Arc>, + /// LifeCycle state of hypervisor-based VCPU. + pub state: Arc<(Mutex, Condvar)>, /// The thread handler of this virtual CPU. task: Arc>>>, /// The thread tid of this VCPU. tid: Arc>>, /// The VM combined by this VCPU. vm: Weak>, - /// The capability of VCPU. - caps: CPUCaps, /// The state backup of architecture CPU right before boot. boot_state: Arc>, + /// Sync the pause state of vCPU in hypervisor and userspace. + pause_signal: Arc, + /// Interact between the vCPU and hypervisor. + pub hypervisor_cpu: Arc, } impl CPU { @@ -229,38 +237,29 @@ impl CPU { /// * `arch_cpu` - Architecture special `CPU` property. /// * `vm` - The virtual machine this `CPU` gets attached to. pub fn new( - vcpu_fd: Arc, + hypervisor_cpu: Arc, id: u8, arch_cpu: Arc>, vm: Arc>, ) -> Self { CPU { id, - fd: vcpu_fd, arch_cpu, state: Arc::new((Mutex::new(CpuLifecycleState::Created), Condvar::new())), task: Arc::new(Mutex::new(None)), tid: Arc::new(Mutex::new(None)), vm: Arc::downgrade(&vm), - caps: CPUCaps::init_capabilities(), boot_state: Arc::new(Mutex::new(ArchCPU::default())), + pause_signal: Arc::new(AtomicBool::new(false)), + hypervisor_cpu, } } - pub fn set_to_boot_state(&self) { - self.arch_cpu.lock().unwrap().set(&self.boot_state); - } - /// Get this `CPU`'s ID. pub fn id(&self) -> u8 { self.id } - /// Get this `CPU`'s file descriptor. - pub fn fd(&self) -> &Arc { - &self.fd - } - /// Get this `CPU`'s state. pub fn state(&self) -> &(Mutex, Condvar) { self.state.as_ref() @@ -284,47 +283,83 @@ impl CPU { } /// Set thread id for `CPU`. - fn set_tid(&self) { - *self.tid.lock().unwrap() = Some(util::unix::gettid()); + pub fn set_tid(&self, tid: Option) { + if tid.is_none() { + // Cast is safe as tid is not negative. + *self.tid.lock().unwrap() = Some(gettid().as_raw() as u64); + } else { + *self.tid.lock().unwrap() = tid; + } + } + + /// Get the hypervisor of this `CPU`. + pub fn hypervisor_cpu(&self) -> &Arc { + &self.hypervisor_cpu + } + + pub fn vm(&self) -> Weak> { + self.vm.clone() + } + + pub fn boot_state(&self) -> Arc> { + self.boot_state.clone() + } + + pub fn pause_signal(&self) -> Arc { + self.pause_signal.clone() } } impl CPUInterface for CPU { - fn realize(&self, boot: &CPUBootConfig) -> Result<()> { + fn realize( + &self, + boot: &CPUBootConfig, + topology: &CPUTopology, + #[cfg(target_arch = "aarch64")] config: &CPUFeatures, + ) -> Result<()> { + trace::cpu_boot_config(boot); let (cpu_state, _) = &*self.state; if *cpu_state.lock().unwrap() != CpuLifecycleState::Created { - return Err( - ErrorKind::RealizeVcpu(format!("VCPU{} may has realized.", self.id())).into(), - ); + return Err(anyhow!(CpuError::RealizeVcpu(format!( + "VCPU{} may has realized.", + self.id() + )))); } + self.hypervisor_cpu + .set_boot_config( + self.arch_cpu.clone(), + boot, + #[cfg(target_arch = "aarch64")] + config, + ) + .with_context(|| "Failed to realize arch cpu")?; + self.arch_cpu .lock() .unwrap() - .set_boot_config(&self.fd, boot) - .chain_err(|| "Failed to realize arch cpu")?; + .set_cpu_topology(topology) + .with_context(|| "Failed to realize arch cpu")?; + self.boot_state.lock().unwrap().set(&self.arch_cpu); Ok(()) } fn resume(&self) -> Result<()> { - let (cpu_state_locked, cvar) = &*self.state; - let mut cpu_state = cpu_state_locked.lock().unwrap(); - if *cpu_state == CpuLifecycleState::Running { - warn!("vcpu{} in running state, no need to resume", self.id()); - return Ok(()); - } + #[cfg(target_arch = "aarch64")] + self.hypervisor_cpu + .set_regs(self.arch_cpu.clone(), RegsIndex::VtimerCount)?; - *cpu_state = CpuLifecycleState::Running; - drop(cpu_state); - cvar.notify_one(); - Ok(()) + self.hypervisor_cpu + .resume(self.state.clone(), self.pause_signal.clone()) } fn start(cpu: Arc, thread_barrier: Arc, paused: bool) -> Result<()> { let (cpu_state, _) = &*cpu.state; if *cpu_state.lock().unwrap() == CpuLifecycleState::Running { - return Err(ErrorKind::StartVcpu("Cpu is already running".to_string()).into()); + return Err(anyhow!(CpuError::StartVcpu( + "Cpu is already running".to_string() + ))); } if paused { *cpu_state.lock().unwrap() = CpuLifecycleState::Paused; @@ -333,105 +368,72 @@ impl CPUInterface for CPU { } let local_cpu = cpu.clone(); + let cpu_id = cpu.id(); + let hypervisor_cpu = cpu.hypervisor_cpu().clone(); + let hyp_type = hypervisor_cpu.get_hypervisor_type(); let cpu_thread_worker = CPUThreadWorker::new(cpu); let handle = thread::Builder::new() - .name(format!("CPU {}/KVM", local_cpu.id)) + .name(format!("CPU {}/{:?}", local_cpu.id, hyp_type)) .spawn(move || { - if let Err(e) = cpu_thread_worker.handle(thread_barrier) { - error!( - "Some error occurred in cpu{} thread: {}", - cpu_thread_worker.thread_cpu.id, - error_chain::ChainedError::display_chain(&e) - ); + if let Err(e) = hypervisor_cpu.vcpu_exec(cpu_thread_worker, thread_barrier) { + error!("Some error occurred in cpu{} thread: {:?}", cpu_id, e); } }) - .chain_err(|| format!("Failed to create thread for CPU {}/KVM", local_cpu.id()))?; + .with_context(|| { + format!("Failed to create thread for CPU {}/{:?}", cpu_id, hyp_type) + })?; local_cpu.set_task(Some(handle)); Ok(()) } - fn reset(&self) -> Result<()> { - let task = self.task.lock().unwrap(); - match task.as_ref() { - Some(thread) => thread - .kill(VCPU_RESET_SIGNAL) - .chain_err(|| ErrorKind::KickVcpu("Fail to reset vcpu".to_string())), - None => { - warn!("VCPU thread not started, no need to reset"); - Ok(()) - } - } - } - - fn kick(&self) -> Result<()> { - let task = self.task.lock().unwrap(); - match task.as_ref() { - Some(thread) => thread - .kill(VCPU_TASK_SIGNAL) - .chain_err(|| ErrorKind::KickVcpu("Fail to kick vcpu".to_string())), - None => { - warn!("VCPU thread not started, no need to kick"); - Ok(()) - } - } - } - fn pause(&self) -> Result<()> { - let task = self.task.lock().unwrap(); - let (cpu_state, cvar) = &*self.state; + self.hypervisor_cpu.pause( + self.task.clone(), + self.state.clone(), + self.pause_signal.clone(), + )?; - if *cpu_state.lock().unwrap() == CpuLifecycleState::Running { - *cpu_state.lock().unwrap() = CpuLifecycleState::Paused; - cvar.notify_one() - } + #[cfg(target_arch = "aarch64")] + self.hypervisor_cpu + .get_regs(self.arch_cpu.clone(), RegsIndex::VtimerCount)?; - match task.as_ref() { - Some(thread) => match thread.kill(VCPU_TASK_SIGNAL) { - Ok(_) => Ok(()), - Err(e) => Err(ErrorKind::StopVcpu(format!("{}", e)).into()), - }, - None => { - warn!("VCPU thread not started, no need to stop"); - Ok(()) - } - } + Ok(()) } fn destroy(&self) -> Result<()> { - let (cpu_state, cvar) = &*self.state; - if *cpu_state.lock().unwrap() == CpuLifecycleState::Running { - *cpu_state.lock().unwrap() = CpuLifecycleState::Stopping; - } else { - *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; - } - - self.kick()?; - let mut cpu_state = cpu_state.lock().unwrap(); - cpu_state = cvar - .wait_timeout(cpu_state, Duration::from_millis(32)) - .unwrap() - .0; - - if *cpu_state == CpuLifecycleState::Stopped { - *cpu_state = CpuLifecycleState::Nothing; - Ok(()) - } else { - Err(ErrorKind::DestroyVcpu(format!("VCPU still in {:?} state", *cpu_state)).into()) - } + self.hypervisor_cpu + .destroy(self.task.clone(), self.state.clone()) } fn guest_shutdown(&self) -> Result<()> { - let (cpu_state, _) = &*self.state; - *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; - if let Some(vm) = self.vm.upgrade() { - vm.lock().unwrap().destroy(); + let shutdown_act = vm.lock().unwrap().get_shutdown_action(); + match shutdown_act { + ShutdownActionPoweroff => { + let (cpu_state, _) = &*self.state; + *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; + vm.lock().unwrap().destroy(); + } + ShutdownActionPause => { + let now = Instant::now(); + while !vm.lock().unwrap().pause() { + thread::sleep(Duration::from_millis(5)); + if now.elapsed() > Duration::from_secs(2) { + // Not use resume() to avoid unnecessary qmp event. + vm.lock() + .unwrap() + .notify_lifecycle(VmState::Paused, VmState::Running); + bail!("Failed to pause VM"); + } + } + } + } } else { - return Err(ErrorKind::NoMachineInterface.into()); + return Err(anyhow!(CpuError::NoMachineInterface)); } if QmpChannel::is_connected() { - let shutdown_msg = schema::Shutdown { + let shutdown_msg = qmp_schema::Shutdown { guest: true, reason: "guest-shutdown".to_string(), }; @@ -443,118 +445,25 @@ impl CPUInterface for CPU { fn guest_reset(&self) -> Result<()> { if let Some(vm) = self.vm.upgrade() { + let (cpu_state, _) = &*self.state; + *cpu_state.lock().unwrap() = CpuLifecycleState::Paused; vm.lock().unwrap().reset(); } else { - return Err(ErrorKind::NoMachineInterface.into()); - } - - if QmpChannel::is_connected() { - let reset_msg = schema::Reset { guest: true }; - event!(Reset; reset_msg); + return Err(anyhow!(CpuError::NoMachineInterface)); } Ok(()) } - - fn kvm_vcpu_exec(&self) -> Result { - let vm = if let Some(vm) = self.vm.upgrade() { - vm - } else { - return Err(ErrorKind::NoMachineInterface.into()); - }; - - match self.fd.run() { - Ok(run) => match run { - #[cfg(target_arch = "x86_64")] - VcpuExit::IoIn(addr, data) => { - vm.lock().unwrap().pio_in(u64::from(addr), data); - } - #[cfg(target_arch = "x86_64")] - VcpuExit::IoOut(addr, data) => { - vm.lock().unwrap().pio_out(u64::from(addr), data); - } - VcpuExit::MmioRead(addr, data) => { - vm.lock().unwrap().mmio_read(addr, data); - } - VcpuExit::MmioWrite(addr, data) => { - vm.lock().unwrap().mmio_write(addr, data); - } - #[cfg(target_arch = "x86_64")] - VcpuExit::Hlt => { - info!("Vcpu{} received KVM_EXIT_HLT signal", self.id()); - return Err(ErrorKind::VcpuHltEvent(self.id()).into()); - } - #[cfg(target_arch = "x86_64")] - VcpuExit::Shutdown => { - info!("Vcpu{} received an KVM_EXIT_SHUTDOWN signal", self.id()); - self.guest_shutdown()?; - - return Ok(false); - } - #[cfg(target_arch = "aarch64")] - VcpuExit::SystemEvent(event, flags) => { - if event == kvm_bindings::KVM_SYSTEM_EVENT_SHUTDOWN { - info!( - "Vcpu{} received an KVM_SYSTEM_EVENT_SHUTDOWN signal", - self.id() - ); - self.guest_shutdown() - .chain_err(|| "Some error occurred in guest shutdown")?; - } else if event == kvm_bindings::KVM_SYSTEM_EVENT_RESET { - info!( - "Vcpu{} received an KVM_SYSTEM_EVENT_RESET signal", - self.id() - ); - self.guest_reset() - .chain_err(|| "Some error occurred in guest reset")?; - return Ok(true); - } else { - error!( - "Vcpu{} received unexpected system event with type 0x{:x}, flags 0x{:x}", - self.id(), - event, - flags - ); - } - - return Ok(false); - } - VcpuExit::FailEntry => { - info!("Vcpu{} received KVM_EXIT_FAIL_ENTRY signal", self.id()); - return Ok(false); - } - VcpuExit::InternalError => { - info!("Vcpu{} received KVM_EXIT_INTERNAL_ERROR signal", self.id()); - return Ok(false); - } - r => { - return Err(ErrorKind::VcpuExitReason(self.id(), format!("{:?}", r)).into()); - } - }, - Err(ref e) => { - match e.errno() { - libc::EAGAIN => {} - libc::EINTR => { - self.fd.set_kvm_immediate_exit(0); - } - _ => { - return Err(ErrorKind::UnhandledKvmExit(self.id()).into()); - } - }; - } - } - Ok(true) - } } /// The struct to handle events in cpu thread. #[allow(clippy::upper_case_acronyms)] -struct CPUThreadWorker { - thread_cpu: Arc, +pub struct CPUThreadWorker { + pub thread_cpu: Arc, } impl CPUThreadWorker { - thread_local!(static LOCAL_THREAD_VCPU: RefCell> = RefCell::new(None)); + thread_local!(static LOCAL_THREAD_VCPU: RefCell> = const { RefCell::new(None) }); /// Allocates a new `CPUThreadWorker`. fn new(thread_cpu: Arc) -> Self { @@ -562,7 +471,7 @@ impl CPUThreadWorker { } /// Init vcpu thread static variable. - fn init_local_thread_vcpu(&self) { + pub fn init_local_thread_vcpu(&self) { Self::LOCAL_THREAD_VCPU.with(|thread_vcpu| { *thread_vcpu.borrow_mut() = Some(CPUThreadWorker { thread_cpu: self.thread_cpu.clone(), @@ -570,56 +479,22 @@ impl CPUThreadWorker { }) } - fn run_on_local_thread_vcpu(func: F) -> Result<()> + pub fn run_on_local_thread_vcpu(func: F) -> Result<()> where - F: FnOnce(&CPU), + F: FnOnce(Arc), { Self::LOCAL_THREAD_VCPU.with(|thread_vcpu| { if let Some(local_thread_vcpu) = thread_vcpu.borrow().as_ref() { - func(&local_thread_vcpu.thread_cpu); + func(local_thread_vcpu.thread_cpu.clone()); Ok(()) } else { - Err(ErrorKind::VcpuLocalThreadNotPresent.into()) + Err(anyhow!(CpuError::VcpuLocalThreadNotPresent)) } }) } - /// Init signal for `CPU` event. - fn init_signals() -> Result<()> { - extern "C" fn handle_signal(signum: c_int, _: *mut siginfo_t, _: *mut c_void) { - match signum { - VCPU_TASK_SIGNAL => { - let _ = CPUThreadWorker::run_on_local_thread_vcpu(|vcpu| { - vcpu.fd().set_kvm_immediate_exit(1); - fence(Ordering::Release) - }); - } - VCPU_RESET_SIGNAL => { - let _ = CPUThreadWorker::run_on_local_thread_vcpu(|vcpu| { - if let Err(e) = vcpu.arch_cpu.lock().unwrap().reset_vcpu( - &vcpu.fd, - #[cfg(target_arch = "x86_64")] - &vcpu.caps, - ) { - error!("Failed to reset vcpu state: {}", e.to_string()) - } - fence(Ordering::Release) - }); - } - _ => {} - } - } - - register_signal_handler(VCPU_TASK_SIGNAL, handle_signal) - .chain_err(|| "Failed to register VCPU_TASK_SIGNAL signal.")?; - register_signal_handler(VCPU_RESET_SIGNAL, handle_signal) - .chain_err(|| "Failed to register VCPU_TASK_SIGNAL signal.")?; - - Ok(()) - } - - /// Judge whether the kvm vcpu is ready to emulate. - fn ready_for_running(&self) -> Result { + /// Judge whether the hypervisor vcpu is ready to emulate. + pub fn ready_for_running(&self) -> Result { let mut flag = 0_u32; let (cpu_state_locked, cvar) = &*self.thread_cpu.state; let mut cpu_state = cpu_state_locked.lock().unwrap(); @@ -631,6 +506,9 @@ impl CPUThreadWorker { info!("Vcpu{} paused", self.thread_cpu.id); flag = 1; } + // Setting pause_signal to be `true` if kvm changes vCPU to pause state. + self.thread_cpu.pause_signal().store(true, Ordering::SeqCst); + fence(Ordering::Release); cpu_state = cvar.wait(cpu_state).unwrap(); } CpuLifecycleState::Running => { @@ -647,65 +525,23 @@ impl CPUThreadWorker { } } } - - /// Handle the all events in vcpu thread. - fn handle(&self, thread_barrier: Arc) -> Result<()> { - self.init_local_thread_vcpu(); - if let Err(e) = Self::init_signals() { - error!("Failed to init cpu{} signal:{}", self.thread_cpu.id, e); - } - - self.thread_cpu.set_tid(); - - // The vcpu thread is going to run, - // reset its running environment. - #[cfg(not(test))] - self.thread_cpu - .reset() - .chain_err(|| "Failed to reset for cpu register state")?; - - // Wait for all vcpu to complete the running - // environment initialization. - thread_barrier.wait(); - - info!("vcpu{} start running", self.thread_cpu.id); - while let Ok(true) = self.ready_for_running() { - #[cfg(not(test))] - if !self - .thread_cpu - .kvm_vcpu_exec() - .chain_err(|| format!("VCPU {}/KVM emulate error!", self.thread_cpu.id()))? - { - break; - } - - #[cfg(test)] - { - thread::sleep(Duration::from_millis(5)); - } - } - - // The vcpu thread is about to exit, marking the state - // of the CPU state as Stopped. - let (cpu_state, cvar) = &*self.thread_cpu.state; - *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; - cvar.notify_one(); - - Ok(()) - } } /// The wrapper for topology for VCPU. #[derive(Clone)] pub struct CpuTopology { + /// Number of vcpus in VM. + pub nrcpus: u8, /// Number of sockets in VM. pub sockets: u8, - /// Number of cores in VM. + /// Number of dies in one socket. + pub dies: u8, + /// Number of clusters in one die. + pub clusters: u8, + /// Number of cores in one cluster. pub cores: u8, - /// Number of threads in VM. + /// Number of threads in one core. pub threads: u8, - /// Number of vcpus in VM. - pub nrcpus: u8, /// Number of online vcpus in VM. pub max_cpus: u8, /// Online mask number of all vcpus. @@ -713,19 +549,34 @@ pub struct CpuTopology { } impl CpuTopology { - /// Init CpuTopology structure. - /// - /// # Arguments - /// - /// * `nr_cpus`: Number of vcpus. - pub fn new(nr_cpus: u8) -> Self { - let mask: Vec = vec![1; nr_cpus as usize]; + /// * `nr_cpus`: Number of vcpus in one VM. + /// * `nr_sockets`: Number of sockets in one VM. + /// * `nr_dies`: Number of dies in one socket. + /// * `nr_clusters`: Number of clusters in one die. + /// * `nr_cores`: Number of cores in one cluster. + /// * `nr_threads`: Number of threads in one core. + /// * `max_cpus`: Number of online vcpus in VM. + pub fn new( + nr_cpus: u8, + nr_sockets: u8, + nr_dies: u8, + nr_clusters: u8, + nr_cores: u8, + nr_threads: u8, + max_cpus: u8, + ) -> Self { + let mut mask: Vec = vec![0; max_cpus as usize]; + (0..nr_cpus as usize).for_each(|index| { + mask[index] = 1; + }); Self { - sockets: nr_cpus, - cores: 1, - threads: 1, nrcpus: nr_cpus, - max_cpus: nr_cpus, + sockets: nr_sockets, + dies: nr_dies, + clusters: nr_clusters, + cores: nr_cores, + threads: nr_threads, + max_cpus, online_mask: Arc::new(Mutex::new(mask)), } } @@ -751,177 +602,51 @@ impl CpuTopology { /// # Arguments /// /// * `vcpu_id` - ID of vcpu. - pub fn get_topo(&self, vcpu_id: usize) -> (u8, u8, u8) { - let cpu_per_socket = self.cores * self.threads; - let cpu_per_core = self.threads; - let socketid: u8 = vcpu_id as u8 / cpu_per_socket; - let coreid: u8 = (vcpu_id as u8 % cpu_per_socket) / cpu_per_core; - let threadid: u8 = (vcpu_id as u8 % cpu_per_socket) % cpu_per_core; - (socketid, coreid, threadid) + fn get_topo_item(&self, vcpu_id: u8) -> (u8, u8, u8, u8, u8) { + // nr_cpus is no more than u8::MAX, multiply will not overflow. + // nr_xxx is no less than 1, div and mod operations will not panic. + let socketid: u8 = vcpu_id / (self.dies * self.clusters * self.cores * self.threads); + let dieid: u8 = (vcpu_id / (self.clusters * self.cores * self.threads)) % self.dies; + let clusterid: u8 = (vcpu_id / (self.cores * self.threads)) % self.clusters; + let coreid: u8 = (vcpu_id / self.threads) % self.cores; + let threadid: u8 = vcpu_id % self.threads; + (socketid, dieid, clusterid, coreid, threadid) + } + + pub fn get_topo_instance_for_qmp(&self, cpu_index: u8) -> qmp_schema::CpuInstanceProperties { + let (socketid, _dieid, _clusterid, coreid, threadid) = self.get_topo_item(cpu_index); + qmp_schema::CpuInstanceProperties { + node_id: None, + socket_id: Some(socketid as isize), + #[cfg(target_arch = "x86_64")] + die_id: Some(_dieid as isize), + #[cfg(target_arch = "aarch64")] + cluster_id: Some(_clusterid as isize), + core_id: Some(coreid as isize), + thread_id: Some(threadid as isize), + } + } +} + +/// Capture the boot signal that trap from guest kernel, and then record +/// kernel boot timestamp. +#[cfg(feature = "boot_time")] +pub fn capture_boot_signal(addr: u64, data: &[u8]) { + if addr == MAGIC_SIGNAL_GUEST_BOOT { + if data[0] == MAGIC_VALUE_SIGNAL_GUEST_BOOT_START { + info!("Kernel starts to boot!"); + } else if data[0] == MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE { + info!("Kernel boot complete!"); + } } } #[cfg(test)] mod tests { use std::sync::{Arc, Mutex}; - use std::time::Duration; - - use hypervisor::kvm::{KVMFds, KVM_FDS}; - use machine_manager::machine::{ - KvmVmState, MachineAddressInterface, MachineInterface, MachineLifecycle, - }; - use serial_test::serial; use super::*; - struct TestVm { - #[cfg(target_arch = "x86_64")] - pio_in: Arc)>>>, - #[cfg(target_arch = "x86_64")] - pio_out: Arc)>>>, - mmio_read: Arc)>>>, - mmio_write: Arc)>>>, - } - - impl TestVm { - fn new() -> Self { - TestVm { - #[cfg(target_arch = "x86_64")] - pio_in: Arc::new(Mutex::new(Vec::new())), - #[cfg(target_arch = "x86_64")] - pio_out: Arc::new(Mutex::new(Vec::new())), - mmio_read: Arc::new(Mutex::new(Vec::new())), - mmio_write: Arc::new(Mutex::new(Vec::new())), - } - } - } - - impl MachineLifecycle for TestVm { - fn notify_lifecycle(&self, _old: KvmVmState, _new: KvmVmState) -> bool { - true - } - } - - impl MachineAddressInterface for TestVm { - #[cfg(target_arch = "x86_64")] - fn pio_in(&self, addr: u64, data: &mut [u8]) -> bool { - self.pio_in.lock().unwrap().push((addr, data.to_vec())); - true - } - - #[cfg(target_arch = "x86_64")] - fn pio_out(&self, addr: u64, data: &[u8]) -> bool { - self.pio_out.lock().unwrap().push((addr, data.to_vec())); - true - } - - fn mmio_read(&self, addr: u64, data: &mut [u8]) -> bool { - #[cfg(target_arch = "aarch64")] - { - data[3] = 0x0; - data[2] = 0x0; - data[1] = 0x5; - data[0] = 0x6; - } - self.mmio_read.lock().unwrap().push((addr, data.to_vec())); - true - } - - fn mmio_write(&self, addr: u64, data: &[u8]) -> bool { - self.mmio_write.lock().unwrap().push((addr, data.to_vec())); - true - } - } - - impl MachineInterface for TestVm {} - - #[test] - #[serial] - #[allow(unused)] - fn test_cpu_lifecycle() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let vm = Arc::new(Mutex::new(TestVm::new())); - let cpu = CPU::new( - Arc::new( - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_vcpu(0) - .unwrap(), - ), - 0, - Arc::new(Mutex::new(ArchCPU::default())), - vm.clone(), - ); - let (cpu_state, _) = &*cpu.state; - assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Created); - drop(cpu_state); - - let cpus_thread_barrier = Arc::new(Barrier::new(2)); - let cpu_thread_barrier = cpus_thread_barrier.clone(); - - #[cfg(target_arch = "aarch64")] - { - let mut kvi = kvm_bindings::kvm_vcpu_init::default(); - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .get_preferred_target(&mut kvi) - .unwrap(); - kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; - cpu.fd.vcpu_init(&kvi).unwrap(); - } - - // Test cpu life cycle as: - // Created -> Paused -> Running -> Paused -> Running -> Destroy - let cpu_arc = Arc::new(cpu); - CPU::start(cpu_arc.clone(), cpu_thread_barrier, true).unwrap(); - - // Wait for CPU thread init signal hook - std::thread::sleep(Duration::from_millis(50)); - cpus_thread_barrier.wait(); - let (cpu_state, _) = &*cpu_arc.state; - assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Paused); - drop(cpu_state); - - assert!(cpu_arc.resume().is_ok()); - - // Wait for CPU finish state change. - std::thread::sleep(Duration::from_millis(50)); - let (cpu_state, _) = &*cpu_arc.state; - assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Running); - drop(cpu_state); - - assert!(cpu_arc.pause().is_ok()); - - // Wait for CPU finish state change. - std::thread::sleep(Duration::from_millis(50)); - let (cpu_state, _) = &*cpu_arc.state; - assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Paused); - drop(cpu_state); - - assert!(cpu_arc.resume().is_ok()); - // Wait for CPU finish state change. - std::thread::sleep(Duration::from_millis(50)); - - assert!(cpu_arc.destroy().is_ok()); - - // Wait for CPU finish state change. - std::thread::sleep(Duration::from_millis(50)); - let (cpu_state, _) = &*cpu_arc.state; - assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Nothing); - drop(cpu_state); - } - #[test] fn test_cpu_get_topu() { let test_nr_cpus: u8 = 16; @@ -929,6 +654,8 @@ mod tests { let microvm_cpu_topo = CpuTopology { sockets: test_nr_cpus, + dies: 1, + clusters: 1, cores: 1, threads: 1, nrcpus: test_nr_cpus, @@ -936,15 +663,51 @@ mod tests { online_mask: Arc::new(Mutex::new(mask)), }; - assert_eq!(microvm_cpu_topo.get_topo(0), (0, 0, 0)); - assert_eq!(microvm_cpu_topo.get_topo(4), (4, 0, 0)); - assert_eq!(microvm_cpu_topo.get_topo(8), (8, 0, 0)); - assert_eq!(microvm_cpu_topo.get_topo(15), (15, 0, 0)); + assert_eq!(microvm_cpu_topo.get_topo_item(0), (0, 0, 0, 0, 0)); + assert_eq!(microvm_cpu_topo.get_topo_item(4), (4, 0, 0, 0, 0)); + assert_eq!(microvm_cpu_topo.get_topo_item(8), (8, 0, 0, 0, 0)); + assert_eq!(microvm_cpu_topo.get_topo_item(15), (15, 0, 0, 0, 0)); + + let mask = Vec::with_capacity(test_nr_cpus as usize); + let microvm_cpu_topo_x86 = CpuTopology { + sockets: 1, + dies: 2, + clusters: 1, + cores: 4, + threads: 2, + nrcpus: test_nr_cpus, + max_cpus: test_nr_cpus, + online_mask: Arc::new(Mutex::new(mask)), + }; + + assert_eq!(microvm_cpu_topo_x86.get_topo_item(0), (0, 0, 0, 0, 0)); + assert_eq!(microvm_cpu_topo_x86.get_topo_item(4), (0, 0, 0, 2, 0)); + assert_eq!(microvm_cpu_topo_x86.get_topo_item(8), (0, 1, 0, 0, 0)); + assert_eq!(microvm_cpu_topo_x86.get_topo_item(15), (0, 1, 0, 3, 1)); + + let mask = Vec::with_capacity(test_nr_cpus as usize); + let microvm_cpu_topo_arm = CpuTopology { + sockets: 1, + dies: 1, + clusters: 2, + cores: 4, + threads: 2, + nrcpus: test_nr_cpus, + max_cpus: test_nr_cpus, + online_mask: Arc::new(Mutex::new(mask)), + }; + + assert_eq!(microvm_cpu_topo_arm.get_topo_item(0), (0, 0, 0, 0, 0)); + assert_eq!(microvm_cpu_topo_arm.get_topo_item(4), (0, 0, 0, 2, 0)); + assert_eq!(microvm_cpu_topo_arm.get_topo_item(8), (0, 0, 1, 0, 0)); + assert_eq!(microvm_cpu_topo_arm.get_topo_item(15), (0, 0, 1, 3, 1)); let test_nr_cpus: u8 = 32; let mask = Vec::with_capacity(test_nr_cpus as usize); let test_cpu_topo = CpuTopology { sockets: 2, + dies: 1, + clusters: 1, cores: 4, threads: 2, nrcpus: test_nr_cpus, @@ -952,14 +715,14 @@ mod tests { online_mask: Arc::new(Mutex::new(mask)), }; - assert_eq!(test_cpu_topo.get_topo(0), (0, 0, 0)); - assert_eq!(test_cpu_topo.get_topo(4), (0, 2, 0)); - assert_eq!(test_cpu_topo.get_topo(7), (0, 3, 1)); - assert_eq!(test_cpu_topo.get_topo(11), (1, 1, 1)); - assert_eq!(test_cpu_topo.get_topo(15), (1, 3, 1)); - assert_eq!(test_cpu_topo.get_topo(17), (2, 0, 1)); - assert_eq!(test_cpu_topo.get_topo(23), (2, 3, 1)); - assert_eq!(test_cpu_topo.get_topo(29), (3, 2, 1)); - assert_eq!(test_cpu_topo.get_topo(31), (3, 3, 1)); + assert_eq!(test_cpu_topo.get_topo_item(0), (0, 0, 0, 0, 0)); + assert_eq!(test_cpu_topo.get_topo_item(4), (0, 0, 0, 2, 0)); + assert_eq!(test_cpu_topo.get_topo_item(7), (0, 0, 0, 3, 1)); + assert_eq!(test_cpu_topo.get_topo_item(11), (1, 0, 0, 1, 1)); + assert_eq!(test_cpu_topo.get_topo_item(15), (1, 0, 0, 3, 1)); + assert_eq!(test_cpu_topo.get_topo_item(17), (2, 0, 0, 0, 1)); + assert_eq!(test_cpu_topo.get_topo_item(23), (2, 0, 0, 3, 1)); + assert_eq!(test_cpu_topo.get_topo_item(29), (3, 0, 0, 2, 1)); + assert_eq!(test_cpu_topo.get_topo_item(31), (3, 0, 0, 3, 1)); } } diff --git a/cpu/src/x86_64/cpuid.rs b/cpu/src/x86_64/cpuid.rs index 1084b271b796e5393bc96e23cccf682783e63681..f7b8d7525bb9d0027ac6d3e4b9adf5dd5c0b5359 100644 --- a/cpu/src/x86_64/cpuid.rs +++ b/cpu/src/x86_64/cpuid.rs @@ -12,20 +12,17 @@ use core::arch::x86_64::__cpuid_count; -pub fn host_cpuid( +pub unsafe fn host_cpuid( leaf: u32, subleaf: u32, - eax: *mut u32, - ebx: *mut u32, - ecx: *mut u32, - edx: *mut u32, + eax: &mut u32, + ebx: &mut u32, + ecx: &mut u32, + edx: &mut u32, ) { - unsafe { - let cpuid = __cpuid_count(leaf, subleaf); - - *eax = cpuid.eax; - *ebx = cpuid.ebx; - *ecx = cpuid.ecx; - *edx = cpuid.edx; - } + let cpuid = __cpuid_count(leaf, subleaf); + *eax = cpuid.eax; + *ebx = cpuid.ebx; + *ecx = cpuid.ecx; + *edx = cpuid.edx; } diff --git a/cpu/src/x86_64/mod.rs b/cpu/src/x86_64/mod.rs index 37e0f84781b8ffbe03dcb6b86cbdb03cc1252d54..09fdf5db5a5559770b226f84a17529785b031f7d 100644 --- a/cpu/src/x86_64/mod.rs +++ b/cpu/src/x86_64/mod.rs @@ -10,22 +10,27 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -pub mod caps; mod cpuid; use std::sync::{Arc, Mutex}; +use anyhow::{Context, Result}; use kvm_bindings::{ - kvm_debugregs, kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_regs, kvm_segment, - kvm_sregs, kvm_vcpu_events, kvm_xcrs, kvm_xsave, Msrs, KVM_MAX_CPUID_ENTRIES, - KVM_MP_STATE_RUNNABLE, KVM_MP_STATE_UNINITIALIZED, + kvm_cpuid_entry2 as CpuidEntry2, kvm_debugregs as DebugRegs, kvm_fpu as Fpu, + kvm_lapic_state as LapicState, kvm_mp_state as MpState, kvm_msr_entry as MsrEntry, + kvm_regs as Regs, kvm_segment as Segment, kvm_sregs as Sregs, kvm_vcpu_events as VcpuEvents, + kvm_xcrs as Xcrs, kvm_xsave as Xsave, CpuId, + KVM_CPUID_FLAG_SIGNIFCANT_INDEX as CPUID_FLAG_SIGNIFICANT_INDEX, + KVM_MP_STATE_RUNNABLE as MP_STATE_RUNNABLE, + KVM_MP_STATE_UNINITIALIZED as MP_STATE_UNINITIALIZED, }; -use kvm_ioctls::{Kvm, VcpuFd}; use self::cpuid::host_cpuid; -use crate::errors::{Result, ResultExt}; use crate::CPU; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration::{ + DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; const ECX_EPB_SHIFT: u32 = 3; @@ -49,9 +54,28 @@ const MSR_LIST: &[u32] = &[ const MSR_IA32_MISC_ENABLE: u32 = 0x01a0; const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x1; +const ECX_INVALID: u32 = 0u32 << 8; +const ECX_THREAD: u32 = 1u32 << 8; +const ECX_CORE: u32 = 2u32 << 8; +const ECX_DIE: u32 = 5u32 << 8; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum X86RegsIndex { + Regs, + Sregs, + Fpu, + MpState, + LapicState, + MsrEntry, + VcpuEvents, + Xsave, + Xcrs, + DebugRegs, +} + /// X86 CPU booting configure information #[allow(clippy::upper_case_acronyms)] -#[derive(Default, Clone)] +#[derive(Default, Clone, Debug)] pub struct X86CPUBootConfig { pub prot64_mode: bool, /// Register %rip value @@ -63,8 +87,8 @@ pub struct X86CPUBootConfig { /// zero page address, as the second parameter of __startup_64 /// arch/x86/kernel/head_64.S:86 pub zero_page: u64, - pub code_segment: kvm_segment, - pub data_segment: kvm_segment, + pub code_segment: Segment, + pub data_segment: Segment, pub gdt_base: u64, pub gdt_size: u16, pub idt_base: u64, @@ -72,25 +96,78 @@ pub struct X86CPUBootConfig { pub pml4_start: u64, } +#[allow(clippy::upper_case_acronyms)] +#[derive(Default, Copy, Clone, Debug)] +pub struct X86CPUTopology { + threads: u8, + cores: u8, + dies: u8, +} + +impl X86CPUTopology { + pub fn new() -> Self { + X86CPUTopology::default() + } + + pub fn set_topology(mut self, toplogy: (u8, u8, u8)) -> Self { + self.threads = toplogy.0; + self.cores = toplogy.1; + self.dies = toplogy.2; + self + } +} + /// The state of vCPU's register. #[allow(clippy::upper_case_acronyms)] #[repr(C)] -#[derive(Copy, Clone, Desc, ByteCode)] +#[derive(Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] pub struct X86CPUState { - nr_vcpus: u32, - apic_id: u32, - regs: kvm_regs, - sregs: kvm_sregs, - fpu: kvm_fpu, - mp_state: kvm_mp_state, - lapic: kvm_lapic_state, - msr_len: usize, - msr_list: [kvm_msr_entry; 256], - cpu_events: kvm_vcpu_events, - xsave: kvm_xsave, - xcrs: kvm_xcrs, - debugregs: kvm_debugregs, + max_vcpus: u8, + nr_threads: u8, + nr_cores: u8, + nr_dies: u8, + nr_sockets: u8, + pub apic_id: u32, + pub regs: Regs, + pub sregs: Sregs, + pub fpu: Fpu, + pub mp_state: MpState, + pub lapic: LapicState, + pub msr_len: usize, + pub msr_list: [MsrEntry; 256], + pub cpu_events: VcpuEvents, + pub xsave: Xsave, + pub xcrs: Xcrs, + pub debugregs: DebugRegs, +} + +impl Clone for X86CPUState { + fn clone(&self) -> Self { + let mut xsave: Xsave = Default::default(); + // we just clone xsave.region, because xsave.extra does not save + // valid values and it is not allowed to be cloned. + xsave.region = self.xsave.region; + Self { + max_vcpus: self.max_vcpus, + nr_threads: self.nr_threads, + nr_cores: self.nr_cores, + nr_dies: self.nr_dies, + nr_sockets: self.nr_sockets, + apic_id: self.apic_id, + regs: self.regs, + sregs: self.sregs, + fpu: self.fpu, + mp_state: self.mp_state, + lapic: self.lapic, + msr_len: self.msr_len, + msr_list: self.msr_list, + cpu_events: self.cpu_events, + xsave, + xcrs: self.xcrs, + debugregs: self.debugregs, + } + } } impl X86CPUState { @@ -99,26 +176,30 @@ impl X86CPUState { /// # Arguments /// /// * `vcpu_id` - ID of this `CPU`. - /// * `nr_vcpus` - Number of vcpus. - pub fn new(vcpu_id: u32, nr_vcpus: u32) -> Self { - let mp_state = kvm_mp_state { + /// * `max_vcpus` - Number of vcpus. + pub fn new(vcpu_id: u32, max_vcpus: u8) -> Self { + let mp_state = MpState { mp_state: if vcpu_id == 0 { - KVM_MP_STATE_RUNNABLE + MP_STATE_RUNNABLE } else { - KVM_MP_STATE_UNINITIALIZED + MP_STATE_UNINITIALIZED }, }; X86CPUState { apic_id: vcpu_id, - nr_vcpus, + max_vcpus, mp_state, + nr_threads: 1, + nr_cores: 1, + nr_dies: 1, + nr_sockets: 1, ..Default::default() } } pub fn set(&mut self, cpu_state: &Arc>) { let locked_cpu_state = cpu_state.lock().unwrap(); - self.nr_vcpus = locked_cpu_state.nr_vcpus; + self.max_vcpus = locked_cpu_state.max_vcpus; self.apic_id = locked_cpu_state.apic_id; self.regs = locked_cpu_state.regs; self.sregs = locked_cpu_state.sregs; @@ -128,82 +209,25 @@ impl X86CPUState { self.msr_len = locked_cpu_state.msr_len; self.msr_list = locked_cpu_state.msr_list; self.cpu_events = locked_cpu_state.cpu_events; - self.xsave = locked_cpu_state.xsave; + self.xsave = Default::default(); + self.xsave.region = locked_cpu_state.xsave.region; self.xcrs = locked_cpu_state.xcrs; self.debugregs = locked_cpu_state.debugregs; } - /// Set register value in `X86CPUState` according to `boot_config`. + /// Set cpu topology /// /// # Arguments /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - /// * `boot_config` - Boot message from boot_loader. - pub fn set_boot_config( - &mut self, - vcpu_fd: &Arc, - boot_config: &X86CPUBootConfig, - ) -> Result<()> { - self.setup_lapic(vcpu_fd)?; - self.setup_regs(boot_config); - self.setup_sregs(vcpu_fd, boot_config)?; - self.setup_fpu(); - self.setup_msrs(); - + /// * `topology` - X86 CPU Topology + pub fn set_cpu_topology(&mut self, topology: &X86CPUTopology) -> Result<()> { + self.nr_threads = topology.threads; + self.nr_cores = topology.cores; + self.nr_dies = topology.dies; Ok(()) } - /// Reset register value with `X86CPUState`. - /// - /// # Arguments - /// - /// * `vcpu_fd` - Vcpu file descriptor in kvm. - /// * `caps` - Vcpu capabilities in kvm. - pub fn reset_vcpu(&self, vcpu_fd: &Arc, caps: &caps::X86CPUCaps) -> Result<()> { - self.setup_cpuid(vcpu_fd) - .chain_err(|| format!("Failed to set cpuid for CPU {}", self.apic_id))?; - - vcpu_fd - .set_mp_state(self.mp_state) - .chain_err(|| format!("Failed to set mpstate for CPU {}", self.apic_id))?; - vcpu_fd - .set_sregs(&self.sregs) - .chain_err(|| format!("Failed to set sregs for CPU {}", self.apic_id))?; - vcpu_fd - .set_regs(&self.regs) - .chain_err(|| format!("Failed to set regs for CPU {}", self.apic_id))?; - if caps.has_xsave { - vcpu_fd - .set_xsave(&self.xsave) - .chain_err(|| format!("Failed to set xsave for CPU {}", self.apic_id))?; - } else { - vcpu_fd - .set_fpu(&self.fpu) - .chain_err(|| format!("Failed to set fpu for CPU {}", self.apic_id))?; - } - if caps.has_xcrs { - vcpu_fd - .set_xcrs(&self.xcrs) - .chain_err(|| format!("Failed to set xcrs for CPU {}", self.apic_id))?; - } - vcpu_fd - .set_debug_regs(&self.debugregs) - .chain_err(|| format!("Failed to set debug register for CPU {}", self.apic_id))?; - vcpu_fd - .set_lapic(&self.lapic) - .chain_err(|| format!("Failed to set lapic for CPU {}", self.apic_id))?; - vcpu_fd - .set_msrs(&Msrs::from_entries(&self.msr_list[0..self.msr_len])) - .chain_err(|| format!("Failed to set msrs for CPU {}", self.apic_id))?; - vcpu_fd - .set_vcpu_events(&self.cpu_events) - .chain_err(|| format!("Failed to set vcpu events for CPU {}", self.apic_id))?; - - Ok(()) - } - - #[allow(clippy::cast_ptr_alignment)] - fn setup_lapic(&mut self, vcpu_fd: &Arc) -> Result<()> { + pub fn setup_lapic(&mut self, lapic: LapicState) -> Result<()> { // Disable nmi and external interrupt before enter protected mode // See: https://elixir.bootlin.com/linux/v4.19.123/source/arch/x86/include/asm/apicdef.h const APIC_LVT0: usize = 0x350; @@ -212,31 +236,29 @@ impl X86CPUState { const APIC_MODE_EXTINT: u32 = 0x7; const APIC_ID: usize = 0x20; - self.lapic = vcpu_fd - .get_lapic() - .chain_err(|| format!("Failed to get lapic for CPU {}/KVM", self.apic_id))?; + self.lapic = lapic; - // The member regs in struct kvm_lapic_state is a u8 array with 1024 entries, - // so it's saft to cast u8 pointer to u32 at position APIC_LVT0 and APIC_LVT1. + // SAFETY: The member regs in struct LapicState is a u8 array with 1024 entries, + // so it's safe to cast u8 pointer to u32 at position APIC_LVT0 and APIC_LVT1. // Safe because all value in this unsafe block is certain. unsafe { let apic_lvt_lint0 = &mut self.lapic.regs[APIC_LVT0..] as *mut [i8] as *mut u32; - *apic_lvt_lint0 &= !0x700; - *apic_lvt_lint0 |= APIC_MODE_EXTINT << 8; + let modified = (apic_lvt_lint0.read_unaligned() & !0x700) | (APIC_MODE_EXTINT << 8); + apic_lvt_lint0.write_unaligned(modified); let apic_lvt_lint1 = &mut self.lapic.regs[APIC_LVT1..] as *mut [i8] as *mut u32; - *apic_lvt_lint1 &= !0x700; - *apic_lvt_lint1 |= APIC_MODE_NMI << 8; + let modified = (apic_lvt_lint1.read_unaligned() & !0x700) | (APIC_MODE_NMI << 8); + apic_lvt_lint1.write_unaligned(modified); let apic_id = &mut self.lapic.regs[APIC_ID..] as *mut [i8] as *mut u32; - *apic_id = self.apic_id << 24; + apic_id.write_unaligned(self.apic_id << 24); } Ok(()) } - fn setup_regs(&mut self, boot_config: &X86CPUBootConfig) { - self.regs = kvm_regs { + pub fn setup_regs(&mut self, boot_config: &X86CPUBootConfig) { + self.regs = Regs { rflags: 0x0002, // Means processor has been initialized rip: boot_config.boot_ip, rsp: boot_config.boot_sp, @@ -246,22 +268,20 @@ impl X86CPUState { }; } - fn setup_sregs(&mut self, vcpu_fd: &Arc, boot_config: &X86CPUBootConfig) -> Result<()> { - self.sregs = vcpu_fd - .get_sregs() - .chain_err(|| format!("Failed to get sregs for CPU {}/KVM", self.apic_id))?; + pub fn setup_sregs(&mut self, sregs: Sregs, boot_config: &X86CPUBootConfig) -> Result<()> { + self.sregs = sregs; - self.sregs.cs.base = (boot_config.boot_selector as u64) << 4; + self.sregs.cs.base = u64::from(boot_config.boot_selector) << 4; self.sregs.cs.selector = boot_config.boot_selector; - self.sregs.ds.base = (boot_config.boot_selector as u64) << 4; + self.sregs.ds.base = u64::from(boot_config.boot_selector) << 4; self.sregs.ds.selector = boot_config.boot_selector; - self.sregs.es.base = (boot_config.boot_selector as u64) << 4; + self.sregs.es.base = u64::from(boot_config.boot_selector) << 4; self.sregs.es.selector = boot_config.boot_selector; - self.sregs.fs.base = (boot_config.boot_selector as u64) << 4; + self.sregs.fs.base = u64::from(boot_config.boot_selector) << 4; self.sregs.fs.selector = boot_config.boot_selector; - self.sregs.gs.base = (boot_config.boot_selector as u64) << 4; + self.sregs.gs.base = u64::from(boot_config.boot_selector) << 4; self.sregs.gs.selector = boot_config.boot_selector; - self.sregs.ss.base = (boot_config.boot_selector as u64) << 4; + self.sregs.ss.base = u64::from(boot_config.boot_selector) << 4; self.sregs.ss.selector = boot_config.boot_selector; if boot_config.prot64_mode { @@ -271,7 +291,7 @@ impl X86CPUState { Ok(()) } - fn set_prot64_sregs(&mut self, boot_config: &X86CPUBootConfig) { + pub fn set_prot64_sregs(&mut self, boot_config: &X86CPUBootConfig) { // X86_CR0_PE: Protection Enable // EFER_LME: Long mode enable // EFER_LMA: Long mode active @@ -313,19 +333,19 @@ impl X86CPUState { self.sregs.cr0 |= X86_CR0_PG; } - fn setup_fpu(&mut self) { + pub fn setup_fpu(&mut self) { // Default value for fxregs_state.mxcsr // arch/x86/include/asm/fpu/types.h const MXCSR_DEFAULT: u32 = 0x1f80; - self.fpu = kvm_fpu { + self.fpu = Fpu { fcw: 0x37f, mxcsr: MXCSR_DEFAULT, ..Default::default() }; } - fn setup_msrs(&mut self) { + pub fn setup_msrs(&mut self) { // Enable fasting-string operation to improve string // store operations. for (index, msr) in MSR_LIST.iter().enumerate() { @@ -334,23 +354,50 @@ impl X86CPUState { _ => 0u64, }; - self.msr_list[index] = kvm_msr_entry { + self.msr_list[index] = MsrEntry { index: *msr, data, ..Default::default() }; + // usize is enough for storing msr len. self.msr_len += 1; } } - fn setup_cpuid(&self, vcpu_fd: &Arc) -> Result<()> { - let sys_fd = match Kvm::new() { - Ok(fd) => fd, - _ => bail!("setup_cpuid: Open /dev/kvm failed"), - }; - let mut cpuid = sys_fd - .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) - .chain_err(|| format!("Failed to get supported cpuid for CPU {}/KVM", self.apic_id))?; + pub fn adjust_cpuid(&self, cpuid: &mut CpuId) -> Result<()> { + if self.nr_dies < 2 { + return Ok(()); + } + + // Intel CPU topology with multi-dies support requires CPUID[0x1f]. + let entries = cpuid.as_mut_slice(); + for entry in entries.iter_mut() { + if entry.function == 0 { + if entry.eax >= 0x1f { + return Ok(()); + } else { + entry.eax = 0x1f; + } + break; + } + } + for index in 0..4 { + let entry = CpuidEntry2 { + function: 0x1f, + index, + ..Default::default() + }; + cpuid.push(entry)?; + } + Ok(()) + } + + pub fn setup_cpuid(&self, cpuid: &mut CpuId) -> Result<()> { + // nr_xx is no less than 1. + let core_offset = 32u32 - (self.nr_threads - 1).leading_zeros(); + let die_offset = (32u32 - (self.nr_cores - 1).leading_zeros()) + core_offset; + let pkg_offset = (32u32 - (self.nr_dies - 1).leading_zeros()) + die_offset; + self.adjust_cpuid(cpuid)?; let entries = cpuid.as_mut_slice(); for entry in entries.iter_mut() { @@ -363,29 +410,36 @@ impl X86CPUState { } } 2 => { - host_cpuid( - 2, - 0, - &mut entry.eax, - &mut entry.ebx, - &mut entry.ecx, - &mut entry.edx, - ); + // SAFETY: entry is from KVM_GET_SUPPORTED_CPUID. + unsafe { + host_cpuid( + 2, + 0, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + } } 4 => { // cache info: needed for Pentium Pro compatibility // Passthrough host cache info directly to guest - host_cpuid( - 4, - entry.index, - &mut entry.eax, - &mut entry.ebx, - &mut entry.ecx, - &mut entry.edx, - ); + // SAFETY: entry is from KVM_GET_SUPPORTED_CPUID. + unsafe { + host_cpuid( + 4, + entry.index, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + } entry.eax &= !0xfc00_0000; - if entry.eax & 0x0001_ffff != 0 && self.nr_vcpus > 1 { - entry.eax |= (self.nr_vcpus - 1) << 26; + if entry.eax & 0x0001_ffff != 0 && self.max_vcpus > 1 { + // max_vcpus is no less than 1. + entry.eax |= (u32::from(self.max_vcpus) - 1) << 26; } } 6 => { @@ -402,199 +456,118 @@ impl X86CPUState { } 0xb => { // Extended Topology Enumeration Leaf - entry.edx = self.apic_id as u32; + entry.edx = self.apic_id; entry.ecx = entry.index & 0xff; match entry.index { 0 => { - entry.eax = 0u32; - entry.ebx = 1u32; - entry.ecx |= 1u32 << 8; + entry.eax = core_offset; + entry.ebx = u32::from(self.nr_threads); + entry.ecx |= ECX_THREAD; } 1 => { - entry.eax = 32u32 - self.nr_vcpus.leading_zeros(); - entry.ebx = self.nr_vcpus; - entry.ecx |= 2u32 << 8; + entry.eax = pkg_offset; + // nr_cpus is no more than u8::MAX, multiply will not overflow. + entry.ebx = u32::from(self.nr_threads * self.nr_cores); + entry.ecx |= ECX_CORE; } _ => { - entry.ebx = 0xff; + entry.eax = 0; + entry.ebx = 0; + entry.ecx |= ECX_INVALID; + } + } + } + 0x1f => { + if self.nr_dies < 2 { + entry.eax = 0; + entry.ebx = 0; + entry.ecx = 0; + entry.edx = 0; + continue; + } + + entry.edx = self.apic_id; + entry.ecx = entry.index & 0xff; + entry.flags = CPUID_FLAG_SIGNIFICANT_INDEX; + + match entry.index { + 0 => { + entry.eax = core_offset; + entry.ebx = u32::from(self.nr_threads); + entry.ecx |= ECX_THREAD; + } + 1 => { + entry.eax = die_offset; + // nr_cpus is no more than u8::MAX, multiply will not overflow. + entry.ebx = u32::from(self.nr_cores * self.nr_threads); + entry.ecx |= ECX_CORE; + } + 2 => { + entry.eax = pkg_offset; + // nr_cpus is no more than u8::MAX, multiply will not overflow. + entry.ebx = u32::from(self.nr_dies * self.nr_cores * self.nr_threads); + entry.ecx |= ECX_DIE; + } + _ => { + entry.eax = 0; + entry.ebx = 0; + entry.ecx |= ECX_INVALID; } } - entry.ebx &= 0xffff; } 0x8000_0002..=0x8000_0004 => { // Passthrough host cpu model name directly to guest - host_cpuid( - entry.function, - entry.index, - &mut entry.eax, - &mut entry.ebx, - &mut entry.ecx, - &mut entry.edx, - ); + // SAFETY: entry is from KVM_GET_SUPPORTED_CPUID. + unsafe { + host_cpuid( + entry.function, + entry.index, + &mut entry.eax, + &mut entry.ebx, + &mut entry.ecx, + &mut entry.edx, + ); + } } _ => (), } } - vcpu_fd - .set_cpuid2(&cpuid) - .chain_err(|| format!("Failed to set cpuid for CPU {}/KVM", self.apic_id))?; Ok(()) } } impl StateTransfer for CPU { - fn get_state_vec(&self) -> migration::errors::Result> { - let mut msr_entries = self.caps.create_msr_entries(); - let mut cpu_state_locked = self.arch_cpu.lock().unwrap(); - - cpu_state_locked.mp_state = self.fd.get_mp_state()?; - cpu_state_locked.regs = self.fd.get_regs()?; - cpu_state_locked.sregs = self.fd.get_sregs()?; - if self.caps.has_xsave { - cpu_state_locked.xsave = self.fd.get_xsave()?; - } else { - cpu_state_locked.fpu = self.fd.get_fpu()?; - } - if self.caps.has_xcrs { - cpu_state_locked.xcrs = self.fd.get_xcrs()?; - } - cpu_state_locked.debugregs = self.fd.get_debug_regs()?; - cpu_state_locked.lapic = self.fd.get_lapic()?; - cpu_state_locked.msr_len = self.fd.get_msrs(&mut msr_entries)?; - for (i, entry) in msr_entries.as_slice().iter().enumerate() { - cpu_state_locked.msr_list[i] = *entry; - } - cpu_state_locked.cpu_events = self.fd.get_vcpu_events()?; - - Ok(cpu_state_locked.as_bytes().to_vec()) + fn get_state_vec(&self) -> Result> { + let hypervisor_cpu = self.hypervisor_cpu(); + + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::MpState)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::Regs)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::Sregs)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::Xsave)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::Fpu)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::Xcrs)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::DebugRegs)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::LapicState)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::MsrEntry)?; + hypervisor_cpu.get_regs(self.arch_cpu.clone(), X86RegsIndex::VcpuEvents)?; + + Ok(self.arch_cpu.lock().unwrap().as_bytes().to_vec()) } - fn set_state(&self, state: &[u8]) -> migration::errors::Result<()> { - let cpu_state = *X86CPUState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("CPU"))?; + fn set_state(&self, state: &[u8]) -> Result<()> { + let cpu_state = X86CPUState::from_bytes(state) + .with_context(|| MigrationError::FromBytesError("CPU"))?; let mut cpu_state_locked = self.arch_cpu.lock().unwrap(); - *cpu_state_locked = cpu_state; + *cpu_state_locked = cpu_state.clone(); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&X86CPUState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&X86CPUState::descriptor().name).unwrap_or(!0) } } impl MigrationHook for CPU {} - -#[cfg(test)] -mod test { - use super::*; - use hypervisor::kvm::{KVMFds, KVM_FDS}; - use kvm_bindings::kvm_segment; - use serial_test::serial; - use std::sync::Arc; - - #[test] - #[serial] - fn test_x86_64_cpu() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let code_seg = kvm_segment { - base: 0, - limit: 1048575, - selector: 16, - type_: 11, - present: 1, - dpl: 0, - db: 0, - s: 1, - l: 1, - g: 1, - avl: 0, - unusable: 0, - padding: 0, - }; - let data_seg = kvm_segment { - base: 0, - limit: 1048575, - selector: 24, - type_: 3, - present: 1, - dpl: 0, - db: 1, - s: 1, - l: 0, - g: 1, - avl: 0, - unusable: 0, - padding: 0, - }; - let cpu_config = X86CPUBootConfig { - prot64_mode: true, - boot_ip: 0, - boot_sp: 0, - boot_selector: 0, - zero_page: 0x0000_7000, - code_segment: code_seg, - data_segment: data_seg, - gdt_base: 0x500u64, - gdt_size: 16, - idt_base: 0x520u64, - idt_size: 8, - pml4_start: 0x0000_9000, - }; - - // For `get_lapic` in realize function to work, - // you need to create a irq_chip for VM before creating the VCPU. - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - vm_fd.create_irq_chip().unwrap(); - let vcpu = Arc::new(vm_fd.create_vcpu(0).unwrap()); - let mut x86_cpu = X86CPUState::new(0, 1); - //test `set_boot_config` function - assert!(x86_cpu.set_boot_config(&vcpu, &cpu_config).is_ok()); - - // test setup special registers - let cpu_caps = caps::X86CPUCaps::init_capabilities(); - assert!(x86_cpu.reset_vcpu(&vcpu, &cpu_caps).is_ok()); - let x86_sregs = vcpu.get_sregs().unwrap(); - assert_eq!(x86_sregs.cs, code_seg); - assert_eq!(x86_sregs.ds, data_seg); - assert_eq!(x86_sregs.es, data_seg); - assert_eq!(x86_sregs.fs, data_seg); - assert_eq!(x86_sregs.gs, data_seg); - assert_eq!(x86_sregs.ss, data_seg); - assert_eq!(x86_sregs.gdt.base, cpu_config.gdt_base); - assert_eq!(x86_sregs.gdt.limit, cpu_config.gdt_size); - assert_eq!(x86_sregs.idt.base, cpu_config.idt_base); - assert_eq!(x86_sregs.idt.limit, cpu_config.idt_size); - assert_eq!(x86_sregs.cr0 & 0x1, 1); - assert_eq!((x86_sregs.cr0 & 0x8000_0000) >> 31, 1); - assert_eq!(x86_sregs.cr3, cpu_config.pml4_start); - assert_eq!((x86_sregs.cr4 & 0x20) >> 5, 1); - assert_eq!((x86_sregs.efer & 0x700) >> 8, 5); - - // test setup_regs function - let x86_regs = vcpu.get_regs().unwrap(); - assert_eq!(x86_regs.rflags, 0x0002); - assert_eq!(x86_regs.rip, 0); - assert_eq!(x86_regs.rsp, 0); - assert_eq!(x86_regs.rbp, 0); - assert_eq!(x86_regs.rsi, 0x0000_7000); - - // test setup_fpu function - if !cpu_caps.has_xsave { - let x86_fpu = vcpu.get_fpu().unwrap(); - assert_eq!(x86_fpu.fcw, 0x37f); - } - } -} diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 341d0cc6ea8bad2b9f85be7df626ea506d2c3550..3dca2b4185028a50057e79935c404deccc6da0ce 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -1,30 +1,59 @@ [package] name = "devices" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" +description = "Misc device emulation" [dependencies] -error-chain = "0.12.4" -libc = ">=0.2.71" -log = "0.4.8" -kvm-ioctls = "0.6.0" -serde = { version = ">=1.0.114", features = ["derive"] } -vmm-sys-util = ">=0.7.0" +thiserror = "1.0" +anyhow = "1.0" +libc = "0.2" +log = "0.4" +serde = { version = "1.0", features = ["derive"] } +strum = "0.24.1" +strum_macros = "0.24.3" +vmm-sys-util = "0.12.1" +byteorder = "1.4.3" +drm-fourcc = ">=2.2.0" +once_cell = "1.18.0" +v4l2-sys-mit = { version = "0.3.0", optional = true } +serde_json = "1.0" +rand = "0.8.5" address_space = { path = "../address_space" } -hypervisor = { path = "../hypervisor" } +cpu = { path = "../cpu" } machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } -sysbus = { path = "../sysbus" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } acpi = { path = "../acpi" } -byteorder = "1.3.4" -kvm-bindings = ">=0.3.0" - -[dev-dependencies] -serial_test = "0.5.1" +block_backend = { path = "../block_backend"} +chardev_backend = { path = "../chardev_backend" } +ui = { path = "../ui" } +pulse = { version = "2.27", package = "libpulse-binding", optional = true } +psimple = { version = "2.27", package = "libpulse-simple-binding", optional = true } +alsa = { version = "0.7.0", optional = true } +rusb = { version = "0.9", optional = true } +libusb1-sys = { version = "0.6.5", optional = true } +trace = { path = "../trace" } +clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } +hisysevent = { path = "../hisysevent" } [features] default = [] +scream = ["machine_manager/scream"] +scream_alsa = ["scream", "dep:alsa", "machine_manager/scream_alsa"] +scream_pulseaudio = ["scream", "dep:pulse", "dep:psimple", "machine_manager/scream_pulseaudio"] +scream_ohaudio = ["scream", "machine_manager/scream_ohaudio", "util/scream_ohaudio"] +pvpanic = ["machine_manager/pvpanic"] +demo_device = ["machine_manager/demo_device", "ui/console", "util/pixman"] +usb_host = ["dep:libusb1-sys", "dep:rusb", "machine_manager/usb_host", "util/usb_host"] +usb_camera = ["machine_manager/usb_camera"] +usb_camera_v4l2 = ["usb_camera", "dep:v4l2-sys-mit", "machine_manager/usb_camera_v4l2", "util/usb_camera_v4l2"] +usb_camera_oh = ["usb_camera", "machine_manager/usb_camera_oh", "util/usb_camera_oh"] +ramfb = ["ui/console", "util/pixman"] +usb_uas = [] +trace_to_logger = [] +trace_to_ftrace = [] +trace_to_hitrace = [] diff --git a/devices/src/acpi/cpu_controller.rs b/devices/src/acpi/cpu_controller.rs new file mode 100644 index 0000000000000000000000000000000000000000..797f44d465a1ca456dcba9965a8deddd94b356bc --- /dev/null +++ b/devices/src/acpi/cpu_controller.rs @@ -0,0 +1,571 @@ +// Copyright (c) 2023 China Telecom Co.,Ltd. All rights reserved. +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use log::{error, info}; +use vmm_sys_util::eventfd::EventFd; + +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use acpi::{ + AcpiError, AcpiLocalApic, AmlAcquire, AmlAddressSpaceType, AmlArg, AmlBuffer, AmlBuilder, + AmlCallWithArgs1, AmlCallWithArgs2, AmlCallWithArgs4, AmlDevice, AmlEisaId, AmlEqual, AmlField, + AmlFieldAccessType, AmlFieldLockRule, AmlFieldUnit, AmlFieldUpdateRule, AmlIf, AmlInteger, + AmlLocal, AmlMethod, AmlMutex, AmlName, AmlNameDecl, AmlNotify, AmlOne, AmlOpRegion, + AmlQWordDesc, AmlRelease, AmlResTemplate, AmlReturn, AmlScopeBuilder, AmlStore, AmlString, + AmlZero, +}; +use address_space::GuestAddress; +use cpu::{CPUBootConfig, CPUInterface, CPUTopology, CpuLifecycleState, CPU}; +use migration::MigrationManager; +use util::gen_base_func; + +const CPU_ENABLE_FLAG: u8 = 1; +const CPU_INSERTING_FLAG: u8 = 2; +const CPU_REMOVING_FLAG: u8 = 4; +const CPU_EJECT_FLAG: u8 = 8; + +const CPU_SELECTION_OFFSET: u64 = 0; +const CPU_STATUS_OFFSET: u64 = 1; +const CPU_EVENT_CODE_OFFSET: u64 = 2; + +const MADT_CPU_ENABLE_FLAG: usize = 0; + +#[derive(Clone)] +pub struct CpuConfig { + // Boot config. + boot_config: CPUBootConfig, + // Cpu topology. + cpu_topology: CPUTopology, +} + +impl CpuConfig { + pub fn new(boot_config: CPUBootConfig, cpu_topology: CPUTopology) -> Self { + CpuConfig { + boot_config, + cpu_topology, + } + } +} + +#[derive(Clone, Default)] +pub struct CpuController { + base: SysBusDevBase, + max_cpus: u8, + // Hotplug options: + // true - hotplug a vcpu. + // false - hotunplug a vcpu. + // None - nothing. + hotplug: Option>, + // Device id of vcpu need to hotplug. + device_id: String, + // Vcpu id need to hotplug or hotunplug. + vcpu_id: Arc, + // Map of hotplug vcpu id and device id. + id_map: HashMap, + // Map of all vcpu id and vcpu of vm. + vcpu_map: HashMap>, + // Acpi selected cpu id (for status check). + selected_cpu: u8, + // Cpu config information. + cpu_config: Option, + // Hotplug cpu request. + hotplug_cpu_req: Option>, +} + +impl CpuController { + pub fn new( + max_cpus: u8, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + cpu_config: CpuConfig, + hotplug_cpu_req: Arc, + boot_vcpus: Vec>, + ) -> Result { + let mut cpu_controller = CpuController { + max_cpus, + cpu_config: Some(cpu_config), + hotplug_cpu_req: Some(hotplug_cpu_req), + ..Default::default() + }; + cpu_controller + .set_sys_resource(sysbus, region_base, region_size, "CPUController") + .with_context(|| AcpiError::Alignment(region_size.try_into().unwrap()))?; + cpu_controller.set_boot_vcpu(boot_vcpus)?; + cpu_controller.set_parent_bus(sysbus.clone()); + + Ok(cpu_controller) + } + + fn eject_cpu(&mut self, vcpu_id: u8) -> Result<()> { + let vcpu = self.vcpu_map.get(&vcpu_id).unwrap(); + vcpu.destroy()?; + self.id_map.insert(vcpu_id, "".to_string()); + Ok(()) + } + + fn get_cpu_state(&self, vcpu_id: u8) -> Result { + if let Some(vcpu) = self.vcpu_map.get(&vcpu_id) { + let (vcpu_state, _) = vcpu.state(); + Ok(*vcpu_state.lock().unwrap()) + } else { + Ok(CpuLifecycleState::Stopped) + } + } + + pub fn check_id_existed(&self, input_device_id: &str, input_vcpu_id: u8) -> Result<()> { + for (vcpu_id, id) in &self.id_map { + if id == input_device_id { + bail!("Device id {} already existed.", input_device_id) + } + // If vcpu id exist and device id is not empty, this vcpu is running. + if vcpu_id == &input_vcpu_id && !id.is_empty() { + bail!("Cpu-id {} is running, device id is {}.", input_vcpu_id, id) + } + } + Ok(()) + } + + pub fn find_cpu_by_device_id(&self, input_device_id: &str) -> Option { + for (vcpu_id, id) in &self.id_map { + if id == input_device_id { + return Some(*vcpu_id); + } + } + None + } + + pub fn find_reusable_vcpu(&mut self) -> Option> { + // If vcpu id exist and device id is empty, this vcpu is hotunplugged, can be reused. + let input_vcpu_id = self.vcpu_id.load(Ordering::SeqCst); + for (vcpu_id, device_id) in &self.id_map { + if vcpu_id == &input_vcpu_id && device_id.is_empty() { + let vcpu = self.vcpu_map.get(vcpu_id).unwrap().clone(); + return Some(vcpu); + } + } + None + } + + pub fn get_boot_config(&self) -> &CPUBootConfig { + &self.cpu_config.as_ref().unwrap().boot_config + } + + pub fn get_hotplug_cpu_info(&self) -> (String, u8) { + let device_id = self.device_id.clone(); + let vcpu_id = self.vcpu_id.load(Ordering::SeqCst); + (device_id, vcpu_id) + } + + pub fn set_hotplug_cpu_info(&mut self, device_id: String, vcpu_id: u8) -> Result<()> { + self.device_id = device_id; + self.vcpu_id.store(vcpu_id, Ordering::SeqCst); + Ok(()) + } + + pub fn get_topology_config(&self) -> &CPUTopology { + &self.cpu_config.as_ref().unwrap().cpu_topology + } + + pub fn setup_reuse_vcpu(&mut self, vcpu: Arc) -> Result<()> { + let device_id = self.device_id.clone(); + let vcpu_id = self.vcpu_id.load(Ordering::SeqCst); + let (state, _) = vcpu.state(); + let mut vcpu_state = state.lock().unwrap(); + *vcpu_state = CpuLifecycleState::Created; + drop(vcpu_state); + + MigrationManager::register_cpu_instance(cpu::ArchCPU::descriptor(), vcpu, vcpu_id); + if let Some(plug) = &self.hotplug { + plug.store(true, Ordering::SeqCst); + } else { + self.hotplug = Some(Arc::new(AtomicBool::new(true))); + } + self.id_map.insert(vcpu_id, device_id); + Ok(()) + } + + pub fn set_boot_vcpu(&mut self, boot_vcpus: Vec>) -> Result<()> { + for (k, v) in boot_vcpus.iter().enumerate() { + self.vcpu_map.insert(k.try_into().unwrap(), v.clone()); + } + Ok(()) + } + + pub fn setup_hotplug_vcpu( + &mut self, + device_id: String, + vcpu_id: u8, + vcpu: Arc, + ) -> Result<()> { + // Register vcpu instance. + MigrationManager::register_cpu_instance(cpu::ArchCPU::descriptor(), vcpu.clone(), vcpu_id); + // Set operate. + if let Some(plug) = &self.hotplug { + plug.store(true, Ordering::SeqCst); + } else { + self.hotplug = Some(Arc::new(AtomicBool::new(true))); + } + self.id_map.insert(vcpu_id, device_id); + self.vcpu_map.insert(vcpu_id, vcpu); + Ok(()) + } + + pub fn set_hotunplug_cpu(&mut self, vcpu_id: u8) -> Result<()> { + if let Some(plug) = &self.hotplug { + plug.store(false, Ordering::SeqCst); + } else { + self.hotplug = Some(Arc::new(AtomicBool::new(false))); + } + self.vcpu_id.store(vcpu_id, Ordering::SeqCst); + Ok(()) + } + + pub fn trigger_hotplug_cpu(&mut self) -> Result<()> { + self.hotplug_cpu_req + .as_ref() + .unwrap() + .write(1) + .with_context(|| "Failed to write cpu hotplug request.")?; + Ok(()) + } +} + +impl Device for CpuController { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + Ok(dev) + } +} + +impl SysBusDevOps for CpuController { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { + data[0] = 0; + match offset { + CPU_SELECTION_OFFSET => { + let vcpu_id = self.vcpu_id.load(Ordering::SeqCst); + data[0] = vcpu_id; + } + CPU_STATUS_OFFSET => { + let state = self.get_cpu_state(self.selected_cpu).unwrap(); + if state == CpuLifecycleState::Running { + data[0] |= CPU_ENABLE_FLAG; + } + + if let Some(hotplug) = &self.hotplug { + if hotplug.load(Ordering::SeqCst) { + data[0] |= CPU_INSERTING_FLAG; + } else { + data[0] |= CPU_REMOVING_FLAG; + } + } + } + _ => { + error!("Unexpected offset for accessing CpuController: {}", offset); + return false; + } + } + true + } + + fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { + match offset { + CPU_SELECTION_OFFSET => self.selected_cpu = data[0], + CPU_STATUS_OFFSET => { + match data[0] { + // Reset hotplug flag after cpu inserting notified. + CPU_INSERTING_FLAG => self.hotplug = None, + + // Reset hotplug flag after cpu removing notified. + CPU_REMOVING_FLAG => self.hotplug = None, + + // Eject vcpu after guest os eject cpu device. + CPU_EJECT_FLAG => { + let vcpu_id = self.vcpu_id.load(Ordering::SeqCst); + if let Err(_e) = self.eject_cpu(vcpu_id) { + error!("Eject cpu-{} failed", vcpu_id) + } + } + _ => { + error!( + "Unexpected data[0] value for cpu status offset: {}", + data[0] + ); + return false; + } + } + } + CPU_EVENT_CODE_OFFSET => { + info!("Receive _OST event code {}", data[0]); + } + _ => { + error!( + "Unexpected offset for write CpuController device: {}", + offset + ); + return false; + } + } + true + } +} + +impl AmlBuilder for CpuController { + fn aml_bytes(&self) -> Vec { + let res = self.base.res.clone(); + let mut cpu_hotplug_controller = AmlDevice::new("PRES"); + cpu_hotplug_controller.append_child(AmlNameDecl::new("_HID", AmlEisaId::new("PNP0A06"))); + cpu_hotplug_controller.append_child(AmlNameDecl::new( + "_UID", + AmlString("CPU Hotplug Controller".into()), + )); + cpu_hotplug_controller.append_child(AmlMutex::new("CPLK", 0)); + let mut crs = AmlResTemplate::new(); + crs.append_child(AmlQWordDesc::new_memory( + acpi::AmlAddressSpaceDecode::Positive, + acpi::AmlCacheable::Cacheable, + acpi::AmlReadAndWrite::ReadWrite, + 0, + res.region_base, + res.region_base + res.region_size - 1, + 0, + res.region_size, + )); + cpu_hotplug_controller.append_child(AmlNameDecl::new("_CRS", crs)); + let prst = AmlOpRegion::new( + "PRST", + AmlAddressSpaceType::SystemMemory, + res.region_base, + res.region_size, + ); + cpu_hotplug_controller.append_child(prst); + + let mut prst_field = AmlField::new( + "PRST", + AmlFieldAccessType::Byte, + AmlFieldLockRule::NoLock, + AmlFieldUpdateRule::WriteAsZeros, + ); + + prst_field.append_child(AmlFieldUnit::new("CPID".into(), 8)); + prst_field.append_child(AmlFieldUnit::new("CPEN".into(), 1)); + prst_field.append_child(AmlFieldUnit::new("CINS".into(), 1)); + prst_field.append_child(AmlFieldUnit::new("CRMV".into(), 1)); + prst_field.append_child(AmlFieldUnit::new("CEJ_".into(), 1)); + prst_field.append_child(AmlFieldUnit::new(None, 4)); + prst_field.append_child(AmlFieldUnit::new("CEVC".into(), 8)); + + cpu_hotplug_controller.append_child(prst_field); + cpu_hotplug_controller.append_child(AmlCpuStatusMethod {}); + cpu_hotplug_controller.append_child(AmlCpuStatusIndicationMethod {}); + cpu_hotplug_controller.append_child(AmlCpuEjectMethod {}); + cpu_hotplug_controller.append_child(AmlCpuNotifyMethod { + cpus_count: self.max_cpus, + }); + cpu_hotplug_controller.append_child(AmlCpuResizeMethod {}); + + for cpu_id in 0..self.max_cpus { + cpu_hotplug_controller.append_child(AmlCpu { + cpu_id, + dynamic: true, + }) + } + cpu_hotplug_controller.aml_bytes() + } +} + +pub struct AmlCpu { + pub cpu_id: u8, + pub dynamic: bool, +} + +impl AmlCpu { + fn generate_mat(&self) -> Vec { + let lapic = AcpiLocalApic { + type_id: 0, + length: 8, + processor_uid: self.cpu_id, + apic_id: self.cpu_id, + flags: 1 << MADT_CPU_ENABLE_FLAG, + }; + + let mut mat_data: Vec = Vec::new(); + mat_data.resize(std::mem::size_of_val(&lapic), 0); + // SAFETY: mat_data is large enough to hold lapic. + unsafe { *(mat_data.as_mut_ptr() as *mut AcpiLocalApic) = lapic }; + + mat_data + } + + fn sta_method(&self, return_value: Option) -> AmlMethod { + let mut sta_method = AmlMethod::new("_STA", 0, false); + if let Some(value) = return_value { + sta_method.append_child(AmlReturn::with_value(AmlInteger(value))); + } else { + let call_method_csta = AmlCallWithArgs1::new("CSTA", AmlInteger(self.cpu_id.into())); + sta_method.append_child(AmlReturn::with_value(call_method_csta)); + } + sta_method + } + + fn mat_name(&self) -> AmlNameDecl { + let mat_buffer = AmlBuffer(self.generate_mat()); + AmlNameDecl::new("_MAT", mat_buffer) + } + + fn ost_method(&self) -> AmlMethod { + let mut ost_method = AmlMethod::new("_OST", 3, false); + ost_method.append_child(AmlReturn::with_value(AmlCallWithArgs4::new( + "COST", + AmlInteger(self.cpu_id.into()), + AmlArg(0), + AmlArg(1), + AmlArg(2), + ))); + ost_method + } + + fn ej0_method(&self) -> AmlMethod { + let mut ej0_method = AmlMethod::new("_EJ0", 1, false); + ej0_method.append_child(AmlCallWithArgs1::new( + "CEJ0", + AmlInteger(self.cpu_id.into()), + )); + ej0_method + } +} + +impl AmlBuilder for AmlCpu { + fn aml_bytes(&self) -> Vec { + let mut cpu_device = AmlDevice::new(format!("C{:03}", self.cpu_id).as_str()); + cpu_device.append_child(AmlNameDecl::new("_HID", AmlString("ACPI0007".into()))); + cpu_device.append_child(AmlNameDecl::new("_UID", AmlInteger(self.cpu_id.into()))); + cpu_device.append_child(AmlNameDecl::new("_PXM", AmlInteger(0u64))); + if self.dynamic { + { + cpu_device.append_child(self.sta_method(None)); + cpu_device.append_child(self.mat_name()); + cpu_device.append_child(self.ost_method()); + cpu_device.append_child(self.ej0_method()); + } + } else { + cpu_device.append_child(self.sta_method(Some(0xfu64))); + cpu_device.append_child(self.mat_name()); + } + cpu_device.aml_bytes() + } +} + +pub struct AmlCpuStatusIndicationMethod {} + +impl AmlBuilder for AmlCpuStatusIndicationMethod { + fn aml_bytes(&self) -> Vec { + let mut cpu_status_indication_method = AmlMethod::new("COST", 4, false); + cpu_status_indication_method + .append_child(AmlAcquire::new(AmlName("\\_SB.PRES.CPLK".into()), 0xffff)); + cpu_status_indication_method + .append_child(AmlStore::new(AmlArg(2), AmlName("\\_SB.PRES.CEVC".into()))); + cpu_status_indication_method + .append_child(AmlRelease::new(AmlName("\\_SB.PRES.CPLK".to_string()))); + cpu_status_indication_method.aml_bytes() + } +} + +pub struct AmlCpuNotifyMethod { + pub cpus_count: u8, +} + +impl AmlBuilder for AmlCpuNotifyMethod { + fn aml_bytes(&self) -> Vec { + let mut cpu_notify_method = AmlMethod::new("CTFY", 2, true); + for cpu_id in 0..self.cpus_count { + let mut if_scope = AmlIf::new(AmlEqual::new(AmlArg(0), AmlInteger(cpu_id.into()))); + if_scope.append_child(AmlNotify::new( + AmlName(format!("C{:03}", cpu_id)), + AmlArg(1), + )); + cpu_notify_method.append_child(if_scope); + } + cpu_notify_method.aml_bytes() + } +} + +pub struct AmlCpuStatusMethod {} + +impl AmlBuilder for AmlCpuStatusMethod { + fn aml_bytes(&self) -> Vec { + let mut csta_method = AmlMethod::new("CSTA", 1, true); + csta_method.append_child(AmlAcquire::new(AmlName("\\_SB.PRES.CPLK".into()), 0xffff)); + csta_method.append_child(AmlStore::new(AmlZero, AmlLocal(0))); + csta_method.append_child(AmlStore::new(AmlArg(0), AmlName("\\_SB.PRES.CPID".into()))); + + let mut if_scope = AmlIf::new(AmlEqual::new(AmlName("\\_SB.PRES.CPEN".into()), AmlOne)); + if_scope.append_child(AmlStore::new(AmlInteger(0xfu64), AmlLocal(0))); + csta_method.append_child(if_scope); + csta_method.append_child(AmlRelease::new(AmlName("\\_SB.PRES.CPLK".to_string()))); + csta_method.append_child(AmlReturn::with_value(AmlLocal(0))); + csta_method.aml_bytes() + } +} + +pub struct AmlCpuEjectMethod {} + +impl AmlBuilder for AmlCpuEjectMethod { + fn aml_bytes(&self) -> Vec { + let mut eject_method = AmlMethod::new("CEJ0", 1, true); + eject_method.append_child(AmlAcquire::new(AmlName("\\_SB.PRES.CPLK".into()), 0xffff)); + eject_method.append_child(AmlStore::new(AmlOne, AmlName("\\_SB.PRES.CEJ_".into()))); + eject_method.append_child(AmlRelease::new(AmlName("\\_SB.PRES.CPLK".to_string()))); + eject_method.aml_bytes() + } +} + +pub struct AmlCpuResizeMethod {} + +impl AmlBuilder for AmlCpuResizeMethod { + fn aml_bytes(&self) -> Vec { + let mut cscn_method = AmlMethod::new("CSCN", 1, true); + cscn_method.append_child(AmlAcquire::new(AmlName("\\_SB.PRES.CPLK".into()), 0xffff)); + cscn_method.append_child(AmlStore::new( + AmlName("\\_SB.PRES.CPID".into()), + AmlLocal(0), + )); + + let mut if_plug_scope = + AmlIf::new(AmlEqual::new(AmlName("\\_SB.PRES.CINS".into()), AmlOne)); + if_plug_scope.append_child(AmlCallWithArgs2::new("CTFY", AmlLocal(0), AmlOne)); + if_plug_scope.append_child(AmlStore::new(AmlOne, AmlName("\\_SB.PRES.CINS".into()))); + cscn_method.append_child(if_plug_scope); + + let mut if_unplug_scope = + AmlIf::new(AmlEqual::new(AmlName("\\_SB.PRES.CRMV".into()), AmlOne)); + if_unplug_scope.append_child(AmlCallWithArgs2::new("CTFY", AmlLocal(0), AmlInteger(3u64))); + if_unplug_scope.append_child(AmlStore::new(AmlOne, AmlName("\\_SB.PRES.CRMV".into()))); + cscn_method.append_child(if_unplug_scope); + + cscn_method.append_child(AmlRelease::new(AmlName("\\_SB.PRES.CPLK".to_string()))); + cscn_method.aml_bytes() + } +} diff --git a/devices/src/acpi/ged.rs b/devices/src/acpi/ged.rs new file mode 100644 index 0000000000000000000000000000000000000000..76a15032c40bb50016719b46daa43fb281d45266 --- /dev/null +++ b/devices/src/acpi/ged.rs @@ -0,0 +1,311 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::unix::prelude::AsRawFd; +use std::rc::Rc; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{Context, Result}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use acpi::{ + AcpiError, AmlActiveLevel, AmlAddressSpaceType, AmlAnd, AmlBuilder, AmlDevice, AmlEdgeLevel, + AmlEqual, AmlExtendedInterrupt, AmlField, AmlFieldAccessType, AmlFieldLockRule, AmlFieldUnit, + AmlFieldUpdateRule, AmlIf, AmlIntShare, AmlInteger, AmlLocal, AmlMethod, AmlName, AmlNameDecl, + AmlNotify, AmlOpRegion, AmlResTemplate, AmlResourceUsage, AmlScopeBuilder, AmlStore, AmlString, +}; +#[cfg(target_arch = "x86_64")] +use acpi::{AmlCallWithArgs1, AmlOne}; +#[cfg(target_arch = "aarch64")] +use acpi::{INTERRUPT_PPIS_COUNT, INTERRUPT_SGIS_COUNT}; +use address_space::GuestAddress; +use machine_manager::event; +use machine_manager::event_loop::EventLoop; +use machine_manager::qmp::qmp_channel::QmpChannel; +use util::gen_base_func; +use util::loop_context::{ + create_new_eventfd, read_fd, EventNotifier, NotifierCallback, NotifierOperation, +}; +use util::num_ops::write_data_u32; + +#[derive(Clone, Copy)] +pub enum AcpiEvent { + Nothing = 0, + PowerDown = 1, + AcadSt = 2, + BatteryInf = 4, + BatterySt = 8, + CpuResize = 16, +} + +const AML_GED_EVT_REG: &str = "EREG"; +const AML_GED_EVT_SEL: &str = "ESEL"; + +#[derive(Clone)] +pub struct GedEvent { + power_button: Arc, + #[cfg(target_arch = "x86_64")] + cpu_resize: Arc, +} + +impl GedEvent { + pub fn new( + power_button: Arc, + #[cfg(target_arch = "x86_64")] cpu_resize: Arc, + ) -> GedEvent { + GedEvent { + power_button, + #[cfg(target_arch = "x86_64")] + cpu_resize, + } + } +} + +#[derive(Clone)] +pub struct Ged { + base: SysBusDevBase, + notification_type: Arc, + battery_present: bool, + ged_event: GedEvent, +} + +impl Ged { + pub fn new( + battery_present: bool, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + ged_event: GedEvent, + ) -> Result { + let mut ged = Self { + base: SysBusDevBase::default(), + notification_type: Arc::new(AtomicU32::new(AcpiEvent::Nothing as u32)), + battery_present, + ged_event, + }; + ged.base.interrupt_evt = Some(Arc::new(create_new_eventfd()?)); + ged.set_sys_resource(sysbus, region_base, region_size, "Ged") + .with_context(|| AcpiError::Alignment(region_size as u32))?; + ged.set_parent_bus(sysbus.clone()); + + Ok(ged) + } + + fn register_acpi_powerdown_event(&self, power_button: Arc) -> Result<()> { + let power_down_fd = power_button.as_raw_fd(); + let ged_clone = self.clone(); + let power_down_handler: Rc = Rc::new(move |_, _| { + read_fd(power_down_fd); + ged_clone + .notification_type + .fetch_or(AcpiEvent::PowerDown as u32, Ordering::SeqCst); + ged_clone.inject_interrupt(); + trace::ged_inject_acpi_event(AcpiEvent::PowerDown as u32); + if QmpChannel::is_connected() { + event!(Powerdown); + } + None + }); + + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + power_down_fd, + None, + EventSet::IN, + vec![power_down_handler], + ); + + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register powerdown notifier.")?; + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + fn register_acpi_cpu_resize_event(&self, cpu_resize: Arc) -> Result<()> { + let cpu_resize_fd = cpu_resize.as_raw_fd(); + let clone_ged = self.clone(); + let cpu_resize_handler: Rc = Rc::new(move |_, _| { + read_fd(cpu_resize_fd); + clone_ged + .notification_type + .fetch_or(AcpiEvent::CpuResize as u32, Ordering::SeqCst); + clone_ged.inject_interrupt(); + trace::ged_inject_acpi_event(AcpiEvent::CpuResize as u32); + if QmpChannel::is_connected() { + event!(CpuResize); + } + None + }); + + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + cpu_resize_fd, + None, + EventSet::IN, + vec![cpu_resize_handler], + ); + + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register cpu resize notifier.")?; + Ok(()) + } + + pub fn inject_acpi_event(&self, evt: AcpiEvent) { + self.notification_type + .fetch_or(evt as u32, Ordering::SeqCst); + self.inject_interrupt(); + trace::ged_inject_acpi_event(evt as u32); + } +} + +impl Device for Ged { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let ged_event = self.ged_event.clone(); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + + let ged = dev.lock().unwrap(); + ged.register_acpi_powerdown_event(ged_event.power_button) + .with_context(|| "Failed to register ACPI powerdown event.")?; + #[cfg(target_arch = "x86_64")] + ged.register_acpi_cpu_resize_event(ged_event.cpu_resize) + .with_context(|| "Failed to register ACPI cpu resize event.")?; + Ok(dev.clone()) + } +} + +impl SysBusDevOps for Ged { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { + if offset != 0 { + return false; + } + let value = self + .notification_type + .swap(AcpiEvent::Nothing as u32, Ordering::SeqCst); + trace::ged_read(value); + write_data_u32(data, value) + } + + fn write(&mut self, _data: &[u8], _base: GuestAddress, _offset: u64) -> bool { + true + } +} + +impl AmlBuilder for Ged { + fn aml_bytes(&self) -> Vec { + let mut acpi_dev = AmlDevice::new("\\_SB.GED"); + acpi_dev.append_child(AmlNameDecl::new("_HID", AmlString("ACPI0013".to_string()))); + acpi_dev.append_child(AmlNameDecl::new("_UID", AmlString("GED".to_string()))); + + let mut res = AmlResTemplate::new(); + + // SPI start at interrupt number 32 on aarch64 platform. + #[cfg(target_arch = "aarch64")] + let irq_base = INTERRUPT_PPIS_COUNT + INTERRUPT_SGIS_COUNT; + #[cfg(target_arch = "x86_64")] + let irq_base = 0; + res.append_child(AmlExtendedInterrupt::new( + AmlResourceUsage::Consumer, + AmlEdgeLevel::Edge, + AmlActiveLevel::High, + AmlIntShare::Exclusive, + vec![self.base.res.irq as u32 + irq_base], + )); + acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); + + acpi_dev.append_child(AmlOpRegion::new( + "EREG", + AmlAddressSpaceType::SystemMemory, + self.base.res.region_base, + self.base.res.region_size, + )); + + let mut field = AmlField::new( + AML_GED_EVT_REG, + AmlFieldAccessType::DWord, + AmlFieldLockRule::NoLock, + AmlFieldUpdateRule::WriteAsZeros, + ); + + let element = AmlFieldUnit::new(Some(AML_GED_EVT_SEL), 32); + field.append_child(element); + acpi_dev.append_child(field); + + let mut method = AmlMethod::new("_EVT", 1, true); + let store = AmlStore::new(AmlName(AML_GED_EVT_SEL.to_string()), AmlLocal(0)); + method.append_child(store); + + struct PowerDevEvent(AcpiEvent, &'static str, u64); + let events: [PowerDevEvent; 4] = [ + PowerDevEvent(AcpiEvent::PowerDown, "PWRB", 0x80), + PowerDevEvent(AcpiEvent::AcadSt, "ACAD", 0x80), + PowerDevEvent(AcpiEvent::BatteryInf, "BAT0", 0x81), + PowerDevEvent(AcpiEvent::BatterySt, "BAT0", 0x80), + ]; + + for event in events.into_iter() { + let evt = event.0 as u64; + let dev = event.1; + let notify = event.2; + + if !self.battery_present + && (evt > AcpiEvent::PowerDown as u64 && evt <= AcpiEvent::BatterySt as u64) + { + break; + } + + let mut if_scope = AmlIf::new(AmlEqual::new( + AmlAnd::new(AmlLocal(0), AmlInteger(evt), AmlLocal(1)), + AmlInteger(evt), + )); + if_scope.append_child(AmlNotify::new(AmlName(dev.to_string()), AmlInteger(notify))); + method.append_child(if_scope); + } + + #[cfg(target_arch = "x86_64")] + { + // Call cpu hot(un)plug method. + let mut cpu_if_scope = AmlIf::new(AmlEqual::new( + AmlAnd::new( + AmlLocal(0), + AmlInteger(AcpiEvent::CpuResize as u64), + AmlLocal(1), + ), + AmlInteger(AcpiEvent::CpuResize as u64), + )); + cpu_if_scope.append_child(AmlCallWithArgs1::new("\\_SB.PRES.CSCN", AmlOne)); + method.append_child(cpu_if_scope); + } + + acpi_dev.append_child(method); + + acpi_dev.aml_bytes() + } +} + +pub fn acpi_dsdt_add_power_button() -> AmlDevice { + let mut acpi_dev = AmlDevice::new("PWRB"); + acpi_dev.append_child(AmlNameDecl::new("_HID", AmlString("PNP0C0C".to_string()))); + acpi_dev.append_child(AmlNameDecl::new("_UID", AmlInteger(1))); + + acpi_dev +} diff --git a/devices/src/acpi/mod.rs b/devices/src/acpi/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..a97e097543ff4f0ca70b5650db745ef7e78c43b4 --- /dev/null +++ b/devices/src/acpi/mod.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(target_arch = "x86_64")] +pub mod cpu_controller; +pub mod ged; +pub mod power; diff --git a/devices/src/acpi/power.rs b/devices/src/acpi/power.rs new file mode 100644 index 0000000000000000000000000000000000000000..ae3d0bf61c004e56ea90f7d80f3943ca052c5d87 --- /dev/null +++ b/devices/src/acpi/power.rs @@ -0,0 +1,421 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use anyhow::{Context, Result}; +use log::info; + +use crate::acpi::ged::{AcpiEvent, Ged}; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use acpi::{ + AcpiError, AmlAddressSpaceType, AmlBuilder, AmlDevice, AmlField, AmlFieldAccessType, + AmlFieldLockRule, AmlFieldUnit, AmlFieldUpdateRule, AmlIndex, AmlInteger, AmlMethod, AmlName, + AmlNameDecl, AmlOpRegion, AmlPackage, AmlReturn, AmlScopeBuilder, AmlStore, AmlString, AmlZero, +}; +use address_space::GuestAddress; +use machine_manager::event_loop::EventLoop; +use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::num_ops::write_data_u32; + +const AML_ACAD_REG: &str = "ADPM"; +const AML_ACAD_ONLINE: &str = "ADPO"; + +const AML_ACAD_REG_SZ: u64 = 4; + +const AML_BAT0_REG: &str = "BATM"; +const AML_BAT0_DESIGN_CAP: &str = "DCAP"; +const AML_BAT0_LAST_FULL_CAP: &str = "LFC"; +const AML_BAT0_DESIGN_VOLTAGE: &str = "DV"; +const AML_BAT0_STATE: &str = "ST"; +const AML_BAT0_PRESENT_RATE: &str = "PRT"; +const AML_BAT0_REM_CAP: &str = "RCAP"; +const AML_BAT0_PRES_VOLT: &str = "PV"; + +const POWERDEV_REGS_SIZE: usize = 8; +const REG_IDX_ACAD_ON: usize = 0; +const REG_IDX_BAT_DCAP: usize = 1; +const REG_IDX_BAT_FCAP: usize = 2; +const REG_IDX_BAT_DVOLT: usize = 3; +const REG_IDX_BAT_STATE: usize = 4; +const REG_IDX_BAT_PRATE: usize = 5; +const REG_IDX_BAT_RCAP: usize = 6; +const REG_IDX_BAT_PVOLT: usize = 7; + +const ACPI_BATTERY_STATE_DISCHARGING: u32 = 0x1; +const ACPI_BATTERY_STATE_CHARGING: u32 = 0x2; + +const ACAD_SYSFS_DIR: &str = "/sys/class/power_supply/Mains"; +const BAT_SYSFS_DIR: &str = "/sys/class/power_supply/Battery"; + +#[repr(C)] +#[derive(Copy, Clone, Desc, ByteCode)] +#[desc_version(compat_version = "0.1.0")] +struct PowerDevState { + last_acad_st: u32, + last_bat_st: u32, + last_bat_lvl: u32, +} + +#[derive(Clone)] +pub struct PowerDev { + base: SysBusDevBase, + regs: Vec, + state: PowerDevState, + ged: Arc>, +} + +impl PowerDev { + pub fn new( + ged_dev: Arc>, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + ) -> Result { + let mut pdev = Self { + base: SysBusDevBase::default(), + regs: vec![0; POWERDEV_REGS_SIZE], + state: PowerDevState { + last_acad_st: 1, + last_bat_st: ACPI_BATTERY_STATE_CHARGING, + last_bat_lvl: 0xffffffff, + }, + ged: ged_dev, + }; + pdev.set_sys_resource(sysbus, region_base, region_size, "PowerDev") + .with_context(|| AcpiError::Alignment(region_size as u32))?; + pdev.set_parent_bus(sysbus.clone()); + Ok(pdev) + } + + fn read_sysfs_power_props( + &self, + dir_name: &str, + sysfs_props: &Vec<&str>, + pdev_props: &mut [u32], + ) -> Result<()> { + for i in 0..sysfs_props.len() { + let df = format!("{}/{}", dir_name, sysfs_props[i]); + let path = Path::new(&df); + let sprop = std::fs::read_to_string(path).with_context(|| { + format!("Can't read power device property: {}", path.display(),) + })?; + let prop = sprop[..sprop.len() - 1].parse::().with_context(|| { + format!( + "Can't parse power device property: {} value: {}", + path.display(), + sprop + ) + })?; + // All the values except "online" property is multiplicated by 1000. + // Only "online" property starts with 'o' character. + pdev_props[i] = if sysfs_props[i].starts_with('o') { + prop.unsigned_abs() as u32 + } else { + (prop.abs() / 1000) as u32 + }; + } + Ok(()) + } + + fn power_battery_init_info(&mut self) -> Result<()> { + let bat_sysfs_props = vec!["energy_full_design", "energy_full", "voltage_max_design"]; + let mut props: Vec = vec![0; bat_sysfs_props.len()]; + self.read_sysfs_power_props(BAT_SYSFS_DIR, &bat_sysfs_props, &mut props)?; + self.regs[REG_IDX_BAT_DCAP] = props[0]; + self.regs[REG_IDX_BAT_FCAP] = props[1]; + self.regs[REG_IDX_BAT_DVOLT] = props[2]; + Ok(()) + } + + fn power_status_read(&mut self) -> Result<()> { + let acad_props = vec!["online"]; + let bat_sysfs_props = vec!["online", "current_now", "energy_now", "voltage_now"]; + let mut props: Vec = vec![0; bat_sysfs_props.len()]; + + self.read_sysfs_power_props(ACAD_SYSFS_DIR, &acad_props, &mut props)?; + self.regs[REG_IDX_ACAD_ON] = props[0]; + + self.read_sysfs_power_props(BAT_SYSFS_DIR, &bat_sysfs_props, &mut props)?; + self.regs[REG_IDX_BAT_STATE] = if props[0] == 1 { + ACPI_BATTERY_STATE_CHARGING + } else { + ACPI_BATTERY_STATE_DISCHARGING + }; + // unit: mA + self.regs[REG_IDX_BAT_PRATE] = props[1]; + self.regs[REG_IDX_BAT_RCAP] = props[2]; + self.regs[REG_IDX_BAT_PVOLT] = props[3]; + // unit: mW + self.regs[REG_IDX_BAT_PRATE] = + (self.regs[REG_IDX_BAT_PRATE] * self.regs[REG_IDX_BAT_PVOLT]) / 1000; + + trace::power_status_read(&self.regs); + Ok(()) + } + + fn power_load_static_status(&mut self) { + info!("Load static power devices status"); + self.regs[REG_IDX_ACAD_ON] = 1; + self.regs[REG_IDX_BAT_DCAP] = 0xffffffff; + self.regs[REG_IDX_BAT_FCAP] = 0xffffffff; + self.regs[REG_IDX_BAT_DVOLT] = 0xffffffff; + self.regs[REG_IDX_BAT_STATE] = ACPI_BATTERY_STATE_CHARGING; + self.regs[REG_IDX_BAT_PRATE] = 0; + self.regs[REG_IDX_BAT_RCAP] = 0xffffffff; + self.regs[REG_IDX_BAT_PVOLT] = 0xffffffff; + } + + fn send_power_event(&self, evt: AcpiEvent) { + self.ged.lock().unwrap().inject_acpi_event(evt); + } +} + +impl StateTransfer for PowerDev { + fn get_state_vec(&self) -> Result> { + Ok(self.state.as_bytes().to_vec()) + } + + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + self.state.as_mut_bytes().copy_from_slice(state); + Ok(()) + } + + fn get_device_alias(&self) -> u64 { + MigrationManager::get_desc_alias(&PowerDevState::descriptor().name).unwrap_or(!0) + } +} + +impl MigrationHook for PowerDev { + fn resume(&mut self) -> Result<()> { + self.send_power_event(AcpiEvent::AcadSt); + self.send_power_event(AcpiEvent::BatterySt); + Ok(()) + } +} + +impl Device for PowerDev { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + + let pdev_available: bool; + { + let mut pdev = dev.lock().unwrap(); + pdev_available = pdev.power_battery_init_info().is_ok(); + if pdev_available { + pdev.send_power_event(AcpiEvent::BatteryInf); + } + } + if pdev_available { + power_status_update(&dev); + } else { + let mut pdev = dev.lock().unwrap(); + pdev.power_load_static_status(); + } + + Ok(dev) + } +} + +impl SysBusDevOps for PowerDev { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { + let reg_idx: u64 = offset / 4; + if reg_idx >= self.regs.len() as u64 { + return false; + } + let value = self.regs[reg_idx as usize]; + trace::power_read(reg_idx, value); + write_data_u32(data, value) + } + + fn write(&mut self, _data: &[u8], _base: GuestAddress, _offset: u64) -> bool { + true + } +} + +impl AmlBuilder for PowerDev { + fn aml_bytes(&self) -> Vec { + let mut acpi_acad_dev = AmlDevice::new("ACAD"); + acpi_acad_dev.append_child(AmlNameDecl::new("_HID", AmlString("ACPI0003".to_string()))); + + acpi_acad_dev.append_child(AmlOpRegion::new( + AML_ACAD_REG, + AmlAddressSpaceType::SystemMemory, + self.base.res.region_base, + AML_ACAD_REG_SZ, + )); + + let mut field = AmlField::new( + AML_ACAD_REG, + AmlFieldAccessType::DWord, + AmlFieldLockRule::NoLock, + AmlFieldUpdateRule::WriteAsZeros, + ); + + field.append_child(AmlFieldUnit::new(Some(AML_ACAD_ONLINE), 32)); + acpi_acad_dev.append_child(field); + + let mut pcl_pkg = AmlPackage::new(1); + pcl_pkg.append_child(AmlName("\\_SB".to_string())); + acpi_acad_dev.append_child(AmlNameDecl::new("_PCL", pcl_pkg)); + + let mut method = AmlMethod::new("_STA", 0, false); + method.append_child(AmlReturn::with_value(AmlInteger(0x0F))); + + acpi_acad_dev.append_child(method); + + method = AmlMethod::new("_PSR", 0, false); + method.append_child(AmlReturn::with_value(AmlName(AML_ACAD_ONLINE.to_string()))); + acpi_acad_dev.append_child(method); + + let mut acpi_bat_dev = AmlDevice::new("BAT0"); + acpi_bat_dev.append_child(AmlNameDecl::new("_HID", AmlString("PNP0C0A".to_string()))); + + acpi_bat_dev.append_child(AmlOpRegion::new( + AML_BAT0_REG, + AmlAddressSpaceType::SystemMemory, + self.base.res.region_base + AML_ACAD_REG_SZ, + self.base.res.region_size - AML_ACAD_REG_SZ, + )); + + field = AmlField::new( + AML_BAT0_REG, + AmlFieldAccessType::DWord, + AmlFieldLockRule::NoLock, + AmlFieldUpdateRule::WriteAsZeros, + ); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_DESIGN_CAP), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_LAST_FULL_CAP), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_DESIGN_VOLTAGE), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_STATE), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_PRESENT_RATE), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_REM_CAP), 32)); + field.append_child(AmlFieldUnit::new(Some(AML_BAT0_PRES_VOLT), 32)); + acpi_bat_dev.append_child(field); + + pcl_pkg = AmlPackage::new(1); + pcl_pkg.append_child(AmlName("\\_SB".to_string())); + acpi_bat_dev.append_child(AmlNameDecl::new("_PCL", pcl_pkg)); + + method = AmlMethod::new("_STA", 0, false); + method.append_child(AmlInteger(0x1F)); + acpi_bat_dev.append_child(method); + + let mut bif_pkg = AmlPackage::new(13); + bif_pkg.append_child(AmlInteger(0x0)); + bif_pkg.append_child(AmlInteger(0xFFFFFFFF)); + bif_pkg.append_child(AmlInteger(0xFFFFFFFF)); + bif_pkg.append_child(AmlInteger(0x1)); + bif_pkg.append_child(AmlInteger(0xFFFFFFFF)); + bif_pkg.append_child(AmlInteger(0x00000100)); + bif_pkg.append_child(AmlInteger(0x00000050)); + bif_pkg.append_child(AmlInteger(1)); + bif_pkg.append_child(AmlInteger(1)); + bif_pkg.append_child(AmlString("SVBATM1".to_string())); + bif_pkg.append_child(AmlString("000001".to_string())); + bif_pkg.append_child(AmlString("LI-ON".to_string())); + bif_pkg.append_child(AmlString("SVIRT".to_string())); + acpi_bat_dev.append_child(AmlNameDecl::new("PBIF", bif_pkg)); + + method = AmlMethod::new("_BIF", 0, false); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_DESIGN_CAP.to_string()), + AmlIndex::new(AmlName("PBIF".to_string()), AmlInteger(1), AmlZero), + )); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_LAST_FULL_CAP.to_string()), + AmlIndex::new(AmlName("PBIF".to_string()), AmlInteger(2), AmlZero), + )); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_DESIGN_VOLTAGE.to_string()), + AmlIndex::new(AmlName("PBIF".to_string()), AmlInteger(4), AmlZero), + )); + method.append_child(AmlReturn::with_value(AmlName("PBIF".to_string()))); + acpi_bat_dev.append_child(method); + + let mut bst_pkg = AmlPackage::new(4); + bst_pkg.append_child(AmlInteger(u64::from(ACPI_BATTERY_STATE_CHARGING))); + bst_pkg.append_child(AmlInteger(0xFFFFFFFF)); + bst_pkg.append_child(AmlInteger(0xFFFFFFFF)); + bst_pkg.append_child(AmlInteger(0xFFFFFFFF)); + acpi_bat_dev.append_child(AmlNameDecl::new("PBST", bst_pkg)); + + method = AmlMethod::new("_BST", 0, false); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_STATE.to_string()), + AmlIndex::new(AmlName("PBST".to_string()), AmlInteger(0), AmlZero), + )); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_PRESENT_RATE.to_string()), + AmlIndex::new(AmlName("PBST".to_string()), AmlInteger(1), AmlZero), + )); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_REM_CAP.to_string()), + AmlIndex::new(AmlName("PBST".to_string()), AmlInteger(2), AmlZero), + )); + method.append_child(AmlStore::new( + AmlName(AML_BAT0_PRES_VOLT.to_string()), + AmlIndex::new(AmlName("PBST".to_string()), AmlInteger(3), AmlZero), + )); + method.append_child(AmlReturn::with_value(AmlName("PBST".to_string()))); + acpi_bat_dev.append_child(method); + + acpi_acad_dev + .aml_bytes() + .into_iter() + .chain(acpi_bat_dev.aml_bytes()) + .collect() + } +} + +fn power_status_update(dev: &Arc>) { + let cdev = dev.clone(); + let update_func = Box::new(move || { + power_status_update(&cdev); + }); + + let mut pdev = dev.lock().unwrap(); + + if pdev.power_status_read().is_ok() { + let step2notify: u32 = pdev.regs[REG_IDX_BAT_FCAP] / 100; + let bdiff: u32 = pdev.regs[REG_IDX_BAT_RCAP].abs_diff(pdev.state.last_bat_lvl); + + if pdev.state.last_acad_st != pdev.regs[REG_IDX_ACAD_ON] { + pdev.send_power_event(AcpiEvent::AcadSt); + pdev.state.last_acad_st = pdev.regs[REG_IDX_ACAD_ON]; + } + if pdev.state.last_bat_st != pdev.regs[REG_IDX_BAT_STATE] || bdiff >= step2notify { + pdev.send_power_event(AcpiEvent::BatterySt); + pdev.state.last_bat_st = pdev.regs[REG_IDX_BAT_STATE]; + pdev.state.last_bat_lvl = pdev.regs[REG_IDX_BAT_RCAP]; + } + + EventLoop::get_ctx(None) + .unwrap() + .timer_add(update_func, Duration::from_secs(5)); + } else { + pdev.power_load_static_status(); + } +} diff --git a/devices/src/camera_backend/demo.rs b/devices/src/camera_backend/demo.rs new file mode 100644 index 0000000000000000000000000000000000000000..725d3de84251e164b8a1c7116062021c08c0267f --- /dev/null +++ b/devices/src/camera_backend/demo.rs @@ -0,0 +1,646 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! Demo backend for vCamera device, that helps for testing. + +use std::fs::read_to_string; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::{debug, error, info}; +use rand::{thread_rng, Rng}; +use serde::{Deserialize, Serialize}; + +use super::INTERVALS_PER_SEC; +use crate::camera_backend::{ + check_path, CamBasicFmt, CameraBackend, CameraBrokenCallback, CameraFormatList, CameraFrame, + CameraNotifyCallback, FmtType, +}; +use util::aio::{mem_from_buf, Iovec}; + +#[derive(Debug)] +enum RgbColor { + Red, + Orange, + Yellow, + Green, + Blue, + Indigo, + Violet, + White, + Black, +} + +fn get_rgb_color(color: &RgbColor) -> (u8, u8, u8) { + match color { + RgbColor::Red => (0xff, 0x0, 0x0), + RgbColor::Orange => (0xff, 0x80, 0x0), + RgbColor::Yellow => (0xff, 0xff, 0x0), + RgbColor::Green => (0x0, 0xff, 0x0), + RgbColor::Blue => (0x0, 0x0, 0xff), + RgbColor::Indigo => (0x4b, 0x0, 0x82), + RgbColor::Violet => (0xee, 0x82, 0xee), + RgbColor::White => (0xff, 0xff, 0xff), + RgbColor::Black => (0x0, 0x0, 0x0), + } +} + +impl From for RgbColor { + fn from(t: u8) -> Self { + match t { + 0 => RgbColor::Red, + 1 => RgbColor::Orange, + 2 => RgbColor::Yellow, + 3 => RgbColor::Green, + 4 => RgbColor::Blue, + 5 => RgbColor::Indigo, + 6 => RgbColor::Violet, + 7 => RgbColor::White, + _ => RgbColor::Black, + } + } +} + +#[derive(Default)] +struct FrameImage { + image: Vec, + used_len: u64, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum DeviceState { + Uninitialized, + Running, + Exit, +} + +enum ImageMode { + Default, + Random, +} + +impl From<&str> for ImageMode { + fn from(t: &str) -> Self { + match t { + "default" => ImageMode::Default, + "random" => ImageMode::Random, + _ => ImageMode::Default, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DeviceConfig { + check_interval: u64, + image_mode: String, + force_frame_len: Option, +} + +impl Default for DeviceConfig { + fn default() -> Self { + Self { + check_interval: 50, + image_mode: String::from("default"), + force_frame_len: None, + } + } +} + +/// Demo camera backend used for test. +pub struct DemoCameraBackend { + id: String, + /// Device config path. + config_path: String, + /// Frame image data. + frame_image: Arc>, + /// Callback to used to notify when data is coming. + notify_cb: Option, + /// Callback to used to notify the broken. + broken_cb: Option, + /// Current format. + cur_format: Arc>, + /// Format list supported by the device. + format_list: Vec, + /// Device state. + state: Arc>, +} + +impl DemoCameraBackend { + pub fn new(id: String, config_path: String) -> Result { + let checked_path = check_path(config_path.as_str())?; + Ok(DemoCameraBackend { + id, + config_path: checked_path, + frame_image: Arc::new(Mutex::new(FrameImage::default())), + notify_cb: None, + broken_cb: None, + cur_format: Arc::new(Mutex::new(CamBasicFmt::default())), + format_list: build_format_list(), + state: Arc::new(Mutex::new(DeviceState::Uninitialized)), + }) + } + + fn start_worker(&mut self) -> Result<()> { + let cloned_fmt = self.cur_format.clone(); + let cloned_frame = self.frame_image.clone(); + let cloned_notify = self.notify_cb.clone(); + let cloned_state = self.state.clone(); + let cloned_path = self.config_path.clone(); + + std::thread::Builder::new() + .name("demo camera worker".to_string()) + .spawn(move || { + let mut image_frame = ImageFrame::default(); + let config = read_config(&cloned_path).unwrap_or_else(|_| DeviceConfig::default()); + info!("Demo device config {:?}", config); + loop { + let locked_state = cloned_state.lock().unwrap(); + match *locked_state { + DeviceState::Uninitialized => { + std::thread::sleep(std::time::Duration::from_millis( + config.check_interval, + )); + continue; + } + DeviceState::Running => (), + DeviceState::Exit => break, + } + drop(locked_state); + let mut locked_frame = cloned_frame.lock().unwrap(); + if locked_frame.used_len == 0 { + // Build next frame. + let locked_fmt = cloned_fmt.lock().unwrap(); + if let Some(len) = config.force_frame_len { + locked_frame.used_len = len; + locked_frame.image = vec![0xfe; len as usize]; + debug!("Demo camera force used_len {}", locked_frame.used_len); + } else { + locked_frame.image = match image_frame.build_image( + &ImageMode::from(config.image_mode.as_str()), + &locked_fmt.fmttype, + locked_fmt.width, + locked_fmt.height, + ) { + Ok(img) => img, + Err(e) => { + error!("Failed to build image {:?}", e); + break; + } + }; + locked_frame.used_len = locked_frame.image.len() as u64; + debug!("Demo camera used_len {}", locked_frame.used_len); + } + if let Some(notify) = cloned_notify.as_ref() { + notify(); + } + let interval = if locked_fmt.fps != 0 { + 1000 / u64::from(locked_fmt.fps) + } else { + 20 + }; + drop(locked_frame); + std::thread::sleep(std::time::Duration::from_millis(interval)); + } + } + })?; + Ok(()) + } +} + +#[derive(Default)] +struct ImageFrame { + frame_idx: u64, +} + +impl ImageFrame { + fn build_image( + &mut self, + image_mode: &ImageMode, + format: &FmtType, + width: u32, + height: u32, + ) -> Result> { + const FRAME_IDX_LIMIT: u64 = 1000; + let color = match image_mode { + ImageMode::Default => RgbColor::Red, + ImageMode::Random => RgbColor::from(self.frame_idx as u8 % 8), + }; + debug!("Demo Image color {:?}", color); + let (r, g, b) = get_rgb_color(&color); + let data = init_img(width, height, (r, g, b)); + let image = match format { + FmtType::Mjpg => build_fake_mjpg(width, height), + FmtType::Yuy2 => convert_to_yuy2(&data, width, height), + FmtType::Rgb565 => data, + FmtType::Nv12 => convert_to_nv12(&data, width, height), + }; + self.frame_idx += 1; + if self.frame_idx > FRAME_IDX_LIMIT { + self.frame_idx = 0; + } + Ok(image) + } +} + +fn read_config(path: &str) -> Result { + let str = read_to_string(path)?; + let conf = serde_json::from_str::(&str)?; + Ok(conf) +} + +fn build_format_list() -> Vec { + vec![ + build_yuy2_list(), + build_mjpg_list(), + build_rgb565_list(), + build_nv12_list(), + ] +} + +fn build_yuy2_list() -> CameraFormatList { + CameraFormatList { + format: FmtType::Yuy2, + fmt_index: 1, + frame: vec![ + CameraFrame { + width: 1280, + height: 720, + interval: INTERVALS_PER_SEC / 10, + index: 1, + }, + CameraFrame { + width: 1920, + height: 1280, + interval: INTERVALS_PER_SEC / 5, + index: 2, + }, + CameraFrame { + width: 960, + height: 540, + interval: INTERVALS_PER_SEC / 30, + index: 3, + }, + CameraFrame { + width: 640, + height: 480, + interval: INTERVALS_PER_SEC / 30, + index: 4, + }, + CameraFrame { + width: 480, + height: 240, + interval: INTERVALS_PER_SEC / 30, + index: 5, + }, + CameraFrame { + width: 160, + height: 120, + interval: INTERVALS_PER_SEC / 60, + index: 6, + }, + ], + } +} + +fn build_mjpg_list() -> CameraFormatList { + CameraFormatList { + format: FmtType::Mjpg, + fmt_index: 2, + frame: vec![ + CameraFrame { + width: 1920, + height: 1080, + interval: INTERVALS_PER_SEC / 30, + index: 1, + }, + CameraFrame { + width: 1280, + height: 720, + interval: INTERVALS_PER_SEC / 30, + index: 2, + }, + CameraFrame { + width: 960, + height: 540, + interval: INTERVALS_PER_SEC / 30, + index: 3, + }, + CameraFrame { + width: 480, + height: 240, + interval: INTERVALS_PER_SEC / 30, + index: 4, + }, + ], + } +} + +fn build_rgb565_list() -> CameraFormatList { + CameraFormatList { + format: FmtType::Rgb565, + fmt_index: 3, + frame: vec![ + CameraFrame { + width: 1280, + height: 720, + interval: INTERVALS_PER_SEC / 10, + index: 1, + }, + CameraFrame { + width: 640, + height: 480, + interval: INTERVALS_PER_SEC / 30, + index: 2, + }, + CameraFrame { + width: 480, + height: 240, + interval: INTERVALS_PER_SEC / 30, + index: 3, + }, + ], + } +} + +fn build_nv12_list() -> CameraFormatList { + CameraFormatList { + format: FmtType::Nv12, + fmt_index: 4, + frame: vec![ + CameraFrame { + width: 1280, + height: 720, + interval: INTERVALS_PER_SEC / 10, + index: 1, + }, + CameraFrame { + width: 640, + height: 480, + interval: INTERVALS_PER_SEC / 30, + index: 2, + }, + CameraFrame { + width: 480, + height: 240, + interval: INTERVALS_PER_SEC / 30, + index: 3, + }, + ], + } +} + +impl CameraBackend for DemoCameraBackend { + fn set_fmt(&mut self, cam_fmt: &CamBasicFmt) -> Result<()> { + *self.cur_format.lock().unwrap() = *cam_fmt; + info!("Demo camera backend set format {:?}", cam_fmt); + Ok(()) + } + + fn set_ctl(&self) -> Result<()> { + Ok(()) + } + + fn video_stream_on(&mut self) -> Result<()> { + if *self.state.lock().unwrap() == DeviceState::Running { + return Ok(()); + } + info!("Demo camera backend {} stream on", self.id); + let mut locked_state = self.state.lock().unwrap(); + *locked_state = DeviceState::Running; + drop(locked_state); + self.start_worker() + } + + fn video_stream_off(&mut self) -> Result<()> { + if *self.state.lock().unwrap() == DeviceState::Exit { + return Ok(()); + } + info!("Demo camera backend {} stream off", self.id); + let mut locked_state = self.state.lock().unwrap(); + *locked_state = DeviceState::Exit; + Ok(()) + } + + fn list_format(&mut self) -> Result> { + Ok(self.format_list.clone()) + } + + fn reset(&mut self) { + info!("Demo camera backend {} reset", self.id); + let mut locked_state = self.state.lock().unwrap(); + *locked_state = DeviceState::Exit; + let mut locked_frame = self.frame_image.lock().unwrap(); + locked_frame.used_len = 0; + } + + fn get_frame_size(&self) -> usize { + self.frame_image.lock().unwrap().used_len as usize + } + + fn next_frame(&mut self) -> Result<()> { + let mut locked_frame = self.frame_image.lock().unwrap(); + locked_frame.used_len = 0; + Ok(()) + } + + fn get_frame(&self, iovecs: &[Iovec], frame_offset: usize, len: usize) -> Result { + let locked_frame = self.frame_image.lock().unwrap(); + if frame_offset + len > locked_frame.used_len as usize { + bail!("Invalid frame offset {} or len {}", frame_offset, len); + } + let mut copied = 0; + for iov in iovecs { + if len == copied { + break; + } + let cnt = std::cmp::min(iov.iov_len as usize, len - copied); + let start = frame_offset + copied; + let end = start + cnt; + let tmp = &locked_frame.image[start..end]; + // SAFETY: iovecs is generated by address_space and len is not less than tmp's. + unsafe { mem_from_buf(tmp, iov.iov_base) } + .with_context(|| format!("Failed to write data to {:x}", iov.iov_base))?; + copied += cnt; + } + Ok(copied) + } + + fn get_format_by_index(&self, format_index: u8, frame_index: u8) -> Result { + let mut out = CamBasicFmt::default(); + for fmt in &self.format_list { + if fmt.fmt_index != format_index { + continue; + } + out.fmttype = fmt.format; + for frm in &fmt.frame { + if frm.index != frame_index { + continue; + } + out.width = frm.width; + out.height = frm.height; + out.fps = INTERVALS_PER_SEC + .checked_div(frm.interval) + .with_context(|| { + format!( + "Invalid interval {} for format/frame {}:{}", + frm.interval, format_index, frame_index + ) + })?; + return Ok(out); + } + } + bail!( + "format/frame with idx {}/{} is not found", + format_index, + frame_index + ); + } + + fn register_notify_cb(&mut self, cb: CameraNotifyCallback) { + self.notify_cb = Some(cb); + } + + fn register_broken_cb(&mut self, cb: CameraBrokenCallback) { + self.broken_cb = Some(cb); + } +} + +fn clip(x: i32) -> u8 { + if x > 255 { + 255 + } else if x < 0 { + 0 + } else { + x as u8 + } +} + +fn init_img(width: u32, height: u32, color: (u8, u8, u8)) -> Vec { + let len = height * width; + let (r, g, b) = color; + let mut img: Vec = Vec::with_capacity((len * 4) as usize); + for _ in 0..len { + img.push(b); + img.push(g); + img.push(r); + img.push(255); + } + img +} + +fn convert_to_nv12(source: &[u8], width: u32, height: u32) -> Vec { + let pixel = 4; + let len = height * width; + let mut img_nv12: Vec = Vec::with_capacity(len as usize); + for i in 0..len { + let idx = (i * pixel) as usize; + let (b, g, r) = ( + f32::from(source[idx]), + f32::from(source[idx + 1]), + f32::from(source[idx + 2]), + ); + let y = (0.299 * r + 0.587 * g + 0.114 * b) as u8; + img_nv12.push(y); + } + for i in 0..(width * height / 2) { + let idx = (i * 2 * pixel) as usize; + let (b, g, r) = ( + f32::from(source[idx]), + f32::from(source[idx + 1]), + f32::from(source[idx + 2]), + ); + let u = (-0.147 * r - 0.289 * g + 0.436 * b + 128_f32) as u8; + let v = (0.615 * r - 0.515 * g - 0.100 * b + 128_f32) as u8; + img_nv12.push(u); + img_nv12.push(v); + } + img_nv12 +} + +fn convert_to_yuy2(source: &[u8], width: u32, height: u32) -> Vec { + let pixbytes = 4; + let sz = width * height * 2; + let mut yuv = vec![0; sz as usize]; + for x in 0..height { + for y in 0..(width / 2) { + let offset = x * width * pixbytes + y * pixbytes * 2; + let src = &source[offset as usize..]; + let val = LittleEndian::read_i32(src); + let r1 = (val >> 16) & 0xff; + let g1 = (val >> 8) & 0xff; + let b1 = val & 0xff; + let src = &source[pixbytes as usize..]; + let val = LittleEndian::read_i32(src); + let r2 = (val >> 16) & 0xff; + let g2 = (val >> 8) & 0xff; + let b2 = val & 0xff; + + let y1 = clip(((66 * r1 + 129 * g1 + 25 * b1 + 128) >> 8) + 16); + let u1 = clip( + (((-38 * r1 - 74 * g1 + 112 * b1 + 128) >> 8) + + ((-38 * r2 - 74 * g2 + 112 * b2 + 128) >> 8)) + / 2 + + 128, + ); + let y2 = clip(((66 * r2 + 129 * g2 + 25 * b2 + 128) >> 8) + 16); + let v1 = clip( + (((112 * r1 - 94 * g1 - 18 * b1 + 128) >> 8) + + ((112 * r2 - 94 * g2 - 18 * b2 + 128) >> 8)) + / 2 + + 128, + ); + let mut dst = (x * width * 2 + y * 4) as usize; + yuv[dst] = y1; + dst += 1; + yuv[dst] = u1; + dst += 1; + yuv[dst] = y2; + dst += 1; + yuv[dst] = v1; + } + } + yuv +} + +// NOTE: Fake mjpg data, which is used to simulate frame data of different lengths. +fn build_fake_mjpg(width: u32, height: u32) -> Vec { + let mut rng = thread_rng(); + let len = rng.gen_range((width * height / 20)..(width * height / 4)); + let start = vec![0xff, 0xd8, 0xff, 0xe0]; + let data = vec![0xfc; len as usize]; + let end = vec![0xff, 0xf9]; + [start, data, end].concat() +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_yuy2() { + let mut frame = ImageFrame::default(); + let buf = frame + .build_image(&ImageMode::Default, &FmtType::Yuy2, 2, 2) + .unwrap(); + assert_eq!(buf, [82, 90, 82, 240, 82, 90, 82, 240]); + } + + #[test] + fn test_rgb() { + let mut frame = ImageFrame::default(); + let buf = frame + .build_image(&ImageMode::Default, &FmtType::Rgb565, 1, 1) + .unwrap(); + assert_eq!(buf, [0, 0, 255, 255]); + } +} diff --git a/devices/src/camera_backend/mod.rs b/devices/src/camera_backend/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..d9fd95f980d7ad05d895faa70667e58653a71df6 --- /dev/null +++ b/devices/src/camera_backend/mod.rs @@ -0,0 +1,218 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! The abstract layer that connects different frontend & backend camera devices. +//! Backend devices, such as v4l2, usb, or demo device, etc., shall implement trait +//! CameraBackend. + +pub mod demo; +#[cfg(all(target_env = "ohos", feature = "usb_camera_oh"))] +pub mod ohcam; +#[cfg(feature = "usb_camera_v4l2")] +pub mod v4l2; + +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; + +use self::demo::DemoCameraBackend; +#[cfg(all(target_env = "ohos", feature = "usb_camera_oh"))] +use self::ohcam::OhCameraBackend; +#[cfg(feature = "usb_camera_v4l2")] +use self::v4l2::V4l2CameraBackend; +use crate::usb::camera::UsbCameraConfig; +use machine_manager::config::{CamBackendType, CameraDevConfig, ConfigError}; +use util::aio::Iovec; + +/// Frame interval in 100ns units. +pub const INTERVALS_PER_SEC: u32 = 10_000_000; + +#[derive(Clone, Copy, Default, Debug)] +pub struct CamBasicFmt { + pub width: u32, + pub height: u32, + fps: u32, + pub fmttype: FmtType, +} + +impl CamBasicFmt { + pub fn get_frame_intervals(&self) -> Result { + if self.fps == 0 { + bail!("Invalid fps!"); + } + Ok(INTERVALS_PER_SEC / self.fps) + } +} + +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Default)] +pub enum FmtType { + #[default] + Yuy2 = 0, + Rgb565, + Mjpg, + Nv12, +} + +#[derive(Clone, Debug)] +pub struct CameraFrame { + pub width: u32, + pub height: u32, + pub index: u8, + pub interval: u32, +} + +#[derive(Clone)] +pub struct CameraFormatList { + pub format: FmtType, + pub fmt_index: u8, + pub frame: Vec, +} + +pub fn check_path(path: &str) -> Result { + let filepath = path.to_string(); + if !Path::new(path).exists() { + bail!(ConfigError::FileNotExist(filepath)); + } + + Ok(filepath) +} + +pub fn get_video_frame_size(width: u32, height: u32, fmt: FmtType) -> Result { + let pixel_size = width + .checked_mul(height) + .with_context(|| format!("Invalid width {} or height {}", width, height))?; + if pixel_size % 2 != 0 { + bail!("Abnormal width {} or height {}", width, height); + } + match fmt { + // NV12 format: 4 Y values share a pair of UV values, that means every 4 pixels + // need 6 bytes. On average, 1 pixel needs 1.5 bytes. + FmtType::Nv12 => pixel_size + .checked_mul(3) + .with_context(|| { + format!( + "fmt {:?}, Invalid width {} or height {}", + fmt, width, height + ) + })? + .checked_div(2) + .with_context(|| { + format!( + "fmt {:?}, Invalid width {} or height {}", + fmt, width, height + ) + }), + _ => pixel_size.checked_mul(2).with_context(|| { + format!( + "fmt {:?}, Invalid width {} or height {}", + fmt, width, height + ) + }), + } +} + +pub fn get_bit_rate(width: u32, height: u32, interval: u32, fmt: FmtType) -> Result { + let fm_size = get_video_frame_size(width, height, fmt)?; + let size_in_bit = u64::from(fm_size) * u64::from(INTERVALS_PER_SEC) * 8; + let rate = size_in_bit + .checked_div(u64::from(interval)) + .with_context(|| format!("Invalid size {} or interval {}", size_in_bit, interval))?; + Ok(rate as u32) +} + +#[macro_export] +macro_rules! video_fourcc { + ($a:expr, $b:expr, $c:expr, $d:expr) => { + $a as u32 | (($b as u32) << 8) | (($c as u32) << 16) | (($d as u32) << 24) + }; +} + +pub const PIXFMT_RGB565: u32 = video_fourcc!('R', 'G', 'B', 'P'); +pub const PIXFMT_YUYV: u32 = video_fourcc!('Y', 'U', 'Y', 'V'); +pub const PIXFMT_MJPG: u32 = video_fourcc!('M', 'J', 'P', 'G'); +pub const PIXFMT_NV12: u32 = video_fourcc!('N', 'V', '1', '2'); + +/// Callback function which is called when frame data is coming. +pub type CameraNotifyCallback = Arc; + +/// Callback function which is called when backend is broken. +pub type CameraBrokenCallback = Arc; + +pub trait CameraBackend: Send + Sync { + /// Set a specific format. + fn set_fmt(&mut self, fmt: &CamBasicFmt) -> Result<()>; + + /// Set control capabilities and properties. + fn set_ctl(&self) -> Result<()>; + + // Turn stream on to start to receive frame buffer. + fn video_stream_on(&mut self) -> Result<()>; + + // Turn stream off to end receiving frame buffer. + fn video_stream_off(&mut self) -> Result<()>; + + /// List all formats supported by backend. + fn list_format(&mut self) -> Result>; + + /// Reset the device. + fn reset(&mut self); + + /// Get the total size of current frame. + fn get_frame_size(&self) -> usize; + + /// Copy frame data to iovecs. + fn get_frame(&self, iovecs: &[Iovec], frame_offset: usize, len: usize) -> Result; + + /// Get format/frame info including width/height/interval/fmt according to format/frame index. + fn get_format_by_index(&self, format_index: u8, frame_index: u8) -> Result; + + /// Get next frame when current frame is read complete. + fn next_frame(&mut self) -> Result<()>; + + /// Register notify callback which is called when data is coming. + fn register_notify_cb(&mut self, cb: CameraNotifyCallback); + + /// Register broken callback which is called when backend is broken. + fn register_broken_cb(&mut self, cb: CameraBrokenCallback); + + /// Pause/resume stream. + fn pause(&mut self, _paused: bool) {} +} + +#[allow(unused_variables)] +pub fn create_cam_backend( + config: UsbCameraConfig, + cameradev: CameraDevConfig, + _tokenid: u64, +) -> Result>> { + let cam: Arc> = match cameradev.backend { + #[cfg(feature = "usb_camera_v4l2")] + CamBackendType::V4l2 => Arc::new(Mutex::new(V4l2CameraBackend::new( + cameradev.id, + cameradev.path, + config.iothread, + )?)), + #[cfg(all(target_env = "ohos", feature = "usb_camera_oh"))] + CamBackendType::OhCamera => Arc::new(Mutex::new(OhCameraBackend::new( + cameradev.id, + cameradev.path, + _tokenid, + )?)), + CamBackendType::Demo => Arc::new(Mutex::new(DemoCameraBackend::new( + config.id, + cameradev.path, + )?)), + }; + + Ok(cam) +} diff --git a/devices/src/camera_backend/ohcam.rs b/devices/src/camera_backend/ohcam.rs new file mode 100755 index 0000000000000000000000000000000000000000..075f1c170e04447fd9e826fcb1515ec58c053a60 --- /dev/null +++ b/devices/src/camera_backend/ohcam.rs @@ -0,0 +1,456 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::ffi::CStr; +use std::sync::RwLock; + +use anyhow::{bail, Context, Result}; +use log::error; +use once_cell::sync::Lazy; + +use crate::camera_backend::{ + CamBasicFmt, CameraBackend, CameraBrokenCallback, CameraFormatList, CameraFrame, + CameraNotifyCallback, FmtType, +}; +#[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") +))] +use trace::trace_scope::Scope; +use util::aio::Iovec; +use util::ohos_binding::camera::*; +use util::ohos_binding::misc::bound_tokenid; + +type OhCamCB = RwLock>; +static OHCAM_CALLBACKS: Lazy = Lazy::new(|| RwLock::new(HashMap::new())); + +// In UVC, interval's unit is 100ns. +// So, fps * interval / 10_000_000 == 1. +const FPS_INTERVAL_TRANS: u32 = 10_000_000; +const RESOLUTION_WHITELIST: [(i32, i32); 2] = [(640, 480), (1280, 720)]; +const FRAME_FORMAT_WHITELIST: [i32; 2] = [CAMERA_FORMAT_YUYV422, CAMERA_FORMAT_NV12]; +const FPS_WHITELIST: [i32; 1] = [30]; + +#[derive(Default)] +struct OhCamCallBack { + /// Callback to used to notify when data is coming. + notify_cb: Option, + /// Callback to used to notify the broken. + broken_cb: Option, + ptr: Option, + buffer_size: u64, +} + +impl OhCamCallBack { + fn set_buffer(&mut self, addr: u64, s: i32) { + self.buffer_size = s as u64; + self.ptr = Some(addr); + } + + fn get_buffer(&self) -> (Option, u64) { + (self.ptr, self.buffer_size) + } + + fn clear_buffer(&mut self) { + self.buffer_size = 0; + self.ptr = None; + } + + fn set_notify_cb(&mut self, cb: CameraNotifyCallback) { + self.notify_cb = Some(cb); + } + + fn set_broken_cb(&mut self, cb: CameraNotifyCallback) { + self.broken_cb = Some(cb); + } + + fn notify(&self) { + if let Some(notify_cb) = &self.notify_cb { + notify_cb(); + } + } + + fn broken(&self) { + if let Some(broken_cb) = &self.broken_cb { + broken_cb(); + } + } +} + +#[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") +))] +#[derive(Clone, Default)] +struct OhCameraAsyncScope { + next_frame_id: u64, + async_scope: Option, +} + +#[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") +))] +impl OhCameraAsyncScope { + fn start(&mut self) { + self.async_scope = Some(trace::ohcam_next_frame(true, self.next_frame_id)); + self.next_frame_id += 1; + } + + fn stop(&mut self) { + self.async_scope = None; + } +} + +#[derive(Clone)] +pub struct OhCameraBackend { + // ID for this OhCameraBackend. + id: String, + // ID of OH camera device. + camid: String, + profile_cnt: u8, + ctx: OhCamera, + fmt_list: Vec, + selected_profile: u8, + stream_on: bool, + paused: bool, + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + async_scope: Box, + tokenid: u64, +} + +// SAFETY: Send and Sync is not auto-implemented for raw pointer type. +// implementing them is safe because ctx field is access. +unsafe impl Send for OhCameraBackend {} +// SAFETY: Same reason as above. +unsafe impl Sync for OhCameraBackend {} + +fn cam_fmt_from_oh(t: i32) -> Result { + let fmt = match t { + CAMERA_FORMAT_YUV420SP => FmtType::Nv12, + CAMERA_FORMAT_NV12 => FmtType::Nv12, + CAMERA_FORMAT_YUYV422 => FmtType::Yuy2, + CAMERA_FORMAT_MJPEG => FmtType::Mjpg, + _ => bail!("OHCAM: No supported type {}", t), + }; + + Ok(fmt) +} + +impl Drop for OhCameraBackend { + fn drop(&mut self) { + OHCAM_CALLBACKS.write().unwrap().remove_entry(&self.camid); + } +} + +impl OhCameraBackend { + pub fn new(id: String, cam_name: String, tokenid: u64) -> Result { + let (ctx, profile_cnt) = OhCamera::new(cam_name.clone())?; + + Ok(OhCameraBackend { + id, + camid: cam_name, + profile_cnt: profile_cnt as u8, + ctx, + fmt_list: vec![], + selected_profile: 0, + stream_on: false, + paused: false, + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + async_scope: Box::::default(), + tokenid, + }) + } +} + +impl CameraBackend for OhCameraBackend { + fn set_fmt(&mut self, cam_fmt: &CamBasicFmt) -> Result<()> { + for fmt in &self.fmt_list { + if fmt.format != cam_fmt.fmttype { + continue; + } + for frm in &fmt.frame { + if frm.width != cam_fmt.width || frm.height != cam_fmt.height { + continue; + } + + let fps = FPS_INTERVAL_TRANS + .checked_div(frm.interval) + .with_context(|| format!("OHCAM: Invalid interval {}", frm.interval))?; + if fps != cam_fmt.fps { + continue; + } + + self.selected_profile = fmt.fmt_index - 1; + self.ctx.set_fmt(i32::from(self.selected_profile))?; + return Ok(()); + } + } + Ok(()) + } + + fn set_ctl(&self) -> Result<()> { + Ok(()) + } + + fn video_stream_on(&mut self) -> Result<()> { + if self.tokenid != 0 { + bound_tokenid(self.tokenid)?; + } + self.ctx.start_stream(on_buffer_available, on_broken)?; + self.stream_on = true; + Ok(()) + } + + fn video_stream_off(&mut self) -> Result<()> { + self.ctx.stop_stream(); + if let Some(cb) = OHCAM_CALLBACKS.write().unwrap().get_mut(&self.camid) { + cb.clear_buffer(); + } + self.stream_on = false; + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + self.async_scope.stop(); + Ok(()) + } + + fn list_format(&mut self) -> Result> { + let mut fmt_list: Vec = Vec::new(); + + for idx in 0..self.profile_cnt { + match self.ctx.get_profile(i32::from(idx)) { + Ok((fmt, width, height, fps)) => { + if !FRAME_FORMAT_WHITELIST.iter().any(|&x| x == fmt) + || !RESOLUTION_WHITELIST.iter().any(|&x| x == (width, height)) + || !FPS_WHITELIST.iter().any(|&x| x == fps) + { + continue; + } + + let frame = CameraFrame { + width: width as u32, + height: height as u32, + index: 1, + interval: FPS_INTERVAL_TRANS / fps as u32, + }; + fmt_list.push(CameraFormatList { + format: cam_fmt_from_oh(fmt)?, + frame: vec![frame], + fmt_index: idx.checked_add(1).unwrap_or_else(|| { + error!("list_format: too much profile ID"); + u8::MAX + }), + }); + } + Err(e) => error!("{:?}", e), + } + } + self.fmt_list = fmt_list.clone(); + Ok(fmt_list) + } + + fn reset(&mut self) { + if let Some(cb) = OHCAM_CALLBACKS.write().unwrap().get_mut(&self.camid) { + cb.clear_buffer(); + } + if let Err(e) = self.ctx.reset_camera(self.camid.clone()) { + error!("OHCAM: reset failed, err: {e}"); + } + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + self.async_scope.stop(); + } + + fn get_format_by_index(&self, format_index: u8, frame_index: u8) -> Result { + let mut out = CamBasicFmt::default(); + for fmt in &self.fmt_list { + if fmt.fmt_index != format_index { + continue; + } + out.fmttype = fmt.format; + for frm in &fmt.frame { + if frm.index != frame_index { + continue; + } + out.width = frm.width; + out.height = frm.height; + out.fps = FPS_INTERVAL_TRANS + .checked_div(frm.interval) + .with_context(|| { + format!( + "{}: Invalid interval {} for format/frame {}:{}", + self.id, frm.interval, format_index, frame_index + ) + })?; + return Ok(out); + } + } + bail!( + "{}: format/frame with idx {}/{} is not found", + self.id, + format_index, + frame_index + ); + } + + fn get_frame_size(&self) -> usize { + if let Some(cb) = OHCAM_CALLBACKS.read().unwrap().get(&self.camid) { + return cb.get_buffer().1 as usize; + } + 0 + } + + fn next_frame(&mut self) -> Result<()> { + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + self.async_scope.start(); + self.ctx.next_frame(); + if let Some(cb) = OHCAM_CALLBACKS.write().unwrap().get_mut(&self.camid) { + cb.clear_buffer(); + } + Ok(()) + } + + fn get_frame(&self, iovecs: &[Iovec], frame_offset: usize, len: usize) -> Result { + let (src, src_len) = OHCAM_CALLBACKS + .read() + .unwrap() + .get(&self.camid) + .with_context(|| "Invalid camid in callback table")? + .get_buffer(); + + if src.is_none() || src.unwrap() == 0 { + bail!("Invalid frame src") + } + + if src_len == 0_u64 { + bail!("Invalid frame src_len {}", src_len); + } + + trace::trace_scope_start!(ohcam_get_frame, args = (frame_offset, len)); + + let mut copied = 0_usize; + for iov in iovecs { + if len == copied { + break; + } + let cnt = std::cmp::min(iov.iov_len as usize, len - copied); + let src_ptr = src.unwrap() + frame_offset as u64 + copied as u64; + // SAFETY: the address is not out of range. + unsafe { + std::ptr::copy_nonoverlapping(src_ptr as *const u8, iov.iov_base as *mut u8, cnt); + } + copied += cnt; + } + Ok(copied) + } + + fn register_notify_cb(&mut self, cb: CameraNotifyCallback) { + OHCAM_CALLBACKS + .write() + .unwrap() + .entry(self.camid.clone()) + .or_insert(OhCamCallBack::default()) + .set_notify_cb(cb); + } + + fn register_broken_cb(&mut self, cb: CameraBrokenCallback) { + OHCAM_CALLBACKS + .write() + .unwrap() + .entry(self.camid.clone()) + .or_insert(OhCamCallBack::default()) + .set_broken_cb(cb); + } + + fn pause(&mut self, paused: bool) { + if self.paused == paused { + return; + } + + if paused { + // If stream is off, we don't need to set self.paused. + // Because it's not required to re-open stream while + // vm is resuming. + if !self.stream_on { + return; + } + self.paused = true; + self.video_stream_off().unwrap_or_else(|e| { + error!("ohcam pause: failed to pause stream {:?}", e); + }); + } else { + self.paused = false; + self.video_stream_on().unwrap_or_else(|e| { + error!("ohcam resume: failed to resume stream {:?}", e); + }) + } + } +} + +fn cstr_to_string(src: *const u8) -> Result { + if src.is_null() { + bail!("cstr_to_string: src is null"); + } + // SAFETY: we promise that 'src' ends with "null" symbol. + let src_cstr = unsafe { CStr::from_ptr(src) }; + let target_string = src_cstr + .to_str() + .with_context(|| "cstr_to_string: failed to transfer camid")? + .to_owned(); + + Ok(target_string) +} + +// SAFETY: use RW lock to ensure the security of resources. +unsafe extern "C" fn on_buffer_available(src_buffer: u64, length: i32, camid: *const u8) { + let cam = cstr_to_string(camid).unwrap_or_else(|e| { + error!("{e}"); + "".to_string() + }); + if let Some(cb) = OHCAM_CALLBACKS.write().unwrap().get_mut(&cam) { + cb.set_buffer(src_buffer, length); + cb.notify(); + } +} + +// SAFETY: use RW lock to ensure the security of resources. +unsafe extern "C" fn on_broken(camid: *const u8) { + let cam = cstr_to_string(camid).unwrap_or_else(|e| { + error!("{e}"); + "".to_string() + }); + if let Some(cb) = OHCAM_CALLBACKS.read().unwrap().get(&cam) { + cb.broken(); + } +} diff --git a/devices/src/camera_backend/v4l2.rs b/devices/src/camera_backend/v4l2.rs new file mode 100644 index 0000000000000000000000000000000000000000..f86b13ea8b31856e286b7178e6673c5a34db37ee --- /dev/null +++ b/devices/src/camera_backend/v4l2.rs @@ -0,0 +1,550 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! V4L2 backend for vCamera device. /dev/videoX and VIDIOC_XX ioctls are used. + +use std::os::unix::prelude::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use log::{error, info, warn}; +use v4l2_sys_mit::{ + v4l2_buf_type_V4L2_BUF_TYPE_VIDEO_CAPTURE, v4l2_buffer, v4l2_fmtdesc, v4l2_format, + v4l2_frmivalenum, v4l2_frmsizeenum, v4l2_frmsizetypes_V4L2_FRMSIZE_TYPE_DISCRETE, + v4l2_memory_V4L2_MEMORY_MMAP, v4l2_requestbuffers, v4l2_streamparm, V4L2_CAP_STREAMING, + V4L2_CAP_VIDEO_CAPTURE, V4L2_FMT_FLAG_EMULATED, +}; +use vmm_sys_util::epoll::EventSet; + +use super::{PIXFMT_MJPG, PIXFMT_NV12, PIXFMT_RGB565, PIXFMT_YUYV}; +use crate::camera_backend::{ + check_path, CamBasicFmt, CameraBackend, CameraBrokenCallback, CameraFormatList, CameraFrame, + CameraNotifyCallback, FmtType, INTERVALS_PER_SEC, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use util::aio::Iovec; +use util::loop_context::{EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation}; +use util::v4l2::{new_init, V4l2Backend}; + +const BUFFER_CNT: usize = 4; + +#[derive(Default)] +pub struct Sample { + /// Sample address. + addr: u64, + /// Sample used length. + used_len: u64, + /// Sample in which buffer. + buf_index: u32, +} + +impl Sample { + fn reset(&mut self) { + self.addr = 0; + self.used_len = 0; + self.buf_index = 0; + } +} + +#[derive(Clone)] +pub struct V4l2CameraBackend { + id: String, + dev_path: String, + /// Sample info. + sample: Arc>, + /// V4l2 backend used to get frame. + backend: Option>, + /// Callback to used to notify when data is coming. + notify_cb: Option, + /// Callback to used to notify the broken. + broken_cb: Option, + /// If the video stream is on or not. + running: bool, + /// If the backend fd is listening or not. + listening: bool, + iothread: Option, + delete_evts: Vec, + fmt_list: Vec, +} + +impl V4l2CameraBackend { + pub fn new(id: String, path: String, iothread: Option) -> Result { + let backend = V4l2Backend::new(path.clone(), BUFFER_CNT)?; + let checked_path = check_path(path.as_str())?; + let cam = V4l2CameraBackend { + id, + dev_path: checked_path, + sample: Arc::new(Mutex::new(Sample::default())), + backend: Some(Arc::new(backend)), + running: false, + listening: false, + notify_cb: None, + broken_cb: None, + iothread, + delete_evts: Vec::new(), + fmt_list: vec![], + }; + cam.check_cap()?; + Ok(cam) + } + + fn check_cap(&self) -> Result<()> { + // SAFETY: backend is inited in outside function. + let cap = self.backend.as_ref().unwrap().query_cap()?; + if cap.capabilities & V4L2_CAP_VIDEO_CAPTURE != V4L2_CAP_VIDEO_CAPTURE { + bail!( + "Device {} not support capture capability {}", + self.id, + cap.capabilities + ); + } + if cap.device_caps & V4L2_CAP_VIDEO_CAPTURE != V4L2_CAP_VIDEO_CAPTURE { + bail!( + "Device caps {} not support capture capability {}", + self.id, + cap.device_caps + ); + } + if cap.capabilities & V4L2_CAP_STREAMING != V4L2_CAP_STREAMING { + bail!( + "Device {} not support streaming capability {}", + self.id, + cap.capabilities + ); + } + Ok(()) + } + + fn register_fd(&mut self) -> Result<()> { + if self.listening { + self.unregister_fd()?; + } + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + trace::camera_register_fd(&self.id, backend.as_raw_fd()); + // Register event notifier for /dev/videoX. + let handler = Arc::new(Mutex::new(V4l2IoHandler::new( + &self.sample, + backend, + self.notify_cb.clone(), + self.broken_cb.clone(), + ))); + register_event_helper( + EventNotifierHelper::internal_notifiers(handler), + self.iothread.as_ref(), + &mut self.delete_evts, + )?; + self.listening = true; + Ok(()) + } + + fn unregister_fd(&mut self) -> Result<()> { + if !self.listening { + warn!("Camera {} is not listening", self.id); + return Ok(()); + } + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + trace::camera_unregister_fd(&self.id, backend.as_raw_fd()); + unregister_event_helper(self.iothread.as_ref(), &mut self.delete_evts)?; + self.listening = false; + Ok(()) + } + + fn list_frame_size(&self, pixfmt: u32) -> Result> { + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + let mut list = Vec::new(); + let mut frmsize = new_init::(); + let mut frm_idx = 1; + frmsize.pixel_format = pixfmt; + const FRAME_SIZE_LIMIT: u32 = 1000; + for i in 0..FRAME_SIZE_LIMIT { + frmsize.index = i; + let frame_size_end = backend.enum_frame_size(&mut frmsize)?; + if frame_size_end { + break; + } + // NOTE: Only support discrete now. + if frmsize.type_ != v4l2_frmsizetypes_V4L2_FRMSIZE_TYPE_DISCRETE { + continue; + } + // SAFETY: There are two enumeration types for v4l2_frmivalenum__bindgen_ty_1: discrete and stepwise. + // Parsing will not result in undefined value. + let width = unsafe { frmsize.__bindgen_anon_1.discrete.width }; + // SAFETY: The reason is same as above. + let height = unsafe { frmsize.__bindgen_anon_1.discrete.height }; + let interval_list = self.list_frame_interval(pixfmt, width, height)?; + for interval in interval_list { + list.push(CameraFrame { + width, + height, + interval, + index: frm_idx, + }); + frm_idx += 1; + } + } + Ok(list) + } + + fn list_frame_interval(&self, pixfmt: u32, width: u32, height: u32) -> Result> { + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + let mut list = Vec::new(); + let mut frame_val = new_init::(); + frame_val.pixel_format = pixfmt; + frame_val.width = width; + frame_val.height = height; + const FRAME_INTERVAL_LIMIT: u32 = 1000; + for i in 0..FRAME_INTERVAL_LIMIT { + frame_val.index = i; + let interval_end = backend.enum_frame_interval(&mut frame_val)?; + if interval_end { + break; + } + // NOTE: Only support discrete now. + if frame_val.type_ != v4l2_frmsizetypes_V4L2_FRMSIZE_TYPE_DISCRETE { + continue; + } + // SAFETY: There are two enumeration types for v4l2_frmivalenum__bindgen_ty_1: discrete and stepwise. + // Parsing will not result in undefined value. + let numerator = unsafe { frame_val.__bindgen_anon_1.discrete.numerator }; + // SAFETY: The reason is as same above. + let denominator = unsafe { frame_val.__bindgen_anon_1.discrete.denominator }; + if denominator == 0 { + warn!( + "Invalid denominator ignore it, format {} width {} height {}", + frame_val.pixel_format, frame_val.width, frame_val.height + ); + continue; + } + let interval = (u64::from(numerator) * u64::from(INTERVALS_PER_SEC) + / u64::from(denominator)) as u32; + list.push(interval); + } + Ok(list) + } + + fn is_pixfmt_supported(&self, pixelformat: u32) -> bool { + pixelformat == PIXFMT_MJPG + || pixelformat == PIXFMT_RGB565 + || pixelformat == PIXFMT_YUYV + || pixelformat == PIXFMT_NV12 + } +} + +impl CameraBackend for V4l2CameraBackend { + fn set_fmt(&mut self, cam_fmt: &CamBasicFmt) -> Result<()> { + info!("Camera {} set format {:?}", self.id, cam_fmt); + if self.listening { + self.unregister_fd()?; + } + + // NOTE: Reopen backend to avoid Device or Resource busy. + let backend = V4l2Backend::new(self.dev_path.clone(), BUFFER_CNT)?; + trace::camera_set_format(&self.id, backend.as_raw_fd()); + self.backend = Some(Arc::new(backend)); + + let mut fmt = new_init::(); + fmt.type_ = V4L2_CAP_VIDEO_CAPTURE; + fmt.fmt.pix.width = cam_fmt.width; + fmt.fmt.pix.height = cam_fmt.height; + fmt.fmt.pix.pixelformat = cam_fmt_to_v4l2(&cam_fmt.fmttype); + fmt.fmt.pix.field = 4; + // SAFETY: backend is inited before. + let backend = self.backend.as_ref().unwrap(); + backend.set_format(&fmt)?; + + let mut parm = new_init::(); + parm.type_ = v4l2_buf_type_V4L2_BUF_TYPE_VIDEO_CAPTURE; + let interval = cam_fmt.get_frame_intervals()?; + // SAFETY: there are two enumeration types for v4l2_streamparm__bindgen_ty_1: + // v4l2_captureparm and v4l2_outputparm. They have same length in memory and + // parsing will not result in undefined value. + unsafe { + parm.parm.capture.timeperframe.numerator = 30; + parm.parm.capture.timeperframe.denominator = + parm.parm.capture.timeperframe.numerator * INTERVALS_PER_SEC / interval; + } + backend.set_stream_parameter(&parm)?; + Ok(()) + } + + fn set_ctl(&self) -> Result<()> { + Ok(()) + } + + fn video_stream_on(&mut self) -> Result<()> { + if self.running { + warn!("Camera {} already running", self.id); + return Ok(()); + } + info!("Camera {} stream on", self.id); + let mut bufs = new_init::(); + bufs.type_ = v4l2_buf_type_V4L2_BUF_TYPE_VIDEO_CAPTURE; + bufs.memory = v4l2_memory_V4L2_MEMORY_MMAP; + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + backend.request_buffers(&mut bufs)?; + backend.stream_on(V4L2_CAP_VIDEO_CAPTURE as std::os::raw::c_int)?; + self.register_fd()?; + self.running = true; + Ok(()) + } + + fn video_stream_off(&mut self) -> Result<()> { + info!("Camera {} stream off", self.id); + self.unregister_fd()?; + if let Some(backend) = self.backend.as_ref() { + backend.stream_off(V4L2_CAP_VIDEO_CAPTURE as std::os::raw::c_int)?; + backend.release_buffers()?; + self.backend = None; + } + self.running = false; + Ok(()) + } + + fn list_format(&mut self) -> Result> { + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + let mut list = Vec::new(); + let mut desc = new_init::(); + desc.type_ = V4L2_CAP_VIDEO_CAPTURE; + const FORMAT_LIMIT: u32 = 1000; + let mut fmt_index = 1; + for i in 0..FORMAT_LIMIT { + desc.index = i; + let format_end = backend.enum_format(&mut desc)?; + if format_end { + break; + } + if desc.flags & V4L2_FMT_FLAG_EMULATED != 0 + || !self.is_pixfmt_supported(desc.pixelformat) + { + continue; + } + list.push(CameraFormatList { + format: cam_fmt_from_v4l2(desc.pixelformat)?, + frame: self.list_frame_size(desc.pixelformat)?, + fmt_index, + }); + fmt_index += 1; + } + + self.fmt_list = list.clone(); + + Ok(list) + } + + fn reset(&mut self) { + info!("device {} reset", self.id); + if self.running { + if let Err(e) = self.unregister_fd() { + warn!("Failed to unregister fd when reset {:?}", e); + } + if let Some(backend) = self.backend.as_ref() { + if let Err(e) = backend.stream_off(V4L2_CAP_VIDEO_CAPTURE as std::os::raw::c_int) { + warn!("Failed to stream off when reset {:?}", e); + } + if let Err(e) = backend.release_buffers() { + warn!("Failed to release buffer when reset {:?}", e); + } + self.backend = None; + } + } + self.listening = false; + self.running = false; + self.sample.lock().unwrap().reset(); + } + + fn get_format_by_index(&self, format_index: u8, frame_index: u8) -> Result { + let mut out = CamBasicFmt::default(); + for fmt in &self.fmt_list { + if fmt.fmt_index != format_index { + continue; + } + out.fmttype = fmt.format; + for frm in &fmt.frame { + if frm.index != frame_index { + continue; + } + out.width = frm.width; + out.height = frm.height; + out.fps = 10000000_u32.checked_div(frm.interval).with_context(|| { + format!( + "Invalid interval {} for format/frame {}:{}", + frm.interval, format_index, frame_index + ) + })?; + trace::camera_get_format_by_index(format_index, frame_index, &out); + return Ok(out); + } + } + + bail!( + "format/frame with idx {}/{} is not found", + format_index, + frame_index + ); + } + + fn get_frame_size(&self) -> usize { + self.sample.lock().unwrap().used_len as usize + } + + fn next_frame(&mut self) -> Result<()> { + let mut locked_sample = self.sample.lock().unwrap(); + locked_sample.used_len = 0; + let backend = self.backend.as_ref().with_context(|| "Backend is none")?; + let mut buf = new_init::(); + buf.type_ = v4l2_buf_type_V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = v4l2_memory_V4L2_MEMORY_MMAP; + buf.index = locked_sample.buf_index; + backend.queue_buffer(&buf)?; + Ok(()) + } + + fn get_frame(&self, iovecs: &[Iovec], frame_offset: usize, len: usize) -> Result { + let locked_sample = self.sample.lock().unwrap(); + if frame_offset + len > locked_sample.used_len as usize { + bail!("Invalid frame offset {} or len {}", frame_offset, len); + } + let mut copied = 0; + for iov in iovecs { + if len == copied { + break; + } + let cnt = std::cmp::min(iov.iov_len as usize, len - copied); + let src_ptr = locked_sample.addr + frame_offset as u64 + copied as u64; + // SAFETY: the address is not out of range. + unsafe { + std::ptr::copy(src_ptr as *const u8, iov.iov_base as *mut u8, cnt); + } + copied += cnt; + } + Ok(copied) + } + + fn register_notify_cb(&mut self, cb: CameraNotifyCallback) { + self.notify_cb = Some(cb); + } + + fn register_broken_cb(&mut self, cb: CameraBrokenCallback) { + self.broken_cb = Some(cb); + } +} + +fn cam_fmt_to_v4l2(t: &FmtType) -> u32 { + match t { + FmtType::Yuy2 => PIXFMT_YUYV, + FmtType::Rgb565 => PIXFMT_RGB565, + FmtType::Mjpg => PIXFMT_MJPG, + FmtType::Nv12 => PIXFMT_NV12, + } +} + +fn cam_fmt_from_v4l2(t: u32) -> Result { + let fmt = match t { + PIXFMT_YUYV => FmtType::Yuy2, + PIXFMT_RGB565 => FmtType::Rgb565, + PIXFMT_MJPG => FmtType::Mjpg, + PIXFMT_NV12 => FmtType::Nv12, + _ => bail!("Invalid v4l2 type {}", t), + }; + Ok(fmt) +} + +struct V4l2IoHandler { + sample: Arc>, + backend: Arc, + notify_cb: Option, + broken_cb: Option, +} + +impl V4l2IoHandler { + fn new( + sample: &Arc>, + backend: &Arc, + cb: Option, + broken_cb: Option, + ) -> Self { + V4l2IoHandler { + sample: sample.clone(), + backend: backend.clone(), + notify_cb: cb, + broken_cb, + } + } + + fn handle_sample(&mut self) -> Result<()> { + let mut buf = new_init::(); + buf.type_ = v4l2_buf_type_V4L2_BUF_TYPE_VIDEO_CAPTURE; + buf.memory = v4l2_memory_V4L2_MEMORY_MMAP; + if !self.backend.dequeue_buffer(&buf)? { + // Not ready. + return Ok(()); + } + if buf.bytesused > buf.length { + bail!("Invalid buf used {} length {}", buf.bytesused, buf.length); + } + let locked_buf = self.backend.buffer.lock().unwrap(); + let mut locked_sample = self.sample.lock().unwrap(); + if locked_sample.used_len == 0 { + let iov = locked_buf + .get(buf.index as usize) + .with_context(|| "Buffer index overflow")?; + if u64::from(buf.bytesused) > iov.iov_len { + bail!( + "Buffer overflow, bytesused {} iov len {}", + buf.bytesused, + iov.iov_len + ); + } + locked_sample.addr = iov.iov_base; + locked_sample.used_len = u64::from(buf.bytesused); + locked_sample.buf_index = buf.index; + drop(locked_sample); + // Notify the camera to deal with request. + if let Some(notify_cb) = &self.notify_cb { + notify_cb(); + } + } else { + self.backend + .queue_buffer(&buf) + .with_context(|| "Failed to queue buffer when handle sample")?; + } + Ok(()) + } +} + +impl EventNotifierHelper for V4l2IoHandler { + fn internal_notifiers(v4l2_handler: Arc>) -> Vec { + let cloend_v4l2_handler = v4l2_handler.clone(); + let handler: Rc = Rc::new(move |event, _fd: RawFd| { + let mut locked_handler = cloend_v4l2_handler.lock().unwrap(); + if event & EventSet::HANG_UP == EventSet::HANG_UP { + if let Some(broken_cb) = &locked_handler.broken_cb { + // Backend is broken. + broken_cb(); + } + } else if let Err(e) = locked_handler.handle_sample() { + error!("Failed to handle sample {:?}", e); + } + None + }); + + vec![EventNotifier::new( + NotifierOperation::AddShared, + v4l2_handler.lock().unwrap().backend.as_raw_fd(), + None, + // For unexpected device removal. + EventSet::IN | EventSet::EDGE_TRIGGERED | EventSet::HANG_UP, + vec![handler], + )] + } +} diff --git a/devices/src/interrupt_controller/aarch64/gicv2.rs b/devices/src/interrupt_controller/aarch64/gicv2.rs new file mode 100644 index 0000000000000000000000000000000000000000..4a67fa0c3179da920e0f02448db2943f32eb051d --- /dev/null +++ b/devices/src/interrupt_controller/aarch64/gicv2.rs @@ -0,0 +1,187 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::marker::{Send, Sync}; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, Result}; +use log::error; + +use super::{GICConfig, GICDevice}; +use crate::interrupt_controller::InterruptError; +use address_space::AddressSpace; +use machine_manager::machine::{MachineLifecycle, VmState}; +use util::device_tree::{self, FdtBuilder}; + +// See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. +const VGIC_V2_DIST_SIZE: u64 = 0x1000; +const VGIC_V2_CPU_SIZE: u64 = 0x2000; + +/// Configure a v2 Interrupt controller. +pub struct GICv2Config { + /// GIC distributor address range. + pub dist_range: (u64, u64), + /// GIC cpu interface address range. + pub cpu_range: (u64, u64), + /// GIC v2m range . + pub v2m_range: Option<(u64, u64)>, + /// GIC system memory. + pub sys_mem: Option>, +} + +/// Access wrapper for GICv2. +pub trait GICv2Access: Send + Sync { + fn init_gic(&self, nr_irqs: u32, dist_base: u64, cpu_if_base: u64) -> Result<()>; + + /// Returns `gicr_attr` of `vCPU`. + fn vcpu_gicr_attr(&self, offset: u64, cpu: usize) -> u64; + + fn access_gic_distributor(&self, offset: u64, gicd_value: &mut u32, write: bool) -> Result<()>; + + fn access_gic_cpu( + &self, + offset: u64, + cpu: usize, + gicc_value: &mut u64, + write: bool, + ) -> Result<()>; + + fn pause(&self) -> Result<()>; +} + +#[derive(Default)] +struct GicCpuInterfaceRegion { + /// Base address. + base: u64, + /// Size of Cpu Interface region. + size: u64, +} + +#[derive(Default)] +struct GicDistGuestRegion { + /// Base address. + base: u64, + /// Size of Cpu Interface region. + size: u64, +} + +/// A wrapper around creating and managing a `GICv2`. +pub struct GICv2 { + /// The handler for the GICv2 device to access the corresponding device in hypervisor. + pub hypervisor_gic: Arc, + /// Maximum irq number. + nr_irqs: u32, + /// GICv2 cpu interface region. + cpu_interface_region: GicCpuInterfaceRegion, + /// Guest physical address space of the GICv2 distributor register mappings. + dist_guest_region: GicDistGuestRegion, + /// Lifecycle state for GICv2. + state: Arc>, +} + +impl GICv2 { + pub fn new(hypervisor_gic: Arc, config: &GICConfig) -> Result { + let v2config = match config.v2.as_ref() { + Some(v2) => v2, + None => { + return Err(anyhow!(InterruptError::InvalidConfig( + "no v2 config found".to_string() + ))) + } + }; + + let cpu_interface_region = GicCpuInterfaceRegion { + base: v2config.dist_range.0 + VGIC_V2_DIST_SIZE, + size: VGIC_V2_CPU_SIZE, + }; + let dist_guest_region = GicDistGuestRegion { + base: v2config.dist_range.0, + size: v2config.dist_range.1, + }; + + Ok(GICv2 { + hypervisor_gic, + nr_irqs: config.max_irq, + cpu_interface_region, + dist_guest_region, + state: Arc::new(Mutex::new(VmState::Created)), + }) + } +} + +impl MachineLifecycle for GICv2 { + fn pause(&self) -> bool { + if self.hypervisor_gic.pause().is_ok() { + *self.state.lock().unwrap() = VmState::Running; + true + } else { + false + } + } + + fn notify_lifecycle(&self, old: VmState, new: VmState) -> bool { + let state = self.state.lock().unwrap(); + if *state != old { + error!("GICv2 lifecycle error: state check failed."); + return false; + } + drop(state); + + match (old, new) { + (VmState::Running, VmState::Paused) => self.pause(), + _ => true, + } + } +} + +impl GICDevice for GICv2 { + fn realize(&self) -> Result<()> { + let dist_base = self.dist_guest_region.base; + let cpu_if_base = self.cpu_interface_region.base; + self.hypervisor_gic + .init_gic(self.nr_irqs, dist_base, cpu_if_base)?; + + *self.state.lock().unwrap() = VmState::Running; + + Ok(()) + } + + fn generate_fdt(&self, fdt: &mut FdtBuilder) -> Result<()> { + let gic_reg = vec![ + self.dist_guest_region.base, + self.dist_guest_region.size, + self.cpu_interface_region.base, + self.cpu_interface_region.size, + ]; + + let intc_node_dep = fdt.begin_node("intc")?; + fdt.set_property_string("compatible", "arm,cortex-a15-gic")?; + fdt.set_property("interrupt-controller", &Vec::new())?; + fdt.set_property_u32("#interrupt-cells", 0x3)?; + fdt.set_property_u32("phandle", device_tree::GIC_PHANDLE)?; + fdt.set_property_u32("#address-cells", 0x2)?; + fdt.set_property_u32("#size-cells", 0x2)?; + fdt.set_property_array_u64("reg", &gic_reg)?; + + let gic_intr = [ + device_tree::GIC_FDT_IRQ_TYPE_PPI, + 0x9, + device_tree::IRQ_TYPE_LEVEL_HIGH, + ]; + + fdt.set_property_array_u32("interrupts", &gic_intr)?; + + fdt.end_node(intc_node_dep)?; + + Ok(()) + } +} diff --git a/devices/src/interrupt_controller/aarch64/gicv3.rs b/devices/src/interrupt_controller/aarch64/gicv3.rs index 79ebb27b38abb70616e5e9c4c1f1c439421451eb..60babf03de380cd37d2d952cffa5b674cbdfca2f 100644 --- a/devices/src/interrupt_controller/aarch64/gicv3.rs +++ b/devices/src/interrupt_controller/aarch64/gicv3.rs @@ -12,146 +12,139 @@ use std::sync::{Arc, Mutex}; -use kvm_ioctls::DeviceFd; - -use super::{ - state::{GICv3ItsState, GICv3State}, - GICConfig, GICDevice, UtilResult, -}; -use crate::interrupt_controller::errors::{ErrorKind, Result, ResultExt}; -use hypervisor::kvm::KVM_FDS; -use machine_manager::machine::{KvmVmState, MachineLifecycle}; -use migration::{MigrationManager, MigrationRestoreOrder}; +use anyhow::{anyhow, Context, Result}; +use log::{error, info}; + +use super::{GICConfig, GICDevice}; +use crate::interrupt_controller::error::InterruptError; +use machine_manager::machine::{MachineLifecycle, VmState}; +use migration::StateTransfer; use util::device_tree::{self, FdtBuilder}; // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. const SZ_64K: u64 = 0x0001_0000; -const KVM_VGIC_V3_REDIST_SIZE: u64 = 2 * SZ_64K; - -/// A wrapper for kvm_based device check and access. -pub struct KvmDevice; - -impl KvmDevice { - fn kvm_device_check(fd: &DeviceFd, group: u32, attr: u64) -> Result<()> { - let attr = kvm_bindings::kvm_device_attr { - group, - attr, - addr: 0, - flags: 0, - }; - fd.has_device_attr(&attr) - .chain_err(|| "Failed to check device attributes for GIC.")?; - Ok(()) - } +const VGIC_V3_REDIST_SIZE: u64 = 2 * SZ_64K; + +/// Configure a v3 Interrupt controller. +pub struct GICv3Config { + /// Config msi support + pub msi: bool, + /// GIC distributor address range. + pub dist_range: (u64, u64), + /// GIC redistributor address range, support multiple redistributor regions. + pub redist_region_ranges: Vec<(u64, u64)>, + /// GIC ITS address ranges. + pub its_range: Option<(u64, u64)>, +} - fn kvm_device_access( - fd: &DeviceFd, - group: u32, - attr: u64, - addr: u64, - write: bool, +/// Access wrapper for GICv3. +pub trait GICv3Access: Send + Sync { + fn init_gic( + &self, + _nr_irqs: u32, + _redist_regions: Vec, + _dist_base: u64, ) -> Result<()> { - let attr = kvm_bindings::kvm_device_attr { - group, - attr, - addr, - flags: 0, - }; - - if write { - fd.set_device_attr(&attr) - .chain_err(|| "Failed to set device attributes for GIC.")?; - } else { - let mut attr = attr; - fd.get_device_attr(&mut attr) - .chain_err(|| "Failed to get device attributes for GIC.")?; - }; - Ok(()) } -} -/// Access wrapper for GICv3. -pub trait GICv3Access { - /// Returns `gicr_attr` of `vCPU`. - fn vcpu_gicr_attr(&self, cpu: usize) -> u64; + fn vcpu_gicr_attr(&self, _cpu: usize) -> u64 { + 0 + } - fn access_gic_distributor(&self, offset: u64, gicd_value: &mut u32, write: bool) -> Result<()>; + fn access_gic_distributor( + &self, + _offset: u64, + _gicd_value: &mut u32, + _write: bool, + ) -> Result<()> { + Ok(()) + } fn access_gic_redistributor( &self, - offset: u64, - cpu: usize, - gicr_value: &mut u32, - write: bool, - ) -> Result<()>; + _offset: u64, + _cpu: usize, + _gicr_value: &mut u32, + _write: bool, + ) -> Result<()> { + Ok(()) + } fn access_gic_cpu( &self, - offset: u64, - cpu: usize, - gicc_value: &mut u64, - write: bool, - ) -> Result<()>; + _offset: u64, + _cpu: usize, + _gicc_value: &mut u64, + _write: bool, + ) -> Result<()> { + Ok(()) + } - fn access_gic_line_level(&self, offset: u64, gicll_value: &mut u32, write: bool) -> Result<()>; + fn access_gic_line_level( + &self, + _offset: u64, + _gicll_value: &mut u32, + _write: bool, + ) -> Result<()> { + Ok(()) + } + + fn pause(&self) -> Result<()> { + Ok(()) + } } -struct GicRedistRegion { +#[derive(Clone, Copy)] +pub struct GicRedistRegion { /// Base address. - base: u64, + pub base: u64, /// Size of redistributor region. size: u64, /// Attribute of redistributor region. - base_attr: u64, + pub base_attr: u64, } /// A wrapper around creating and managing a `GICv3`. pub struct GICv3 { - /// The fd for the GICv3 device. - fd: DeviceFd, + /// The handler for the GICv3 device to access the corresponding device in hypervisor. + pub hypervisor_gic: Arc, /// Number of vCPUs, determines the number of redistributor and CPU interface. pub(crate) vcpu_count: u64, /// GICv3 ITS device. - pub(crate) its_dev: Option>, + pub its_dev: Option>, /// Maximum irq number. pub(crate) nr_irqs: u32, /// GICv3 redistributor info, support multiple redistributor regions. - redist_regions: Vec, + pub redist_regions: Vec, /// Base address in the guest physical address space of the GICv3 distributor /// register mappings. dist_base: u64, /// GICv3 distributor region size. dist_size: u64, /// Lifecycle state for GICv3. - state: Arc>, + state: Arc>, } impl GICv3 { - fn new(config: &GICConfig) -> Result { - config.check_sanity()?; - - let mut gic_device = kvm_bindings::kvm_create_device { - type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - fd: 0, - flags: 0, - }; - - let gic_fd = match KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_device(&mut gic_device) - { - Ok(fd) => fd, - Err(e) => return Err(ErrorKind::CreateKvmDevice(e).into()), + pub fn new( + hypervisor_gic: Arc, + its_handler: Arc, + config: &GICConfig, + ) -> Result { + let v3config = match config.v3.as_ref() { + Some(v3) => v3, + None => { + return Err(anyhow!(InterruptError::InvalidConfig( + "no v3 config found".to_string() + ))) + } }; // Calculate GIC redistributor regions' address range according to vcpu count. - let base = config.redist_region_ranges[0].0; - let size = config.redist_region_ranges[0].1; - let redist_capability = size / KVM_VGIC_V3_REDIST_SIZE; + let base = v3config.redist_region_ranges[0].0; + let size = v3config.redist_region_ranges[0].1; + let redist_capability = size / VGIC_V3_REDIST_SIZE; let redist_region_count = std::cmp::min(config.vcpu_count, redist_capability); let mut redist_regions = vec![GicRedistRegion { base, @@ -160,262 +153,149 @@ impl GICv3 { }]; if config.vcpu_count > redist_capability { - let high_redist_base = config.redist_region_ranges[1].0; + let high_redist_base = v3config.redist_region_ranges[1].0; let high_redist_region_count = config.vcpu_count - redist_capability; let high_redist_attr = (high_redist_region_count << 52) | high_redist_base | 0x1; redist_regions.push(GicRedistRegion { base: high_redist_base, - size: high_redist_region_count * KVM_VGIC_V3_REDIST_SIZE, + size: high_redist_region_count * VGIC_V3_REDIST_SIZE, base_attr: high_redist_attr, }) } let mut gicv3 = GICv3 { - fd: gic_fd, + hypervisor_gic, vcpu_count: config.vcpu_count, nr_irqs: config.max_irq, its_dev: None, redist_regions, - dist_base: config.dist_range.0, - dist_size: config.dist_range.1, - state: Arc::new(Mutex::new(KvmVmState::Created)), + dist_base: v3config.dist_range.0, + dist_size: v3config.dist_range.1, + state: Arc::new(Mutex::new(VmState::Created)), }; - if let Some(its_range) = config.its_range { - gicv3.its_dev = Some(Arc::new( - GICv3Its::new(&its_range).chain_err(|| "Failed to create ITS")?, - )); + if let Some(its_range) = v3config.its_range { + gicv3.its_dev = Some(Arc::new(GICv3Its::new(its_handler, &its_range))); } Ok(gicv3) } - fn realize(&self) -> Result<()> { - if self.redist_regions.len() > 1 { - KvmDevice::kvm_device_check( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION as u64, - ) - .chain_err(|| { - "Multiple redistributors are acquired while KVM does not provide support." - })?; - } - - if self.redist_regions.len() == 1 { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST), - &self.redist_regions.get(0).unwrap().base as *const u64 as u64, - true, - ) - .chain_err(|| "Failed to set GICv3 attribute: redistributor address")?; - } else { - for redist in &self.redist_regions { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION), - &redist.base_attr as *const u64 as u64, - true, - ) - .chain_err(|| "Failed to set GICv3 attribute: redistributor region address")?; - } + fn reset_its_state(&self) -> Result<()> { + if let Some(its) = &self.its_dev { + its.reset()?; } - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_DIST), - &self.dist_base as *const u64 as u64, - true, - ) - .chain_err(|| "Failed to set GICv3 attribute: distributor address")?; - - KvmDevice::kvm_device_check(&self.fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)?; - - // Init the interrupt number support by the GIC. - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, - 0, - &self.nr_irqs as *const u32 as u64, - true, - ) - .chain_err(|| "Failed to set GICv3 attribute: irqs")?; - - // Finalize the GIC. - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, - u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), - 0, - true, - ) - .chain_err(|| "KVM failed to initialize GICv3")?; - - let mut state = self.state.lock().unwrap(); - *state = KvmVmState::Running; - Ok(()) } - fn device_fd(&self) -> &DeviceFd { - &self.fd - } -} - -impl MachineLifecycle for GICv3 { - fn pause(&self) -> bool { - // VM change state will flush REDIST pending tables into guest RAM. - if KvmDevice::kvm_device_access( - self.device_fd(), - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, - kvm_bindings::KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES as u64, - 0, - true, - ) - .is_ok() - { - let mut state = self.state.lock().unwrap(); - *state = KvmVmState::Running; - - true - } else { - false - } - } - - fn notify_lifecycle(&self, old: KvmVmState, new: KvmVmState) -> bool { - let state = self.state.lock().unwrap(); - if *state != old { - error!("GICv3 lifecycle error: state check failed."); - return false; - } - drop(state); - - match (old, new) { - (KvmVmState::Running, KvmVmState::Paused) => self.pause(), - _ => true, - } + fn reset_gic_state(&self) -> Result<()> { + let reset_state = self.create_reset_state()?; + self.set_state(&reset_state) + .with_context(|| "Failed to reset gic") } -} - -impl GICv3Access for GICv3 { - fn vcpu_gicr_attr(&self, cpu: usize) -> u64 { - let clustersz = 16; - - let aff1 = (cpu / clustersz) as u64; - let aff0 = (cpu % clustersz) as u64; - - let affid = (aff1 << 8) | aff0; - let cpu_affid: u64 = ((affid & 0xFF_0000_0000) >> 8) | (affid & 0xFF_FFFF); - - let last = if (self.vcpu_count - 1) == cpu as u64 { - 1 - } else { - 0 - }; - ((cpu_affid << 32) | (1 << 24) | (1 << 8) | (last << 4)) - & kvm_bindings::KVM_DEV_ARM_VGIC_V3_MPIDR_MASK as u64 - } - - fn access_gic_distributor(&self, offset: u64, gicd_value: &mut u32, write: bool) -> Result<()> { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS, - offset, - gicd_value as *mut u32 as u64, - write, - ) - .chain_err(|| format!("Failed to access gic distributor for offset 0x{:x}", offset)) + pub(crate) fn access_gic_distributor( + &self, + offset: u64, + gicd_value: &mut u32, + write: bool, + ) -> Result<()> { + self.hypervisor_gic + .access_gic_distributor(offset, gicd_value, write) } - fn access_gic_redistributor( + pub(crate) fn access_gic_redistributor( &self, offset: u64, cpu: usize, gicr_value: &mut u32, write: bool, ) -> Result<()> { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, - self.vcpu_gicr_attr(cpu) | offset, - gicr_value as *mut u32 as u64, - write, - ) - .chain_err(|| { - format!( - "Failed to access gic redistributor for offset 0x{:x}", - offset - ) - }) + self.hypervisor_gic + .access_gic_redistributor(offset, cpu, gicr_value, write) } - fn access_gic_cpu( + pub(crate) fn access_gic_cpu( &self, offset: u64, cpu: usize, gicc_value: &mut u64, write: bool, ) -> Result<()> { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, - self.vcpu_gicr_attr(cpu) | offset, - gicc_value as *mut u64 as u64, - write, - ) - .chain_err(|| format!("Failed to access gic cpu for offset 0x{:x}", offset)) + self.hypervisor_gic + .access_gic_cpu(offset, cpu, gicc_value, write) } - fn access_gic_line_level(&self, offset: u64, gicll_value: &mut u32, write: bool) -> Result<()> { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, - self.vcpu_gicr_attr(0) | offset, - gicll_value as *mut u32 as u64, - write, - ) + pub(crate) fn access_gic_line_level( + &self, + offset: u64, + gicll_value: &mut u32, + write: bool, + ) -> Result<()> { + self.hypervisor_gic + .access_gic_line_level(offset, gicll_value, write) } } -impl GICDevice for GICv3 { - fn create_device( - gic_conf: &GICConfig, - ) -> Result> { - let gicv3 = Arc::new(GICv3::new(gic_conf)?); - if gicv3.its_dev.is_some() { - MigrationManager::register_device_instance( - GICv3ItsState::descriptor(), - gicv3.its_dev.as_ref().unwrap().clone(), - MigrationRestoreOrder::Gicv3Its, +impl MachineLifecycle for GICv3 { + fn pause(&self) -> bool { + // VM change state will flush REDIST pending tables into guest RAM. + if let Err(e) = self.hypervisor_gic.pause() { + error!( + "Failed to flush REDIST pending tables into guest RAM, error: {:?}", + e ); + return false; } - MigrationManager::register_device_instance( - GICv3State::descriptor(), - gicv3.clone(), - MigrationRestoreOrder::Gicv3, - ); - Ok(gicv3) + // The ITS tables need to be flushed into guest RAM before VM pause. + if let Some(its_dev) = &self.its_dev { + if let Err(e) = its_dev.its_handler.access_gic_its_tables(true) { + error!("Failed to access GIC ITS tables, error: {:?}", e); + return false; + } + } + + let mut state = self.state.lock().unwrap(); + *state = VmState::Running; + + true } + fn notify_lifecycle(&self, old: VmState, new: VmState) -> bool { + let state = self.state.lock().unwrap(); + if *state != old { + error!("GICv3 lifecycle error: state check failed."); + return false; + } + drop(state); + + match (old, new) { + (VmState::Running, VmState::Paused) => self.pause(), + _ => true, + } + } +} + +impl GICDevice for GICv3 { fn realize(&self) -> Result<()> { - self.realize().chain_err(|| "Failed to realize GICv3")?; + self.hypervisor_gic + .init_gic(self.nr_irqs, self.redist_regions.clone(), self.dist_base) + .with_context(|| "Failed to init GICv3")?; if let Some(its) = &self.its_dev { - its.realize().chain_err(|| "Failed to realize ITS")?; + its.realize().with_context(|| "Failed to realize ITS")?; } + let mut state = self.state.lock().unwrap(); + *state = VmState::Running; + Ok(()) } - fn generate_fdt(&self, fdt: &mut FdtBuilder) -> UtilResult<()> { + fn generate_fdt(&self, fdt: &mut FdtBuilder) -> Result<()> { let redist_count = self.redist_regions.len() as u32; let mut gic_reg = vec![self.dist_base, self.dist_size]; @@ -457,11 +337,40 @@ impl GICDevice for GICv3 { Ok(()) } + + fn reset(&self) -> Result<()> { + info!("Reset gicv3its"); + self.reset_its_state()?; + info!("Reset gicv3"); + self.reset_gic_state() + } + + fn get_redist_count(&self) -> u8 { + self.redist_regions.len() as u8 + } } -pub(crate) struct GICv3Its { - /// The fd for the GICv3Its device - fd: DeviceFd, +pub trait GICv3ItsAccess: Send + Sync { + fn init_gic_its(&self, _msi_base: u64) -> Result<()> { + Ok(()) + } + + fn access_gic_its(&self, _attr: u32, _its_value: &mut u64, _write: bool) -> Result<()> { + Ok(()) + } + + fn access_gic_its_tables(&self, _save: bool) -> Result<()> { + Ok(()) + } + + fn reset(&self) -> Result<()> { + Ok(()) + } +} + +pub struct GICv3Its { + /// The handler for the GICv3Its device to access the corresponding device in hypervisor. + pub its_handler: Arc, /// Base address in the guest physical address space of the GICv3 ITS /// control register frame. @@ -472,160 +381,29 @@ pub(crate) struct GICv3Its { } impl GICv3Its { - fn new(its_range: &(u64, u64)) -> Result { - let mut its_device = kvm_bindings::kvm_create_device { - type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, - fd: 0, - flags: 0, - }; - - let its_fd = match KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_device(&mut its_device) - { - Ok(fd) => fd, - Err(e) => return Err(ErrorKind::CreateKvmDevice(e).into()), - }; - - Ok(GICv3Its { - fd: its_fd, + fn new(its_handler: Arc, its_range: &(u64, u64)) -> Self { + GICv3Its { + its_handler, msi_base: its_range.0, msi_size: its_range.1, - }) + } } fn realize(&self) -> Result<()> { - KvmDevice::kvm_device_check( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), - ) - .chain_err(|| "ITS address attribute is not supported for KVM")?; - - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, - u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), - &self.msi_base as *const u64 as u64, - true, - ) - .chain_err(|| "Failed to set ITS attribute: ITS address")?; - - // Finalize the GIC Its. - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, - u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), - &self.msi_base as *const u64 as u64, - true, - ) - .chain_err(|| "KVM failed to initialize ITS")?; + self.its_handler.init_gic_its(self.msi_base)?; Ok(()) } pub(crate) fn access_gic_its(&self, attr: u32, its_value: &mut u64, write: bool) -> Result<()> { - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ITS_REGS, - attr as u64, - its_value as *const u64 as u64, - write, - ) + self.its_handler.access_gic_its(attr, its_value, write) } pub(crate) fn access_gic_its_tables(&self, save: bool) -> Result<()> { - let attr = if save { - kvm_bindings::KVM_DEV_ARM_ITS_SAVE_TABLES as u64 - } else { - kvm_bindings::KVM_DEV_ARM_ITS_RESTORE_TABLES as u64 - }; - KvmDevice::kvm_device_access( - &self.fd, - kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, - attr, - std::ptr::null::() as u64, - true, - ) + self.its_handler.access_gic_its_tables(save) } -} - -#[cfg(test)] -mod tests { - use hypervisor::kvm::KVMFds; - use serial_test::serial; - - use super::super::GICConfig; - use super::*; - - #[test] - #[serial] - fn test_create_gicv3() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let gic_conf = GICConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3.into(), - vcpu_count: 4, - max_irq: 192, - msi: false, - dist_range: (0x0800_0000, 0x0001_0000), - redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000)], - its_range: None, - }; - assert!(GICv3::new(&gic_conf).is_ok()); - } - - #[test] - #[serial] - fn test_create_gic_device() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let gic_config = GICConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - vcpu_count: 4_u64, - max_irq: 192_u32, - msi: false, - dist_range: (0x0800_0000, 0x0001_0000), - redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000)], - its_range: None, - }; - let gic = GICv3::new(&gic_config).unwrap(); - assert!(gic.its_dev.is_none()); - assert!(GICv3::new(&gic_config).is_err()); - } - - #[test] - #[serial] - fn test_gic_redist_regions() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { - return; - } - KVM_FDS.store(Arc::new(kvm_fds)); - - let gic_config = GICConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - vcpu_count: 210_u64, - max_irq: 192_u32, - msi: true, - dist_range: (0x0800_0000, 0x0001_0000), - redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000), (256 << 30, 0x200_0000)], - its_range: Some((0x0808_0000, 0x0002_0000)), - }; - let gic = GICv3::new(&gic_config).unwrap(); - assert!(gic.its_dev.is_some()); - assert_eq!(gic.redist_regions.len(), 2); + pub(crate) fn reset(&self) -> Result<()> { + self.its_handler.reset() } } diff --git a/devices/src/interrupt_controller/aarch64/mod.rs b/devices/src/interrupt_controller/aarch64/mod.rs index c4f5aa0ba40247ad4b01a23dc28308e2ee705f57..ab548604c37455a40d18aeb785910eed2b02cbeb 100644 --- a/devices/src/interrupt_controller/aarch64/mod.rs +++ b/devices/src/interrupt_controller/aarch64/mod.rs @@ -10,121 +10,128 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +mod gicv2; mod gicv3; -#[allow(dead_code)] mod state; +pub use gicv2::GICv2; +pub use gicv2::GICv2Access; +pub use gicv2::GICv2Config; pub use gicv3::GICv3; +pub use gicv3::GICv3Access; +pub use gicv3::GICv3Config; +pub use gicv3::GICv3ItsAccess; +pub use gicv3::GicRedistRegion; +pub use state::{GICv3ItsState, GICv3State}; use std::sync::Arc; -use machine_manager::machine::{KvmVmState, MachineLifecycle}; -use util::{ - device_tree::{self, FdtBuilder}, - errors::Result as UtilResult, -}; +use anyhow::{anyhow, Context, Result}; -use super::errors::{ErrorKind, Result, ResultExt}; +use crate::interrupt_controller::error::InterruptError; +use machine_manager::machine::{MachineLifecycle, VmState}; +use util::device_tree::{self, FdtBuilder}; // First 32 are private to each CPU (SGIs and PPIs). -pub(crate) const GIC_IRQ_INTERNAL: u32 = 32; +pub const GIC_IRQ_INTERNAL: u32 = 32; +// Last usable IRQ on aarch64. +pub const GIC_IRQ_MAX: u32 = 192; + +/// GIC version type. +pub enum GICVersion { + GICv2, + GICv3, +} -/// Configure a Interrupt controller. pub struct GICConfig { /// Config GIC version - pub version: u32, + pub version: Option, /// Config number of CPUs handled by the device pub vcpu_count: u64, /// Config maximum number of irqs handled by the device pub max_irq: u32, - /// Config msi support - pub msi: bool, - /// GIC distributor address range. - pub dist_range: (u64, u64), - /// GIC redistributor address range, support multiple redistributor regions. - pub redist_region_ranges: Vec<(u64, u64)>, - /// GIC ITS address ranges. - pub its_range: Option<(u64, u64)>, + /// v2 config. + pub v2: Option, + /// v3 config. + pub v3: Option, } impl GICConfig { - fn check_sanity(&self) -> Result<()> { - if self.version != kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3 { - return Err(ErrorKind::InvalidConfig("GIC only support GICv3".to_string()).into()); - }; - - if self.vcpu_count > 256 || self.vcpu_count == 0 { - return Err( - ErrorKind::InvalidConfig("GIC only support maximum 256 vcpus".to_string()).into(), - ); - } - + pub fn check_sanity(&self) -> Result<()> { if self.max_irq <= GIC_IRQ_INTERNAL { - return Err( - ErrorKind::InvalidConfig("GIC irq numbers need above 32".to_string()).into(), - ); + return Err(anyhow!(InterruptError::InvalidConfig( + "GIC irq numbers need above 32".to_string() + ))); } - Ok(()) } } /// A wrapper for `GIC` must perform the function. pub trait GICDevice: MachineLifecycle { - /// Constructs a kvm_based `GIC` device. - /// - /// # Arguments - /// - /// * `vm` - File descriptor for vmfd. - /// * `gic_conf` - Configuration for `GIC`. - fn create_device( - gic_conf: &GICConfig, - ) -> Result> - where - Self: Sized; - - /// Realize function for kvm_based `GIC` device. + /// Realize function for hypervisor_based `GIC` device. fn realize(&self) -> Result<()>; + /// Reset 'GIC' + fn reset(&self) -> Result<()> { + Ok(()) + } + /// Constructs `fdt` node for `GIC`. /// /// # Arguments /// /// * `fdt` - Device tree presented by bytes. - fn generate_fdt(&self, fdt: &mut FdtBuilder) -> UtilResult<()>; + fn generate_fdt(&self, fdt: &mut FdtBuilder) -> Result<()>; + + /// Get GIC redistributor number. + fn get_redist_count(&self) -> u8 { + 0 + } } -/// A wrapper around creating and using a kvm-based interrupt controller. +#[derive(Clone)] +/// A wrapper around creating and using a hypervisor-based interrupt controller. pub struct InterruptController { gic: Arc, } impl InterruptController { - /// Constructs a new kvm_based `InterruptController`. + /// Constructs a new hypervisor_based `InterruptController`. /// /// # Arguments /// /// * `gic_conf` - Configuration for `GIC`. - pub fn new(gic_conf: &GICConfig) -> Result { - Ok(InterruptController { - gic: GICv3::create_device(gic_conf).chain_err(|| "Failed to realize GIC")?, - }) + pub fn new( + gic: Arc, + ) -> InterruptController { + InterruptController { gic } } pub fn realize(&self) -> Result<()> { - self.gic.realize().chain_err(|| "Failed to realize GIC")?; + self.gic + .realize() + .with_context(|| "Failed to realize GIC")?; Ok(()) } + /// Reset the InterruptController + pub fn reset(&self) -> Result<()> { + self.gic.reset().with_context(|| "Failed to reset GIC") + } + /// Change `InterruptController` lifecycle state to `Stopped`. pub fn stop(&self) { - self.gic - .notify_lifecycle(KvmVmState::Running, KvmVmState::Paused); + self.gic.notify_lifecycle(VmState::Running, VmState::Paused); + } + + pub fn get_redist_count(&self) -> u8 { + self.gic.get_redist_count() } } impl device_tree::CompileFDT for InterruptController { - fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> UtilResult<()> { + fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> Result<()> { self.gic.generate_fdt(fdt)?; Ok(()) } @@ -137,30 +144,14 @@ mod tests { #[test] fn test_gic_config() { let mut gic_conf = GICConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3.into(), + version: Some(GICVersion::GICv3), vcpu_count: 4, - max_irq: 192, - msi: false, - dist_range: (0x0800_0000, 0x0001_0000), - redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000)], - its_range: None, + max_irq: GIC_IRQ_MAX, + v2: None, + v3: None, }; assert!(gic_conf.check_sanity().is_ok()); - gic_conf.version = 3; - assert!(gic_conf.check_sanity().is_err()); - gic_conf.version = kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3.into(); - assert!(gic_conf.check_sanity().is_ok()); - - gic_conf.vcpu_count = 257; - assert!(gic_conf.check_sanity().is_err()); - gic_conf.vcpu_count = 0; - assert!(gic_conf.check_sanity().is_err()); - gic_conf.vcpu_count = 24; - assert!(gic_conf.check_sanity().is_ok()); - - assert!(gic_conf.check_sanity().is_ok()); - gic_conf.max_irq = 32; assert!(gic_conf.check_sanity().is_err()); } diff --git a/devices/src/interrupt_controller/aarch64/state.rs b/devices/src/interrupt_controller/aarch64/state.rs index d4da00bd615314e9ec1ddf57ea7e73484fbcf438..a099790bd2925265859ffd00ad4ac2479c8e81bc 100644 --- a/devices/src/interrupt_controller/aarch64/state.rs +++ b/devices/src/interrupt_controller/aarch64/state.rs @@ -12,15 +12,16 @@ use std::mem::size_of; +use anyhow::{Context, Result}; use libc::c_uint; -use super::gicv3::{GICv3, GICv3Access, GICv3Its}; +use super::gicv3::{GICv3, GICv3Its}; use super::GIC_IRQ_INTERNAL; -use crate::interrupt_controller::errors::Result; use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; -/// Register data length can be get by `get_device_attr/set_device_attr` in kvm once. +/// Register data length can be get in hypervisor once. const REGISTER_SIZE: u64 = size_of::() as u64; /// Distributor registers, as offsets from the distributor base address @@ -29,7 +30,6 @@ const GICD_CTLR: u64 = 0x0000; const GICD_STATUSR: u64 = 0x0010; const GICD_IGROUPR: u64 = 0x0080; const GICD_ISENABLER: u64 = 0x0100; -const GICD_ICENABLER: u64 = 0x0180; const GICD_ISPENDR: u64 = 0x0200; const GICD_ISACTIVER: u64 = 0x0300; const GICD_IPRIORITYR: u64 = 0x0400; @@ -46,6 +46,7 @@ const GICR_STATUSR: u64 = 0x0010; const GICR_WAKER: u64 = 0x0014; const GICR_PROPBASER: u64 = 0x0070; const GICR_PENDBASER: u64 = 0x0078; +const NR_GICR_IPRIORITYR: usize = 8; /// SGI and PPI Redistributor registers, offsets from RD_base const GICR_IGROUPR0: u64 = 0x1_0080; @@ -57,7 +58,6 @@ const GICR_ISACTIVER0: u64 = 0x1_0300; const GICR_ICACTIVER0: u64 = 0x1_0380; const GICR_IPRIORITYR: u64 = 0x1_0400; const GICR_ICFGR1: u64 = 0x1_0C04; -const NR_GICR_IPRIORITYR: usize = 8; /// GIC CPU interface registers const ICC_PMR_EL1: u64 = 0xc230; @@ -86,7 +86,6 @@ const GITS_CBASER: u32 = 0x0080; const GITS_CWRITER: u32 = 0x0088; const GITS_CREADR: u32 = 0x0090; const GITS_BASER: u32 = 0x0100; -const NR_GITS_BASER: usize = 8; /// The status of GICv3 redistributor. #[repr(C)] @@ -245,7 +244,7 @@ impl GICv3 { self.access_gic_redistributor(GICR_STATUSR, redist.vcpu, &mut redist.gicr_statusr, true)?; self.access_gic_redistributor(GICR_WAKER, redist.vcpu, &mut redist.gicr_waker, true)?; self.access_gic_redistributor(GICR_IGROUPR0, redist.vcpu, &mut redist.gicr_igroupr0, true)?; - self.access_gic_redistributor(GICR_ISENABLER0, redist.vcpu, &mut !0, true)?; + self.access_gic_redistributor(GICR_ICENABLER0, redist.vcpu, &mut !0, true)?; self.access_gic_redistributor( GICR_ISENABLER0, redist.vcpu, @@ -281,7 +280,7 @@ impl GICv3 { ..Default::default() }; - let offset = dist.irq_base / (GIC_IRQ_INTERNAL as u64 / REGISTER_SIZE); + let offset = dist.irq_base / (u64::from(GIC_IRQ_INTERNAL) / REGISTER_SIZE); self.access_gic_distributor(GICD_IGROUPR + offset, &mut dist.gicd_igroupr, false)?; self.access_gic_distributor(GICD_ISENABLER + offset, &mut dist.gicd_isenabler, false)?; self.access_gic_distributor(dist.irq_base, &mut dist.line_level, false)?; @@ -291,7 +290,7 @@ impl GICv3 { // edge trigger for i in 0..NR_GICD_ICFGR { if ((i * GIC_IRQ_INTERNAL as usize / NR_GICD_ICFGR) as u64 + dist.irq_base) - > self.nr_irqs as u64 + > u64::from(self.nr_irqs) { break; } @@ -300,7 +299,7 @@ impl GICv3 { } for i in 0..NR_GICD_IPRIORITYR { - if (i as u64 * REGISTER_SIZE + dist.irq_base) > self.nr_irqs as u64 { + if (i as u64 * REGISTER_SIZE + dist.irq_base) > u64::from(self.nr_irqs) { break; } let offset = dist.irq_base + REGISTER_SIZE * i as u64; @@ -312,7 +311,7 @@ impl GICv3 { } for i in 0..NR_GICD_IROUTER { - if (i as u64 + dist.irq_base) > self.nr_irqs as u64 { + if (i as u64 + dist.irq_base) > u64::from(self.nr_irqs) { break; } let offset = dist.irq_base + i as u64; @@ -329,12 +328,12 @@ impl GICv3 { } fn set_dist(&self, mut dist: GICv3DistState) -> Result<()> { - let offset = dist.irq_base / (GIC_IRQ_INTERNAL as u64 / REGISTER_SIZE); + let offset = dist.irq_base / (u64::from(GIC_IRQ_INTERNAL) / REGISTER_SIZE); self.access_gic_distributor(GICD_ISENABLER + offset, &mut dist.gicd_isenabler, true)?; self.access_gic_distributor(GICD_IGROUPR + offset, &mut dist.gicd_igroupr, true)?; for i in 0..NR_GICD_IROUTER { - if (i as u64 + dist.irq_base) > self.nr_irqs as u64 { + if (i as u64 + dist.irq_base) > u64::from(self.nr_irqs) { break; } let offset = dist.irq_base + i as u64; @@ -350,7 +349,7 @@ impl GICv3 { // edge trigger for i in 0..NR_GICD_ICFGR { if ((i * GIC_IRQ_INTERNAL as usize / NR_GICD_ICFGR) as u64 + dist.irq_base) - > self.nr_irqs as u64 + > u64::from(self.nr_irqs) { break; } @@ -363,7 +362,7 @@ impl GICv3 { self.access_gic_distributor(GICD_ISACTIVER + offset, &mut dist.gicd_isactiver, true)?; for i in 0..NR_GICD_IPRIORITYR { - if (i as u64 * REGISTER_SIZE + dist.irq_base) > self.nr_irqs as u64 { + if (i as u64 * REGISTER_SIZE + dist.irq_base) > u64::from(self.nr_irqs) { break; } let offset = dist.irq_base + REGISTER_SIZE * i as u64; @@ -402,7 +401,8 @@ impl GICv3 { self.access_gic_cpu(ICC_BPR1_EL1, gic_cpu.vcpu, &mut gic_cpu.icc_bpr1_el1, false)?; // ICC_CTLR_EL1.PRIbits is [10:8] in ICC_CTLR_EL1 - // PRIBits indicate the number of priority bits implemented, independently for each target PE. + // PRIBits indicate the number of priority bits implemented, independently for each target + // PE. let icc_ctlr_el1_pri = ((gic_cpu.icc_ctlr_el1 & ICC_CTLR_EL1_PRIBITS_MASK) >> ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; // Save APnR registers based on ICC_CTLR_EL1.PRIBITS @@ -486,7 +486,8 @@ impl GICv3 { self.access_gic_cpu(ICC_BPR1_EL1, gic_cpu.vcpu, &mut gic_cpu.icc_bpr1_el1, true)?; // ICC_CTLR_EL1.PRIbits is [10:8] in ICC_CTLR_EL1 - // PRIBits indicate the number of priority bits implemented, independently for each target PE. + // PRIBits indicate the number of priority bits implemented, independently for each target + // PE. let icc_ctlr_el1_pri = ((gic_cpu.icc_ctlr_el1 & ICC_CTLR_EL1_PRIBITS_MASK) >> ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; // Restore APnR registers based on ICC_CTLR_EL1.PRIBITS @@ -549,6 +550,43 @@ impl GICv3 { Ok(()) } + + pub(crate) fn create_reset_state(&self) -> Result> { + let mut gic_state = GICv3State::default(); + + self.access_gic_redistributor(GICR_TYPER, 0, &mut gic_state.redist_typer_l, false) + .with_context(|| "create_reset_state: redist_typer_l")?; + self.access_gic_redistributor(GICR_TYPER + 4, 0, &mut gic_state.redist_typer_h, false) + .with_context(|| "create_reset_state: redist_typer_h")?; + + // process cpu-state and redistriburor + gic_state.iccr_len = self.vcpu_count as usize; + gic_state.redist_len = self.vcpu_count as usize; + for cpu in 0..self.vcpu_count { + let mut gic_cpu = GICv3CPUState { + vcpu: cpu as usize, + ..Default::default() + }; + + gic_cpu.icc_sre_el1 = 0x7; + + // initialize to hardware supported configuration + self.access_gic_cpu(ICC_CTLR_EL1, cpu as usize, &mut gic_cpu.icc_ctlr_el1, false) + .with_context(|| format!("create_reset_state: VCPU-{} icc_ctlr_el1", cpu))?; + + gic_state.vcpu_iccr[cpu as usize] = gic_cpu; + // setup redist state + gic_state.vcpu_redist[cpu as usize] = GICv3RedistState { + vcpu: cpu as usize, + ..Default::default() + } + } + + // process distributor + gic_state.dist_len = (self.nr_irqs / 32) as usize; + + Ok(gic_state.as_bytes().to_vec()) + } } /// The status of GICv3 interrupt controller. @@ -572,90 +610,86 @@ pub struct GICv3State { } impl StateTransfer for GICv3 { - fn get_state_vec(&self) -> migration::errors::Result> { - use migration::errors::ErrorKind; + fn get_state_vec(&self) -> Result> { + use migration::MigrationError; let mut state = GICv3State::default(); self.access_gic_redistributor(GICR_TYPER, 0, &mut state.redist_typer_l, false) - .map_err(|e| ErrorKind::GetGicRegsError("redist_typer_l", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("redist_typer_l", e.to_string()))?; self.access_gic_redistributor(GICR_TYPER + 4, 0, &mut state.redist_typer_h, false) - .map_err(|e| ErrorKind::GetGicRegsError("redist_typer_h", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("redist_typer_h", e.to_string()))?; self.access_gic_distributor(GICD_CTLR, &mut state.gicd_ctlr, false) - .map_err(|e| ErrorKind::GetGicRegsError("gicd_ctlr", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("gicd_ctlr", e.to_string()))?; let plpis = (state.redist_typer_l & 1) != 0; for cpu in 0..self.vcpu_count { state.vcpu_redist[state.redist_len] = self .get_redist(cpu as usize, plpis) - .map_err(|e| ErrorKind::GetGicRegsError("redist", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("redist", e.to_string()))?; state.redist_len += 1; } self.access_gic_distributor(GICD_STATUSR, &mut state.gicd_statusr, false) - .map_err(|e| ErrorKind::GetGicRegsError("gicd_statusr", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("gicd_statusr", e.to_string()))?; for irq in (GIC_IRQ_INTERNAL..self.nr_irqs).step_by(32) { state.irq_dist[state.dist_len] = self - .get_dist(irq as u64) - .map_err(|e| ErrorKind::GetGicRegsError("dist", e.to_string()))?; + .get_dist(u64::from(irq)) + .map_err(|e| MigrationError::GetGicRegsError("dist", e.to_string()))?; state.dist_len += 1; } for cpu in 0..self.vcpu_count { state.vcpu_iccr[state.iccr_len] = self .get_cpu(cpu as usize) - .map_err(|e| ErrorKind::GetGicRegsError("cpu", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("cpu", e.to_string()))?; state.iccr_len += 1; } Ok(state.as_bytes().to_vec()) } - fn set_state(&self, state: &[u8]) -> migration::errors::Result<()> { - use migration::errors::ErrorKind; + fn set_state(&self, state: &[u8]) -> Result<()> { + use migration::error::MigrationError; let state = GICv3State::from_bytes(state).unwrap(); let mut regu32 = state.redist_typer_l; self.access_gic_redistributor(GICR_TYPER, 0, &mut regu32, false) - .map_err(|e| ErrorKind::SetGicRegsError("gicr_typer_l", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("gicr_typer_l", e.to_string()))?; let plpis: bool = regu32 & 1 != 0; regu32 = state.redist_typer_h; self.access_gic_redistributor(GICR_TYPER + 4, 0, &mut regu32, false) - .map_err(|e| ErrorKind::SetGicRegsError("gicr_typer_h", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("gicr_typer_h", e.to_string()))?; regu32 = state.gicd_ctlr; self.access_gic_distributor(GICD_CTLR, &mut regu32, true) - .map_err(|e| ErrorKind::SetGicRegsError("gicd_ctlr", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("gicd_ctlr", e.to_string()))?; for gicv3_redist in state.vcpu_redist[0..state.redist_len].iter() { self.set_redist(*gicv3_redist, plpis) - .map_err(|e| ErrorKind::SetGicRegsError("redist", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("redist", e.to_string()))?; } regu32 = state.gicd_statusr; self.access_gic_distributor(GICD_STATUSR, &mut regu32, true) - .map_err(|e| ErrorKind::SetGicRegsError("gicd_statusr", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("gicd_statusr", e.to_string()))?; for gicv3_dist in state.irq_dist[0..state.dist_len].iter() { self.set_dist(*gicv3_dist) - .map_err(|e| ErrorKind::SetGicRegsError("dist", e.to_string()))? + .map_err(|e| MigrationError::SetGicRegsError("dist", e.to_string()))? } for gicv3_iccr in state.vcpu_iccr[0..state.iccr_len].iter() { self.set_cpu(*gicv3_iccr) - .map_err(|e| ErrorKind::SetGicRegsError("cpu", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("cpu", e.to_string()))?; } Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&GICv3State::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&GICv3State::descriptor().name).unwrap_or(!0) } } @@ -675,65 +709,59 @@ pub struct GICv3ItsState { } impl StateTransfer for GICv3Its { - fn get_state_vec(&self) -> migration::errors::Result> { - use migration::errors::ErrorKind; + fn get_state_vec(&self) -> Result> { + use migration::MigrationError; let mut state = GICv3ItsState::default(); - self.access_gic_its_tables(true) - .map_err(|e| ErrorKind::GetGicRegsError("Its table", e.to_string()))?; for i in 0..8 { self.access_gic_its(GITS_BASER + 8 * i as u32, &mut state.baser[i], false) - .map_err(|e| ErrorKind::GetGicRegsError("Its baser", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its baser", e.to_string()))?; } self.access_gic_its(GITS_CTLR, &mut state.ctlr, false) - .map_err(|e| ErrorKind::GetGicRegsError("Its ctlr", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its ctlr", e.to_string()))?; self.access_gic_its(GITS_CBASER, &mut state.cbaser, false) - .map_err(|e| ErrorKind::GetGicRegsError("Its cbaser", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its cbaser", e.to_string()))?; self.access_gic_its(GITS_CREADR, &mut state.creadr, false) - .map_err(|e| ErrorKind::GetGicRegsError("Its creadr", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its creadr", e.to_string()))?; self.access_gic_its(GITS_CWRITER, &mut state.cwriter, false) - .map_err(|e| ErrorKind::GetGicRegsError("Its cwriter", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its cwriter", e.to_string()))?; self.access_gic_its(GITS_IIDR, &mut state.iidr, false) - .map_err(|e| ErrorKind::GetGicRegsError("Its iidr", e.to_string()))?; + .map_err(|e| MigrationError::GetGicRegsError("Its iidr", e.to_string()))?; Ok(state.as_bytes().to_vec()) } - fn set_state(&self, state: &[u8]) -> migration::errors::Result<()> { - use migration::errors::ErrorKind; + fn set_state(&self, state: &[u8]) -> Result<()> { + use migration::MigrationError; let mut its_state = *GICv3ItsState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("GICv3Its"))?; + .with_context(|| MigrationError::FromBytesError("GICv3Its"))?; self.access_gic_its(GITS_IIDR, &mut its_state.iidr, true) - .map_err(|e| ErrorKind::SetGicRegsError("Its iidr", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its iidr", e.to_string()))?; // It must be written before GITS_CREADR, because GITS_CBASER write access will reset // GITS_CREADR. self.access_gic_its(GITS_CBASER, &mut its_state.cbaser, true) - .map_err(|e| ErrorKind::SetGicRegsError("Its cbaser", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its cbaser", e.to_string()))?; self.access_gic_its(GITS_CREADR, &mut its_state.creadr, true) - .map_err(|e| ErrorKind::SetGicRegsError("Its readr", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its readr", e.to_string()))?; self.access_gic_its(GITS_CWRITER, &mut its_state.cwriter, true) - .map_err(|e| ErrorKind::SetGicRegsError("Its cwriter", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its cwriter", e.to_string()))?; for i in 0..8 { self.access_gic_its(GITS_BASER + 8 * i as u32, &mut its_state.baser[i], true) - .map_err(|e| ErrorKind::SetGicRegsError("Its baser", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its baser", e.to_string()))?; } self.access_gic_its_tables(false) - .map_err(|e| ErrorKind::SetGicRegsError("Its table", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its table", e.to_string()))?; self.access_gic_its(GITS_CTLR, &mut its_state.ctlr, true) - .map_err(|e| ErrorKind::SetGicRegsError("Its ctlr", e.to_string()))?; + .map_err(|e| MigrationError::SetGicRegsError("Its ctlr", e.to_string()))?; Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&GICv3ItsState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&GICv3ItsState::descriptor().name).unwrap_or(!0) } } diff --git a/devices/src/interrupt_controller/error.rs b/devices/src/interrupt_controller/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..191d6f7b73ea024bf3addc3d338d2a72fdabcdc8 --- /dev/null +++ b/devices/src/interrupt_controller/error.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum InterruptError { + #[error("Invalid GIC config: {0}")] + InvalidConfig(String), +} diff --git a/devices/src/interrupt_controller/mod.rs b/devices/src/interrupt_controller/mod.rs index e17d5c479c9a966cf817eae03252557b0e35ba2d..d1b04a168168cbed9602a34006654090a406dd22 100644 --- a/devices/src/interrupt_controller/mod.rs +++ b/devices/src/interrupt_controller/mod.rs @@ -17,7 +17,7 @@ //! ## Design //! //! This module offers support for: -//! 1. Create kvm-based interrupt controller. +//! 1. Create hypervisor-based interrupt controller. //! 2. Manager lifecycle for `GIC`. //! //! ## Platform Support @@ -27,23 +27,147 @@ #[allow(clippy::upper_case_acronyms)] #[cfg(target_arch = "aarch64")] mod aarch64; +mod error; #[cfg(target_arch = "aarch64")] -pub use aarch64::GICConfig as InterruptControllerConfig; -#[cfg(target_arch = "aarch64")] -pub use aarch64::InterruptController; - -pub mod errors { - error_chain! { - errors { - #[cfg(target_arch = "aarch64")] - InvalidConfig(err_info: String) { - display("Invalid GIC config: {}.", err_info) - } - #[cfg(target_arch = "aarch64")] - CreateKvmDevice(err: kvm_ioctls::Error) { - display("Failed to create KVM device: {:#?}.", err) - } +pub use aarch64::{ + GICConfig as ICGICConfig, GICDevice, GICVersion, GICv2, GICv2Access, + GICv2Config as ICGICv2Config, GICv3, GICv3Access, GICv3Config as ICGICv3Config, GICv3ItsAccess, + GICv3ItsState, GICv3State, GicRedistRegion, InterruptController, GIC_IRQ_INTERNAL, GIC_IRQ_MAX, +}; +pub use error::InterruptError; + +use std::sync::Arc; + +use anyhow::Result; +use vmm_sys_util::eventfd::EventFd; + +use super::pci::MsiVector; + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub enum TriggerMode { + Level, + #[default] + Edge, +} + +pub trait LineIrqManager: Send + Sync { + fn irqfd_enable(&self) -> bool; + + fn register_irqfd( + &self, + _irq_fd: Arc, + _irq: u32, + _trigger_mode: TriggerMode, + ) -> Result<()> { + Ok(()) + } + + fn unregister_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Ok(()) + } + + fn set_level_irq(&self, _irq: u32, _level: bool) -> Result<()> { + Ok(()) + } + + fn set_edge_irq(&self, _irq: u32) -> Result<()> { + Ok(()) + } + + fn write_irqfd(&self, _irq_fd: Arc) -> Result<()> { + Ok(()) + } +} + +pub trait MsiIrqManager: Send + Sync { + fn irqfd_enable(&self) -> bool; + + fn allocate_irq(&self, _vector: MsiVector) -> Result { + Ok(0) + } + + fn release_irq(&self, _irq: u32) -> Result<()> { + Ok(()) + } + + fn register_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Ok(()) + } + + fn unregister_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Ok(()) + } + + fn trigger( + &self, + _irq_fd: Option>, + _vector: MsiVector, + _dev_id: u32, + ) -> Result<()> { + Ok(()) + } + + fn update_route_table(&self, _gsi: u32, _vector: MsiVector) -> Result<()> { + Ok(()) + } +} + +pub struct IrqManager { + pub line_irq_manager: Option>, + pub msi_irq_manager: Option>, +} + +#[derive(Default, Clone)] +pub struct IrqState { + pub irq: u32, + irq_fd: Option>, + irq_handler: Option>, + trigger_mode: TriggerMode, +} + +impl IrqState { + pub fn new( + irq: u32, + irq_fd: Option>, + irq_handler: Option>, + trigger_mode: TriggerMode, + ) -> Self { + IrqState { + irq, + irq_fd, + irq_handler, + trigger_mode, + } + } + + pub fn register_irq(&mut self) -> Result<()> { + if self.irq_handler.is_none() { + return Ok(()); + } + + let irq_handler = self.irq_handler.as_ref().unwrap(); + if !irq_handler.irqfd_enable() { + self.irq_fd = None; + return Ok(()); + } + + if let Some(irqfd) = self.irq_fd.clone() { + irq_handler.register_irqfd(irqfd, self.irq, self.trigger_mode.clone())?; + } + + Ok(()) + } + + pub fn trigger_irq(&self) -> Result<()> { + let irq_handler = self.irq_handler.as_ref().unwrap(); + if let Some(irq_fd) = &self.irq_fd { + return irq_handler.write_irqfd(irq_fd.clone()); + } + if self.trigger_mode == TriggerMode::Edge { + irq_handler.set_edge_irq(self.irq) + } else { + irq_handler.set_level_irq(self.irq, true) } } } diff --git a/devices/src/legacy/chardev.rs b/devices/src/legacy/chardev.rs deleted file mode 100644 index a1a120593b99d9dfacc2c6bf2cdb09ca97bafd78..0000000000000000000000000000000000000000 --- a/devices/src/legacy/chardev.rs +++ /dev/null @@ -1,318 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::fs::{read_link, File, OpenOptions}; -use std::io::{Stdin, Stdout}; -use std::os::unix::io::{AsRawFd, FromRawFd}; -use std::os::unix::net::{UnixListener, UnixStream}; -use std::path::PathBuf; -use std::sync::{Arc, Mutex}; - -use libc::{cfmakeraw, tcgetattr, tcsetattr, termios}; -use machine_manager::machine::{PathInfo, PTY_PATH}; -use machine_manager::{ - config::{ChardevConfig, ChardevType}, - temp_cleaner::TempCleaner, -}; -use util::loop_context::{EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation}; -use util::set_termi_raw_mode; -use util::unix::limit_permission; -use vmm_sys_util::epoll::EventSet; - -use super::errors::{Result, ResultExt}; - -/// Provide the trait that helps handle the input data. -pub trait InputReceiver: Send { - fn input_handle(&mut self, buffer: &[u8]); - - fn get_remain_space_size(&mut self) -> usize; -} - -/// Character device structure. -pub struct Chardev { - /// Id of chardev. - pub id: String, - /// Type of backend device. - pub backend: ChardevType, - /// UnixListener for socket-type chardev. - pub listener: Option, - /// Chardev input. - pub input: Option>>, - /// Chardev output. - pub output: Option>>, - /// Fd of socket stream. - pub stream_fd: Option, - /// Handle the input data and trigger interrupt if necessary. - receive: Option>, - /// Return the remain space size of receiver buffer. - get_remain_space_size: Option usize + Send + Sync>>, -} - -impl Chardev { - pub fn new(chardev_cfg: ChardevConfig) -> Self { - Chardev { - id: chardev_cfg.id, - backend: chardev_cfg.backend, - listener: None, - input: None, - output: None, - stream_fd: None, - receive: None, - get_remain_space_size: None, - } - } - - pub fn realize(&mut self) -> Result<()> { - match &self.backend { - ChardevType::Stdio => { - set_termi_raw_mode().chain_err(|| "Failed to set terminal to raw mode")?; - self.input = Some(Arc::new(Mutex::new(std::io::stdin()))); - self.output = Some(Arc::new(Mutex::new(std::io::stdout()))); - } - ChardevType::Pty => { - let (master, path) = - set_pty_raw_mode().chain_err(|| "Failed to set pty to raw mode")?; - info!("Pty path is: {:?}", path); - let path_info = PathInfo { - path: format!("pty:{:?}", &path), - label: self.id.clone(), - }; - PTY_PATH.lock().unwrap().push(path_info); - // Safe because `master_arc` is the only one owner for the file descriptor. - let master_arc = unsafe { Arc::new(Mutex::new(File::from_raw_fd(master))) }; - self.input = Some(master_arc.clone()); - self.output = Some(master_arc); - } - ChardevType::Socket(path) => { - let sock = UnixListener::bind(path.clone()) - .chain_err(|| format!("Failed to bind socket for chardev, path:{}", path))?; - self.listener = Some(sock); - // add file to temporary pool, so it could be cleaned when vm exit. - TempCleaner::add_path(path.clone()); - limit_permission(path).chain_err(|| { - format!( - "Failed to change file permission for chardev, path:{}", - path - ) - })?; - } - ChardevType::File(path) => { - let file = Arc::new(Mutex::new( - OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(path)?, - )); - self.output = Some(file); - } - }; - Ok(()) - } - - pub fn set_input_callback(&mut self, dev: &Arc>) { - let cloned_dev = dev.clone(); - self.receive = Some(Arc::new(move |data: &[u8]| { - cloned_dev.lock().unwrap().input_handle(data) - })); - let cloned_dev = dev.clone(); - self.get_remain_space_size = Some(Arc::new(move || { - cloned_dev.lock().unwrap().get_remain_space_size() - })); - } -} - -fn set_pty_raw_mode() -> Result<(i32, PathBuf)> { - let mut master: libc::c_int = 0; - let master_ptr: *mut libc::c_int = &mut master; - let mut slave: libc::c_int = 0; - let slave_ptr: *mut libc::c_int = &mut slave; - // Safe because this only create a new pseudoterminal and set the master and slave fd. - let ret = { - unsafe { - libc::openpty( - master_ptr, - slave_ptr, - std::ptr::null_mut(), - std::ptr::null_mut(), - std::ptr::null_mut(), - ) - } - }; - if ret < 0 { - bail!( - "Failed to open pty, error is {}", - std::io::Error::last_os_error() - ) - } - let proc_path = PathBuf::from(format!("/proc/self/fd/{}", slave)); - let path = read_link(proc_path).chain_err(|| "Failed to read slave pty link")?; - // Safe because this only set the `old_termios` struct to zero. - let mut old_termios: termios = unsafe { std::mem::zeroed() }; - // Safe because this only get the current mode of slave pty and save it. - let ret = unsafe { tcgetattr(slave, &mut old_termios as *mut _) }; - if ret < 0 { - bail!( - "Failed to get mode of pty, error is {}", - std::io::Error::last_os_error() - ); - } - let mut new_termios: termios = old_termios; - // Safe because this function only change the `new_termios` argument. - unsafe { cfmakeraw(&mut new_termios as *mut _) }; - // Safe because this function only set the slave pty to raw mode. - let ret = unsafe { tcsetattr(slave, libc::TCSAFLUSH, &new_termios as *const _) }; - if ret < 0 { - bail!( - "Failed to set pty to raw mode, error is {}", - std::io::Error::last_os_error() - ); - } - Ok((master, path)) -} - -fn get_notifier_handler( - chardev: Arc>, - backend: ChardevType, -) -> Box { - match backend { - ChardevType::Stdio | ChardevType::Pty => Box::new(move |_, _| { - let locked_chardev = chardev.lock().unwrap(); - let buff_size = locked_chardev.get_remain_space_size.as_ref().unwrap()(); - let mut buffer = vec![0_u8; buff_size]; - if let Some(input) = locked_chardev.input.clone() { - if let Ok(index) = input.lock().unwrap().chr_read_raw(&mut buffer) { - locked_chardev.receive.as_ref().unwrap()(&mut buffer[..index]); - } else { - error!("Failed to read input data"); - } - } else { - error!("Failed to get chardev input fd"); - } - None - }), - ChardevType::Socket(_) => Box::new(move |_, _| { - let mut locked_chardev = chardev.lock().unwrap(); - let (stream, _) = locked_chardev.listener.as_ref().unwrap().accept().unwrap(); - let listener_fd = locked_chardev.listener.as_ref().unwrap().as_raw_fd(); - let stream_fd = stream.as_raw_fd(); - locked_chardev.stream_fd = Some(stream_fd); - let stream_arc = Arc::new(Mutex::new(stream)); - locked_chardev.input = Some(stream_arc.clone()); - locked_chardev.output = Some(stream_arc); - - let cloned_chardev = chardev.clone(); - let inner_handler = Box::new(move |event, _| { - if event == EventSet::IN { - let locked_chardev = cloned_chardev.lock().unwrap(); - let buff_size = locked_chardev.get_remain_space_size.as_ref().unwrap()(); - let mut buffer = vec![0_u8; buff_size]; - if let Some(input) = locked_chardev.input.clone() { - if let Ok(index) = input.lock().unwrap().chr_read_raw(&mut buffer) { - locked_chardev.receive.as_ref().unwrap()(&mut buffer[..index]); - } else { - error!("Failed to read input data"); - } - } else { - error!("Failed to get chardev input fd"); - } - } - if event & EventSet::HANG_UP == EventSet::HANG_UP { - cloned_chardev.lock().unwrap().input = None; - cloned_chardev.lock().unwrap().output = None; - cloned_chardev.lock().unwrap().stream_fd = None; - Some(vec![EventNotifier::new( - NotifierOperation::Delete, - stream_fd, - Some(listener_fd), - EventSet::IN | EventSet::HANG_UP, - Vec::new(), - )]) - } else { - None - } - }); - Some(vec![EventNotifier::new( - NotifierOperation::AddShared, - stream_fd, - Some(listener_fd), - EventSet::IN | EventSet::HANG_UP, - vec![Arc::new(Mutex::new(inner_handler))], - )]) - }), - ChardevType::File(_) => Box::new(move |_, _| None), - } -} - -impl EventNotifierHelper for Chardev { - fn internal_notifiers(chardev: Arc>) -> Vec { - let mut notifiers = Vec::new(); - let backend = chardev.lock().unwrap().backend.clone(); - let cloned_chardev = chardev.clone(); - match backend { - ChardevType::Stdio | ChardevType::Pty => { - if let Some(input) = chardev.lock().unwrap().input.clone() { - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - input.lock().unwrap().as_raw_fd(), - None, - EventSet::IN, - vec![Arc::new(Mutex::new(get_notifier_handler( - cloned_chardev, - backend, - )))], - )); - } - } - ChardevType::Socket(_) => { - if let Some(listener) = chardev.lock().unwrap().listener.as_ref() { - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - listener.as_raw_fd(), - None, - EventSet::IN, - vec![Arc::new(Mutex::new(get_notifier_handler( - cloned_chardev, - backend, - )))], - )); - } - } - ChardevType::File(_) => (), - } - notifiers - } -} - -/// Provide backend trait object receiving the input from the guest. -pub trait CommunicatInInterface: std::marker::Send + std::os::unix::io::AsRawFd { - fn chr_read_raw(&mut self, buf: &mut [u8]) -> Result { - use libc::read; - // Safe because this only read the bytes from terminal within the buffer. - let ret = unsafe { read(self.as_raw_fd(), buf.as_mut_ptr() as *mut _, buf.len()) }; - if ret < 0 { - bail!("Failed to read buffer"); - } - Ok(ret as usize) - } -} - -/// Provide backend trait object processing the output from the guest. -pub trait CommunicatOutInterface: std::io::Write + std::marker::Send {} - -impl CommunicatInInterface for UnixStream {} -impl CommunicatInInterface for File {} -impl CommunicatInInterface for Stdin {} - -impl CommunicatOutInterface for UnixStream {} -impl CommunicatOutInterface for File {} -impl CommunicatOutInterface for Stdout {} diff --git a/devices/src/legacy/error.rs b/devices/src/legacy/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..3495be54a3d9207ed6a8159e1b192b18d3ed7add --- /dev/null +++ b/devices/src/legacy/error.rs @@ -0,0 +1,60 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum LegacyError { + #[error("SysBus")] + SysBus { + #[from] + source: crate::sysbus::error::SysBusError, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Failed to allocate system bus resource.")] + SetSysResErr, + #[error("Failed to add FwCfg entry, key is {0}")] + AddEntryErr(String), + #[error("Failed to find FwCfg entry, key is {0}.")] + EntryNotFound(String), + #[error("Duplicate FwCfg file-entry, name is {0}")] + DuplicateFile(String), + #[error("No available FwCfg file-slot for this file entry with filename {0}")] + FileSlotsNotAvailable(String), + #[error("Failed to read DMA request, dma_addr=0x{0:x} size=0x{1:x}")] + ReadDmaRequest(u64, u64), + #[error("Invalid FwCfg entry key {0}")] + InvalidFwCfgEntry(u16), + #[error("Flash size is 0x{0:x}, offset 0x{1:x} and size 0x{2:x} in write request overflows")] + PFlashWriteOverflow(u64, u64, u64), + #[error("Flash size is 0x{0:x}, offset 0x{1:x} and size 0x{2:x} in read request overflows")] + PFlashReadOverflow(u64, u64, u64), + #[error("Failed to seek to offset 0x{0:x} of PFlash file")] + PFlashFileSeekErr(u64), + #[error("Flash CFI table len is 0x{0:x}, request 0x{1:x} overflows")] + PFlashIndexOverflow(u64, usize), + #[error("Unsupported device configuration: device width {0}, bank width {1}")] + PFlashDevConfigErr(u32, u32), + #[error("Failed to write to Flash ROM")] + WritePFlashRomErr, + #[error("Failed to register event notifier.")] + RegNotifierErr, +} diff --git a/devices/src/legacy/fwcfg.rs b/devices/src/legacy/fwcfg.rs index 866bab29725b5eaade15b8cbd3b6134fc1d4f86d..b6a57a278ea08cdbc85d23d9d87e57961167f10f 100644 --- a/devices/src/legacy/fwcfg.rs +++ b/devices/src/legacy/fwcfg.rs @@ -12,23 +12,26 @@ use std::sync::{Arc, Mutex}; -use acpi::AmlBuilder; +use anyhow::{anyhow, bail, Context, Result}; #[cfg(target_arch = "x86_64")] +use byteorder::LittleEndian; +use byteorder::{BigEndian, ByteOrder}; +use log::{error, warn}; + +use crate::legacy::error::LegacyError; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; use acpi::{ - AmlDevice, AmlInteger, AmlIoDecode, AmlIoResource, AmlNameDecl, AmlResTemplate, - AmlScopeBuilder, AmlString, + AmlBuilder, AmlDevice, AmlInteger, AmlNameDecl, AmlResTemplate, AmlScopeBuilder, AmlString, }; -use address_space::{AddressSpace, GuestAddress}; #[cfg(target_arch = "x86_64")] -use byteorder::LittleEndian; -use byteorder::{BigEndian, ByteOrder}; -use error_chain::ChainedError; -use sysbus::{errors::Result as SysBusResult, SysBus, SysBusDevOps, SysBusDevType, SysRes}; +use acpi::{AmlIoDecode, AmlIoResource}; +#[cfg(target_arch = "aarch64")] +use acpi::{AmlMemory32Fixed, AmlReadAndWrite}; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; use util::byte_code::ByteCode; use util::num_ops::extract_u64; -use util::offset_of; - -use crate::legacy::errors::{ErrorKind, Result, ResultExt}; +use util::{gen_base_func, offset_of}; #[cfg(target_arch = "x86_64")] const FW_CFG_IO_BASE: u64 = 0x510; @@ -129,7 +132,7 @@ fn get_key_name(key: usize) -> &'static str { /// FwCfg select callback and write callback type definition type FwCfgCallbackType = Arc; -type FwCfgWriteCallbackType = Arc; +type FwCfgWriteCallbackType = Arc>; /// FwCfg select callback pub trait FwCfgCallback { @@ -138,7 +141,7 @@ pub trait FwCfgCallback { /// FwCfg write callback pub trait FwCfgWriteCallback { - fn write_callback(&self, start: u64, len: usize); + fn write_callback(&mut self, data: Vec, start: u64, len: usize); } /// The FwCfgEntry type which holds the firmware item @@ -176,25 +179,6 @@ struct FwCfgFile { name: [u8; 56], } -impl Eq for FwCfgFile {} - -impl PartialEq for FwCfgFile { - fn eq(&self, other: &Self) -> bool { - self.name.to_vec() == other.name.to_vec() - } -} - -impl Default for FwCfgFile { - fn default() -> Self { - FwCfgFile { - size: 0_u32, - select: 0_u16, - reserved: 0_u16, - name: [0_u8; 56], - } - } -} - impl FwCfgFile { fn new(size: u32, select: u16, name: &str) -> Self { let len = std::cmp::min(56, name.len()); @@ -259,12 +243,14 @@ fn write_dma_memory( mut buf: &[u8], len: u64, ) -> Result<()> { - addr_space.write(&mut buf, addr, len).chain_err(|| { - format!( - "Failed to write dma memory of fwcfg at gpa=0x{:x} len=0x{:x}", - addr.0, len - ) - })?; + addr_space + .write(&mut buf, addr, len, AddressAttr::Ram) + .with_context(|| { + format!( + "Failed to write dma memory of fwcfg at gpa=0x{:x} len=0x{:x}", + addr.0, len + ) + })?; Ok(()) } @@ -276,12 +262,14 @@ fn read_dma_memory( mut buf: &mut [u8], len: u64, ) -> Result<()> { - addr_space.read(&mut buf, addr, len).chain_err(|| { - format!( - "Failed to read dma memory of fwcfg at gpa=0x{:x} len=0x{:x}", - addr.0, len - ) - })?; + addr_space + .read(&mut buf, addr, len, AddressAttr::Ram) + .with_context(|| { + format!( + "Failed to read dma memory of fwcfg at gpa=0x{:x} len=0x{:x}", + addr.0, len + ) + })?; Ok(()) } @@ -295,7 +283,7 @@ fn write_dma_result(addr_space: &Arc, addr: GuestAddress, value: u &dma_buf, dma_buf.len() as u64, ) - .chain_err(|| { + .with_context(|| { format!( "Failed to write dma result of fwcfg at gpa=0x{:x} value=0x{:x}", addr.0, value @@ -328,7 +316,7 @@ pub struct FwCfgCommon { impl FwCfgCommon { fn new(sys_mem: Arc) -> Self { FwCfgCommon { - file_slots: FW_CFG_FILE_SLOTS_DFLT as u16, + file_slots: FW_CFG_FILE_SLOTS_DFLT, arch_entries: vec![ FwCfgEntry::default(); (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS_DFLT) as usize @@ -360,9 +348,9 @@ impl FwCfgCommon { fn get_entry_mut(&mut self) -> Result<&mut FwCfgEntry> { let key = self.cur_entry & FW_CFG_ENTRY_MASK; if key >= self.max_entry() || self.cur_entry == FW_CFG_INVALID { - return Err( - ErrorKind::EntryNotFound(get_key_name(self.cur_entry as usize).to_owned()).into(), - ); + return Err(anyhow!(LegacyError::EntryNotFound( + get_key_name(self.cur_entry as usize).to_owned() + ))); }; // unwrap is safe because the count of arch_entries and entries is initialized @@ -377,11 +365,14 @@ impl FwCfgCommon { /// Select the entry by the key specified fn select_entry(&mut self, key: u16) { + let ret; self.cur_offset = 0; if (key & FW_CFG_ENTRY_MASK) >= self.max_entry() { self.cur_entry = FW_CFG_INVALID; + ret = 0; } else { self.cur_entry = key; + ret = 1; // unwrap() is safe because we have checked the range of `key`. let selected_entry = self.get_entry_mut().unwrap(); @@ -389,6 +380,8 @@ impl FwCfgCommon { cb.select_callback(); } } + + trace::fwcfg_select_entry(key, get_key_name(key as usize), ret); } fn add_entry( @@ -402,7 +395,7 @@ impl FwCfgCommon { let key = (key as u16) & FW_CFG_ENTRY_MASK; if key >= self.max_entry() || data.len() >= std::u32::MAX as usize { - return Err(ErrorKind::InvalidFwCfgEntry(key).into()); + return Err(anyhow!(LegacyError::InvalidFwCfgEntry(key))); } let entry = if self.is_arch_local() { @@ -420,18 +413,19 @@ impl FwCfgCommon { warn!("Entry not empty, will override"); } - entry.data = data; + entry.data = data.clone(); entry.select_cb = select_cb; entry.allow_write = allow_write; entry.write_cb = write_cb; + trace::fwcfg_add_entry(key, get_key_name(key as usize), data); Ok(()) } // Update a FwCfgEntry fn update_entry_data(&mut self, key: u16, mut data: Vec) -> Result<()> { if key >= self.max_entry() || data.len() >= std::u32::MAX as usize { - return Err(ErrorKind::InvalidFwCfgEntry(key).into()); + return Err(anyhow!(LegacyError::InvalidFwCfgEntry(key))); } let entry = if self.is_arch_local() { @@ -445,7 +439,9 @@ impl FwCfgCommon { e.data.append(&mut data); Ok(()) } else { - Err(ErrorKind::EntryNotFound(get_key_name(key as usize).to_owned()).into()) + Err(anyhow!(LegacyError::EntryNotFound( + get_key_name(key as usize).to_owned() + ))) } } @@ -458,7 +454,9 @@ impl FwCfgCommon { allow_write: bool, ) -> Result<()> { if self.files.len() >= self.file_slots as usize { - return Err(ErrorKind::FileSlotsNotAvailable(filename.to_owned()).into()); + return Err(anyhow!(LegacyError::FileSlotsNotAvailable( + filename.to_owned() + ))); } let file_name_bytes = filename.to_string().as_bytes().to_vec(); @@ -468,7 +466,7 @@ impl FwCfgCommon { .iter() .any(|f| f.name[0..file_name_bytes.len()].to_vec() == file_name_bytes) { - return Err(ErrorKind::DuplicateFile(filename.to_owned()).into()); + return Err(anyhow!(LegacyError::DuplicateFile(filename.to_owned()))); } let mut index = self.files.len(); @@ -479,11 +477,8 @@ impl FwCfgCommon { } } - let file = FwCfgFile::new( - data.len() as u32, - FW_CFG_FILE_FIRST + index as u16, - filename, - ); + let data_len = data.len(); + let file = FwCfgFile::new(data_len as u32, FW_CFG_FILE_FIRST + index as u16, filename); self.files.insert(index, file); self.files.iter_mut().skip(index + 1).for_each(|f| { f.select += 1; @@ -501,6 +496,46 @@ impl FwCfgCommon { FW_CFG_FILE_FIRST as usize + index, FwCfgEntry::new(data, select_cb, write_cb, allow_write), ); + + trace::fwcfg_add_file(index, filename, data_len); + Ok(()) + } + + fn modify_file_callback( + &mut self, + filename: &str, + data: Vec, + select_cb: Option, + write_cb: Option, + allow_write: bool, + ) -> Result<()> { + if self.files.len() >= self.file_slots as usize { + return Err(anyhow!(LegacyError::FileSlotsNotAvailable( + filename.to_owned() + ))); + } + + let file_name_bytes = filename.to_string().as_bytes().to_vec(); + // Make sure file entry is existed. + let index = self + .files + .iter() + .position(|f| f.name[0..file_name_bytes.len()].to_vec() == file_name_bytes) + .with_context(|| LegacyError::EntryNotFound(filename.to_owned()))?; + self.files[index].size = data.len() as u32; + + // Update FileDir entry + let mut bytes = Vec::new(); + let file_length_be = BigEndian::read_u32((self.files.len() as u32).as_bytes()); + bytes.append(&mut file_length_be.as_bytes().to_vec()); + for value in self.files.iter() { + bytes.append(&mut value.as_be_bytes()); + } + self.update_entry_data(FwCfgEntryType::FileDir as u16, bytes)?; + + // Update File entry + self.entries[FW_CFG_FILE_FIRST as usize + index] = + FwCfgEntry::new(data, select_cb, write_cb, allow_write); Ok(()) } @@ -519,7 +554,7 @@ impl FwCfgCommon { let size = std::mem::size_of::() as u64; if let Err(_e) = read_dma_memory(&self.mem_space, dma_addr, dma_request, size) { write_dma_result(&self.mem_space, dma_addr, FW_CFG_DMA_CTL_ERROR)?; - return Err(ErrorKind::ReadDmaRequest(dma_addr.0, size).into()); + return Err(anyhow!(LegacyError::ReadDmaRequest(dma_addr.0, size))); } // Build `FwCfgDmaAccess` object from dma_request here @@ -565,7 +600,7 @@ impl FwCfgCommon { &mem_space, GuestAddress(dma.address), data.as_slice(), - len as u64, + u64::from(len), ) .is_err() { @@ -588,7 +623,7 @@ impl FwCfgCommon { &mem_space, GuestAddress(dma.address), &entry.data[offset as usize..], - len as u64, + u64::from(len), ) .is_err() { @@ -598,7 +633,7 @@ impl FwCfgCommon { if is_write { let mut dma_read_error = false; let data = &mut entry.data[offset as usize..]; - if read_dma_memory(&mem_space, GuestAddress(dma.address), data, len as u64) + if read_dma_memory(&mem_space, GuestAddress(dma.address), data, u64::from(len)) .is_err() { dma_read_error = true; @@ -608,18 +643,24 @@ impl FwCfgCommon { dma.control |= FW_CFG_DMA_CTL_ERROR; } else if true { if let Some(cb) = &entry.write_cb { - cb.write_callback(offset as u64, len as usize); + cb.lock().unwrap().write_callback( + data.to_vec(), + u64::from(offset), + len as usize, + ); } } } offset += len; } dma.length -= len; - dma.address += len as u64 + dma.address += u64::from(len) } self.cur_offset = offset; write_dma_result(&self.mem_space, dma_addr, dma.control)?; + + trace::fwcfg_read_data(0); Ok(()) } @@ -634,13 +675,12 @@ impl FwCfgCommon { /// # Errors /// /// Return Error if fail to add the file entry. - /// fn dma_mem_write(&mut self, addr: u64, value: u64, size: u32) -> Result<()> { if size == 4 { if addr == 0 { self.dma_addr = GuestAddress(value << 32); } else if addr == 4 { - self.dma_addr = GuestAddress(self.dma_addr.raw_value() | value as u64); + self.dma_addr = GuestAddress(self.dma_addr.raw_value() | value); self.handle_dma_request()?; } } else if size == 8 && addr == 0 { @@ -666,14 +706,13 @@ impl FwCfgCommon { /// # Return /// /// Return the value of the register - /// fn dma_mem_read(&self, addr: u64, size: u32) -> Result { extract_u64( FW_CFG_DMA_SIGNATURE as u64, - ((8 - addr - size as u64) * 8) as u32, - (size * 8) as u32, + ((8 - addr - u64::from(size)) * 8) as u32, + size * 8, ) - .ok_or_else(|| ErrorKind::Msg("Failed to extract bits from u64".to_string()).into()) + .with_context(|| "Failed to extract bits from u64") } /// Read data register @@ -686,7 +725,6 @@ impl FwCfgCommon { /// # Return /// /// Return the value of the register - /// fn read_data_reg(&mut self, _addr: u64, mut size: u32) -> Result { if size == 0 || size > std::mem::size_of::() as u32 { bail!( @@ -705,7 +743,7 @@ impl FwCfgCommon { && cur_offset < entry.data.len() as u32 { loop { - value = (value << 8) | entry.data[cur_offset as usize] as u64; + value = (value << 8) | u64::from(entry.data[cur_offset as usize]); cur_offset += 1; size -= 1; @@ -713,9 +751,11 @@ impl FwCfgCommon { break; } } - value <<= 8 * size as u64; + value <<= 8 * u64::from(size); } self.cur_offset = cur_offset; + + trace::fwcfg_read_data(value); Ok(value) } @@ -800,35 +840,28 @@ fn common_read( /// FwCfg MMIO Device use for AArch64 platform #[cfg(target_arch = "aarch64")] pub struct FwCfgMem { + base: SysBusDevBase, fwcfg: FwCfgCommon, - /// System Resource of device. - res: SysRes, } #[cfg(target_arch = "aarch64")] impl FwCfgMem { - pub fn new(sys_mem: Arc) -> Self { - FwCfgMem { - fwcfg: FwCfgCommon::new(sys_mem), - res: SysRes::default(), - } - } - - pub fn realize( - mut self, - sysbus: &mut SysBus, + pub fn new( + sys_mem: Arc, + sysbus: &Arc>, region_base: u64, region_size: u64, - ) -> Result>> { - self.fwcfg.common_realize()?; - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| "Failed to allocate system resource for FwCfg.")?; + ) -> Result { + let mut fwcfgmem = FwCfgMem { + base: SysBusDevBase::new(SysBusDevType::FwCfg), + fwcfg: FwCfgCommon::new(sys_mem), + }; + fwcfgmem + .set_sys_resource(sysbus, region_base, region_size, "FwCfgMem") + .with_context(|| "Failed to allocate system resource for FwCfg.")?; + fwcfgmem.set_parent_bus(sysbus.clone()); - let dev = Arc::new(Mutex::new(self)); - sysbus - .attach_device(&dev, region_base, region_size) - .chain_err(|| "Failed to attach FwCfg device to system bus.")?; - Ok(dev) + Ok(fwcfgmem) } } @@ -843,10 +876,7 @@ fn read_bytes( 0..=7 => match fwcfg_arch.fwcfg.read_data_reg(offset, data.len() as u32) { Ok(val) => val, Err(e) => { - error!( - "Failed to read from FwCfg data register, error is {}", - e.display_chain() - ); + error!("Failed to read from FwCfg data register, error is {:?}", e); return false; } }, @@ -861,6 +891,7 @@ fn read_bytes( offset - 0x10, data.len() ); + return false; } match fwcfg_arch .fwcfg @@ -868,7 +899,7 @@ fn read_bytes( { Ok(val) => val, Err(e) => { - error!("Failed to handle FwCfg DMA-read, error is {}", e); + error!("Failed to handle FwCfg DMA-read, error is {:?}", e); return false; } } @@ -883,7 +914,7 @@ fn read_bytes( 1 => data[0] = value as u8, 2 => BigEndian::write_u16(data, value as u16), 4 => BigEndian::write_u32(data, value as u32), - 8 => BigEndian::write_u64(data, value as u64), + 8 => BigEndian::write_u64(data, value), _ => {} } true @@ -896,8 +927,31 @@ impl FwCfgOps for FwCfgMem { } } +#[cfg(target_arch = "aarch64")] +impl Device for FwCfgMem { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.fwcfg.select_entry(FwCfgEntryType::Signature as u16); + Ok(()) + } + + fn realize(mut self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + self.fwcfg.common_realize()?; + let dev = Arc::new(Mutex::new(self)); + sysbus + .attach_device(&dev) + .with_context(|| "Failed to attach FwCfg device to system bus.")?; + Ok(dev) + } +} + #[cfg(target_arch = "aarch64")] impl SysBusDevOps for FwCfgMem { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], base: GuestAddress, offset: u64) -> bool { common_read(self, data, base, offset) } @@ -905,10 +959,10 @@ impl SysBusDevOps for FwCfgMem { fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { let size = data.len() as u32; let value = match size { - 1 => data[0] as u64, - 2 => BigEndian::read_u16(data) as u64, - 4 => BigEndian::read_u32(data) as u64, - 8 => BigEndian::read_u64(data) as u64, + 1 => u64::from(data[0]), + 2 => u64::from(BigEndian::read_u16(data)), + 4 => u64::from(BigEndian::read_u32(data)), + 8 => BigEndian::read_u64(data), _ => 0, }; match offset { @@ -937,29 +991,15 @@ impl SysBusDevOps for FwCfgMem { true } - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - fn set_sys_resource( &mut self, - _sysbus: &mut SysBus, + _sysbus: &Arc>, region_base: u64, region_size: u64, - ) -> SysBusResult<()> { - let mut res = self.get_sys_resource().unwrap(); - res.region_base = region_base; - res.region_size = region_size; - Ok(()) - } - - /// Get device type. - fn get_type(&self) -> SysBusDevType { - SysBusDevType::FwCfg - } - - fn reset(&mut self) -> SysBusResult<()> { - self.fwcfg.select_entry(FwCfgEntryType::Signature as u16); + region_name: &str, + ) -> Result<()> { + self.sysbusdev_base_mut() + .set_sys(-1, region_base, region_size, region_name); Ok(()) } } @@ -967,36 +1007,23 @@ impl SysBusDevOps for FwCfgMem { #[allow(clippy::upper_case_acronyms)] #[cfg(target_arch = "x86_64")] pub struct FwCfgIO { + base: SysBusDevBase, fwcfg: FwCfgCommon, - /// System Resource of device. - res: SysRes, } #[cfg(target_arch = "x86_64")] impl FwCfgIO { - pub fn new(sys_mem: Arc) -> Self { - FwCfgIO { + pub fn new(sys_mem: Arc, sysbus: &Arc>) -> Result { + let mut fwcfg = FwCfgIO { + base: SysBusDevBase::new(SysBusDevType::FwCfg), fwcfg: FwCfgCommon::new(sys_mem), - res: SysRes { - region_base: FW_CFG_IO_BASE, - region_size: FW_CFG_IO_SIZE, - irq: -1, - }, - } - } - - pub fn realize(mut self, sysbus: &mut SysBus) -> Result>> { - self.fwcfg.common_realize()?; - let region_base = self.res.region_base; - let region_size = self.res.region_size; - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| "Failed to allocate system resource for FwCfg.")?; + }; + fwcfg + .set_sys_resource(sysbus, FW_CFG_IO_BASE, FW_CFG_IO_SIZE, "FwCfgIO") + .with_context(|| "Failed to allocate system resource for FwCfg.")?; + fwcfg.set_parent_bus(sysbus.clone()); - let dev = Arc::new(Mutex::new(self)); - sysbus - .attach_device(&dev, region_base, region_size) - .chain_err(|| "Failed to attach FwCfg device to system bus.")?; - Ok(dev) + Ok(fwcfg) } } @@ -1005,10 +1032,7 @@ fn read_bytes(fwcfg_arch: &mut FwCfgIO, data: &mut [u8], base: GuestAddress, off let value: u64 = match offset { 0..=1 => match fwcfg_arch.fwcfg.read_data_reg(offset, data.len() as u32) { Err(e) => { - error!( - "Failed to read from FwCfg data register, error is {}", - e.display_chain() - ); + error!("Failed to read from FwCfg data register, error is {:?}", e); return false; } Ok(val) => val, @@ -1020,10 +1044,11 @@ fn read_bytes(fwcfg_arch: &mut FwCfgIO, data: &mut [u8], base: GuestAddress, off offset - 0x10, data.len() ); + return false; } match fwcfg_arch.fwcfg.dma_mem_read(offset - 4, data.len() as u32) { Err(e) => { - error!("Failed to handle FwCfg DMA-read, error is {}", e); + error!("Failed to handle FwCfg DMA-read, error is {:?}", e); return false; } Ok(val) => val, @@ -1043,7 +1068,7 @@ fn read_bytes(fwcfg_arch: &mut FwCfgIO, data: &mut [u8], base: GuestAddress, off 1 => data[0] = value as u8, 2 => BigEndian::write_u16(data, value as u16), 4 => BigEndian::write_u32(data, value as u32), - 8 => BigEndian::write_u64(data, value as u64), + 8 => BigEndian::write_u64(data, value), _ => { warn!( "Failed to read from FwCfg data register, data length {} is invalid", @@ -1062,8 +1087,31 @@ impl FwCfgOps for FwCfgIO { } } +#[cfg(target_arch = "x86_64")] +impl Device for FwCfgIO { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.fwcfg.select_entry(FwCfgEntryType::Signature as u16); + Ok(()) + } + + fn realize(mut self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + self.fwcfg.common_realize()?; + let dev = Arc::new(Mutex::new(self)); + sysbus + .attach_device(&dev) + .with_context(|| "Failed to attach FwCfg device to system bus.")?; + Ok(dev) + } +} + #[cfg(target_arch = "x86_64")] impl SysBusDevOps for FwCfgIO { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], base: GuestAddress, offset: u64) -> bool { common_read(self, data, base, offset) } @@ -1083,17 +1131,14 @@ impl SysBusDevOps for FwCfgIO { } 4..=11 => { let value = match size { - 1 => data[0] as u64, - 2 => BigEndian::read_u16(data) as u64, - 4 => BigEndian::read_u32(data) as u64, - 8 => BigEndian::read_u64(data) as u64, + 1 => u64::from(data[0]), + 2 => u64::from(BigEndian::read_u16(data)), + 4 => u64::from(BigEndian::read_u32(data)), + 8 => BigEndian::read_u64(data), _ => 0, }; if let Err(e) = self.fwcfg.dma_mem_write(offset - 4, value, size) { - error!( - "Failed to handle FwCfg DMA-write, error is {}", - e.display_chain() - ); + error!("Failed to handle FwCfg DMA-write, error is {:?}", e); return false; } } @@ -1109,33 +1154,20 @@ impl SysBusDevOps for FwCfgIO { true } - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - fn set_sys_resource( &mut self, - _sysbus: &mut SysBus, + _sysbus: &Arc>, region_base: u64, region_size: u64, - ) -> SysBusResult<()> { - let mut res = self.get_sys_resource().unwrap(); - res.region_base = region_base; - res.region_size = region_size; - Ok(()) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::FwCfg - } - - fn reset(&mut self) -> SysBusResult<()> { - self.fwcfg.select_entry(FwCfgEntryType::Signature as u16); + region_name: &str, + ) -> Result<()> { + self.sysbusdev_base_mut() + .set_sys(-1, region_base, region_size, region_name); Ok(()) } } -pub trait FwCfgOps { +pub trait FwCfgOps: Send + Sync { fn fw_cfg_common(&mut self) -> &mut FwCfgCommon; /// Add an entry to FwCfg device, with Vector content. @@ -1193,12 +1225,36 @@ pub trait FwCfgOps { self.fw_cfg_common() .add_file_callback(filename, data, None, None, true) } + + /// Modify a file entry to FwCfg device, without callbacks, write-allow. + /// + /// # Arguments + /// + /// * `filename` - Name of the file + /// * `data` - Raw data bytes of the file to be modified + fn modify_file_entry(&mut self, filename: &str, data: Vec) -> Result<()> { + self.fw_cfg_common() + .modify_file_callback(filename, data, None, None, true) + } } #[cfg(target_arch = "aarch64")] impl AmlBuilder for FwCfgMem { fn aml_bytes(&self) -> Vec { - Vec::new() + let mut acpi_dev = AmlDevice::new("FWCF"); + acpi_dev.append_child(AmlNameDecl::new("_HID", AmlString("QEMU0002".to_string()))); + acpi_dev.append_child(AmlNameDecl::new("_STA", AmlInteger(0xB))); + acpi_dev.append_child(AmlNameDecl::new("_CCA", AmlInteger(0x1))); + + let mut res = AmlResTemplate::new(); + res.append_child(AmlMemory32Fixed::new( + AmlReadAndWrite::ReadWrite, + self.base.res.region_base as u32, + self.base.res.region_size as u32, + )); + acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); + + acpi_dev.aml_bytes() } } @@ -1212,10 +1268,10 @@ impl AmlBuilder for FwCfgIO { let mut res = AmlResTemplate::new(); res.append_child(AmlIoResource::new( AmlIoDecode::Decode16, - self.res.region_base as u16, - self.res.region_base as u16, + self.base.res.region_base as u16, + self.base.res.region_base as u16, 0x01, - self.res.region_size as u8, + self.base.res.region_size as u8, )); acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); @@ -1226,50 +1282,8 @@ impl AmlBuilder for FwCfgIO { #[cfg(test)] mod test { use super::*; - use address_space::{AddressSpace, HostMemMapping, Region}; - - fn sysbus_init() -> SysBus { - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)).unwrap(); - #[cfg(target_arch = "x86_64")] - let free_irqs: (i32, i32) = (5, 15); - #[cfg(target_arch = "aarch64")] - let free_irqs: (i32, i32) = (32, 191); - let mmio_region: (u64, u64) = (0x0A00_0000, 0x1000_0000); - SysBus::new( - #[cfg(target_arch = "x86_64")] - &sys_io, - &sys_mem, - free_irqs, - mmio_region, - ) - } - - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - 0x1000_0000, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } + use crate::sysbus::sysbus_init; + use crate::test::address_space_init; #[test] fn test_entry_functions() { @@ -1362,6 +1376,38 @@ mod test { let entry = fwcfg_common.get_entry_mut().unwrap(); assert_eq!(entry.data, file_data); + let file_data = Vec::new(); + assert_eq!( + fwcfg_common + .add_file_callback("bootorder", file_data.clone(), None, None, false) + .is_ok(), + true + ); + let id = fwcfg_common.files[0].select; + fwcfg_common.select_entry(id); + let entry = fwcfg_common.get_entry_mut().unwrap(); + assert_eq!(entry.data, file_data); + + let modify_file_data = "/pci@ffffffffffffffff/pci-bridge@3/scsi@0,1/disk@0,0" + .as_bytes() + .to_vec(); + assert_eq!( + fwcfg_common + .modify_file_callback("bootorder", modify_file_data.clone(), None, None, true) + .is_ok(), + true + ); + let id = fwcfg_common.files[0].select; + fwcfg_common.select_entry(id); + let entry = fwcfg_common.get_entry_mut().unwrap(); + assert_eq!(entry.data, modify_file_data); + + assert_eq!( + fwcfg_common + .modify_file_callback("abc", modify_file_data.clone(), None, None, true) + .is_err(), + true + ); let file_data = vec![0x33; 500]; assert_eq!( fwcfg_common @@ -1390,7 +1436,12 @@ mod test { let addr = GuestAddress(0x0000); fwcfg_common .mem_space - .write(&mut dma_request.as_ref(), addr, dma_request.len() as u64) + .write( + &mut dma_request.as_ref(), + addr, + dma_request.len() as u64, + AddressAttr::Ram, + ) .unwrap(); // [2]set dma addr. @@ -1400,7 +1451,13 @@ mod test { assert_eq!(fwcfg_common.handle_dma_request().is_ok(), true); // [4]check dma response. - assert_eq!(fwcfg_common.mem_space.read_object::(addr).unwrap(), 0); + assert_eq!( + fwcfg_common + .mem_space + .read_object::(addr, AddressAttr::Ram) + .unwrap(), + 0 + ); // [5]check dma write result. let mut read_dma_buf = Vec::new(); @@ -1408,7 +1465,12 @@ mod test { let len = sig_entry_data.len(); fwcfg_common .mem_space - .read(&mut read_dma_buf, GuestAddress(0xffff), len as u64) + .read( + &mut read_dma_buf, + GuestAddress(0xffff), + len as u64, + AddressAttr::Ram, + ) .unwrap(); assert_eq!(read_dma_buf, sig_entry_data); @@ -1421,7 +1483,12 @@ mod test { let addr = GuestAddress(0x0000); fwcfg_common .mem_space - .write(&mut dma_request.as_ref(), addr, dma_request.len() as u64) + .write( + &mut dma_request.as_ref(), + addr, + dma_request.len() as u64, + AddressAttr::Ram, + ) .unwrap(); fwcfg_common.dma_addr = addr; @@ -1429,14 +1496,25 @@ mod test { assert_eq!(fwcfg_common.handle_dma_request().is_ok(), true); // Result should be all zero. - assert_eq!(fwcfg_common.mem_space.read_object::(addr).unwrap(), 0); + assert_eq!( + fwcfg_common + .mem_space + .read_object::(addr, AddressAttr::Ram) + .unwrap(), + 0 + ); let mut read_dma_buf = Vec::new(); let all_zero = vec![0x0_u8; 4]; let len = all_zero.len(); fwcfg_common .mem_space - .read(&mut read_dma_buf, GuestAddress(0xffff), len as u64) + .read( + &mut read_dma_buf, + GuestAddress(0xffff), + len as u64, + AddressAttr::Ram, + ) .unwrap(); assert_eq!(read_dma_buf, all_zero); } @@ -1446,9 +1524,9 @@ mod test { fn test_read_write_aarch64() { let mut sys_bus = sysbus_init(); let sys_mem = address_space_init(); - let fwcfg = FwCfgMem::new(sys_mem); + let fwcfg = FwCfgMem::new(sys_mem, &mut sys_bus, 0x0902_0000, 0x0000_0018).unwrap(); - let fwcfg_dev = FwCfgMem::realize(fwcfg, &mut sys_bus, 0x0902_0000, 0x0000_0018).unwrap(); + let fwcfg_dev = fwcfg.realize().unwrap(); // Read FW_CFG_DMA_SIGNATURE entry. let base = GuestAddress(0x0000); let mut read_data = vec![0xff_u8, 0xff, 0xff, 0xff]; @@ -1486,9 +1564,9 @@ mod test { fn test_read_write_x86_64() { let mut sys_bus = sysbus_init(); let sys_mem = address_space_init(); - let fwcfg = FwCfgIO::new(sys_mem); + let fwcfg = FwCfgIO::new(sys_mem, &mut sys_bus).unwrap(); - let fwcfg_dev = FwCfgIO::realize(fwcfg, &mut sys_bus).unwrap(); + let fwcfg_dev = fwcfg.realize().unwrap(); // Read FW_CFG_DMA_SIGNATURE entry. let base = GuestAddress(0x0000); let mut read_data = vec![0xff_u8, 0xff, 0xff, 0xff]; diff --git a/devices/src/legacy/mod.rs b/devices/src/legacy/mod.rs index 6835a9214e05072e9b4f379e378c328ac2bba2e1..74cc2e4a7a21094610148cd2764db1b5065c702a 100644 --- a/devices/src/legacy/mod.rs +++ b/devices/src/legacy/mod.rs @@ -25,80 +25,23 @@ //! - `x86_64` //! - `aarch64` -pub mod errors { - error_chain! { - links { - SysBus(sysbus::errors::Error, sysbus::errors::ErrorKind); - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - } - errors { - SetSysResErr { - display("Failed to allocate system bus resource.") - } - AddEntryErr(key: String) { - display("Failed to add FwCfg entry, key is {}", key) - } - EntryNotFound(key: String) { - display("Failed to find FwCfg entry, key is {}.", key) - } - DuplicateFile(key: String) { - display("Duplicate FwCfg file-entry, name is {}", key) - } - FileSlotsNotAvailable(key: String) { - display("No available FwCfg file-slot for this file entry with filename {}", key) - } - ReadDmaRequest(addr: u64, size: u64) { - display("Failed to read DMA request, dma_addr=0x{:x} size=0x{:x}", addr, size) - } - InvalidFwCfgEntry(key: u16) { - display("Invalid FwCfg entry key {}", key) - } - PFlashWriteOverflow(size:u64, offset: u64, data_len: u64) { - display("Flash size is 0x{:x}, offset 0x{:x} and size 0x{:x} in write request overflows", size, offset, data_len) - } - PFlashReadOverflow(size:u64, offset: u64, data_len: u64) { - display("Flash size is 0x{:x}, offset 0x{:x} and size 0x{:x} in read request overflows", size, offset, data_len) - } - PFlashFileSeekErr(offset: u64) { - display("Failed to seek to offset 0x{:x} of PFlash file", offset) - } - PFlashIndexOverflow(index: u64, len: usize) { - display("Flash CFI table len is 0x{:x}, request 0x{:x} overflows", len, index) - } - PFlashDevConfigErr(dev_width: u32, bank_width: u32) { - display("Unsupported device configuration: device width {}, bank width {}", dev_width, bank_width) - } - WritePFlashRomErr { - display("Failed to write to Flash ROM") - } - RegNotifierErr { - display("Failed to register event notifier.") - } - } - } -} +pub mod error; -mod chardev; -#[allow(dead_code)] mod fwcfg; -#[allow(dead_code)] mod pflash; -#[allow(dead_code)] #[cfg(target_arch = "aarch64")] mod pl011; #[cfg(target_arch = "aarch64")] mod pl031; -#[allow(dead_code)] +#[cfg(all(feature = "ramfb", target_arch = "aarch64"))] +mod ramfb; #[cfg(target_arch = "x86_64")] mod rtc; mod serial; #[cfg(target_arch = "x86_64")] -pub use self::rtc::{RTC, RTC_IRQ, RTC_PORT_INDEX}; -pub use chardev::{Chardev, InputReceiver}; +pub use self::rtc::{RTC, RTC_PORT_INDEX}; +pub use error::LegacyError; #[cfg(target_arch = "x86_64")] pub use fwcfg::FwCfgIO; #[cfg(target_arch = "aarch64")] @@ -108,5 +51,7 @@ pub use pflash::PFlash; #[cfg(target_arch = "aarch64")] pub use pl011::PL011; #[cfg(target_arch = "aarch64")] -pub use pl031::PL031; +pub use pl031::{PL031, RTC_CR, RTC_DR, RTC_IMSC, RTC_LR}; +#[cfg(all(feature = "ramfb", target_arch = "aarch64"))] +pub use ramfb::{Ramfb, RamfbConfig}; pub use serial::{Serial, SERIAL_ADDR}; diff --git a/devices/src/legacy/pflash.rs b/devices/src/legacy/pflash.rs index 36f708dba6569c46e0a0bdf3c183f17b13acb98d..5ea22a3ba86c5202da214d65f0bab03f37c1a9fa 100644 --- a/devices/src/legacy/pflash.rs +++ b/devices/src/legacy/pflash.rs @@ -14,20 +14,22 @@ use std::fs::File; use std::io::Write; use std::sync::{Arc, Mutex}; -use acpi::AmlBuilder; -use address_space::{FileBackend, GuestAddress, HostMemMapping, Region}; -use byteorder::{ByteOrder, LittleEndian}; -use error_chain::ChainedError; -use sysbus::{errors::Result as SysBusResult, SysBus, SysBusDevOps, SysBusDevType, SysRes}; -use util::num_ops::{deposit_u32, extract_u32}; +use anyhow::{anyhow, bail, Context, Result}; +use log::{error, warn}; -use super::errors::{ErrorKind, Result, ResultExt}; +use super::error::LegacyError; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use acpi::AmlBuilder; +use address_space::{AddressAttr, FileBackend, GuestAddress, HostMemMapping, Region}; +use util::gen_base_func; +use util::num_ops::{deposit_u32, extract_u32, read_data_u32, round_up, write_data_u32}; +use util::unix::host_page_size; pub struct PFlash { + base: SysBusDevBase, /// Has backend file or not. has_backend: bool, - /// Number of blocks. - blk_num: u32, /// Length of block. block_len: u32, /// The width of PFlash array for vm. @@ -39,7 +41,7 @@ pub struct PFlash { /// This is used to support x16 wide PFlash run in x8 mode. max_device_width: u32, /// If 0, the PFlash is read normally. - write_cycle: i32, + write_cycle: u32, /// PFlash is read only or not. read_only: bool, /// Command to control PFlash. @@ -56,11 +58,31 @@ pub struct PFlash { write_blk_size: u32, /// ROM region of PFlash. rom: Option, - /// System Resource of device. - res: SysRes, + /// backend: Option, + host_mmap: Arc, } impl PFlash { + fn flash_region_size( + region_max_size: u64, + backend: &Option>, + read_only: bool, + ) -> Result { + // We don't have to occupy the whole memory region. + // Expose just real data size, rounded up to page_size. + if let Some(fd) = backend.as_ref() { + let len = fd.as_ref().metadata().unwrap().len(); + if len > region_max_size || len == 0 || (!read_only && len % host_page_size() != 0) { + bail!( + "Invalid flash file: Region size 0x{region_max_size:X}, file size 0x{len:X}; read_only {read_only}" + ); + } + Ok(round_up(len, host_page_size()).unwrap()) + } else { + Ok(region_max_size) + } + } + /// Construct function of PFlash device. /// /// # Arguments @@ -77,33 +99,26 @@ impl PFlash { /// Return Error when /// * block-length is zero. /// * PFlash size is zero. - /// * file size is smaller than PFlash size. + /// * flash is writable and file size is smaller than region_max_size. + #[allow(clippy::too_many_arguments)] pub fn new( - size: u64, - backend: &Option, + region_max_size: u64, + backend: Option>, block_len: u32, bank_width: u32, device_width: u32, read_only: bool, + sysbus: &Arc>, + region_base: u64, ) -> Result { if block_len == 0 { bail!("PFlash: block-length is zero which is invalid."); } + let size = Self::flash_region_size(region_max_size, &backend, read_only)?; let blocks_per_device: u32 = size as u32 / block_len; if blocks_per_device == 0 { bail!("PFlash: num-blocks is zero which is invalid."); } - if let Some(fd) = backend.as_ref() { - let len = fd.metadata().unwrap().len(); - if len < size { - bail!( - "Mmap requires 0x{:X} bytes, given file provides 0x{:X} bytes", - size, - len - ); - } - } - let num_devices: u32 = if device_width == 0 { 1 } else { @@ -176,13 +191,25 @@ impl PFlash { // Number of protection fields. cfi_table[0x3f] = 0x01; - Ok(PFlash { - has_backend: backend.is_some(), + let has_backend = backend.is_some(); + let region_size = Self::flash_region_size(region_max_size, &backend, read_only)?; + let host_mmap = Arc::new(HostMemMapping::new( + GuestAddress(region_base), + None, + region_size, + backend.map(FileBackend::new_common), + false, + true, + read_only, + )?); + + let mut pflash = PFlash { + base: SysBusDevBase::new(SysBusDevType::Flash), + has_backend, block_len, bank_width, // device id for Intel PFlash. ident: [0x89, 0x18, 0x00, 0x00], - blk_num: blocks_per_device, device_width, max_device_width: device_width, write_cycle: 0, @@ -193,53 +220,17 @@ impl PFlash { counter: 0, write_blk_size, rom: None, - res: SysRes::default(), - }) - } - - pub fn realize( - mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - backend: Option, - ) -> Result<()> { - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| "Failed to allocate system resource for PFlash.")?; - - let host_mmap = Arc::new(HostMemMapping::new( - GuestAddress(region_base), - None, - region_size, - backend.map(FileBackend::new_common), - false, - false, - false, - )?); - - let dev = Arc::new(Mutex::new(self)); - let region_ops = sysbus.build_region_ops(&dev); - let rom_region = Region::init_rom_device_region(host_mmap, region_ops); - dev.lock().unwrap().rom = Some(rom_region.clone()); - sysbus - .sys_mem - .root() - .add_subregion(rom_region, region_base) - .chain_err(|| "Failed to attach PFlash to system bus")?; - sysbus.devices.push(dev); + host_mmap, + }; - Ok(()) + pflash + .set_sys_resource(sysbus, region_base, region_size, "PflashRom") + .with_context(|| "Failed to allocate system resource for PFlash.")?; + pflash.set_parent_bus(sysbus.clone()); + Ok(pflash) } fn set_read_array_mode(&mut self, is_illegal_cmd: bool) -> Result<()> { - self.rom - .as_ref() - .unwrap() - .set_rom_device_romd(true) - .chain_err(|| "Failed to set to read array mode.")?; - self.write_cycle = 0; - self.cmd = 0x00; - if is_illegal_cmd { warn!( "Unimplemented PFlash cmd sequence (write cycle: 0x{:X}, cmd: 0x{:X})", @@ -247,6 +238,15 @@ impl PFlash { ); } + trace::pflash_mode_read_array(); + self.rom + .as_ref() + .unwrap() + .set_rom_device_romd(true) + .with_context(|| "Failed to set to read array mode.")?; + self.write_cycle = 0; + self.cmd = 0x00; + Ok(()) } @@ -257,10 +257,16 @@ impl PFlash { // Mask off upper bits, the rest (ident[2] and ident[3]) is not emulated. let mut resp: u32 = match index & 0xFF { - 0 => self.ident[0], - 1 => self.ident[1], + 0 => { + trace::pflash_manufacturer_id(self.ident[0]); + self.ident[0] + } + 1 => { + trace::pflash_device_id(self.ident[1]); + self.ident[1] + } _ => { - debug!("Device ID 2 and 3 are not supported"); + trace::pflash_device_info(index); return Ok(0); } }; @@ -269,7 +275,7 @@ impl PFlash { let mut i: u32 = self.device_width; while i < self.bank_width { resp = deposit_u32(resp, 8 * i, 8 * self.device_width, resp) - .ok_or("Failed to deposit bits to u32")?; + .with_context(|| "Failed to deposit bits to u32")?; i += self.device_width; } } @@ -284,20 +290,24 @@ impl PFlash { - self.device_width.trailing_zeros()); if index >= self.cfi_table.len() as u64 { - return Err(ErrorKind::PFlashIndexOverflow(index, self.cfi_table.len()).into()); + return Err(anyhow!(LegacyError::PFlashIndexOverflow( + index, + self.cfi_table.len() + ))); } let mut resp: u32 = self.cfi_table[index as usize].into(); if self.device_width != self.max_device_width { if self.device_width != 1 || self.bank_width > 4 { - return Err( - ErrorKind::PFlashDevConfigErr(self.device_width, self.bank_width).into(), - ); + return Err(anyhow!(LegacyError::PFlashDevConfigErr( + self.device_width, + self.bank_width + ))); } // Repeat data for PFlash device which supports x16-mode but works in x8-mode. for i in 1..self.max_device_width { - resp = deposit_u32(resp, 8 * i as u32, 8, self.cfi_table[index as usize] as u32) - .ok_or("Failed to deposit bits to u32")?; + resp = deposit_u32(resp, 8 * i, 8, u32::from(self.cfi_table[index as usize])) + .with_context(|| "Failed to deposit bits to u32")?; } } // Responses are repeated for every device in bank. @@ -305,7 +315,7 @@ impl PFlash { let mut i: u32 = self.device_width; while i < self.bank_width { resp = deposit_u32(resp, 8 * i, 8 * self.device_width, resp) - .ok_or("Failed to deposit bits to u32")?; + .with_context(|| "Failed to deposit bits to u32")?; i += self.device_width; } } @@ -319,15 +329,26 @@ impl PFlash { } // Unwrap is safe, because after realize function, rom isn't none. let mr = self.rom.as_ref().unwrap(); - if offset + size as u64 > mr.size() as u64 { - return Err( - ErrorKind::PFlashWriteOverflow(mr.size() as u64, offset, size as u64).into(), - ); + if offset + .checked_add(size as u64) + .map(|sum| sum > mr.size()) + .unwrap_or(true) + { + return Err(anyhow!(LegacyError::PFlashWriteOverflow( + mr.size(), + offset, + u64::from(size) + ))); } - let addr: u64 = mr.get_host_address().ok_or("Failed to get host address.")?; - let ret = unsafe { - // Safe as addr and size are valid. + // SAFETY: size has been checked. + let addr: u64 = unsafe { + mr.get_host_address(AddressAttr::RomDevice) + .with_context(|| "Failed to get host address.") + }?; + let ret = + // SAFETY: addr and size are valid. + unsafe { libc::msync( addr as *mut libc::c_void, size as libc::size_t, @@ -344,36 +365,57 @@ impl PFlash { fn read_data(&mut self, data: &mut [u8], offset: u64) -> Result<()> { // Unwrap is safe, because after realize function, rom isn't none. let mr = self.rom.as_ref().unwrap(); - if offset + data.len() as u64 > mr.size() as u64 { - return Err(ErrorKind::PFlashReadOverflow(mr.size(), offset, data.len() as u64).into()); + if offset + .checked_add(data.len() as u64) + .map(|sum| sum > mr.size()) + .unwrap_or(true) + { + return Err(anyhow!(LegacyError::PFlashReadOverflow( + mr.size(), + offset, + data.len() as u64 + ))); } - let host_addr = mr.get_host_address().unwrap(); - // Safe because host_addr of the region is local allocated and sanity has been checked. - let src = unsafe { - std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, data.len() as usize) - }; + // SAFETY: size has been checked. + let host_addr = unsafe { mr.get_host_address(AddressAttr::RomDevice).unwrap() }; + let src = + // SAFETY: host_addr of the region is local allocated and sanity has been checked. + unsafe { std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, data.len()) }; data.as_mut() .write_all(src) - .chain_err(|| "Failed to read data from PFlash Rom")?; + .with_context(|| "Failed to read data from PFlash Rom")?; + trace::pflash_read_data(offset, data.len(), &data[..std::cmp::min(4, data.len())]); Ok(()) } fn write_data(&mut self, data: &[u8], offset: u64) -> Result<()> { + trace::pflash_write_data( + offset, + data.len(), + &data[..std::cmp::min(4, data.len())], + self.counter, + ); // Unwrap is safe, because after realize function, rom isn't none. let mr = self.rom.as_ref().unwrap(); - if offset + data.len() as u64 > mr.size() as u64 { - return Err( - ErrorKind::PFlashWriteOverflow(mr.size(), offset, data.len() as u64).into(), - ); + if offset + .checked_add(data.len() as u64) + .map(|sum| sum > mr.size()) + .unwrap_or(true) + { + return Err(anyhow!(LegacyError::PFlashWriteOverflow( + mr.size(), + offset, + data.len() as u64 + ))); } - let host_addr = mr.get_host_address().unwrap(); - // Safe because host_addr of the region is local allocated and sanity has been checked. - let mut dst = unsafe { - std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, data.len() as usize) - }; + // SAFETY: size has been checked. + let host_addr = unsafe { mr.get_host_address(AddressAttr::RomDevice).unwrap() }; + let mut dst = + // SAFETY: host_addr of the region is local allocated and sanity has been checked. + unsafe { std::slice::from_raw_parts_mut((host_addr + offset) as *mut u8, data.len()) }; dst.write_all(data) - .chain_err(|| "Failed to write data to PFlash Rom")?; + .with_context(|| "Failed to write data to PFlash Rom")?; Ok(()) } @@ -382,32 +424,30 @@ impl PFlash { match cmd { // cmd 0xf0 is for AMD PFlash. 0x00 | 0xf0 | 0xff => { + trace::pflash_write("read array mode".to_string(), cmd); if let Err(e) = self.set_read_array_mode(false) { error!( - "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {}", - cmd, - e.display_chain() + "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {:?}", + cmd, e ); return false; } return true; } 0x10 | 0x40 => { - debug!("PFlash write: Single Byte Program"); + trace::pflash_write("single byte program (0)".to_string(), cmd); } 0x20 => { - let offset_mask = offset & !(self.block_len as u64 - 1); + let offset_mask = offset & !(u64::from(self.block_len) - 1); + trace::pflash_write_block_erase(offset, self.block_len); if !self.read_only { let all_one = vec![0xff_u8; self.block_len as usize]; if let Err(e) = self.write_data(all_one.as_slice(), offset_mask) { - error!("Failed to write PFlash device: {}.", e.display_chain()); + error!("Failed to write PFlash device: {:?}", e); } if let Err(e) = self.update_content(offset_mask, self.block_len) { - error!( - "Failed to update content for PFlash device: {}.", - e.display_chain() - ); + error!("Failed to update content for PFlash device: {:?}", e); } } else { // Block erase error. @@ -417,44 +457,49 @@ impl PFlash { self.status |= 0x80; } 0x50 => { + trace::pflash_write("clear status bits".to_string(), cmd); self.status = 0x0; if let Err(e) = self.set_read_array_mode(false) { error!( - "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {}", - cmd, - e.display_chain() + "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {:?}", + cmd, e ); return false; } return true; } 0x60 => { - debug!("PFlash write: Block unlock"); + trace::pflash_write("block unlock".to_string(), cmd); + } + 0x70 => { + trace::pflash_write("read status register".to_string(), cmd); + self.cmd = cmd; + return true; } - 0x70 | 0x90 => { - // 0x70: Status Register, 0x90: Read Device ID. + 0x90 => { + trace::pflash_write("read device information".to_string(), cmd); self.cmd = cmd; return true; } 0x98 => { - debug!("PFlash write: CFI query"); + trace::pflash_write("CFI query".to_string(), cmd); } 0xe8 => { + trace::pflash_write("write to buffer".to_string(), cmd); self.status |= 0x80; } _ => { if let Err(e) = self.set_read_array_mode(true) { error!( - "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {}", - cmd, - e.display_chain() + "Failed to set read array mode, write cycle 0, cmd 0x{:x}, error is {:?}", + cmd, e ); return false; } return true; } } - self.write_cycle += 1; + self.write_cycle = self.write_cycle.wrapping_add(1); self.cmd = cmd; true } @@ -469,15 +514,13 @@ impl PFlash { ) -> bool { match self.cmd { 0x10 | 0x40 => { + trace::pflash_write("single byte program (1)".to_string(), self.cmd); if !self.read_only { if let Err(e) = self.write_data(data, offset) { - error!("Failed to write to PFlash device: {}.", e.display_chain()); + error!("Failed to write to PFlash device: {:?}.", e); } if let Err(e) = self.update_content(offset, data_len.into()) { - error!( - "Failed to update content for PFlash device: {}.", - e.display_chain() - ); + error!("Failed to update content for PFlash device: {:?}", e); } } else { self.status |= 0x10; @@ -491,20 +534,18 @@ impl PFlash { self.status |= 0x80; } else if cmd == 0xff { if let Err(e) = self.set_read_array_mode(false) { - error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", cmd, - e.display_chain() + e ); return false; } return true; } else { if let Err(e) = self.set_read_array_mode(true) { - error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", self.cmd, - e.display_chain() + e ); return false; } @@ -519,14 +560,15 @@ impl PFlash { } else { self.bank_width * 8 }; - value = if let Some(v) = extract_u32(value, 0, length) { - v + if let Some(v) = extract_u32(value, 0, length) { + value = v; } else { error!("Failed to extract bits from u32 value"); return false; }; + trace::pflash_write_block(value); + self.write_cycle = self.write_cycle.wrapping_add(1); self.counter = value; - self.write_cycle += 1; } 0x60 => { if (cmd == 0xd0) || (cmd == 0x01) { @@ -534,20 +576,19 @@ impl PFlash { self.status |= 0x80; } else if cmd == 0xff { if let Err(e) = self.set_read_array_mode(false) { - error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", self.cmd, - e.display_chain() + e ); return false; } return true; } else { + trace::pflash_write("unknown (un)blocking command".to_string(), cmd); if let Err(e) = self.set_read_array_mode(true) { - error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", self.cmd, - e.display_chain() + e ); return false; } @@ -557,22 +598,21 @@ impl PFlash { 0x98 => { if cmd == 0xff { if let Err(e) = self.set_read_array_mode(false) { - error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", self.cmd, - e.display_chain() + e ); return false; } return true; } + trace::pflash_write("leaving query mode".to_string(), cmd); } _ => { if let Err(e) = self.set_read_array_mode(true) { error!( - "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {}", - self.cmd, - e.display_chain() + "Failed to set read array mode, write cycle 1, cmd 0x{:x}, error is {:?}", + self.cmd, e ); return false; } @@ -587,21 +627,19 @@ impl PFlash { 0xe8 => { if !self.read_only { if let Err(e) = self.write_data(data, offset) { - error!("Failed to write to PFlash device: {}.", e.display_chain()); + error!("Failed to write to PFlash device: {:?}", e); } } else { self.status |= 0x10; } self.status |= 0x80; if self.counter == 0 { - let mask: u64 = !(self.write_blk_size as u64 - 1); - self.write_cycle += 1; + let mask: u64 = !(u64::from(self.write_blk_size) - 1); + trace::pflash_write("block write finished".to_string(), self.cmd); + self.write_cycle = self.write_cycle.wrapping_add(1); if !self.read_only { if let Err(e) = self.update_content(offset & mask, self.write_blk_size) { - error!( - "Failed to update content for PFlash device: {}.", - e.display_chain() - ); + error!("Failed to update content for PFlash device: {:?}", e); } } else { self.status |= 0x10; @@ -613,9 +651,8 @@ impl PFlash { _ => { if let Err(e) = self.set_read_array_mode(true) { error!( - "Failed to set read array mode, write cycle 2, cmd 0x{:x}, error is {}", - self.cmd, - e.display_chain() + "Failed to set read array mode, write cycle 2, cmd 0x{:x}, error is {:?}", + self.cmd, e ); return false; } @@ -633,10 +670,9 @@ impl PFlash { self.status |= 0x80; } else { if let Err(e) = self.set_read_array_mode(false) { - error!( - "Failed to set read array mode, write cycle 3, cmd 0x{:x}, error is {}", + error!("Failed to set read array mode, write cycle 3, cmd 0x{:x}, error is {:?}", self.cmd, - e.display_chain() + e ); return false; } @@ -646,9 +682,8 @@ impl PFlash { _ => { if let Err(e) = self.set_read_array_mode(true) { error!( - "Failed to set read array mode, write cycle 3, cmd 0x{:x}, error is {}", - self.cmd, - e.display_chain() + "Failed to set read array mode, write cycle 3, cmd 0x{:x}, error is {:?}", + self.cmd, e ); return false; } @@ -659,7 +694,44 @@ impl PFlash { } } +impl Device for PFlash { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.rom + .as_ref() + .unwrap() + .set_rom_device_romd(true) + .with_context(|| "Fail to set PFlash rom region read only")?; + self.cmd = 0x00; + self.write_cycle = 0; + self.status = 0x80; + Ok(()) + } + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let region_base = self.base.res.region_base; + let host_mmap = self.host_mmap.clone(); + let dev = Arc::new(Mutex::new(self)); + let region_ops = sysbus.build_region_ops(&dev); + let rom_region = Region::init_rom_device_region(host_mmap, region_ops, "PflashRom"); + dev.lock().unwrap().rom = Some(rom_region.clone()); + sysbus + .sys_mem + .root() + .add_subregion(rom_region, region_base) + .with_context(|| "Failed to attach PFlash to system bus")?; + sysbus.sysbus_attach_child(dev.clone())?; + + Ok(dev) + } +} + impl SysBusDevOps for PFlash { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { let mut index: u64; let mut ret: u32 = 0; @@ -684,16 +756,17 @@ impl SysBusDevOps for PFlash { // 0x70: Status Register. // 0xe8: Write block. // Just read status register, return every device status in bank. - ret = self.status as u32; + ret = u32::from(self.status); if self.device_width != 0 && data_len > self.device_width { let mut shift: u32 = self.device_width * 8; while shift + self.device_width * 8 <= data_len * 8 { - ret |= (self.status as u32) << shift; + ret |= u32::from(self.status) << shift; shift += self.device_width * 8; } } else if self.device_width == 0 && data_len > 2 { - ret |= (self.status as u32) << 16; + ret |= u32::from(self.status) << 16; } + trace::pflash_read_status(ret); } 0x90 => { if self.device_width == 0 { @@ -706,9 +779,18 @@ impl SysBusDevOps for PFlash { } match index { - 0 => ret = self.ident[0] << 8 | self.ident[1], - 1 => ret = self.ident[2] << 8 | self.ident[3], - _ => ret = 0, + 0 => { + ret = self.ident[0] << 8 | self.ident[1]; + trace::pflash_manufacturer_id(ret); + } + 1 => { + ret = self.ident[2] << 8 | self.ident[3]; + trace::pflash_device_id(ret); + } + _ => { + ret = 0; + trace::pflash_device_info(index); + } } } else { // If a read request is larger than bank_width of PFlash device, @@ -716,9 +798,9 @@ impl SysBusDevOps for PFlash { // combine serval queries into one response. let mut i: u32 = 0; while i < data_len { - match self.query_devid(offset + (i * self.bank_width) as u64) { + match self.query_devid(offset + u64::from(i * self.bank_width)) { Err(e) => { - error!("Failed to query devid {}.", e.display_chain()); + error!("Failed to query devid {:?}", e); break; } Ok(fieldval) => { @@ -756,9 +838,9 @@ impl SysBusDevOps for PFlash { } else { let mut i: u32 = 0; while i < data_len { - match self.query_cfi(offset + (i * self.bank_width) as u64) { + match self.query_cfi(offset + u64::from(i * self.bank_width)) { Err(e) => { - error!("Failed to query devid, {}.", e.display_chain()); + error!("Failed to query devid, {:?}", e); break; } Ok(fieldval) => { @@ -781,40 +863,27 @@ impl SysBusDevOps for PFlash { } _ => { // This should never happen : reset state & treat it as a read. - error!("PFlash read: unknown command state 0x{:X}", self.cmd); + trace::pflash_read_unknown_state(self.cmd); self.write_cycle = 0; self.cmd = 0x00; if let Err(e) = self.read_data(data, offset) { - error!("Failed to read data from PFlash: {}.", e.display_chain()); + error!("Failed to read data from PFlash: {:?}", e); } } } - match data.len() { - 1 => data[0] = ret as u8, - 2 => LittleEndian::write_u16(data, ret as u16), - 4 => LittleEndian::write_u32(data, ret), - n => { - error!("Invalid data length {}", n); - return false; - } - } - - true + trace::pflash_io_read(offset, data_len, ret, self.cmd, self.write_cycle); + write_data_u32(data, ret) } fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { - let value: u32 = match data.len() { - 1 => data[0] as u32, - 2 => LittleEndian::read_u16(data).into(), - 4 => LittleEndian::read_u32(data), - n => { - error!("Invalid data length {}", n); - return false; - } - }; + let mut value = 0_u32; + if !read_data_u32(data, &mut value) { + return false; + } let cmd: u8 = data[0]; let data_len: u8 = data.len() as u8; + trace::pflash_io_write(offset, data_len, value, self.write_cycle); if self.write_cycle == 0 && self @@ -831,58 +900,31 @@ impl SysBusDevOps for PFlash { // - PFlash write // * cmd 0x10 | 0x40 represents single Byte Program. // * cmd 0xe8 represents write to buffer. - // - cmd 0x20 | 0x28 represents PFlash erase (write all 1). + // * cmd 0x20 | 0x28 represents PFlash erase (write all 1). match self.write_cycle { 0 => self.handle_write_first_pass(cmd, offset), 1 => self.handle_write_second_pass(cmd, offset, data, data_len, value), 2 => self.handle_write_third_pass(offset, data), 3 => self.handle_write_fourth_pass(cmd), _ => { - error!( - "PFlash write: invalid write state: write cycle {}", - self.write_cycle - ); + trace::pflash_write("invalid write state".to_string(), cmd); if let Err(e) = self.set_read_array_mode(false) { - error!( - "Failed to set PFlash to read array mode, error is {}", - e.display_chain() - ); + error!("Failed to set PFlash to read array mode, error is {:?}", e); } false } } } - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - fn set_sys_resource( &mut self, - _sysbus: &mut SysBus, + _sysbus: &Arc>, region_base: u64, region_size: u64, - ) -> SysBusResult<()> { - let mut res = self.get_sys_resource().unwrap(); - res.region_base = region_base; - res.region_size = region_size; - res.irq = 0; - Ok(()) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::Flash - } - - fn reset(&mut self) -> SysBusResult<()> { - use sysbus::errors::ResultExt as SysBusResultExt; - - SysBusResultExt::chain_err(self.rom.as_ref().unwrap().set_rom_device_romd(true), || { - "Fail to set PFlash rom region read only" - })?; - self.cmd = 0x00; - self.write_cycle = 0; - self.status = 0x80; + region_name: &str, + ) -> Result<()> { + self.sysbusdev_base_mut() + .set_sys(0, region_base, region_size, region_name); Ok(()) } } @@ -895,28 +937,11 @@ impl AmlBuilder for PFlash { #[cfg(test)] mod test { - use super::*; - use address_space::AddressSpace; use std::fs; - pub use std::fs::File; - - fn sysbus_init() -> SysBus { - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)).unwrap(); - #[cfg(target_arch = "x86_64")] - let free_irqs: (i32, i32) = (5, 15); - #[cfg(target_arch = "aarch64")] - let free_irqs: (i32, i32) = (32, 191); - let mmio_region: (u64, u64) = (0x0A00_0000, 0x1000_0000); - SysBus::new( - #[cfg(target_arch = "x86_64")] - &sys_io, - &sys_mem, - free_irqs, - mmio_region, - ) - } + use std::fs::File; + + use super::*; + use crate::sysbus::sysbus_init; fn pflash_dev_init(file_name: &str) -> Arc> { let sector_len: u32 = 0x40_000; @@ -928,32 +953,20 @@ mod test { fd.set_len(flash_size).unwrap(); drop(fd); - let fd = Some( + let fd = Some(Arc::new( std::fs::OpenOptions::new() .read(true) .write(true) .open(file_name) .unwrap(), - ); - let pflash = PFlash::new(flash_size, &fd, sector_len, 4, 2, read_only).unwrap(); + )); let sysbus = sysbus_init(); - let dev = Arc::new(Mutex::new(pflash)); - let region_ops = sysbus.build_region_ops(&dev); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(flash_base), - None, - flash_size, - fd.map(FileBackend::new_common), - false, - true, - false, - ) - .unwrap(), - ); + let pflash = PFlash::new( + flash_size, fd, sector_len, 4, 2, read_only, &sysbus, flash_base, + ) + .unwrap(); + let dev = pflash.realize().unwrap(); - let rom_region = Region::init_rom_device_region(host_mmap, region_ops); - dev.lock().unwrap().rom = Some(rom_region); dev } diff --git a/devices/src/legacy/pl011.rs b/devices/src/legacy/pl011.rs index 13343813f85c2a7c2fffc379219bb2865be06af1..465173ba8eea13e76f721e8fb7e4f7041f4b1579 100644 --- a/devices/src/legacy/pl011.rs +++ b/devices/src/legacy/pl011.rs @@ -12,25 +12,30 @@ use std::sync::{Arc, Mutex}; +use anyhow::{Context, Result}; +use log::error; + +use super::error::LegacyError; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; use acpi::{ AmlActiveLevel, AmlBuilder, AmlDevice, AmlEdgeLevel, AmlExtendedInterrupt, AmlIntShare, AmlInteger, AmlMemory32Fixed, AmlNameDecl, AmlReadAndWrite, AmlResTemplate, AmlResourceUsage, - AmlScopeBuilder, AmlString, + AmlScopeBuilder, AmlString, INTERRUPT_PPIS_COUNT, INTERRUPT_SGIS_COUNT, }; use address_space::GuestAddress; -use byteorder::{ByteOrder, LittleEndian}; -use machine_manager::{ - config::{BootSource, Param, SerialConfig}, - event_loop::EventLoop, +use chardev_backend::chardev::{Chardev, InputReceiver}; +use machine_manager::config::SerialConfig; +use machine_manager::event_loop::EventLoop; +use migration::{ + snapshot::PL011_SNAPSHOT_ID, DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, + MigrationManager, StateTransfer, }; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use sysbus::{SysBus, SysBusDevOps, SysBusDevType, SysRes}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; -use util::loop_context::EventNotifierHelper; -use vmm_sys_util::eventfd::EventFd; - -use super::chardev::{Chardev, InputReceiver}; -use super::errors::{ErrorKind, Result, ResultExt}; +use util::gen_base_func; +use util::loop_context::{create_new_eventfd, EventNotifierHelper}; +use util::num_ops::read_data_u32; const PL011_FLAG_TXFE: u8 = 0x80; const PL011_FLAG_RXFF: u8 = 0x40; @@ -55,7 +60,7 @@ const PL011_FIFO_SIZE: usize = 16; #[repr(C)] #[derive(Clone, Copy, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] -pub struct PL011State { +struct PL011State { /// Read FIFO. PL011_FIFO_SIZE is 16. rfifo: [u32; 16], /// Flag Register. @@ -92,7 +97,7 @@ impl PL011State { fn new() -> Self { PL011State { rfifo: [0; PL011_FIFO_SIZE], - flags: (PL011_FLAG_TXFE | PL011_FLAG_RXFE) as u32, + flags: u32::from(PL011_FLAG_TXFE | PL011_FLAG_RXFE), lcr: 0, rsr: 0, cr: 0x300, @@ -113,105 +118,130 @@ impl PL011State { #[allow(clippy::upper_case_acronyms)] pub struct PL011 { + base: SysBusDevBase, + /// Whether rx paused + paused: bool, /// Device state. state: PL011State, - /// Interrupt event file descriptor. - interrupt_evt: EventFd, - /// System Resource of device. - res: SysRes, /// Character device for redirection. chardev: Arc>, } impl PL011 { /// Create a new `PL011` instance with default parameters. - pub fn new(cfg: SerialConfig) -> Result { - Ok(PL011 { + pub fn new( + cfg: SerialConfig, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + ) -> Result { + let mut pl011 = PL011 { + base: SysBusDevBase { + dev_type: SysBusDevType::PL011, + interrupt_evt: Some(Arc::new(create_new_eventfd()?)), + ..Default::default() + }, + paused: false, state: PL011State::new(), - interrupt_evt: EventFd::new(libc::EFD_NONBLOCK)?, - res: SysRes::default(), chardev: Arc::new(Mutex::new(Chardev::new(cfg.chardev))), - }) + }; + pl011 + .set_sys_resource(sysbus, region_base, region_size, "PL011") + .with_context(|| "Failed to set system resource for PL011.")?; + pl011.set_parent_bus(sysbus.clone()); + + Ok(pl011) } - fn interrupt(&self) { + fn interrupt(&mut self) { let irq_mask = INT_E | INT_MS | INT_RT | INT_TX | INT_RX; let flag = self.state.int_level & self.state.int_enabled; if flag & irq_mask != 0 { - if let Err(e) = self.interrupt_evt.write(1) { - error!( - "Failed to trigger interrupt for PL011, flag is 0x{:x}, error is {}", - flag, e, - ) - } + self.inject_interrupt(); + trace::pl011_interrupt(flag); } } - pub fn realize( - mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - bs: &Arc>, - ) -> Result<()> { - self.chardev - .lock() - .unwrap() - .realize() - .chain_err(|| "Failed to realize chardev")?; - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| "Failed to set system resource for PL011.")?; - - let dev = Arc::new(Mutex::new(self)); - sysbus - .attach_device(&dev, region_base, region_size) - .chain_err(|| "Failed to attach PL011 to system bus.")?; - - bs.lock().unwrap().kernel_cmdline.push(Param { - param_type: "earlycon".to_string(), - value: format!("pl011,mmio,0x{:08x}", region_base), - }); - MigrationManager::register_device_instance_mutex(PL011State::descriptor(), dev.clone()); - let locked_dev = dev.lock().unwrap(); - locked_dev.chardev.lock().unwrap().set_input_callback(&dev); - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(locked_dev.chardev.clone()), - None, - ) - .chain_err(|| ErrorKind::RegNotifierErr)?; - Ok(()) + fn unpause_rx(&mut self) { + if self.paused { + trace::pl011_unpause_rx(); + self.paused = false; + self.chardev.lock().unwrap().unpause_rx(); + } } } impl InputReceiver for PL011 { - fn input_handle(&mut self, data: &[u8]) { - self.state.flags &= !PL011_FLAG_RXFE as u32; + fn receive(&mut self, data: &[u8]) { + self.state.flags &= u32::from(!PL011_FLAG_RXFE); for val in data { let mut slot = (self.state.read_pos + self.state.read_count) as usize; if slot >= PL011_FIFO_SIZE { slot -= PL011_FIFO_SIZE; } - self.state.rfifo[slot] = *val as u32; + self.state.rfifo[slot] = u32::from(*val); self.state.read_count += 1; + trace::pl011_receive(self.state.rfifo[slot], self.state.read_count); } // If in character-mode, or in FIFO-mode and FIFO is full, trigger the interrupt. if ((self.state.lcr & 0x10) == 0) || (self.state.read_count as usize == PL011_FIFO_SIZE) { - self.state.flags |= PL011_FLAG_RXFF as u32; + self.state.flags |= u32::from(PL011_FLAG_RXFF); + trace::pl011_receive_full(); } if self.state.read_count >= self.state.read_trigger { - self.state.int_level |= INT_RX as u32; + self.state.int_level |= INT_RX; self.interrupt(); } } - fn get_remain_space_size(&mut self) -> usize { + fn remain_size(&mut self) -> usize { PL011_FIFO_SIZE - self.state.read_count as usize } + + fn set_paused(&mut self) { + trace::pl011_pause_rx(); + self.paused = true; + } +} + +impl Device for PL011 { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + self.chardev + .lock() + .unwrap() + .realize() + .with_context(|| "Failed to realize chardev")?; + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus + .attach_device(&dev) + .with_context(|| "Failed to attach PL011 to system bus.")?; + drop(locked_bus); + MigrationManager::register_device_instance( + PL011State::descriptor(), + dev.clone(), + PL011_SNAPSHOT_ID, + ); + let locked_dev = dev.lock().unwrap(); + locked_dev.chardev.lock().unwrap().set_receiver(&dev); + EventLoop::update_event( + EventNotifierHelper::internal_notifiers(locked_dev.chardev.clone()), + None, + ) + .with_context(|| LegacyError::RegNotifierErr)?; + drop(locked_dev); + Ok(dev) + } } impl SysBusDevOps for PL011 { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { if data.len() > 4 { error!("Fail to read PL011, illegal data length {}", data.len()); @@ -222,7 +252,9 @@ impl SysBusDevOps for PL011 { match offset >> 2 { 0 => { // Data register. - self.state.flags &= !(PL011_FLAG_RXFF as u32); + self.unpause_rx(); + + self.state.flags &= !u32::from(PL011_FLAG_RXFF); let c = self.state.rfifo[self.state.read_pos as usize]; if self.state.read_count > 0 { @@ -233,11 +265,12 @@ impl SysBusDevOps for PL011 { } } if self.state.read_count == 0 { - self.state.flags |= PL011_FLAG_RXFE as u32; + self.state.flags |= u32::from(PL011_FLAG_RXFE); } if self.state.read_count == self.state.read_trigger - 1 { - self.state.int_level &= !(INT_RX as u32); + self.state.int_level &= !INT_RX; } + trace::pl011_read_fifo(self.state.read_count); self.state.rsr = c >> 8; self.interrupt(); ret = c; @@ -284,7 +317,7 @@ impl SysBusDevOps for PL011 { 0x3f8..=0x400 => { // Register 0xFE0~0xFFC is UART Peripheral Identification Registers // and PrimeCell Identification Registers. - ret = *self.state.id.get(((offset - 0xfe0) >> 2) as usize).unwrap() as u32; + ret = u32::from(*self.state.id.get(((offset - 0xfe0) >> 2) as usize).unwrap()); } _ => { error!("Failed to read pl011: Invalid offset 0x{:x}", offset); @@ -292,36 +325,26 @@ impl SysBusDevOps for PL011 { } } data.copy_from_slice(&ret.as_bytes()[0..data.len()]); + trace::pl011_read(offset, ret); true } fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { - let value = match data.len() { - 1 => data[0] as u32, - 2 => LittleEndian::read_u16(data) as u32, - 4 => LittleEndian::read_u32(data) as u32, - _ => return false, - }; + let mut value = 0; + if !read_data_u32(data, &mut value) { + return false; + } + trace::pl011_write(offset, value); match offset >> 2 { 0 => { let ch = value as u8; - - if let Some(output) = &mut self.chardev.lock().unwrap().output { - let mut locked_output = output.lock().unwrap(); - if let Err(e) = locked_output.write_all(&[ch]) { - error!("Failed to write to pl011 output fd, error is {}", e); - } - if let Err(e) = locked_output.flush() { - error!("Failed to flush pl011, error is {}", e); - } - } else { - debug!("Failed to get output fd"); + if let Err(e) = self.chardev.lock().unwrap().fill_outbuf(vec![ch], None) { + error!("Failed to append pl011 data to outbuf of chardev, {:?}", e); return false; } - - self.state.int_level |= INT_TX as u32; + self.state.int_level |= INT_TX; self.interrupt(); } 1 => { @@ -332,14 +355,17 @@ impl SysBusDevOps for PL011 { } 9 => { self.state.ibrd = value; + trace::pl011_baudrate_change(self.state.ibrd, self.state.fbrd); } 10 => { self.state.fbrd = value; + trace::pl011_baudrate_change(self.state.ibrd, self.state.fbrd); } 11 => { // PL011 works in two modes: character mode or FIFO mode. // Reset FIFO if the mode is changed. if (self.state.lcr ^ value) & 0x10 != 0 { + self.unpause_rx(); // fifo cleared, chardev-rx must be unpaused self.state.read_count = 0; self.state.read_pos = 0; } @@ -376,38 +402,23 @@ impl SysBusDevOps for PL011 { true } - - fn interrupt_evt(&self) -> Option<&EventFd> { - Some(&self.interrupt_evt) - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::PL011 - } } impl StateTransfer for PL011 { - fn get_state_vec(&self) -> migration::errors::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *PL011State::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("PL011"))?; + .with_context(|| MigrationError::FromBytesError("PL011"))?; + self.unpause_rx(); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&PL011State::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&PL011State::descriptor().name).unwrap_or(!0) } } @@ -416,23 +427,23 @@ impl MigrationHook for PL011 {} impl AmlBuilder for PL011 { fn aml_bytes(&self) -> Vec { let mut acpi_dev = AmlDevice::new("COM0"); - acpi_dev.append_child(AmlNameDecl::new("_HID", AmlString("ARMH0001".to_string()))); + acpi_dev.append_child(AmlNameDecl::new("_HID", AmlString("ARMH0011".to_string()))); acpi_dev.append_child(AmlNameDecl::new("_UID", AmlInteger(0))); let mut res = AmlResTemplate::new(); res.append_child(AmlMemory32Fixed::new( AmlReadAndWrite::ReadWrite, - self.res.region_base as u32, - self.res.region_size as u32, + self.base.res.region_base as u32, + self.base.res.region_size as u32, )); // SPI start at interrupt number 32 on aarch64 platform. - let irq_base = 32_u32; + let irq_base = INTERRUPT_PPIS_COUNT + INTERRUPT_SGIS_COUNT; res.append_child(AmlExtendedInterrupt::new( AmlResourceUsage::Consumer, - AmlEdgeLevel::Level, + AmlEdgeLevel::Edge, AmlActiveLevel::High, AmlIntShare::Exclusive, - vec![self.res.irq as u32 + irq_base], + vec![self.base.res.irq as u32 + irq_base], )); acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); @@ -443,18 +454,21 @@ impl AmlBuilder for PL011 { #[cfg(test)] mod test { use super::*; + use crate::sysbus::sysbus_init; use machine_manager::config::{ChardevConfig, ChardevType}; #[test] fn test_receive() { let chardev_cfg = ChardevConfig { - id: "chardev".to_string(), - backend: ChardevType::Stdio, + classtype: ChardevType::Stdio { + id: "chardev".to_string(), + }, }; - let mut pl011_dev = PL011::new(SerialConfig { + let config = SerialConfig { chardev: chardev_cfg, - }) - .unwrap(); + }; + let sysbus = sysbus_init(); + let mut pl011_dev = PL011::new(config, &sysbus, 0x0900_0000, 0x0000_1000).unwrap(); assert_eq!(pl011_dev.state.rfifo, [0; PL011_FIFO_SIZE]); assert_eq!(pl011_dev.state.flags, 0x90); assert_eq!(pl011_dev.state.lcr, 0); @@ -476,10 +490,10 @@ mod test { assert_eq!(pl011_dev.state.int_enabled, 0); let data = vec![0x12, 0x34, 0x56, 0x78, 0x90]; - pl011_dev.input_handle(&data); + pl011_dev.receive(&data); assert_eq!(pl011_dev.state.read_count, data.len() as u32); for i in 0..data.len() { - assert_eq!(pl011_dev.state.rfifo[i], data[i] as u32); + assert_eq!(pl011_dev.state.rfifo[i], u32::from(data[i])); } assert_eq!(pl011_dev.state.flags, 0xC0); assert_eq!(pl011_dev.state.int_level, INT_RX); diff --git a/devices/src/legacy/pl031.rs b/devices/src/legacy/pl031.rs index e7f6715065f1f19487c8a8595f21f51b89b3f0b8..d6a9792205e0b0ddb0be20286250127bb8968eff 100644 --- a/devices/src/legacy/pl031.rs +++ b/devices/src/legacy/pl031.rs @@ -13,27 +13,35 @@ use std::sync::{Arc, Mutex}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use anyhow::{Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; + +use super::error::LegacyError; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; use acpi::AmlBuilder; use address_space::GuestAddress; -use byteorder::{ByteOrder, LittleEndian}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use sysbus::{SysBus, SysBusDevOps, SysBusDevType, SysRes}; +use migration::{ + snapshot::PL031_SNAPSHOT_ID, DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, + MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; -use vmm_sys_util::eventfd::EventFd; - -use super::errors::{ErrorKind, Result, ResultExt}; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; +use util::num_ops::write_data_u32; /// Registers for pl031 from ARM PrimeCell Real Time Clock Technical Reference Manual. /// Data Register. -const RTC_DR: u64 = 0x00; +pub const RTC_DR: u64 = 0x00; /// Match Register. const RTC_MR: u64 = 0x04; /// Load Register. -const RTC_LR: u64 = 0x08; +pub const RTC_LR: u64 = 0x08; /// Control Register. -const RTC_CR: u64 = 0x0c; +pub const RTC_CR: u64 = 0x0c; /// Interrupt Mask Set or Clear Register. -const RTC_IMSC: u64 = 0x10; +pub const RTC_IMSC: u64 = 0x10; /// Raw Interrupt Status Register. const RTC_RIS: u64 = 0x14; /// Masked Interrupt Status Register. @@ -48,7 +56,7 @@ const RTC_PERIPHERAL_ID: [u8; 8] = [0x31, 0x10, 0x14, 0x00, 0x0d, 0xf0, 0x05, 0x #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] -pub struct PL031State { +struct PL031State { /// Match register value. mr: u32, /// Load register value. @@ -62,21 +70,19 @@ pub struct PL031State { #[allow(clippy::upper_case_acronyms)] /// PL031 structure. pub struct PL031 { + base: SysBusDevBase, /// State of device PL031. state: PL031State, /// The duplicate of Load register value. tick_offset: u32, /// Record the real time. base_time: Instant, - /// Interrupt eventfd. - interrupt_evt: Option, - /// System resource. - res: SysRes, } -impl Default for PL031 { - fn default() -> Self { - Self { +impl PL031 { + pub fn new(sysbus: &Arc>, region_base: u64, region_size: u64) -> Result { + let mut pl031 = Self { + base: SysBusDevBase::new(SysBusDevType::Rtc), state: PL031State::default(), // since 1970-01-01 00:00:00,it never cause overflow. tick_offset: SystemTime::now() @@ -84,59 +90,49 @@ impl Default for PL031 { .expect("time wrong") .as_secs() as u32, base_time: Instant::now(), - interrupt_evt: None, - res: SysRes::default(), - } - } -} - -impl PL031 { - pub fn realize( - mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - ) -> Result<()> { - self.interrupt_evt = Some(EventFd::new(libc::EFD_NONBLOCK)?); - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| ErrorKind::SetSysResErr)?; - - let dev = Arc::new(Mutex::new(self)); - sysbus.attach_device(&dev, region_base, region_size)?; - - MigrationManager::register_device_instance_mutex(PL031State::descriptor(), dev); + }; + pl031.base.interrupt_evt = Some(Arc::new(create_new_eventfd()?)); + pl031 + .set_sys_resource(sysbus, region_base, region_size, "PL031") + .with_context(|| LegacyError::SetSysResErr)?; + pl031.set_parent_bus(sysbus.clone()); - Ok(()) + Ok(pl031) } /// Get current clock value. fn get_current_value(&self) -> u32 { - self.base_time.elapsed().as_secs() as u32 + self.tick_offset + (u128::from(self.base_time.elapsed().as_secs()) + u128::from(self.tick_offset)) as u32 } +} - fn inject_interrupt(&self) { - if let Some(evt_fd) = self.interrupt_evt() { - if let Err(e) = evt_fd.write(1) { - error!("pl031: failed to write interrupt eventfd ({}).", e); - } - return; - } - error!("pl031: failed to get interrupt event fd."); +impl Device for PL031 { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + + MigrationManager::register_device_instance( + PL031State::descriptor(), + dev.clone(), + PL031_SNAPSHOT_ID, + ); + + Ok(dev) } } impl SysBusDevOps for PL031 { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + /// Read data from registers by guest. fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { if (0xFE0..0x1000).contains(&offset) { let value = u32::from(RTC_PERIPHERAL_ID[((offset - 0xFE0) >> 2) as usize]); - match data.len() { - 1 => data[0] = value as u8, - 2 => LittleEndian::write_u16(data, value as u16), - 4 => LittleEndian::write_u32(data, value as u32), - _ => {} - } - return true; + return write_data_u32(data, value); } let mut value: u32 = 0; @@ -150,23 +146,25 @@ impl SysBusDevOps for PL031 { RTC_MIS => value = self.state.risr & self.state.imsr, _ => {} } + trace::pl031_read(offset, value); - match data.len() { - 1 => data[0] = value as u8, - 2 => LittleEndian::write_u16(data, value as u16), - 4 => LittleEndian::write_u32(data, value as u32), - _ => {} - } - - true + write_data_u32(data, value) } /// Write data to registers by guest. fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { let value = LittleEndian::read_u32(data); + trace::pl031_write(offset, value); match offset { - RTC_MR => self.state.mr = value, + RTC_MR => { + // TODO: The MR register is used for implementing the RTC alarm. A RTC alarm is a + // feature that can be used to allow a computer to 'wake up' after shut down to + // execute tasks every day or on a certain day. It can sometimes be found in the + // 'Power Management' section of motherboard's BIOS setup. This RTC alarm function + // is not implemented yet, here is a reminder just in case. + self.state.mr = value; + } RTC_LR => { self.state.lr = value; self.tick_offset = value; @@ -175,28 +173,18 @@ impl SysBusDevOps for PL031 { RTC_IMSC => { self.state.imsr = value & 1; self.inject_interrupt(); + trace::pl031_inject_interrupt(); } RTC_ICR => { self.state.risr = 0; self.inject_interrupt(); + trace::pl031_inject_interrupt(); } _ => {} } true } - - fn interrupt_evt(&self) -> Option<&EventFd> { - self.interrupt_evt.as_ref() - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::Rtc - } } impl AmlBuilder for PL031 { @@ -206,26 +194,74 @@ impl AmlBuilder for PL031 { } impl StateTransfer for PL031 { - fn get_state_vec(&self) -> migration::errors::Result> { + fn get_state_vec(&self) -> Result> { let state = self.state; Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *PL031State::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("PL031"))?; + .with_context(|| MigrationError::FromBytesError("PL031"))?; Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&PL031State::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&PL031State::descriptor().name).unwrap_or(!0) } } impl MigrationHook for PL031 {} + +#[cfg(test)] +mod test { + use super::*; + use crate::sysbus::sysbus_init; + use util::time::mktime64; + + const WIGGLE: u32 = 2; + + #[test] + fn test_set_year_20xx() { + let sysbus = sysbus_init(); + let mut rtc = PL031::new(&sysbus, 0x0901_0000, 0x0000_1000).unwrap(); + // Set rtc time: 2013-11-13 02:04:56. + let mut wtick = mktime64(2013, 11, 13, 2, 4, 56) as u32; + let mut data = [0; 4]; + LittleEndian::write_u32(&mut data, wtick); + PL031::write(&mut rtc, &mut data, GuestAddress(0), RTC_LR); + + PL031::read(&mut rtc, &mut data, GuestAddress(0), RTC_DR); + let mut rtick = LittleEndian::read_u32(&data); + + assert!((rtick - wtick) <= WIGGLE); + + // Set rtc time: 2080-11-13 02:04:56, ensure there is no year-2080 overflow. + wtick = mktime64(2080, 11, 13, 2, 4, 56) as u32; + data = [0; 4]; + LittleEndian::write_u32(&mut data, wtick); + PL031::write(&mut rtc, &mut data, GuestAddress(0), RTC_LR); + + PL031::read(&mut rtc, &mut data, GuestAddress(0), RTC_DR); + rtick = LittleEndian::read_u32(&data); + + assert!((rtick - wtick) <= WIGGLE); + } + + #[test] + fn test_set_year_1970() { + let sysbus = sysbus_init(); + let mut rtc = PL031::new(&sysbus, 0x0901_0000, 0x0000_1000).unwrap(); + // Set rtc time (min): 1970-01-01 00:00:00. + let wtick = mktime64(1970, 1, 1, 0, 0, 0) as u32; + let mut data = [0; 4]; + LittleEndian::write_u32(&mut data, wtick); + PL031::write(&mut rtc, &mut data, GuestAddress(0), RTC_LR); + + PL031::read(&mut rtc, &mut data, GuestAddress(0), RTC_DR); + let rtick = LittleEndian::read_u32(&data); + + assert!((rtick - wtick) <= WIGGLE); + } +} diff --git a/devices/src/legacy/ramfb.rs b/devices/src/legacy/ramfb.rs new file mode 100644 index 0000000000000000000000000000000000000000..569a593647d1e1de20a71700b822d3f81e2c1b61 --- /dev/null +++ b/devices/src/legacy/ramfb.rs @@ -0,0 +1,350 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex, Weak}; +use std::time::Duration; + +use anyhow::{Context, Result}; +use clap::{ArgAction, Parser}; +use drm_fourcc::DrmFourcc; +use log::error; + +use super::fwcfg::{FwCfgOps, FwCfgWriteCallback}; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use acpi::AmlBuilder; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use machine_manager::config::valid_id; +use machine_manager::event_loop::EventLoop; +use ui::console::{ + console_init, display_graphic_update, display_replace_surface, ConsoleType, DisplayConsole, + DisplaySurface, HardWareOperations, +}; +use ui::input::{key_event, KEYCODE_RET}; +use util::gen_base_func; +use util::pixman::{pixman_format_bpp, pixman_format_code_t, pixman_image_create_bits}; + +const BYTES_PER_PIXELS: u32 = 8; +const WIDTH_MAX: u32 = 16_000; +const HEIGHT_MAX: u32 = 12_000; +const INSTALL_CHECK_INTERVEL_MS: u64 = 500; +const INSTALL_RELEASE_INTERVEL_MS: u64 = 200; +const INSTALL_PRESS_INTERVEL_MS: u64 = 100; + +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct RamfbConfig { + #[arg(long, value_parser = ["ramfb"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long, default_value = "false", action = ArgAction::Append)] + pub install: bool, +} + +#[repr(packed)] +struct RamfbCfg { + _addr: u64, + _fourcc: u32, + _flags: u32, + _width: u32, + _height: u32, + _stride: u32, +} + +#[derive(Clone)] +pub struct RamfbState { + pub surface: Option, + pub con: Option>>, + sys_mem: Arc, + install: Arc, +} + +// SAFETY: The type of image, the field of the struct DisplaySurface +// is the raw pointer. create_display_surface() method will create +// image object. The memory that the image pointer refers to is +// modified by guest OS and accessed by vnc. So implement Sync and +// Send is safe. +unsafe impl Sync for RamfbState {} +// SAFETY: The reason is same as above. +unsafe impl Send for RamfbState {} + +impl RamfbState { + pub fn new(sys_mem: Arc, install: bool) -> Self { + let ramfb_opts = Arc::new(RamfbInterface {}); + let con = console_init("ramfb".to_string(), ConsoleType::Graphic, ramfb_opts); + Self { + surface: None, + con, + sys_mem, + install: Arc::new(AtomicBool::new(install)), + } + } + + pub fn setup(&mut self, fw_cfg: &Arc>) -> Result<()> { + let mut locked_fw_cfg = fw_cfg.lock().unwrap(); + let ramfb_state_cb = self.clone(); + let cfg: Vec = [0; size_of::()].to_vec(); + locked_fw_cfg + .add_file_callback_entry( + "etc/ramfb", + cfg, + None, + Some(Arc::new(Mutex::new(ramfb_state_cb))), + true, + ) + .with_context(|| "Failed to set fwcfg")?; + Ok(()) + } + + fn create_display_surface( + &mut self, + width: u32, + height: u32, + format: pixman_format_code_t, + mut stride: u32, + addr: u64, + ) { + if width < 16 || height < 16 || width > WIDTH_MAX || height > HEIGHT_MAX { + error!("The resolution: {}x{} is unsupported.", width, height); + } + + if stride == 0 { + let linesize = width * u32::from(pixman_format_bpp(format as u32)) / BYTES_PER_PIXELS; + stride = linesize; + } + + let fb_addr = match self + .sys_mem + .addr_cache_init(GuestAddress(addr), AddressAttr::Ram) + { + Some((hva, len)) => { + if len < u64::from(stride) { + error!("Insufficient contiguous memory length"); + return; + } + hva + } + None => { + error!("Failed to get the host address of the framebuffer"); + return; + } + }; + + let mut ds = DisplaySurface { + format, + ..Default::default() + }; + // SAFETY: pixman_image_create_bits() is C function. All + // parameters passed of the function have been checked. + // It returns a raw pointer. + unsafe { + ds.image = pixman_image_create_bits( + format, + width as i32, + height as i32, + fb_addr as *mut u32, + stride as i32, + ); + } + + if ds.image.is_null() { + error!("Failed to create the surface of Ramfb!"); + return; + } + + self.surface = Some(ds); + + set_press_event(self.install.clone(), fb_addr as *const u8); + } + + fn reset_ramfb_state(&mut self) { + self.surface = None; + } +} + +impl FwCfgWriteCallback for RamfbState { + fn write_callback(&mut self, data: Vec, _start: u64, _len: usize) { + if data.len() < 28 { + error!("RamfbCfg data format is incorrect"); + return; + } + let addr = u64::from_be_bytes( + data.as_slice() + .split_at(size_of::()) + .0 + .try_into() + .unwrap(), + ); + let fourcc = u32::from_be_bytes( + data.as_slice()[8..] + .split_at(size_of::()) + .0 + .try_into() + .unwrap(), + ); + let width = u32::from_be_bytes( + data.as_slice()[16..] + .split_at(size_of::()) + .0 + .try_into() + .unwrap(), + ); + let height = u32::from_be_bytes( + data.as_slice()[20..] + .split_at(size_of::()) + .0 + .try_into() + .unwrap(), + ); + let stride = u32::from_be_bytes( + data.as_slice()[24..] + .split_at(size_of::()) + .0 + .try_into() + .unwrap(), + ); + + let format: pixman_format_code_t = if fourcc == DrmFourcc::Xrgb8888 as u32 { + pixman_format_code_t::PIXMAN_x8r8g8b8 + } else { + error!("Unsupported drm format: {}", fourcc); + return; + }; + + self.create_display_surface(width, height, format, stride, addr); + display_replace_surface(&self.con, self.surface) + .unwrap_or_else(|e| error!("Error occurs during surface switching: {:?}", e)); + } +} + +pub struct RamfbInterface {} +impl HardWareOperations for RamfbInterface { + fn hw_update(&self, con: Arc>) { + let locked_con = con.lock().unwrap(); + let width = locked_con.width; + let height = locked_con.height; + drop(locked_con); + display_graphic_update(&Some(Arc::downgrade(&con)), 0, 0, width, height) + .unwrap_or_else(|e| error!("Error occurs during graphic updating: {:?}", e)); + } +} + +pub struct Ramfb { + base: SysBusDevBase, + pub ramfb_state: RamfbState, +} + +impl Ramfb { + pub fn new(sys_mem: Arc, sysbus: &Arc>, install: bool) -> Self { + let mut ramfb = Ramfb { + base: SysBusDevBase::new(SysBusDevType::Ramfb), + ramfb_state: RamfbState::new(sys_mem, install), + }; + ramfb.set_parent_bus(sysbus.clone()); + ramfb + } +} + +impl Device for Ramfb { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.ramfb_state.reset_ramfb_state(); + Ok(()) + } + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + Ok(dev) + } +} + +impl SysBusDevOps for Ramfb { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + + fn read(&mut self, _data: &mut [u8], _base: GuestAddress, _offset: u64) -> bool { + error!("Ramfb can not be read!"); + false + } + + fn write(&mut self, _data: &[u8], _base: GuestAddress, _offset: u64) -> bool { + error!("Ramfb can not be written!"); + false + } +} + +impl AmlBuilder for Ramfb { + fn aml_bytes(&self) -> Vec { + Vec::new() + } +} + +fn set_press_event(install: Arc, data: *const u8) { + let black_screen = + // SAFETY: data is the raw pointer of framebuffer. EDKII has malloc the memory of + // the framebuffer. So dereference the data is safe. + unsafe { !data.is_null() && *data == 0 && *data.offset(1) == 0 && *data.offset(2) == 0 }; + if install.load(Ordering::Acquire) && black_screen { + let set_press_func = Box::new(move || { + set_press_event(install.clone(), data); + }); + let press_func = Box::new(move || { + key_event(KEYCODE_RET, true) + .unwrap_or_else(|e| error!("Ramfb couldn't press return key: {:?}", e)); + }); + let release_func = Box::new(move || { + key_event(KEYCODE_RET, false) + .unwrap_or_else(|e| error!("Ramfb couldn't release return key: {:?}.", e)); + }); + + let ctx = EventLoop::get_ctx(None).unwrap(); + ctx.timer_add( + set_press_func, + Duration::from_millis(INSTALL_CHECK_INTERVEL_MS), + ); + ctx.timer_add(press_func, Duration::from_millis(INSTALL_PRESS_INTERVEL_MS)); + ctx.timer_add( + release_func, + Duration::from_millis(INSTALL_RELEASE_INTERVEL_MS), + ); + } else { + install.store(false, Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_ramfb_config_cmdline_parser() { + // Test1: install. + let ramfb_cmd1 = "ramfb,id=ramfb0,install=true"; + let ramfb_config = + RamfbConfig::try_parse_from(str_slip_to_clap(ramfb_cmd1, true, false)).unwrap(); + assert_eq!(ramfb_config.id, "ramfb0"); + assert_eq!(ramfb_config.install, true); + + // Test2: Default. + let ramfb_cmd2 = "ramfb,id=ramfb0"; + let ramfb_config = + RamfbConfig::try_parse_from(str_slip_to_clap(ramfb_cmd2, true, false)).unwrap(); + assert_eq!(ramfb_config.install, false); + } +} diff --git a/devices/src/legacy/rtc.rs b/devices/src/legacy/rtc.rs index a549bfb03fb5c88f2110afee2ae354d481d73335..4c324bf5e6c22acf464cb1032bb45f914632251e 100644 --- a/devices/src/legacy/rtc.rs +++ b/devices/src/legacy/rtc.rs @@ -11,31 +11,29 @@ // See the Mulan PSL v2 for more details. use std::sync::{Arc, Mutex}; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use anyhow::Result; +use log::{debug, error, warn}; + +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; use acpi::{ AmlBuilder, AmlDevice, AmlEisaId, AmlIoDecode, AmlIoResource, AmlIrqNoFlags, AmlNameDecl, AmlResTemplate, AmlScopeBuilder, }; use address_space::GuestAddress; -use sysbus::{errors::Result as SysBusResult, SysBus, SysBusDevOps, SysBusDevType, SysRes}; -use vmm_sys_util::eventfd::EventFd; - -use super::errors::Result; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; +use util::time::{mktime64, NANOSECONDS_PER_SECOND}; /// IO port of RTC device to select Register to read/write. pub const RTC_PORT_INDEX: u64 = 0x70; -/// IO port of RTC device to read/write data from selected register. -pub const RTC_PORT_DATA: u64 = 0x71; -/// IRQ number of RTC device. -pub const RTC_IRQ: u32 = 8; /// Index of register of time in RTC static RAM. const RTC_SECONDS: u8 = 0x00; -const RTC_SECONDS_ALARM: u8 = 0x01; const RTC_MINUTES: u8 = 0x02; -const RTC_MINUTES_ALARM: u8 = 0x03; const RTC_HOURS: u8 = 0x04; -const RTC_HOURS_ARARM: u8 = 0x05; const RTC_DAY_OF_WEEK: u8 = 0x06; const RTC_DAY_OF_MONTH: u8 = 0x07; const RTC_MONTH: u8 = 0x08; @@ -46,6 +44,11 @@ const RTC_REG_C: u8 = 0x0C; const RTC_REG_D: u8 = 0x0D; const RTC_CENTURY_BCD: u8 = 0x32; +// Update in progress (UIP) bit. +const REG_A_UIP: u8 = 0x80; +// UIP bit held for last 244 us of every second. +const UIP_HOLD_LENGTH: u64 = 8 * NANOSECONDS_PER_SECOND / 32768; + // Index of memory data in RTC static RAM. // 0x15/0x16 stores low/high byte below 1MB, range is [0, 640KB]. const CMOS_BASE_MEM: (u8, u8) = (0x15, 0x16); @@ -58,12 +61,7 @@ const CMOS_MEM_BELOW_4GB: (u8, u8) = (0x34, 0x35); // 0x5B/0x5C/0x5D stores low/middle/high byte of memory above 4GB, unit is 64KB. const CMOS_MEM_ABOVE_4GB: (u8, u8, u8) = (0x5B, 0x5C, 0x5D); -fn get_utc_time() -> libc::tm { - let time_val = 0_i64; - - // Safe because `libc::time` only get time. - unsafe { libc::time(time_val as *mut i64) }; - +fn rtc_time_to_tm(time_val: i64) -> libc::tm { let mut dest_tm = libc::tm { tm_sec: 0, tm_min: 0, @@ -78,7 +76,7 @@ fn get_utc_time() -> libc::tm { tm_zone: std::ptr::null_mut(), }; - // Safe because `libc::gmtime_r` just convert calendar time to + // SAFETY: `libc::gmtime_r` just convert calendar time to // broken-down format, and saved to `dest_tm`. unsafe { libc::gmtime_r(&time_val, &mut dest_tm) }; @@ -90,41 +88,62 @@ fn bin_to_bcd(src: u8) -> u8 { ((src / 10) << 4) + (src % 10) } +/// Transfer BCD coded decimal to binary coded decimal. +fn bcd_to_bin(src: u8) -> u64 { + if (src >> 4) > 9 || (src & 0x0f) > 9 { + warn!("RTC: The BCD coded format is wrong."); + return 0_u64; + } + + u64::from(((src >> 4) * 10) + (src & 0x0f)) +} + #[allow(clippy::upper_case_acronyms)] /// RTC device. pub struct RTC { + base: SysBusDevBase, /// Static CMOS RAM. cmos_data: [u8; 128], /// Index of Selected register. cur_index: u8, - /// Interrupt eventfd. - interrupt_evt: EventFd, - /// Resource of RTC. - res: SysRes, /// Guest memory size. mem_size: u64, /// The start address of gap. gap_start: u64, + /// The tick offset. + tick_offset: u64, + /// Record the real time. + base_time: Instant, } impl RTC { /// Construct function of RTC device. - pub fn new() -> Result { + pub fn new(sysbus: &Arc>) -> Result { let mut rtc = RTC { + base: SysBusDevBase { + dev_type: SysBusDevType::Rtc, + interrupt_evt: Some(Arc::new(create_new_eventfd()?)), + ..Default::default() + }, cmos_data: [0_u8; 128], cur_index: 0_u8, - interrupt_evt: EventFd::new(libc::EFD_NONBLOCK)?, - res: SysRes { - region_base: RTC_PORT_INDEX, - region_size: 8, - irq: RTC_IRQ as i32, - }, mem_size: 0, gap_start: 0, + // Since 1970-01-01 00:00:00, it never cause overflow. + tick_offset: SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time wrong") + .as_secs(), + base_time: Instant::now(), }; - // Set VRT bit in Register-D, indicates that RAM and time are valid. - rtc.cmos_data[RTC_REG_D as usize] = 0x80; + let tm = rtc_time_to_tm(rtc.get_current_value()); + rtc.set_rtc_cmos(tm); + + rtc.init_rtc_reg(); + + rtc.set_sys_resource(sysbus, RTC_PORT_INDEX, 8, "RTC")?; + rtc.set_parent_bus(sysbus.clone()); Ok(rtc) } @@ -134,8 +153,8 @@ impl RTC { /// # Arguments /// /// * `mem_size` - Guest memory size. - /// * `gap_start` - The start address of gap on x86_64 platform. - /// This value can be found in memory layout. + /// * `gap_start` - The start address of gap on x86_64 platform. This value can be found in + /// memory layout. pub fn set_memory(&mut self, mem_size: u64, gap_start: u64) { self.mem_size = mem_size; self.gap_start = gap_start; @@ -168,43 +187,46 @@ impl RTC { } } - fn read_data(&self, data: &mut [u8]) -> bool { + fn init_rtc_reg(&mut self) { + // Set Time frequency divider and Rate selection frequency in Register-A. + // Bits 6-4 = Time frequency divider (010 = 32.768KHz). + // Bits 3-0 = Rate selection frequency (110 = 1.024KHz, 976.562s). + self.cmos_data[RTC_REG_A as usize] = 0x26; + + // Set 24 hour mode in Register-B. + self.cmos_data[RTC_REG_B as usize] = 0x02; + + // Set VRT bit in Register-D, indicates that RAM and time are valid. + self.cmos_data[RTC_REG_D as usize] = 0x80; + } + + fn read_data(&mut self, data: &mut [u8]) -> bool { if data.len() != 1 { error!("RTC only supports reading data byte by byte."); return false; } - let tm = get_utc_time(); + let tm = rtc_time_to_tm(self.get_current_value()); + self.set_rtc_cmos(tm); match self.cur_index { - RTC_SECONDS => { - data[0] = bin_to_bcd(tm.tm_sec as u8); - } - RTC_MINUTES => { - data[0] = bin_to_bcd(tm.tm_min as u8); - } - RTC_HOURS => { - data[0] = bin_to_bcd(tm.tm_hour as u8); - } - RTC_DAY_OF_WEEK => { - data[0] = bin_to_bcd((tm.tm_wday + 1) as u8); - } - RTC_DAY_OF_MONTH => { - data[0] = bin_to_bcd(tm.tm_mday as u8); + RTC_REG_A => { + data[0] = self.cmos_data[RTC_REG_A as usize]; + // UIP(update in progress) bit will be set at last 244us of every second. + if self.update_in_progress() { + data[0] |= REG_A_UIP; + self.inject_interrupt(); + trace::rtc_inject_interrupt(); + } } - RTC_MONTH => { - data[0] = bin_to_bcd((tm.tm_mon + 1) as u8); - } - RTC_YEAR => { - let year = tm.tm_year + 1900; - data[0] = bin_to_bcd((year % 100) as u8); - } - RTC_CENTURY_BCD => { - data[0] = bin_to_bcd(((tm.tm_year + 1900) % 100) as u8); + RTC_REG_C => { + // The interrupt request flag (IRQF), alarm interrupt flag (AF). + data[0] = 1 << 7 | 1 << 5; } _ => { data[0] = self.cmos_data[self.cur_index as usize]; } } + trace::rtc_read(self.cur_index, data[0]); true } @@ -214,8 +236,21 @@ impl RTC { error!("RTC only supports writing data byte by byte."); return false; } + trace::rtc_write(self.cur_index, data[0]); match self.cur_index { + RTC_SECONDS | RTC_MINUTES | RTC_HOURS | RTC_DAY_OF_WEEK | RTC_DAY_OF_MONTH + | RTC_MONTH | RTC_YEAR | RTC_CENTURY_BCD => { + if self.rtc_valid_check(data[0]) { + self.cmos_data[self.cur_index as usize] = data[0]; + self.update_rtc_time(); + } else { + warn!( + "Set invalid RTC time, index {}, data {}", + self.cur_index, data[0] + ); + } + } RTC_REG_C | RTC_REG_D => { warn!( "Failed to write: read-only register, index {}, data {}", @@ -230,18 +265,108 @@ impl RTC { true } - pub fn realize(mut self, sysbus: &mut SysBus) -> Result<()> { - let region_base = self.res.region_base; - let region_size = self.res.region_size; - self.set_sys_resource(sysbus, region_base, region_size)?; + /// Get current clock value. + fn get_current_value(&self) -> i64 { + (i128::from(self.base_time.elapsed().as_secs()) + i128::from(self.tick_offset)) as i64 + } - let dev = Arc::new(Mutex::new(self)); - sysbus.attach_device(&dev, region_base, region_size)?; + fn set_rtc_cmos(&mut self, tm: libc::tm) { + self.cmos_data[RTC_SECONDS as usize] = bin_to_bcd(tm.tm_sec as u8); + self.cmos_data[RTC_MINUTES as usize] = bin_to_bcd(tm.tm_min as u8); + self.cmos_data[RTC_HOURS as usize] = bin_to_bcd(tm.tm_hour as u8); + self.cmos_data[RTC_DAY_OF_WEEK as usize] = bin_to_bcd((tm.tm_wday + 1) as u8); + self.cmos_data[RTC_DAY_OF_MONTH as usize] = bin_to_bcd(tm.tm_mday as u8); + self.cmos_data[RTC_MONTH as usize] = bin_to_bcd((tm.tm_mon + 1) as u8); + self.cmos_data[RTC_YEAR as usize] = bin_to_bcd(((tm.tm_year + 1900) % 100) as u8); + self.cmos_data[RTC_CENTURY_BCD as usize] = bin_to_bcd(((tm.tm_year + 1900) / 100) as u8); + } + + fn rtc_valid_check(&self, val: u8) -> bool { + let range = [ + [0, 59], // Seconds + [0, 59], // Seconds Alarm + [0, 59], // Minutes + [0, 59], // Minutes Alarm + [0, 23], // Hours + [0, 23], // Hours Alarm + [1, 7], // Day of the Week + [1, 31], // Day of the Month + [1, 12], // Month + [0, 99], // Year + ]; + + if (val >> 4) > 9 || (val & 0x0f) > 9 { + return false; + } + + let value = bcd_to_bin(val); + + if self.cur_index <= 9 + && (value < range[self.cur_index as usize][0] + || value > range[self.cur_index as usize][1]) + { + return false; + } + + true + } + + fn update_rtc_time(&mut self) { + let sec = bcd_to_bin(self.cmos_data[RTC_SECONDS as usize]); + let min = bcd_to_bin(self.cmos_data[RTC_MINUTES as usize]); + let hour = bcd_to_bin(self.cmos_data[RTC_HOURS as usize]); + let day = bcd_to_bin(self.cmos_data[RTC_DAY_OF_MONTH as usize]); + let mon = bcd_to_bin(self.cmos_data[RTC_MONTH as usize]); + let year = bcd_to_bin(self.cmos_data[RTC_YEAR as usize]) + + bcd_to_bin(self.cmos_data[RTC_CENTURY_BCD as usize]) * 100; + + // Check rtc time is valid to prevent tick_offset overflow. + if year < 1970 + || !(1..=12).contains(&mon) + || !(1..=31).contains(&day) + || !(0..=24).contains(&hour) + || !(0..=60).contains(&min) + || !(0..=60).contains(&sec) + { + warn!( + "RTC: the updated rtc time {}-{}-{} may be invalid.", + year, mon, day + ); + return; + } + + self.tick_offset = mktime64(year, mon, day, hour, min, sec); + + self.base_time = Instant::now(); + } + + fn update_in_progress(&self) -> bool { + self.base_time.elapsed().subsec_nanos() >= (NANOSECONDS_PER_SECOND - UIP_HOLD_LENGTH) as u32 + } +} + +impl Device for RTC { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.cmos_data.fill(0); + self.init_rtc_reg(); + self.set_memory(self.mem_size, self.gap_start); Ok(()) } + + fn realize(self) -> Result>> { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + Ok(dev) + } } impl SysBusDevOps for RTC { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], base: GuestAddress, offset: u64) -> bool { if offset == 0 { debug!( @@ -263,25 +388,6 @@ impl SysBusDevOps for RTC { self.write_data(data) } } - - fn interrupt_evt(&self) -> Option<&EventFd> { - Some(&self.interrupt_evt) - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::Rtc - } - - fn reset(&mut self) -> SysBusResult<()> { - self.cmos_data.fill(0); - self.cmos_data[RTC_REG_D as usize] = 0x80; - self.set_memory(self.mem_size, self.gap_start); - Ok(()) - } } impl AmlBuilder for RTC { @@ -292,14 +398,130 @@ impl AmlBuilder for RTC { let mut res = AmlResTemplate::new(); res.append_child(AmlIoResource::new( AmlIoDecode::Decode16, - self.res.region_base as u16, - self.res.region_base as u16, + self.base.res.region_base as u16, + self.base.res.region_base as u16, 0x01, - self.res.region_size as u8, + self.base.res.region_size as u8, )); - res.append_child(AmlIrqNoFlags::new(self.res.irq as u8)); + res.append_child(AmlIrqNoFlags::new(self.base.res.irq as u8)); acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); acpi_dev.aml_bytes() } } + +#[cfg(test)] +mod test { + use anyhow::Context; + + use super::*; + use crate::sysbus::sysbus_init; + use address_space::GuestAddress; + + const WIGGLE: u8 = 2; + + fn cmos_read(rtc: &mut RTC, index: u8) -> u8 { + let mut data: [u8; 1] = [index; 1]; + RTC::write(rtc, &mut data, GuestAddress(0), 0); + RTC::read(rtc, &mut data, GuestAddress(0), 1); + data[0] + } + + fn cmos_write(rtc: &mut RTC, index: u8, val: u8) { + let mut data: [u8; 1] = [index; 1]; + RTC::write(rtc, &mut data, GuestAddress(0), 0); + data[0] = val; + RTC::write(rtc, &mut data, GuestAddress(0), 1); + } + + #[test] + fn test_set_year_20xx() -> Result<()> { + let sysbus = sysbus_init(); + let mut rtc = RTC::new(&sysbus).with_context(|| "Failed to create RTC device")?; + // Set rtc time: 2013-11-13 02:04:56 + cmos_write(&mut rtc, RTC_CENTURY_BCD, 0x20); + cmos_write(&mut rtc, RTC_YEAR, 0x13); + cmos_write(&mut rtc, RTC_MONTH, 0x11); + cmos_write(&mut rtc, RTC_DAY_OF_MONTH, 0x13); + cmos_write(&mut rtc, RTC_HOURS, 0x02); + cmos_write(&mut rtc, RTC_MINUTES, 0x04); + cmos_write(&mut rtc, RTC_SECONDS, 0x56); + + assert!((cmos_read(&mut rtc, RTC_SECONDS) - 0x56) <= WIGGLE); + assert_eq!(cmos_read(&mut rtc, RTC_MINUTES), 0x04); + assert_eq!(cmos_read(&mut rtc, RTC_HOURS), 0x02); + assert_eq!(cmos_read(&mut rtc, RTC_DAY_OF_MONTH), 0x13); + assert_eq!(cmos_read(&mut rtc, RTC_MONTH), 0x11); + assert_eq!(cmos_read(&mut rtc, RTC_YEAR), 0x13); + assert_eq!(cmos_read(&mut rtc, RTC_CENTURY_BCD), 0x20); + + // Set rtc time: 2080-11-13 02:04:56, ensure there is no year-2080 overflow. + cmos_write(&mut rtc, RTC_YEAR, 0x80); + + assert!((cmos_read(&mut rtc, RTC_SECONDS) - 0x56) <= WIGGLE); + assert_eq!(cmos_read(&mut rtc, RTC_MINUTES), 0x04); + assert_eq!(cmos_read(&mut rtc, RTC_HOURS), 0x02); + assert_eq!(cmos_read(&mut rtc, RTC_DAY_OF_MONTH), 0x13); + assert_eq!(cmos_read(&mut rtc, RTC_MONTH), 0x11); + assert_eq!(cmos_read(&mut rtc, RTC_YEAR), 0x80); + assert_eq!(cmos_read(&mut rtc, RTC_CENTURY_BCD), 0x20); + + Ok(()) + } + + #[test] + fn test_set_year_1970() -> Result<()> { + let sysbus = sysbus_init(); + let mut rtc = RTC::new(&sysbus).with_context(|| "Failed to create RTC device")?; + // Set rtc time (min): 1970-01-01 00:00:00 + cmos_write(&mut rtc, RTC_CENTURY_BCD, 0x19); + cmos_write(&mut rtc, RTC_YEAR, 0x70); + cmos_write(&mut rtc, RTC_MONTH, 0x01); + cmos_write(&mut rtc, RTC_DAY_OF_MONTH, 0x01); + cmos_write(&mut rtc, RTC_HOURS, 0x00); + cmos_write(&mut rtc, RTC_MINUTES, 0x00); + cmos_write(&mut rtc, RTC_SECONDS, 0x00); + + assert!((cmos_read(&mut rtc, RTC_SECONDS) - 0x00) <= WIGGLE); + assert_eq!(cmos_read(&mut rtc, RTC_MINUTES), 0x00); + assert_eq!(cmos_read(&mut rtc, RTC_HOURS), 0x00); + assert_eq!(cmos_read(&mut rtc, RTC_DAY_OF_MONTH), 0x01); + assert_eq!(cmos_read(&mut rtc, RTC_MONTH), 0x01); + assert_eq!(cmos_read(&mut rtc, RTC_YEAR), 0x70); + assert_eq!(cmos_read(&mut rtc, RTC_CENTURY_BCD), 0x19); + + Ok(()) + } + + #[test] + fn test_invalid_rtc_time() -> Result<()> { + let sysbus = sysbus_init(); + let mut rtc = RTC::new(&sysbus).with_context(|| "Failed to create RTC device")?; + // Set rtc year: 1969 + cmos_write(&mut rtc, RTC_CENTURY_BCD, 0x19); + cmos_write(&mut rtc, RTC_YEAR, 0x69); + assert_ne!(cmos_read(&mut rtc, RTC_YEAR), 0x69); + + // Set rtc month: 13 + cmos_write(&mut rtc, RTC_MONTH, 0x13); + assert_ne!(cmos_read(&mut rtc, RTC_MONTH), 0x13); + + // Set rtc day: 32 + cmos_write(&mut rtc, RTC_DAY_OF_MONTH, 0x32); + assert_ne!(cmos_read(&mut rtc, RTC_DAY_OF_MONTH), 0x32); + + // Set rtc hour: 25 + cmos_write(&mut rtc, RTC_HOURS, 0x25); + assert_ne!(cmos_read(&mut rtc, RTC_HOURS), 0x25); + + // Set rtc minute: 60 + cmos_write(&mut rtc, RTC_MINUTES, 0x60); + assert_ne!(cmos_read(&mut rtc, RTC_MINUTES), 0x60); + + // Set rtc second: 60 + cmos_write(&mut rtc, RTC_SECONDS, 0x60); + assert_ne!(cmos_read(&mut rtc, RTC_SECONDS), 0x60); + + Ok(()) + } +} diff --git a/devices/src/legacy/serial.rs b/devices/src/legacy/serial.rs index 11e3c1b2ed65a1e8796cc57698fa5dc73ffe4edf..e21112b3cf214dcfa7beb85c9e34c253af509b2b 100644 --- a/devices/src/legacy/serial.rs +++ b/devices/src/legacy/serial.rs @@ -13,24 +13,28 @@ use std::collections::VecDeque; use std::sync::{Arc, Mutex}; +use anyhow::{bail, Context, Result}; +use log::{debug, error}; + +use super::error::LegacyError; +use crate::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use crate::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; use acpi::{ AmlActiveLevel, AmlBuilder, AmlDevice, AmlEdgeLevel, AmlEisaId, AmlExtendedInterrupt, AmlIntShare, AmlInteger, AmlIoDecode, AmlIoResource, AmlNameDecl, AmlResTemplate, AmlResourceUsage, AmlScopeBuilder, }; use address_space::GuestAddress; -use hypervisor::kvm::KVM_FDS; -#[cfg(target_arch = "aarch64")] -use machine_manager::config::{BootSource, Param}; +use chardev_backend::chardev::{Chardev, InputReceiver}; use machine_manager::{config::SerialConfig, event_loop::EventLoop}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use sysbus::{errors::Result as SysBusResult, SysBus, SysBusDevOps, SysBusDevType, SysRes}; +use migration::{ + snapshot::SERIAL_SNAPSHOT_ID, DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, + MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; -use util::loop_context::EventNotifierHelper; -use vmm_sys_util::eventfd::EventFd; - -use super::chardev::{Chardev, InputReceiver}; -use super::errors::{ErrorKind, Result}; +use util::gen_base_func; +use util::loop_context::{create_new_eventfd, EventNotifierHelper}; pub const SERIAL_ADDR: u64 = 0x3f8; @@ -54,13 +58,16 @@ const UART_MSR_CTS: u8 = 0x10; const UART_MSR_DSR: u8 = 0x20; const UART_MSR_DCD: u8 = 0x80; +/// IRQ number of serial device. +const UART_IRQ: i32 = 4; + const RECEIVER_BUFF_SIZE: usize = 1024; /// Contain register status of serial device. #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] -pub struct SerialState { +struct SerialState { /// Receiver buffer state. rbr_value: [u8; 1024], /// Length of rbr. @@ -105,63 +112,45 @@ impl SerialState { /// Contain registers status and operation methods of serial. pub struct Serial { + base: SysBusDevBase, + /// Whether rx paused + paused: bool, /// Receiver buffer register. rbr: VecDeque, /// State of Device Serial. state: SerialState, - /// Interrupt event file descriptor. - interrupt_evt: Option, - /// System resource. - res: SysRes, /// Character device for redirection. chardev: Arc>, } impl Serial { - pub fn new(cfg: SerialConfig) -> Self { - Serial { + pub fn new( + cfg: SerialConfig, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + ) -> Result { + let mut serial = Serial { + base: SysBusDevBase::new(SysBusDevType::Serial), + paused: false, rbr: VecDeque::new(), state: SerialState::new(), - interrupt_evt: None, - res: SysRes::default(), chardev: Arc::new(Mutex::new(Chardev::new(cfg.chardev))), - } + }; + serial.base.interrupt_evt = Some(Arc::new(create_new_eventfd()?)); + serial + .set_sys_resource(sysbus, region_base, region_size, "Serial") + .with_context(|| LegacyError::SetSysResErr)?; + serial.set_parent_bus(sysbus.clone()); + Ok(serial) } - pub fn realize( - mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - #[cfg(target_arch = "aarch64")] bs: &Arc>, - ) -> Result<()> { - use super::errors::ResultExt; - - self.chardev - .lock() - .unwrap() - .realize() - .chain_err(|| "Failed to realize chardev")?; - self.interrupt_evt = Some(EventFd::new(libc::EFD_NONBLOCK)?); - self.set_sys_resource(sysbus, region_base, region_size) - .chain_err(|| ErrorKind::SetSysResErr)?; - let dev = Arc::new(Mutex::new(self)); - sysbus.attach_device(&dev, region_base, region_size)?; - - MigrationManager::register_device_instance_mutex(SerialState::descriptor(), dev.clone()); - #[cfg(target_arch = "aarch64")] - bs.lock().unwrap().kernel_cmdline.push(Param { - param_type: "earlycon".to_string(), - value: format!("uart,mmio,0x{:08x}", region_base), - }); - let locked_dev = dev.lock().unwrap(); - locked_dev.chardev.lock().unwrap().set_input_callback(&dev); - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(locked_dev.chardev.clone()), - None, - ) - .chain_err(|| ErrorKind::RegNotifierErr)?; - Ok(()) + fn unpause_rx(&mut self) { + if self.paused { + trace::serial_unpause_rx(); + self.paused = false; + self.chardev.lock().unwrap().unpause_rx(); + } } /// Update interrupt identification register, @@ -179,14 +168,9 @@ impl Serial { self.state.iir = iir; if iir != UART_IIR_NO_INT { - if let Some(evt) = self.interrupt_evt() { - if let Err(e) = evt.write(1) { - error!("serial: failed to write interrupt eventfd ({}).", e); - } - return; - } - error!("serial: failed to update iir."); + self.inject_interrupt(); } + trace::serial_update_iir(self.state.iir); } // Read one byte data from a certain register selected by `offset`. @@ -206,6 +190,9 @@ impl Serial { if self.state.lcr & UART_LCR_DLAB != 0 { ret = self.state.div as u8; } else { + if self.state.mcr & UART_MCR_LOOP == 0 { + self.unpause_rx(); + } if !self.rbr.is_empty() { ret = self.rbr.pop_front().unwrap_or_default(); } @@ -250,6 +237,7 @@ impl Serial { } _ => {} } + trace::serial_read(offset, ret); ret } @@ -268,8 +256,7 @@ impl Serial { // * fail to write serial. // * fail to flush serial. fn write_internal(&mut self, offset: u64, data: u8) -> Result<()> { - use super::errors::ResultExt; - + trace::serial_write(offset, data); match offset { 0 => { if self.state.lcr & UART_LCR_DLAB != 0 { @@ -289,19 +276,10 @@ impl Serial { self.rbr.push_back(data); self.state.lsr |= UART_LSR_DR; - } else { - let output = self.chardev.lock().unwrap().output.clone(); - if output.is_none() { - self.update_iir(); - bail!("serial: failed to get output fd."); - } - let mut locked_output = output.as_ref().unwrap().lock().unwrap(); - locked_output - .write_all(&[data]) - .chain_err(|| "serial: failed to write.")?; - locked_output - .flush() - .chain_err(|| "serial: failed to flush.")?; + } else if let Err(e) = + self.chardev.lock().unwrap().fill_outbuf(vec![data], None) + { + bail!("Failed to append data to output buffer of chardev, {:?}", e); } self.update_iir(); @@ -323,6 +301,10 @@ impl Serial { self.state.lcr = data; } 4 => { + if data & UART_MCR_LOOP == 0 { + // loopback turned off. Unpause rx + self.unpause_rx(); + } self.state.mcr = data; } 7 => { @@ -336,7 +318,7 @@ impl Serial { } impl InputReceiver for Serial { - fn input_handle(&mut self, data: &[u8]) { + fn receive(&mut self, data: &[u8]) { if self.state.mcr & UART_MCR_LOOP == 0 { let len = self.rbr.len(); if len >= RECEIVER_BUFF_SIZE { @@ -350,43 +332,74 @@ impl InputReceiver for Serial { self.rbr.extend(data); self.state.lsr |= UART_LSR_DR; self.update_iir(); + trace::serial_receive(data.len()); } } - fn get_remain_space_size(&mut self) -> usize { - RECEIVER_BUFF_SIZE + fn remain_size(&mut self) -> usize { + if (self.state.mcr & UART_MCR_LOOP == 0) && (self.rbr.len() < RECEIVER_BUFF_SIZE) { + RECEIVER_BUFF_SIZE - self.rbr.len() + } else { + 0 + } + } + + fn set_paused(&mut self) { + trace::serial_pause_rx(); + self.paused = true; + } +} + +impl Device for Serial { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(self) -> Result>> { + self.chardev + .lock() + .unwrap() + .realize() + .with_context(|| "Failed to realize chardev")?; + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); + let dev = Arc::new(Mutex::new(self)); + sysbus.attach_device(&dev)?; + + MigrationManager::register_device_instance( + SerialState::descriptor(), + dev.clone(), + SERIAL_SNAPSHOT_ID, + ); + let locked_dev = dev.lock().unwrap(); + locked_dev.chardev.lock().unwrap().set_receiver(&dev); + EventLoop::update_event( + EventNotifierHelper::internal_notifiers(locked_dev.chardev.clone()), + None, + ) + .with_context(|| LegacyError::RegNotifierErr)?; + drop(locked_dev); + Ok(dev) } } impl SysBusDevOps for Serial { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { data[0] = self.read_internal(offset); true } fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { - self.write_internal(offset, data[0]).is_ok() - } - - fn interrupt_evt(&self) -> Option<&EventFd> { - self.interrupt_evt.as_ref() - } - - fn set_irq(&mut self, _sysbus: &mut SysBus) -> SysBusResult { - let mut irq: i32 = -1; - if let Some(e) = self.interrupt_evt() { - irq = 4; - KVM_FDS.load().register_irqfd(e, irq as u32)?; + if let Err(e) = self.write_internal(offset, data[0]) { + debug!("Failed to write serial device {}: {:?}", self.name(), e); + false + } else { + true } - Ok(irq) - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) } - fn get_type(&self) -> SysBusDevType { - SysBusDevType::Serial + fn get_irq(&self, _sysbus: &mut SysBus) -> Result { + Ok(UART_IRQ) } } @@ -400,17 +413,17 @@ impl AmlBuilder for Serial { let mut res = AmlResTemplate::new(); res.append_child(AmlIoResource::new( AmlIoDecode::Decode16, - self.res.region_base as u16, - self.res.region_base as u16, + self.base.res.region_base as u16, + self.base.res.region_base as u16, 0x00, - self.res.region_size as u8, + self.base.res.region_size as u8, )); res.append_child(AmlExtendedInterrupt::new( AmlResourceUsage::Consumer, AmlEdgeLevel::Edge, AmlActiveLevel::High, AmlIntShare::Exclusive, - vec![self.res.irq as u32], + vec![self.base.res.irq as u32], )); acpi_dev.append_child(AmlNameDecl::new("_CRS", res)); @@ -419,7 +432,7 @@ impl AmlBuilder for Serial { } impl StateTransfer for Serial { - fn get_state_vec(&self) -> migration::errors::Result> { + fn get_state_vec(&self) -> Result> { let mut state = self.state; let (rbr_state, _) = self.rbr.as_slices(); state.rbr_len = rbr_state.len(); @@ -428,25 +441,22 @@ impl StateTransfer for Serial { Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { let serial_state = *SerialState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("SERIAL"))?; + .with_context(|| MigrationError::FromBytesError("SERIAL"))?; let mut rbr = VecDeque::::default(); for i in 0..serial_state.rbr_len { rbr.push_back(serial_state.rbr_value[i]); } self.rbr = rbr; self.state = serial_state; + self.unpause_rx(); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&SerialState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&SerialState::descriptor().name).unwrap_or(!0) } } @@ -455,18 +465,22 @@ impl MigrationHook for Serial {} #[cfg(test)] mod test { use super::*; + use crate::sysbus::sysbus_init; use machine_manager::config::{ChardevConfig, ChardevType}; #[test] fn test_methods_of_serial() { // test new method let chardev_cfg = ChardevConfig { - id: "chardev".to_string(), - backend: ChardevType::Stdio, + classtype: ChardevType::Stdio { + id: "chardev".to_string(), + }, }; - let mut usart = Serial::new(SerialConfig { + let sysbus = sysbus_init(); + let config = SerialConfig { chardev: chardev_cfg.clone(), - }); + }; + let mut usart = Serial::new(config, &sysbus, SERIAL_ADDR, 8).unwrap(); assert_eq!(usart.state.ier, 0); assert_eq!(usart.state.iir, 1); assert_eq!(usart.state.lcr, 3); @@ -479,7 +493,7 @@ mod test { // test receive method let data = [0x01, 0x02]; - usart.input_handle(&data); + usart.receive(&data); assert_eq!(usart.rbr.is_empty(), false); assert_eq!(usart.rbr.len(), 2); assert_eq!(usart.rbr.front(), Some(&0x01)); @@ -516,12 +530,15 @@ mod test { #[test] fn test_serial_migration_interface() { let chardev_cfg = ChardevConfig { - id: "chardev".to_string(), - backend: ChardevType::Stdio, + classtype: ChardevType::Stdio { + id: "chardev".to_string(), + }, }; - let mut usart = Serial::new(SerialConfig { + let config = SerialConfig { chardev: chardev_cfg, - }); + }; + let sysbus = sysbus_init(); + let mut usart = Serial::new(config, &sysbus, SERIAL_ADDR, 8).unwrap(); // Get state vector for usart let serial_state_result = usart.get_state_vec(); assert!(serial_state_result.is_ok()); diff --git a/devices/src/lib.rs b/devices/src/lib.rs index 09423adbd1d08a168cf97d759534200c2fb51188..d335d1a7c2b933e0fbecc1dd899cb2c3986fb3dd 100644 --- a/devices/src/lib.rs +++ b/devices/src/lib.rs @@ -16,18 +16,314 @@ //! - interrupt controller (aarch64) //! - legacy devices, such as serial devices -#[macro_use] -extern crate log; -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate migration_derive; - -mod interrupt_controller; +pub mod acpi; +#[cfg(feature = "usb_camera")] +pub mod camera_backend; +pub mod interrupt_controller; pub mod legacy; +pub mod misc; +pub mod pci; +pub mod scsi; +pub mod smbios; +pub mod sysbus; +pub mod usb; #[cfg(target_arch = "aarch64")] pub use interrupt_controller::{ - errors as IntCtrlErrs, InterruptController, InterruptControllerConfig, + GICDevice, GICVersion, GICv2, GICv2Access, GICv3, GICv3Access, GICv3ItsAccess, GICv3ItsState, + GICv3State, GicRedistRegion, ICGICConfig, ICGICv2Config, ICGICv3Config, InterruptController, + InterruptError as IntCtrlErrs, GIC_IRQ_INTERNAL, GIC_IRQ_MAX, }; -pub use legacy::errors as LegacyErrs; +pub use interrupt_controller::{IrqManager, IrqState, LineIrqManager, MsiIrqManager, TriggerMode}; +pub use legacy::error::LegacyError as LegacyErrs; +pub use scsi::bus as ScsiBus; +pub use scsi::disk as ScsiDisk; + +use std::any::Any; +use std::any::TypeId; +use std::collections::BTreeMap; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{bail, Context, Result}; +use util::AsAny; + +#[derive(Clone, Default)] +pub struct DeviceBase { + /// Name of this device + pub id: String, + /// Whether it supports hot-plug/hot-unplug. + pub hotpluggable: bool, + /// parent bus. + pub parent: Option>>, + /// Child bus. + pub child: Option>>, +} + +impl DeviceBase { + pub fn new(id: String, hotpluggable: bool, parent: Option>>) -> Self { + DeviceBase { + id, + hotpluggable, + parent, + child: None, + } + } +} + +pub trait Device: Any + AsAny + Send + Sync { + fn device_base(&self) -> &DeviceBase; + + fn device_base_mut(&mut self) -> &mut DeviceBase; + + /// `Any` trait requires a `'static` lifecycle. Error "argument requires that `device` is borrowed for `'static`" + /// will be reported when using `as_any` directly for local variables which don't have `'static` lifecycle. + /// Encapsulation of `as_any` can solve this problem. + fn device_as_any(&mut self) -> &mut dyn Any { + self.as_any_mut() + } + + fn device_type_id(&self) -> TypeId { + self.type_id() + } + + /// Get device name. + fn name(&self) -> String { + self.device_base().id.clone() + } + + /// Query whether it supports hot-plug/hot-unplug. + fn hotpluggable(&self) -> bool { + self.device_base().hotpluggable + } + + /// Get the bus which this device is mounted on. + fn parent_bus(&self) -> Option>> { + self.device_base().parent.clone() + } + + fn set_parent_bus(&mut self, bus: Arc>) { + self.device_base_mut().parent = Some(Arc::downgrade(&bus)); + } + + /// Get the bus which this device has. + fn child_bus(&self) -> Option>> { + self.device_base().child.clone() + } + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + Ok(()) + } + + /// Realize device. + fn realize(self) -> Result>> + where + Self: Sized, + { + // Note: Only PciHost does not have its own realization logic, + // but it will not be called. + bail!("Realize of the device {} is not implemented", self.name()); + } + + /// Unrealize device. + fn unrealize(&mut self) -> Result<()> { + bail!("Unrealize of the device {} is not implemented", self.name()); + } +} + +/// Macro `convert_device_ref!`: Convert from Arc> to &$device_type. +/// +/// # Arguments +/// +/// * `$trait_device` - Variable defined as Arc>. +/// * `$lock_device` - Variable used to get MutexGuard<'_, dyn Device>. +/// * `$struct_device` - Variable used to get &$device_type. +/// * `$device_type` - Struct corresponding to device type. +#[macro_export] +macro_rules! convert_device_ref { + ($trait_device:expr, $lock_device: ident, $struct_device: ident, $device_type: ident) => { + let mut $lock_device = $trait_device.lock().unwrap(); + let $struct_device = $lock_device + .device_as_any() + .downcast_ref::<$device_type>() + .unwrap(); + }; +} + +/// Macro `convert_device_mut!`: Convert from Arc> to &mut $device_type. +/// +/// # Arguments +/// +/// * `$trait_device` - Variable defined as Arc>. +/// * `$lock_device` - Variable used to get MutexGuard<'_, dyn Device>. +/// * `$struct_device` - Variable used to get &mut $device_type. +/// * `$device_type` - Struct corresponding to device type. +#[macro_export] +macro_rules! convert_device_mut { + ($trait_device:expr, $lock_device: ident, $struct_device: ident, $device_type: ident) => { + let mut $lock_device = $trait_device.lock().unwrap(); + let $struct_device = $lock_device + .device_as_any() + .downcast_mut::<$device_type>() + .unwrap(); + }; +} + +#[derive(Default)] +pub struct BusBase { + /// Name of this bus. + pub name: String, + /// Parent device. + pub parent: Option>>, + /// Children devices. + /// + /// Note: + /// 1. The construction of FDT table needs to strictly follow the order of sysbus, + /// so `BTreemap` needs to be used. + /// 2. every device has a unique address on the bus. Using `u64` is sufficient for we can + /// convert it to u8(devfn) for PCI bus and convert it to (u8, u16)(target, lun) for SCSI bus. + /// SysBus doesn't need this unique `u64` address, so we will incrementally fill in a useless number. + pub children: BTreeMap>>, +} + +impl BusBase { + fn new(name: String) -> BusBase { + Self { + name, + ..Default::default() + } + } +} + +pub trait Bus: Any + AsAny + Send + Sync { + fn bus_base(&self) -> &BusBase; + + fn bus_base_mut(&mut self) -> &mut BusBase; + + /// `Any` trait requires a `'static` lifecycle. Error "argument requires that `bus` is borrowed for `'static`" + /// will be reported when using `as_any` directly for local variables which don't have `'static` lifecycle. + /// Encapsulation of `as_any` can solve this problem. + fn bus_as_any(&mut self) -> &mut dyn Any { + self.as_any_mut() + } + + /// Get the name of this bus. + fn name(&self) -> String { + self.bus_base().name.clone() + } + + /// Get the device that owns this bus. + fn parent_device(&self) -> Option>> { + self.bus_base().parent.clone() + } + + /// Get the devices mounted on this bus. + fn child_devices(&self) -> BTreeMap>> { + self.bus_base().children.clone() + } + + /// Get the specific device mounted on this bus. + fn child_dev(&self, key: u64) -> Option<&Arc>> { + self.bus_base().children.get(&key) + } + + /// Attach device to this bus. + fn attach_child(&mut self, key: u64, dev: Arc>) -> Result<()> { + let children = &mut self.bus_base_mut().children; + if children.get(&key).is_some() { + bail!( + "Location of the device {} is same as one of the bus {}", + dev.lock().unwrap().name(), + self.name() + ); + } + children.insert(key, dev); + + Ok(()) + } + + /// Detach device from this bus. + fn detach_child(&mut self, key: u64) -> Result<()> { + self.bus_base_mut() + .children + .remove(&key) + .with_context(|| format!("No such device using key {} in bus {}.", key, self.name()))?; + + Ok(()) + } + + /// Bus reset means that all devices attached to this bus should reset. + fn reset(&self) -> Result<()> { + for dev in self.child_devices().values() { + let mut locked_dev = dev.lock().unwrap(); + locked_dev + .reset(true) + .with_context(|| format!("Failed to reset device {}", locked_dev.name()))?; + } + + Ok(()) + } +} + +/// Macro `convert_bus_ref!`: Convert from Arc> to &$bus_type. +/// +/// # Arguments +/// +/// * `$trait_bus` - Variable defined as Arc>. +/// * `$lock_bus` - Variable used to get MutexGuard<'_, dyn Bus>. +/// * `$struct_bus` - Variable used to get &$bus_type. +/// * `$bus_type` - Struct corresponding to bus type. +#[macro_export] +macro_rules! convert_bus_ref { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident, $bus_type: ident) => { + let mut $lock_bus = $trait_bus.lock().unwrap(); + let $struct_bus = $lock_bus.bus_as_any().downcast_ref::<$bus_type>().unwrap(); + }; +} + +/// Macro `convert_bus_mut!`: Convert from Arc> to &mut $bus_type. +/// +/// # Arguments +/// +/// * `$trait_bus` - Variable defined as Arc>. +/// * `$lock_bus` - Variable used to get MutexGuard<'_, dyn Bus>. +/// * `$struct_bus` - Variable used to get &mut $bus_type. +/// * `$bus_type` - Struct corresponding to bus type. +#[macro_export] +macro_rules! convert_bus_mut { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident, $bus_type: ident) => { + let mut $lock_bus = $trait_bus.lock().unwrap(); + let $struct_bus = $lock_bus.bus_as_any().downcast_mut::<$bus_type>().unwrap(); + }; +} + +#[cfg(test)] +pub mod test { + use std::sync::Arc; + + use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; + + pub fn address_space_init() -> Arc { + let root = Region::init_container_region(1 << 36, "root"); + let sys_space = AddressSpace::new(root, "sys_space", None).unwrap(); + let host_mmap = Arc::new( + HostMemMapping::new( + GuestAddress(0), + None, + 0x1000_0000, + None, + false, + false, + false, + ) + .unwrap(), + ); + sys_space + .root() + .add_subregion( + Region::init_ram_region(host_mmap.clone(), "region_1"), + host_mmap.start_address().raw_value(), + ) + .unwrap(); + sys_space + } +} diff --git a/devices/src/misc/ivshmem.rs b/devices/src/misc/ivshmem.rs new file mode 100644 index 0000000000000000000000000000000000000000..a98a4a1112eadd9331b28e28ce7e959bcab2cd4e --- /dev/null +++ b/devices/src/misc/ivshmem.rs @@ -0,0 +1,239 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{ + atomic::{AtomicU16, Ordering}, + Arc, Mutex, RwLock, Weak, +}; + +use anyhow::Result; +use log::error; + +use crate::pci::config::{ + PciConfig, RegionType, DEVICE_ID, PCI_CLASS_MEMORY_RAM, PCI_CONFIG_SPACE_SIZE, + PCI_VENDOR_ID_REDHAT_QUMRANET, REVISION_ID, SUB_CLASS_CODE, VENDOR_ID, +}; +use crate::pci::msix::init_msix; +use crate::pci::{le_write_u16, PciBus, PciDevBase, PciDevOps}; +use crate::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +use address_space::{GuestAddress, Region, RegionOps}; +use util::gen_base_func; + +const PCI_VENDOR_ID_IVSHMEM: u16 = PCI_VENDOR_ID_REDHAT_QUMRANET; +const PCI_DEVICE_ID_IVSHMEM: u16 = 0x1110; +const PCI_REVIRSION_ID_IVSHMEM: u8 = 1; + +const PCI_BAR_MAX_IVSHMEM: u8 = 3; + +const IVSHMEM_REG_BAR_SIZE: u64 = 0x100; + +const IVSHMEM_BAR0_IRQ_MASK: u64 = 0; +const IVSHMEM_BAR0_IRQ_STATUS: u64 = 4; +const IVSHMEM_BAR0_IVPOSITION: u64 = 8; +const IVSHMEM_BAR0_DOORBELL: u64 = 12; + +type Bar0Write = dyn Fn(&[u8], u64) -> bool + Send + Sync; +type Bar0Read = dyn Fn(&mut [u8], u64) -> bool + Send + Sync; + +#[derive(Default)] +struct Bar0Ops { + write: Option>, + read: Option>, +} + +/// Intel-VM shared memory device structure. +pub struct Ivshmem { + base: PciDevBase, + dev_id: Arc, + ram_mem_region: Region, + vector_nr: u32, + bar0_ops: Arc>, + reset_cb: Option>, +} + +impl Ivshmem { + pub fn new( + name: String, + devfn: u8, + parent_bus: Weak>, + ram_mem_region: Region, + vector_nr: u32, + ) -> Self { + Self { + base: PciDevBase { + base: DeviceBase::new(name, false, Some(parent_bus)), + config: PciConfig::new(devfn, PCI_CONFIG_SPACE_SIZE, PCI_BAR_MAX_IVSHMEM), + devfn, + }, + dev_id: Arc::new(AtomicU16::new(0)), + ram_mem_region, + vector_nr, + bar0_ops: Arc::new(RwLock::new(Bar0Ops::default())), + reset_cb: None, + } + } + + fn register_bars(&mut self) -> Result<()> { + // Currently, ivshmem does not support intx interrupt, ivposition and doorbell. + let bar0_ops = self.bar0_ops.clone(); + let reg_read = move |data: &mut [u8], _: GuestAddress, offset: u64| -> bool { + if offset >= IVSHMEM_REG_BAR_SIZE { + error!("ivshmem: read offset {} exceeds bar0 size", offset); + return true; + } + match offset { + IVSHMEM_BAR0_IRQ_MASK | IVSHMEM_BAR0_IRQ_STATUS | IVSHMEM_BAR0_IVPOSITION => {} + _ => { + if let Some(rcb) = bar0_ops.read().unwrap().read.as_ref() { + return rcb(data, offset); + } + } + } + true + }; + let bar0_ops = self.bar0_ops.clone(); + let reg_write = move |data: &[u8], _: GuestAddress, offset: u64| -> bool { + if offset >= IVSHMEM_REG_BAR_SIZE { + error!("ivshmem: write offset {} exceeds bar0 size", offset); + return true; + } + match offset { + IVSHMEM_BAR0_IRQ_MASK | IVSHMEM_BAR0_IRQ_STATUS | IVSHMEM_BAR0_DOORBELL => {} + _ => { + if let Some(wcb) = bar0_ops.read().unwrap().write.as_ref() { + return wcb(data, offset); + } + } + } + true + }; + let reg_region_ops = RegionOps { + read: Arc::new(reg_read), + write: Arc::new(reg_write), + }; + + // bar0: mmio register + let mut bar0_region = + Region::init_io_region(IVSHMEM_REG_BAR_SIZE, reg_region_ops, "IvshmemIo"); + bar0_region.set_access_size(4); + self.base.config.register_bar( + 0, + bar0_region, + RegionType::Mem32Bit, + false, + IVSHMEM_REG_BAR_SIZE, + )?; + + // bar1: msix + if self.vector_nr > 0 { + init_msix( + &mut self.base, + 1, + self.vector_nr, + self.dev_id.clone(), + None, + None, + )?; + } + + // bar2: ram + self.base.config.register_bar( + 2, + self.ram_mem_region.clone(), + RegionType::Mem64Bit, + true, + self.ram_mem_region.size(), + ) + } + + pub fn trigger_msix(&self, vector: u16) { + if self.vector_nr == 0 { + return; + } + if let Some(msix) = self.base.config.msix.as_ref() { + msix.lock() + .unwrap() + .notify(vector, self.dev_id.load(Ordering::Acquire)); + } + } + + pub fn set_bar0_ops(&mut self, bar0_ops: (Arc, Arc)) { + self.bar0_ops.write().unwrap().write = Some(bar0_ops.0); + self.bar0_ops.write().unwrap().read = Some(bar0_ops.1); + } + + pub fn register_reset_callback(&mut self, cb: Box) { + self.reset_cb = Some(cb); + } +} + +impl Device for Ivshmem { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + le_write_u16( + &mut self.base.config.config, + VENDOR_ID as usize, + PCI_VENDOR_ID_IVSHMEM, + )?; + le_write_u16( + &mut self.base.config.config, + DEVICE_ID as usize, + PCI_DEVICE_ID_IVSHMEM, + )?; + self.base.config.config[REVISION_ID] = PCI_REVIRSION_ID_IVSHMEM; + + le_write_u16( + &mut self.base.config.config, + SUB_CLASS_CODE as usize, + PCI_CLASS_MEMORY_RAM, + )?; + + self.register_bars()?; + + // Attach to the PCI bus. + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + self.dev_id + .store(pci_bus.generate_dev_id(self.base.devfn), Ordering::Release); + let dev = Arc::new(Mutex::new(self)); + locked_bus.attach_child(u64::from(dev.lock().unwrap().base.devfn), dev.clone())?; + Ok(dev) + } + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + if let Some(cb) = &self.reset_cb { + cb(); + } + Ok(()) + } +} + +impl PciDevOps for Ivshmem { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); + + fn write_config(&mut self, offset: usize, data: &[u8]) { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + + self.base.config.write( + offset, + data, + self.dev_id.load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ); + } +} diff --git a/devices/src/misc/mod.rs b/devices/src/misc/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..0e0c015a4a70e58c01414969100122fbcb707292 --- /dev/null +++ b/devices/src/misc/mod.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "scream")] +pub mod scream; + +#[cfg(feature = "scream")] +pub mod ivshmem; + +#[cfg(feature = "pvpanic")] +pub mod pvpanic; diff --git a/devices/src/misc/pvpanic.rs b/devices/src/misc/pvpanic.rs new file mode 100644 index 0000000000000000000000000000000000000000..23e200d0f1f94b3d46a1034ce69c9acd1d21ff3a --- /dev/null +++ b/devices/src/misc/pvpanic.rs @@ -0,0 +1,435 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{ + atomic::{AtomicU16, Ordering}, + Arc, Mutex, Weak, +}; + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use log::{debug, error, info}; +use serde::{Deserialize, Serialize}; + +use crate::pci::config::{ + PciConfig, RegionType, CLASS_PI, DEVICE_ID, HEADER_TYPE, PCI_CLASS_SYSTEM_OTHER, + PCI_CONFIG_SPACE_SIZE, PCI_DEVICE_ID_REDHAT_PVPANIC, PCI_SUBDEVICE_ID_QEMU, + PCI_VENDOR_ID_REDHAT, PCI_VENDOR_ID_REDHAT_QUMRANET, REVISION_ID, SUBSYSTEM_ID, + SUBSYSTEM_VENDOR_ID, SUB_CLASS_CODE, VENDOR_ID, +}; +use crate::pci::{le_write_u16, PciBus, PciDevBase, PciDevOps}; +use crate::{convert_bus_mut, convert_bus_ref, Bus, Device, DeviceBase, MUT_PCI_BUS, PCI_BUS}; +use address_space::{GuestAddress, Region, RegionOps}; +use machine_manager::config::{get_pci_df, valid_id}; +use util::gen_base_func; + +const PVPANIC_PCI_REVISION_ID: u8 = 1; +const PVPANIC_PCI_VENDOR_ID: u16 = PCI_VENDOR_ID_REDHAT_QUMRANET; + +#[cfg(target_arch = "aarch64")] +// param size in Region::init_io_region must greater than 4 +const PVPANIC_REG_BAR_SIZE: u64 = 0x4; +#[cfg(target_arch = "x86_64")] +const PVPANIC_REG_BAR_SIZE: u64 = 0x1; + +pub const PVPANIC_PANICKED: u32 = 1 << 0; +pub const PVPANIC_CRASHLOADED: u32 = 1 << 1; + +#[derive(Parser, Debug, Clone, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct PvpanicDevConfig { + #[arg(long, value_parser = ["pvpanic"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long, alias = "supported-features", default_value = "3", value_parser = valid_supported_features)] + pub supported_features: u32, +} + +fn valid_supported_features(f: &str) -> Result { + let features = f.parse::()?; + let supported_features = match features & !(PVPANIC_PANICKED | PVPANIC_CRASHLOADED) { + 0 => features, + _ => bail!("Unsupported pvpanic device features {}", features), + }; + Ok(supported_features) +} + +#[derive(Copy, Clone)] +pub struct PvPanicState { + supported_features: u32, +} + +impl PvPanicState { + fn new(supported_features: u32) -> Self { + Self { supported_features } + } + + fn handle_event(&self, event: u32) -> Result<()> { + if (event & !(PVPANIC_PANICKED | PVPANIC_CRASHLOADED)) != 0 { + error!("pvpanic: unknown event 0x{:X}", event); + bail!("pvpanic: unknown event 0x{:X}", event); + } + + if (event & PVPANIC_PANICKED) == PVPANIC_PANICKED + && (self.supported_features & PVPANIC_PANICKED) == PVPANIC_PANICKED + { + hisysevent::STRATOVIRT_PVPANIC("PANICKED".to_string()); + info!("pvpanic: panicked event"); + } + + if (event & PVPANIC_CRASHLOADED) == PVPANIC_CRASHLOADED + && (self.supported_features & PVPANIC_CRASHLOADED) == PVPANIC_CRASHLOADED + { + hisysevent::STRATOVIRT_PVPANIC("CRASHLOADED".to_string()); + info!("pvpanic: crashloaded event"); + } + + Ok(()) + } +} + +pub struct PvPanicPci { + base: PciDevBase, + dev_id: AtomicU16, + pvpanic: Arc, +} + +impl PvPanicPci { + pub fn new(config: &PvpanicDevConfig, devfn: u8, parent_bus: Weak>) -> Self { + Self { + base: PciDevBase { + base: DeviceBase::new(config.id.clone(), false, Some(parent_bus)), + config: PciConfig::new(devfn, PCI_CONFIG_SPACE_SIZE, 1), + devfn, + }, + dev_id: AtomicU16::new(0), + pvpanic: Arc::new(PvPanicState::new(config.supported_features)), + } + } + + fn register_bar(&mut self) -> Result<()> { + let cloned_pvpanic_read = self.pvpanic.clone(); + let bar0_read = Arc::new(move |data: &mut [u8], _: GuestAddress, _: u64| -> bool { + debug!( + "pvpanic: read bar0 called event {}", + cloned_pvpanic_read.supported_features + ); + + data[0] = cloned_pvpanic_read.supported_features as u8; + true + }); + + let cloned_pvpanic_write = self.pvpanic.clone(); + let bar0_write = Arc::new(move |data: &[u8], _: GuestAddress, _: u64| -> bool { + debug!("pvpanic: write bar0 called event {:?}", data); + let val = u8::from_le_bytes(match data.try_into() { + Ok(value) => value, + Err(_) => { + return false; + } + }); + + matches!(cloned_pvpanic_write.handle_event(u32::from(val)), Ok(())) + }); + + let bar0_region_ops = RegionOps { + read: bar0_read, + write: bar0_write, + }; + + let mut bar_region = + Region::init_io_region(PVPANIC_REG_BAR_SIZE, bar0_region_ops, "PvPanic"); + bar_region.set_access_size(1); + + self.base.config.register_bar( + 0, + bar_region, + RegionType::Mem64Bit, + false, + PVPANIC_REG_BAR_SIZE, + ) + } +} + +impl Device for PvPanicPci { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + le_write_u16( + &mut self.base.config.config, + VENDOR_ID as usize, + PCI_VENDOR_ID_REDHAT, + )?; + + le_write_u16( + &mut self.base.config.config, + DEVICE_ID as usize, + PCI_DEVICE_ID_REDHAT_PVPANIC, + )?; + + self.base.config.config[REVISION_ID] = PVPANIC_PCI_REVISION_ID; + + le_write_u16( + &mut self.base.config.config, + SUB_CLASS_CODE as usize, + PCI_CLASS_SYSTEM_OTHER, + )?; + + le_write_u16( + &mut self.base.config.config, + SUBSYSTEM_VENDOR_ID, + PVPANIC_PCI_VENDOR_ID, + )?; + + le_write_u16( + &mut self.base.config.config, + SUBSYSTEM_ID, + PCI_SUBDEVICE_ID_QEMU, + )?; + + self.base.config.config[CLASS_PI as usize] = 0x00; + + self.base.config.config[HEADER_TYPE as usize] = 0x00; + + self.register_bar() + .with_context(|| "pvpanic: device register bar failed")?; + + // Attach to the PCI bus. + let devfn = self.base.devfn; + let dev = Arc::new(Mutex::new(self)); + let bus = dev.lock().unwrap().parent_bus().unwrap().upgrade().unwrap(); + MUT_PCI_BUS!(bus, locked_bus, pci_bus); + let device_id = pci_bus.generate_dev_id(devfn); + dev.lock() + .unwrap() + .dev_id + .store(device_id, Ordering::Release); + locked_bus.attach_child(u64::from(devfn), dev.clone())?; + + Ok(dev) + } + + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } +} + +impl PciDevOps for PvPanicPci { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); + + fn write_config(&mut self, offset: usize, data: &[u8]) { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + + self.base.config.write( + offset, + data, + self.dev_id.load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pci::{host::tests::create_pci_host, le_read_u16, PciHost}; + use crate::{convert_bus_ref, convert_device_mut, PCI_BUS}; + use machine_manager::config::str_slip_to_clap; + + /// Convert from Arc> to &mut PvPanicPci. + #[macro_export] + macro_rules! MUT_PVPANIC_PCI { + ($trait_device:expr, $lock_device: ident, $struct_device: ident) => { + convert_device_mut!($trait_device, $lock_device, $struct_device, PvPanicPci); + }; + } + + fn init_pvpanic_dev(devfn: u8, supported_features: u32, dev_id: &str) -> Arc> { + let pci_host = create_pci_host(); + let locked_pci_host = pci_host.lock().unwrap(); + let root_bus = Arc::downgrade(&locked_pci_host.child_bus().unwrap()); + + let config = PvpanicDevConfig { + id: dev_id.to_string(), + supported_features, + classtype: "".to_string(), + bus: "pcie.0".to_string(), + addr: (3, 0), + }; + let pvpanic_dev = PvPanicPci::new(&config, devfn, root_bus); + assert_eq!(pvpanic_dev.base.base.id, "pvpanic_test".to_string()); + + pvpanic_dev.realize().unwrap(); + drop(locked_pci_host); + + pci_host + } + + #[test] + fn test_pvpanic_cmdline_parser() { + // Test1: Right. + let cmdline = "pvpanic,id=pvpanic0,bus=pcie.0,addr=0x7,supported-features=0"; + let result = PvpanicDevConfig::try_parse_from(str_slip_to_clap(cmdline, true, false)); + assert_eq!(result.unwrap().supported_features, 0); + + // Test2: Default value. + let cmdline = "pvpanic,id=pvpanic0,bus=pcie.0,addr=0x7"; + let result = PvpanicDevConfig::try_parse_from(str_slip_to_clap(cmdline, true, false)); + assert_eq!(result.unwrap().supported_features, 3); + + // Test3: Illegal value. + let cmdline = "pvpanic,id=pvpanic0,bus=pcie.0,addr=0x7,supported-features=4"; + let result = PvpanicDevConfig::try_parse_from(str_slip_to_clap(cmdline, true, false)); + assert!(result.is_err()); + } + + #[test] + fn test_pvpanic_attached() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7); + drop(locked_bus); + assert!(pvpanic_dev.is_some()); + assert_eq!( + pvpanic_dev.unwrap().lock().unwrap().name(), + "pvpanic_test".to_string() + ); + + let info = PciBus::find_attached_bus(&root_bus, "pvpanic_test"); + assert!(info.is_some()); + let (bus, dev) = info.unwrap(); + assert_eq!(bus.lock().unwrap().name(), "pcie.0"); + assert_eq!(dev.lock().unwrap().name(), "pvpanic_test"); + } + + #[test] + fn test_pvpanic_config() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7).unwrap(); + MUT_PVPANIC_PCI!(pvpanic_dev, locked_dev, pvpanic); + let info = le_read_u16(&pvpanic.pci_base_mut().config.config, VENDOR_ID as usize) + .unwrap_or_else(|_| 0); + assert_eq!(info, PCI_VENDOR_ID_REDHAT); + + let info = le_read_u16(&pvpanic.pci_base_mut().config.config, DEVICE_ID as usize) + .unwrap_or_else(|_| 0); + assert_eq!(info, PCI_DEVICE_ID_REDHAT_PVPANIC); + + let info = le_read_u16( + &pvpanic.pci_base_mut().config.config, + SUB_CLASS_CODE as usize, + ) + .unwrap_or_else(|_| 0); + assert_eq!(info, PCI_CLASS_SYSTEM_OTHER); + + let info = le_read_u16(&pvpanic.pci_base_mut().config.config, SUBSYSTEM_VENDOR_ID) + .unwrap_or_else(|_| 0); + assert_eq!(info, PVPANIC_PCI_VENDOR_ID); + + let info = + le_read_u16(&pvpanic.pci_base_mut().config.config, SUBSYSTEM_ID).unwrap_or_else(|_| 0); + assert_eq!(info, PCI_SUBDEVICE_ID_QEMU); + } + + #[test] + fn test_pvpanic_read_features() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7).unwrap(); + MUT_PVPANIC_PCI!(pvpanic_dev, locked_dev, pvpanic); + + // test read supported_features + let mut data_read = [0xffu8; 1]; + let result = &pvpanic.pci_base_mut().config.bars[0] + .region + .as_ref() + .unwrap() + .read(&mut data_read.as_mut(), GuestAddress(0), 0, 1); + assert!(result.is_ok()); + assert_eq!( + data_read.to_vec(), + vec![PVPANIC_PANICKED as u8 | PVPANIC_CRASHLOADED as u8] + ); + } + + #[test] + fn test_pvpanic_write_panicked() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7).unwrap(); + MUT_PVPANIC_PCI!(pvpanic_dev, locked_dev, pvpanic); + + // test write panicked event + let data_write = [PVPANIC_PANICKED as u8; 1]; + let count = data_write.len() as u64; + let result = &pvpanic.pci_base_mut().config.bars[0] + .region + .as_ref() + .unwrap() + .write(&mut data_write.as_ref(), GuestAddress(0), 0, count); + assert!(result.is_ok()); + } + + #[test] + fn test_pvpanic_write_crashload() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7).unwrap(); + MUT_PVPANIC_PCI!(pvpanic_dev, locked_dev, pvpanic); + + // test write crashload event + let data_write = [PVPANIC_CRASHLOADED as u8; 1]; + let count = data_write.len() as u64; + let result = &pvpanic.pci_base_mut().config.bars[0] + .region + .as_ref() + .unwrap() + .write(&mut data_write.as_ref(), GuestAddress(0), 0, count); + assert!(result.is_ok()); + } + + #[test] + fn test_pvpanic_write_unknown() { + let pci_host = init_pvpanic_dev(7, PVPANIC_PANICKED | PVPANIC_CRASHLOADED, "pvpanic_test"); + let locked_pci_host = pci_host.lock().unwrap(); + let root_bus = locked_pci_host.child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, pci_bus); + let pvpanic_dev = pci_bus.get_device(0, 7).unwrap(); + MUT_PVPANIC_PCI!(pvpanic_dev, locked_dev, pvpanic); + + // test write unknown event + let data_write = [100u8; 1]; + let count = data_write.len() as u64; + let result = &pvpanic.pci_base_mut().config.bars[0] + .region + .as_ref() + .unwrap() + .write(&mut data_write.as_ref(), GuestAddress(0), 0, count); + assert!(result.is_err()); + } +} diff --git a/devices/src/misc/scream/alsa.rs b/devices/src/misc/scream/alsa.rs new file mode 100644 index 0000000000000000000000000000000000000000..f6a8689eaa6e0d0dca190ceea534b53ffe138faf --- /dev/null +++ b/devices/src/misc/scream/alsa.rs @@ -0,0 +1,299 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cmp::min, + io::{Read, Write}, + sync::atomic::{fence, Ordering}, +}; + +use alsa::{ + pcm::{Access, Format, HwParams}, + Direction, ValueOr, PCM, +}; +use anyhow::Result; +use log::{debug, error, warn}; + +use super::{ + AudioInterface, AudioStatus, ScreamDirection, ShmemStreamFmt, StreamData, + AUDIO_SAMPLE_RATE_44KHZ, TARGET_LATENCY_MS, +}; + +const MAX_CHANNELS: u8 = 8; +const MIN_CHANNELS: u8 = 1; +const MAX_FRAME_NUM: u32 = 240; + +pub struct AlsaStreamData { + pcm: Option, + dir: Direction, + format: Format, + bytes_per_sample: u32, + stream_fmt: ShmemStreamFmt, + rate: u32, + latency: u32, + app_name: String, + init: bool, +} + +impl ScreamDirection { + fn trans_to_alsa(&self) -> Direction { + match self { + Self::Playback => Direction::Playback, + Self::Record => Direction::Capture, + } + } +} + +impl AlsaStreamData { + pub fn init(name: &str, dir: ScreamDirection) -> Self { + // Init receiver format to track changes. + let stream_fmt = ShmemStreamFmt::default(); + + let alsa_dir = dir.trans_to_alsa(); + + Self { + pcm: None, + dir: alsa_dir, + format: Format::S16LE, + bytes_per_sample: 0, + stream_fmt, + rate: AUDIO_SAMPLE_RATE_44KHZ, + latency: TARGET_LATENCY_MS, + app_name: name.to_string(), + init: false, + } + } + + fn setup(&mut self, channels: u8) -> Result<()> { + let pcm = PCM::new("default", self.dir, false)?; + { + // Set hardware parameters of the stream. + let hwp = HwParams::any(&pcm)?; + hwp.set_rate_resample(true)?; + hwp.set_access(Access::RWInterleaved)?; + hwp.set_format(self.format)?; + hwp.set_channels(u32::from(channels))?; + hwp.set_rate(self.rate, ValueOr::Nearest)?; + // Set the latency in microseconds. + hwp.set_buffer_time_near(self.latency * 1000, ValueOr::Nearest)?; + pcm.hw_params(&hwp)?; + trace::scream_setup_alsa_hwp(&self.app_name, &hwp); + + // Set software parameters of the stream. + let hwp = pcm.hw_params_current()?; + let swp = pcm.sw_params_current()?; + swp.set_start_threshold(hwp.get_buffer_size().unwrap())?; + pcm.sw_params(&swp)?; + trace::scream_setup_alsa_swp(&self.app_name, &swp); + } + self.pcm = Some(pcm); + Ok(()) + } + + fn check_fmt_update(&mut self, recv_data: &StreamData) -> bool { + if self.init && self.stream_fmt.fmt_generation == recv_data.fmt.fmt_generation { + return true; + } + + self.destroy(); + + // If audio format changed, reconfigure. + self.stream_fmt = recv_data.fmt; + self.rate = recv_data.fmt.get_rate(); + + match recv_data.fmt.size { + 16 => { + self.format = Format::S16LE; + self.bytes_per_sample = 2; + } + 24 => { + self.format = Format::S243LE; + self.bytes_per_sample = 3; + } + 32 => { + self.format = Format::S32LE; + self.bytes_per_sample = 4; + } + _ => { + warn!( + "Unsupported sample size {} for {}, wait next format switch", + self.app_name, recv_data.fmt.size + ); + self.rate = 0; + } + } + + if self.rate == 0 { + self.init = false; + warn!("Configure wrong rate {} for {}", self.app_name, self.rate); + return false; + } + + if recv_data.fmt.channels < MIN_CHANNELS || recv_data.fmt.channels > MAX_CHANNELS { + self.init = false; + warn!( + "Configure wrong channels {} for {}", + self.app_name, recv_data.fmt.channels + ); + return false; + } + + match self.setup(recv_data.fmt.channels) { + Err(e) => { + error!( + "Failed to set up ALSA HW parameters and SW parameters for {}: {:?}", + self.app_name, e + ); + self.init = false; + } + Ok(_) => self.init = true, + } + self.init + } +} + +impl AudioInterface for AlsaStreamData { + fn send(&mut self, recv_data: &StreamData) { + if !self.check_fmt_update(recv_data) { + self.destroy(); + return; + } + + let mut frames = 0_u32; + let mut io = self.pcm.as_ref().unwrap().io_bytes(); + + // Make sure audio read does not bypass chunk_idx read. + fence(Ordering::Acquire); + + // SAFETY: audio_base is the shared memory. It already verifies the validity + // of the address range during the header check. + let data = unsafe { + std::slice::from_raw_parts( + recv_data.audio_base as *const u8, + recv_data.audio_size as usize, + ) + }; + + let samples = + recv_data.audio_size / (self.bytes_per_sample * u32::from(recv_data.fmt.channels)); + while frames < samples { + let send_frame_num = min(samples - frames, MAX_FRAME_NUM); + let offset = + (frames * self.bytes_per_sample * u32::from(recv_data.fmt.channels)) as usize; + let end = offset + + (send_frame_num * self.bytes_per_sample * u32::from(recv_data.fmt.channels)) + as usize; + match io.write(&data[offset..end]) { + Err(e) => { + debug!("Failed to write data to ALSA buffer: {:?}", e); + match self.pcm.as_ref().unwrap().prepare() { + Err(e) => { + error!("Can't recovery from underrun for playback: {:?}", e); + self.init = false; + } + Ok(_) => continue, + }; + } + Ok(n) => { + trace::scream_alsa_send_frames(frames, offset, end); + frames += + n as u32 / (self.bytes_per_sample * u32::from(recv_data.fmt.channels)); + } + } + } + } + + fn receive(&mut self, recv_data: &StreamData) -> i32 { + if !self.check_fmt_update(recv_data) { + self.destroy(); + return 0; + } + + let mut frames = 0_u32; + let mut io = self.pcm.as_ref().unwrap().io_bytes(); + + // Make sure audio read does not bypass chunk_idx read. + fence(Ordering::Acquire); + + // SAFETY: audio_base is the shared memory. It already verifies the validity + // of the address range during the header check. + let data = unsafe { + std::slice::from_raw_parts_mut( + recv_data.audio_base as *mut u8, + recv_data.audio_size as usize, + ) + }; + + let samples = + recv_data.audio_size / (self.bytes_per_sample * u32::from(recv_data.fmt.channels)); + while frames < samples { + let offset = + (frames * self.bytes_per_sample * u32::from(recv_data.fmt.channels)) as usize; + let end = offset + + ((samples - frames) * self.bytes_per_sample * u32::from(recv_data.fmt.channels)) + as usize; + match io.read(&mut data[offset..end]) { + Err(e) => { + debug!("Failed to read data from ALSA buffer: {:?}", e); + match self.pcm.as_ref().unwrap().prepare() { + Err(e) => { + error!("Can't recovery from overrun for capture: {:?}", e); + self.init = false; + } + Ok(_) => continue, + }; + } + Ok(n) => { + trace::scream_alsa_receive_frames(frames, offset, end); + frames += + n as u32 / (self.bytes_per_sample * u32::from(recv_data.fmt.channels)); + + // During the host headset switchover, io.read is blocked for a long time. + // As a result, the VM recording delay exceeds 1s. Thereforce, check whether + // the delay exceeds 500ms. If the delay exceeds 500ms, start recording again. + let delay = self.pcm.as_ref().unwrap().delay().unwrap_or_else(|e| { + warn!("Scream alsa can't get frames delay: {e:?}"); + 0 + }); + if delay > i64::from(self.rate) >> 1 { + warn!("Scream alsa read audio blocked too long, delay {delay} frames, init again!"); + self.init = false; + } + } + } + } + 1 + } + + fn destroy(&mut self) { + if self.pcm.is_some() { + if self.dir == Direction::Playback { + self.pcm + .as_ref() + .unwrap() + .drain() + .unwrap_or_else(|e| error!("Failed to drain: {:?}", e)); + } + self.pcm = None; + } + + self.init = false; + } + + fn get_status(&self) -> AudioStatus { + if self.init { + AudioStatus::Started + } else { + AudioStatus::Ready + } + } +} diff --git a/devices/src/misc/scream/audio_demo.rs b/devices/src/misc/scream/audio_demo.rs new file mode 100644 index 0000000000000000000000000000000000000000..c709c21644d32d19e799c9d797ad581f3f47db16 --- /dev/null +++ b/devices/src/misc/scream/audio_demo.rs @@ -0,0 +1,137 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex, RwLock}; +use std::{ + fs::{File, OpenOptions}, + io::{Read, Write}, + thread, +}; + +use core::time; +use log::error; + +use super::{AudioExtension, AudioInterface, AudioStatus, ScreamDirection, StreamData}; +use crate::misc::ivshmem::Ivshmem; + +pub const INITIAL_VOLUME_VAL: u32 = 0xaa; +const IVSHMEM_VOLUME_SYNC_VECTOR: u16 = 0; + +pub struct DemoAudioVolume { + shm_dev: Arc>, + vol: RwLock, +} + +// SAFETY: all fields are protected by lock +unsafe impl Send for DemoAudioVolume {} +// SAFETY: all fields are protected by lock +unsafe impl Sync for DemoAudioVolume {} + +impl AudioExtension for DemoAudioVolume { + fn get_host_volume(&self) -> u32 { + *self.vol.read().unwrap() + } + + fn set_host_volume(&self, vol: u32) { + *self.vol.write().unwrap() = vol; + } +} + +impl DemoAudioVolume { + pub fn new(shm_dev: Arc>) -> Arc { + let vol = Arc::new(Self { + shm_dev, + vol: RwLock::new(0), + }); + vol.notify(INITIAL_VOLUME_VAL); + vol + } + + fn notify(&self, vol: u32) { + *self.vol.write().unwrap() = vol; + self.shm_dev + .lock() + .unwrap() + .trigger_msix(IVSHMEM_VOLUME_SYNC_VECTOR); + } +} + +pub struct AudioDemo { + file: File, +} + +impl AudioDemo { + pub fn init(dir: ScreamDirection, playback: String, record: String) -> Self { + let file = match dir { + ScreamDirection::Playback => OpenOptions::new() + .append(true) + .open(playback) + .unwrap_or_else(|e| { + error!("Failed to append open Audio Demo file: {:?}", e); + panic!() + }), + ScreamDirection::Record => File::open(record).unwrap_or_else(|e| { + error!("Failed to append open Audio Demo file: {:?}", e); + panic!() + }), + }; + + Self { file } + } +} + +impl AudioInterface for AudioDemo { + fn send(&mut self, recv_data: &StreamData) { + // SAFETY: Audio demo device is only used for test. + let data = unsafe { + std::slice::from_raw_parts( + recv_data.audio_base as *const u8, + recv_data.audio_size as usize, + ) + }; + + self.file + .write_all(data) + .unwrap_or_else(|e| error!("Failed to write data to file: {:?}", e)); + + self.file + .flush() + .unwrap_or_else(|e| error!("Failed to flush data to file: {:?}", e)); + } + + fn receive(&mut self, recv_data: &StreamData) -> i32 { + thread::sleep(time::Duration::from_millis(20)); + // SAFETY: Audio demo device is only used for test. + let data = unsafe { + std::slice::from_raw_parts_mut( + recv_data.audio_base as *mut u8, + recv_data.audio_size as usize, + ) + }; + let size = self.file.read(data).unwrap_or_else(|e| { + error!("Failed to read data to file: {:?}", e); + 0 + }); + + if size == data.len() { + 1 + } else { + 0 + } + } + + fn destroy(&mut self) {} + + fn get_status(&self) -> AudioStatus { + AudioStatus::Started + } +} diff --git a/devices/src/misc/scream/mod.rs b/devices/src/misc/scream/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..ead35388e30abbaa485ea7f56f40e066314ff83f --- /dev/null +++ b/devices/src/misc/scream/mod.rs @@ -0,0 +1,793 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "scream_alsa")] +mod alsa; +pub mod audio_demo; +#[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] +mod ohaudio; +#[cfg(feature = "scream_pulseaudio")] +mod pulseaudio; + +use std::str::FromStr; +use std::sync::atomic::{fence, Ordering}; +use std::sync::{Arc, Condvar, Mutex, RwLock, Weak}; +use std::{mem, thread}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; +use core::time; +use log::{error, info, warn}; +use once_cell::sync::Lazy; + +#[cfg(feature = "scream_alsa")] +use self::alsa::AlsaStreamData; +use self::audio_demo::{AudioDemo, DemoAudioVolume}; +use super::ivshmem::Ivshmem; +use crate::pci::{le_read_u32, le_write_u32}; +use crate::{Bus, Device}; +use address_space::{GuestAddress, HostMemMapping, Region}; +use machine_manager::config::{get_pci_df, parse_bool, valid_id}; +use machine_manager::notifier::register_vm_pause_notifier; +use machine_manager::state_query::register_state_query_callback; +#[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] +use ohaudio::{OhAudio, OhAudioVolume}; +#[cfg(feature = "scream_pulseaudio")] +use pulseaudio::PulseStreamData; +#[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] +use util::ohos_binding::misc::bound_tokenid; + +pub const AUDIO_SAMPLE_RATE_44KHZ: u32 = 44100; +pub const AUDIO_SAMPLE_RATE_48KHZ: u32 = 48000; + +pub const WINDOWS_SAMPLE_BASE_RATE: u8 = 128; + +pub const TARGET_LATENCY_MS: u32 = 50; + +#[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] +const IVSHMEM_VOLUME_SYNC_VECTOR: u16 = 0; +const IVSHMEM_STATUS_CHANGE_VECTOR: u16 = 1; +const IVSHMEM_VECTORS_NR: u32 = 2; +pub const IVSHMEM_BAR0_VOLUME: u64 = 240; +pub const IVSHMEM_BAR0_STATUS: u64 = 244; + +pub const STATUS_PLAY_BIT: u32 = 0x1; +pub const STATUS_START_BIT: u32 = 0x2; +const STATUS_MIC_AVAIL_BIT: u32 = 0x4; + +// A frame of back-end audio data is 50ms, and the next frame of audio data needs +// to be trained in polling within 50ms. Theoretically, the shorter the polling time, +// the better. However, if the value is too small, the overhead is high. So take a +// compromise: 50 * 1000 / 8 us. +const POLL_DELAY_US: u64 = (TARGET_LATENCY_MS as u64) * 1000 / 8; + +pub const SCREAM_MAGIC: u64 = 0x02032023; + +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, PartialOrd)] +pub enum AudioStatus { + // Processor is ready and waiting for play/capture. + #[default] + Ready, + // Processor is started and doing job. + Started, + // OH audio framework error occurred. + Error, + // OH audio stream is interrupted. + Intr, +} + +type AuthorityNotify = dyn Fn() + Send + Sync; + +#[derive(Clone)] +pub struct AuthorityInformation { + state: bool, + notify: Option>, +} + +impl AuthorityInformation { + const fn default() -> AuthorityInformation { + AuthorityInformation { + state: true, + notify: None, + } + } +} + +type AuthInfo = RwLock; +static AUTH_INFO: Lazy = Lazy::new(|| RwLock::new(AuthorityInformation::default())); + +/// The scream device defines the audio directions. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ScreamDirection { + Playback, + Record, +} + +/// Audio stream header information in the shared memory. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct ShmemStreamHeader { + /// Whether audio is started. + pub is_started: u32, + /// Current audio chunk position. + pub chunk_idx: u16, + /// Maximum number of audio chunk. + pub max_chunks: u16, + /// Size of a single audio chunk. + pub chunk_size: u32, + /// Offset of the first audio data based on shared memory. + pub offset: u32, + start_time_ns: i64, + /// Audio stream format. + pub fmt: ShmemStreamFmt, +} + +pub fn set_record_authority(auth: bool) { + AUTH_INFO.write().unwrap().state = auth; + if let Some(auth_notify) = &AUTH_INFO.read().unwrap().notify { + auth_notify(); + } +} + +pub fn set_authority_notify(notify: Option>) { + AUTH_INFO.write().unwrap().notify = notify; +} + +pub fn get_record_authority() -> bool { + AUTH_INFO.read().unwrap().state +} + +impl ShmemStreamHeader { + pub fn check(&self, last_end: u64) -> bool { + if u64::from(self.offset) < last_end { + warn!( + "Guest set bad offset {} exceeds last stream buffer end {}", + self.offset, last_end + ); + } + + if self.chunk_idx > self.max_chunks { + error!( + "Invalid max_chunks: {} or chunk_idx: {}", + self.max_chunks, self.chunk_idx + ); + return false; + } + + if self.fmt.channels == 0 || self.fmt.channel_map == 0 { + error!( + "The fmt channels {} or channel_map {} is invalid", + self.fmt.channels, self.fmt.channel_map + ); + return false; + } + true + } +} + +/// First Header data in the shared memory. +#[repr(C)] +#[derive(Default)] +pub struct ShmemHeader { + magic: u64, + /// PlayBack audio stream header. + play: ShmemStreamHeader, + /// Record audio stream header. + capt: ShmemStreamHeader, +} + +/// Audio stream format in the shared memory. +#[repr(C)] +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub struct ShmemStreamFmt { + /// Indicates whether the audio format is changed. + pub fmt_generation: u32, + /// Audio sampling rate. + pub rate: u8, + /// Number of audio sampling bits. + pub size: u8, + /// Number of audio channel. + pub channels: u8, + pad: u8, + /// Mapping of audio channel. + pub channel_map: u32, + pad2: u32, +} + +impl Default for ShmemStreamFmt { + fn default() -> Self { + Self { + fmt_generation: 0, + rate: 0, + size: 0, + channels: 2, + pad: 0, + channel_map: 0x03, + pad2: 0, + } + } +} + +impl ShmemStreamFmt { + pub fn get_rate(&self) -> u32 { + let sample_rate = if self.rate >= WINDOWS_SAMPLE_BASE_RATE { + AUDIO_SAMPLE_RATE_44KHZ + } else { + AUDIO_SAMPLE_RATE_48KHZ + }; + sample_rate * u32::from(self.rate % WINDOWS_SAMPLE_BASE_RATE) + } +} + +struct ScreamCond { + cond: Condvar, + paused: Mutex, +} + +impl ScreamCond { + const STREAM_PAUSE_BIT: u8 = 0x1; + const VM_PAUSE_BIT: u8 = 0x2; + + fn new() -> Arc { + Arc::new(Self { + cond: Condvar::default(), + paused: Mutex::new(Self::STREAM_PAUSE_BIT), + }) + } + + fn wait_if_paused(&self, interface: Arc>) { + let mut locked_pause = self.paused.lock().unwrap(); + while *locked_pause != 0 { + interface.lock().unwrap().destroy(); + locked_pause = self.cond.wait(locked_pause).unwrap(); + } + } + + fn set_value(&self, bv: u8, set: bool) { + let mut locked_pause = self.paused.lock().unwrap(); + let old_val = *locked_pause; + match set { + true => *locked_pause = old_val | bv, + false => *locked_pause = old_val & !bv, + } + if *locked_pause == 0 { + self.cond.notify_all(); + } + } + + fn set_vm_pause(&self, paused: bool) { + self.set_value(Self::VM_PAUSE_BIT, paused); + } + + fn set_stream_pause(&self, paused: bool) { + self.set_value(Self::STREAM_PAUSE_BIT, paused); + } +} + +/// Audio stream data structure. +#[derive(Debug, Default)] +pub struct StreamData { + pub fmt: ShmemStreamFmt, + max_chunks: u16, + chunk_idx: u16, + /// Start address of header implies. + start_addr: u64, + /// Length of total data which header implies. + data_shm_len: u64, + /// Size of the data to be played or recorded. + pub audio_size: u32, + /// Location of the played or recorded audio data in the shared memory. + pub audio_base: u64, + /// VM pause notifier id. + pause_notifier_id: u64, +} + +impl StreamData { + fn init(&mut self, header: &ShmemStreamHeader, hva: u64) { + fence(Ordering::Acquire); + self.fmt = header.fmt; + self.chunk_idx = header.chunk_idx; + self.max_chunks = header.max_chunks; + self.data_shm_len = u64::from(header.chunk_size) * u64::from(self.max_chunks); + self.start_addr = hva + u64::from(header.offset); + self.audio_size = header.chunk_size; + } + + fn register_pause_notifier(&mut self, cond: Arc) { + let pause_notify = Arc::new(move |paused: bool| { + cond.set_vm_pause(paused); + }); + self.pause_notifier_id = register_vm_pause_notifier(pause_notify); + } + + fn wait_for_ready( + &mut self, + interface: Arc>, + dir: ScreamDirection, + hva: u64, + cond: Arc, + ) { + // SAFETY: hva is the shared memory base address. It already verifies the validity + // of the address range during the scream realize. + let mut header = &unsafe { std::slice::from_raw_parts(hva as *const ShmemHeader, 1) }[0]; + + let stream_header = match dir { + ScreamDirection::Playback => &header.play, + ScreamDirection::Record => &header.capt, + }; + + loop { + let mut locked_paused = cond.paused.lock().unwrap(); + while *locked_paused != 0 { + interface.lock().unwrap().destroy(); + locked_paused = cond.cond.wait(locked_paused).unwrap(); + } + + if header.magic != SCREAM_MAGIC || stream_header.is_started == 0 { + *locked_paused |= ScreamCond::STREAM_PAUSE_BIT; + continue; + } + + header = + // SAFETY: hva is allocated by libc:::mmap, it can be guaranteed to be legal. + &unsafe { std::slice::from_raw_parts(hva as *const ShmemHeader, 1) }[0]; + self.init(stream_header, hva); + + let mut last_end = 0_u64; + // The recording buffer is behind the playback buffer. Thereforce, the end position of + // the playback buffer must be calculted to determine whether the two buffers overlap. + if dir == ScreamDirection::Record && header.play.is_started != 0 { + last_end = u64::from(header.play.offset) + + u64::from(header.play.chunk_size) * u64::from(header.play.max_chunks); + } + + if !stream_header.check(last_end) { + *locked_paused |= ScreamCond::STREAM_PAUSE_BIT; + continue; + } + + trace::scream_init(&dir, &stream_header); + + return; + } + } + + fn update_buffer_by_chunk_idx( + &mut self, + hva: u64, + shmem_size: u64, + stream_header: &ShmemStreamHeader, + ) -> bool { + self.audio_base = self + .start_addr + .saturating_add(u64::from(self.audio_size) * u64::from(self.chunk_idx)); + let buf_end = hva + shmem_size; + if self.audio_base.saturating_add(u64::from(self.audio_size)) > buf_end { + error!( + "Scream: wrong header: offset {} chunk_idx {} chunk_size {} max_chunks {}", + stream_header.offset, stream_header.chunk_idx, self.audio_size, self.max_chunks, + ); + return false; + } + true + } + + fn playback_trans( + &mut self, + hva: u64, + shmem_size: u64, + interface: Arc>, + cond: Arc, + ) { + // SAFETY: hva is the shared memory base address. It already verifies the validity + // of the address range during the header check. + let header = &mut unsafe { std::slice::from_raw_parts_mut(hva as *mut ShmemHeader, 1) }[0]; + let play = &header.play; + + loop { + cond.wait_if_paused(interface.clone()); + + if play.fmt.fmt_generation != self.fmt.fmt_generation { + break; + } + if self.chunk_idx == play.chunk_idx { + thread::sleep(time::Duration::from_micros(POLL_DELAY_US)); + continue; + } + // If the difference between the currently processed chunk_idx and the chunk_idx in + // the shared memory is greater than 4, the processing of the backend device is too + // slow and the backward data is skipped. + if play + .chunk_idx + .wrapping_add(self.max_chunks) + .wrapping_sub(self.chunk_idx) + % self.max_chunks + > 4 + { + self.chunk_idx = + play.chunk_idx.wrapping_add(self.max_chunks).wrapping_sub(1) % self.max_chunks; + } else { + self.chunk_idx = (self.chunk_idx + 1) % self.max_chunks; + } + + if !self.update_buffer_by_chunk_idx(hva, shmem_size, play) { + return; + } + interface.lock().unwrap().send(self); + } + } + + fn capture_trans( + &mut self, + hva: u64, + shmem_size: u64, + interface: Arc>, + cond: Arc, + ) { + // SAFETY: hva is the shared memory base address. It already verifies the validity + // of the address range during the header check. + let header = &mut unsafe { std::slice::from_raw_parts_mut(hva as *mut ShmemHeader, 1) }[0]; + let capt = &mut header.capt; + + while capt.is_started != 0 { + cond.wait_if_paused(interface.clone()); + + if capt.fmt.fmt_generation != self.fmt.fmt_generation { + return; + } + + if !self.update_buffer_by_chunk_idx(hva, shmem_size, capt) { + return; + } + + let recv_chunks_cnt: i32 = if get_record_authority() { + interface.lock().unwrap().receive(self) + } else { + interface.lock().unwrap().destroy(); + 0 + }; + + match recv_chunks_cnt.cmp(&0) { + std::cmp::Ordering::Less => thread::sleep(time::Duration::from_millis(100)), + std::cmp::Ordering::Greater => { + self.chunk_idx = match (self.chunk_idx + recv_chunks_cnt as u16) + .checked_rem(capt.max_chunks) + { + Some(idx) => idx, + None => { + warn!("Scream: capture header might be cleared by driver"); + return; + } + }; + // Make sure chunk_idx write does not bypass audio chunk write. + fence(Ordering::SeqCst); + capt.chunk_idx = self.chunk_idx; + } + std::cmp::Ordering::Equal => continue, + } + } + } +} + +#[derive(Clone, Debug)] +enum ScreamInterface { + #[cfg(feature = "scream_alsa")] + Alsa, + #[cfg(feature = "scream_pulseaudio")] + PulseAudio, + #[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] + OhAudio, + Demo, +} + +impl FromStr for ScreamInterface { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + #[cfg(feature = "scream_alsa")] + "ALSA" => Ok(ScreamInterface::Alsa), + #[cfg(feature = "scream_pulseaudio")] + "PulseAudio" => Ok(ScreamInterface::PulseAudio), + #[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] + "OhAudio" => Ok(ScreamInterface::OhAudio), + "Demo" => Ok(ScreamInterface::Demo), + _ => Err(anyhow!("Unknown scream interface")), + } + } +} + +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct ScreamConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long)] + pub memdev: String, + #[arg(long)] + interface: ScreamInterface, + #[arg(long, default_value = "")] + playback: String, + #[arg(long, default_value = "")] + record: String, + #[arg(long, default_value = "on", action = ArgAction::Append, value_parser = parse_bool)] + record_auth: bool, +} + +/// Scream sound card device structure. +pub struct Scream { + hva: u64, + size: u64, + config: ScreamConfig, + token_id: Option>>, + interface_resource: Vec>>, +} + +impl Scream { + pub fn new( + size: u64, + config: ScreamConfig, + token_id: Option>>, + ) -> Result { + set_record_authority(config.record_auth); + let header_size = mem::size_of::() as u64; + if size < header_size { + bail!( + "The size {} of the shared memory is smaller than audio header {}", + size, + header_size + ); + } + Ok(Self { + hva: 0, + size, + config, + token_id, + interface_resource: Vec::new(), + }) + } + + #[allow(unused_variables)] + fn interface_init(&self, name: &str, dir: ScreamDirection) -> Arc> { + match self.config.interface { + #[cfg(feature = "scream_alsa")] + ScreamInterface::Alsa => Arc::new(Mutex::new(AlsaStreamData::init(name, dir))), + #[cfg(feature = "scream_pulseaudio")] + ScreamInterface::PulseAudio => Arc::new(Mutex::new(PulseStreamData::init(name, dir))), + #[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] + ScreamInterface::OhAudio => Arc::new(Mutex::new(OhAudio::init(dir))), + ScreamInterface::Demo => Arc::new(Mutex::new(AudioDemo::init( + dir, + self.config.playback.clone(), + self.config.record.clone(), + ))), + } + } + + fn start_play_thread_fn(&mut self, cond: Arc) -> Result<()> { + let hva = self.hva; + let shmem_size = self.size; + let interface = self.interface_init("ScreamPlay", ScreamDirection::Playback); + self.interface_resource.push(interface.clone()); + let cloned_interface = interface.clone(); + self.register_state_query("scream-play".to_string(), cloned_interface); + thread::Builder::new() + .name("scream audio play worker".to_string()) + .spawn(move || { + let clone_interface = interface.clone(); + let mut play_data = StreamData::default(); + play_data.register_pause_notifier(cond.clone()); + + loop { + play_data.wait_for_ready( + clone_interface.clone(), + ScreamDirection::Playback, + hva, + cond.clone(), + ); + + play_data.playback_trans( + hva, + shmem_size, + clone_interface.clone(), + cond.clone(), + ); + } + }) + .with_context(|| "Failed to create thread scream")?; + Ok(()) + } + + fn start_record_thread_fn(&mut self, cond: Arc) -> Result<()> { + let hva = self.hva; + let shmem_size = self.size; + let interface = self.interface_init("ScreamCapt", ScreamDirection::Record); + let _ti = self.token_id.clone(); + self.interface_resource.push(interface.clone()); + let cloned_interface = interface.clone(); + self.register_state_query("scream-record".to_string(), cloned_interface); + thread::Builder::new() + .name("scream audio capt worker".to_string()) + .spawn(move || { + let clone_interface = interface.clone(); + let mut capt_data = StreamData::default(); + capt_data.register_pause_notifier(cond.clone()); + + loop { + capt_data.wait_for_ready( + clone_interface.clone(), + ScreamDirection::Record, + hva, + cond.clone(), + ); + + #[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] + if let Some(token_id) = &_ti { + bound_tokenid(*token_id.read().unwrap()) + .unwrap_or_else(|e| error!("bound token ID failed: {}", e)); + } + capt_data.capture_trans(hva, shmem_size, clone_interface.clone(), cond.clone()); + } + }) + .with_context(|| "Failed to create thread scream")?; + Ok(()) + } + + fn register_state_query(&self, module: String, interface: Arc>) { + register_state_query_callback( + module, + Arc::new(move || match interface.lock().unwrap().get_status() { + AudioStatus::Started => "On".to_string(), + _ => "Off".to_string(), + }), + ); + } + + pub fn realize(&mut self, parent_bus: Weak>) -> Result<()> { + let host_mmap = Arc::new(HostMemMapping::new( + GuestAddress(0), + None, + self.size, + None, + false, + true, + false, + )?); + self.hva = host_mmap.host_address(); + + let devfn = (self.config.addr.0 << 3) + self.config.addr.1; + let mem_region = Region::init_ram_region(host_mmap, "ivshmem_ram"); + let ivshmem = Ivshmem::new( + "ivshmem".to_string(), + devfn, + parent_bus, + mem_region, + IVSHMEM_VECTORS_NR, + ); + let ivshmem = ivshmem.realize()?; + let ivshmem_cloned = ivshmem.clone(); + + let play_cond = ScreamCond::new(); + let capt_cond = ScreamCond::new(); + self.set_ivshmem_ops(ivshmem, play_cond.clone(), capt_cond.clone()); + + let author_notify = Arc::new(move || { + ivshmem_cloned + .lock() + .unwrap() + .trigger_msix(IVSHMEM_STATUS_CHANGE_VECTOR); + }); + set_authority_notify(Some(author_notify)); + + self.start_play_thread_fn(play_cond)?; + self.start_record_thread_fn(capt_cond) + } + + fn set_ivshmem_ops( + &mut self, + ivshmem: Arc>, + play_cond: Arc, + capt_cond: Arc, + ) { + let cloned_play_cond = play_cond.clone(); + let cloned_capt_cond = capt_cond.clone(); + let cb = Box::new(move || { + info!("Scream: device is reset."); + cloned_play_cond.set_stream_pause(true); + cloned_capt_cond.set_stream_pause(true); + }); + ivshmem.lock().unwrap().register_reset_callback(cb); + + let interface = self.create_audio_extension(ivshmem.clone()); + let interface2 = interface.clone(); + let bar0_write = Arc::new(move |data: &[u8], offset: u64| { + match offset { + IVSHMEM_BAR0_VOLUME => { + interface.set_host_volume(le_read_u32(data, 0).unwrap()); + } + IVSHMEM_BAR0_STATUS => { + let val = le_read_u32(data, 0).unwrap(); + if val & STATUS_PLAY_BIT == STATUS_PLAY_BIT { + play_cond.set_stream_pause(val & STATUS_START_BIT != STATUS_START_BIT); + } else { + capt_cond.set_stream_pause(val & STATUS_START_BIT != STATUS_START_BIT); + } + } + _ => { + info!("ivshmem-scream: unsupported write: {offset}"); + } + } + true + }); + let bar0_read = Arc::new(move |data: &mut [u8], offset: u64| { + match offset { + IVSHMEM_BAR0_VOLUME => { + let _ = le_write_u32(data, 0, interface2.get_host_volume()); + } + IVSHMEM_BAR0_STATUS => { + let _ = le_write_u32(data, 0, interface2.get_status_register()); + } + _ => { + info!("ivshmem-scream: unsupported read: {offset}"); + } + } + true + }); + ivshmem + .lock() + .unwrap() + .set_bar0_ops((bar0_write, bar0_read)); + } + + fn create_audio_extension(&self, _ivshmem: Arc>) -> Arc { + match self.config.interface { + #[cfg(all(target_env = "ohos", feature = "scream_ohaudio"))] + ScreamInterface::OhAudio => OhAudioVolume::new(_ivshmem), + ScreamInterface::Demo => DemoAudioVolume::new(_ivshmem), + #[allow(unreachable_patterns)] + _ => Arc::new(AudioExtensionDummy {}), + } + } +} + +pub trait AudioInterface: Send { + fn send(&mut self, recv_data: &StreamData); + fn receive(&mut self, recv_data: &StreamData) -> i32; + fn destroy(&mut self); + fn get_status(&self) -> AudioStatus; +} + +pub trait AudioExtension: Send + Sync { + fn set_host_volume(&self, _vol: u32) {} + fn get_host_volume(&self) -> u32 { + 0 + } + fn get_status_register(&self) -> u32 { + match get_record_authority() { + true => STATUS_MIC_AVAIL_BIT, + false => 0, + } + } +} + +struct AudioExtensionDummy; +impl AudioExtension for AudioExtensionDummy {} +// SAFETY: it is a dummy +unsafe impl Send for AudioExtensionDummy {} +// SAFETY: it is a dummy +unsafe impl Sync for AudioExtensionDummy {} diff --git a/devices/src/misc/scream/ohaudio.rs b/devices/src/misc/scream/ohaudio.rs new file mode 100755 index 0000000000000000000000000000000000000000..a4d8925430ea8f495dfbdf9dc9e2557831e38cda --- /dev/null +++ b/devices/src/misc/scream/ohaudio.rs @@ -0,0 +1,784 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::VecDeque; +use std::os::raw::c_void; +use std::sync::{ + atomic::{fence, AtomicBool, Ordering}, + Arc, Condvar, Mutex, RwLock, +}; +use std::{cmp, io::Read, ptr, thread, time::Duration}; + +use log::{error, info, warn}; + +use crate::misc::ivshmem::Ivshmem; +use crate::misc::scream::{ + AudioExtension, AudioInterface, AudioStatus, ScreamDirection, StreamData, + IVSHMEM_VOLUME_SYNC_VECTOR, +}; +use util::ohos_binding::audio::*; + +const STREAM_DATA_VEC_CAPACITY: usize = 15; +const FLUSH_DELAY_MS: u64 = 5; +const FLUSH_DELAY_CNT: u64 = 200; +const SCREAM_MAX_VOLUME: u32 = 110; +const CAPTURE_WAIT_TIMEOUT: u64 = 500; + +trait OhAudioProcess { + fn init(&mut self, stream: &StreamData) -> bool; + fn destroy(&mut self); + fn process(&mut self, recv_data: &StreamData) -> i32; + fn get_status(&self) -> AudioStatus; +} + +#[derive(Debug, Clone, Copy)] +struct StreamUnit { + addr: usize, + len: usize, +} + +impl Read for StreamUnit { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let len = cmp::min(self.len, buf.len()); + // SAFETY: all the source data are in scream BAR. + unsafe { ptr::copy_nonoverlapping(self.addr as *const u8, buf.as_mut_ptr(), len) }; + self.len -= len; + self.addr += len; + Ok(len) + } +} + +impl StreamUnit { + #[inline] + fn is_empty(&self) -> bool { + self.len == 0 + } + + fn new(addr: usize, len: usize) -> Self { + Self { addr, len } + } + + #[inline] + fn len(&self) -> usize { + self.len + } +} + +struct StreamQueue { + queue: VecDeque, + data_size: usize, +} + +impl Read for StreamQueue { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let len = buf.len(); + let mut ret = 0_usize; + while ret < len { + if self.queue.is_empty() { + break; + } + let unit = match self.queue.front_mut() { + Some(u) => u, + None => break, + }; + let rlen = unit.read(&mut buf[ret..len]).unwrap(); + ret += rlen; + self.data_size -= rlen; + if unit.is_empty() { + self.pop_front(); + } + } + Ok(ret) + } + + // If there's no enough data, let's fill the whole buffer with 0. + fn read_exact(&mut self, buf: &mut [u8]) -> std::io::Result<()> { + let len = buf.len(); + match self.read(buf) { + Ok(ret) => { + if ret < len { + self.read_zero(&mut buf[ret..len]); + } + Ok(()) + } + Err(e) => Err(e), + } + } +} + +impl StreamQueue { + fn new(capacity: usize) -> Self { + Self { + queue: VecDeque::with_capacity(capacity), + data_size: 0, + } + } + + fn clear(&mut self) { + self.queue.clear(); + } + + #[inline] + fn data_size(&self) -> usize { + self.data_size + } + + fn pop_front(&mut self) { + if let Some(elem) = self.queue.pop_front() { + self.data_size -= elem.len(); + } + } + + fn push_back(&mut self, unit: StreamUnit) { + // When audio data is not consumed in time, this buffer + // might be full. So let's keep the max size by dropping + // the old data. This can guarantee sound playing can't + // be delayed too much and the buffer won't become too + // large. + if self.queue.len() == self.queue.capacity() { + self.pop_front(); + } + self.data_size += unit.len; + self.queue.push_back(unit); + } + + fn read_zero(&mut self, buf: &mut [u8]) { + // SAFETY: the buffer is guaranteed by the caller. + unsafe { + ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()); + } + } +} + +struct OhAudioRender { + ctx: Option, + stream_data: Arc>, + flushing: AtomicBool, + status: AudioStatus, +} + +impl Default for OhAudioRender { + fn default() -> OhAudioRender { + OhAudioRender { + ctx: None, + stream_data: Arc::new(Mutex::new(StreamQueue::new(STREAM_DATA_VEC_CAPACITY))), + flushing: AtomicBool::new(false), + status: AudioStatus::default(), + } + } +} + +impl OhAudioRender { + fn check_fmt_update(&mut self, recv_data: &StreamData) { + if self.ctx.is_some() + && !self.ctx.as_ref().unwrap().check_fmt( + recv_data.fmt.size, + recv_data.fmt.get_rate(), + recv_data.fmt.channels, + ) + { + self.destroy(); + } + } + + fn flush(&mut self) { + self.set_flushing(true); + let mut cnt = 0_u64; + while cnt < FLUSH_DELAY_CNT { + thread::sleep(Duration::from_millis(FLUSH_DELAY_MS)); + cnt += 1; + if self.stream_data.lock().unwrap().data_size() == 0 { + break; + } + } + } + + fn flush_renderer(&self) { + let _ = self.ctx.as_ref().unwrap().flush_renderer(); + } + + #[inline(always)] + fn is_flushing(&self) -> bool { + self.flushing.load(Ordering::Acquire) + } + + #[inline(always)] + fn set_flushing(&mut self, flush: bool) { + self.flushing.store(flush, Ordering::Release); + } +} + +impl OhAudioProcess for OhAudioRender { + fn init(&mut self, stream: &StreamData) -> bool { + if self.ctx.is_none() { + let mut context = AudioContext::new(AudioStreamType::Render); + match context.init( + stream.fmt.size, + stream.fmt.get_rate(), + stream.fmt.channels, + AudioProcessCb::RendererCb(Some(on_write_data_cb), Some(render_on_interrupt_cb)), + ptr::addr_of!(*self) as *mut c_void, + ) { + Ok(()) => self.ctx = Some(context), + Err(e) => { + error!("failed to create oh audio render context: {}", e); + return false; + } + } + } + match self.ctx.as_ref().unwrap().start() { + Ok(()) => { + info!("Renderer start"); + self.status = AudioStatus::Started; + trace::oh_scream_render_init(&self.ctx); + } + Err(e) => { + error!("failed to start oh audio renderer: {}", e); + } + } + self.status == AudioStatus::Started + } + + fn destroy(&mut self) { + info!("Renderer destroy"); + match self.status { + AudioStatus::Error | AudioStatus::Intr => { + self.ctx = None; + self.status = AudioStatus::Ready; + return; + } + AudioStatus::Started => self.flush(), + _ => {} + } + self.ctx = None; + self.stream_data.lock().unwrap().clear(); + self.set_flushing(false); + self.status = AudioStatus::Ready; + trace::oh_scream_render_destroy(); + } + + fn process(&mut self, recv_data: &StreamData) -> i32 { + self.check_fmt_update(recv_data); + + fence(Ordering::Acquire); + + trace::trace_scope_start!(ohaudio_render_process, args = (recv_data)); + + self.stream_data.lock().unwrap().push_back(StreamUnit::new( + recv_data.audio_base as usize, + recv_data.audio_size as usize, + )); + + if self.status == AudioStatus::Error || self.status == AudioStatus::Intr { + error!( + "Audio server {:?} occurred. Destroy and reconnect it.", + self.status + ); + self.destroy(); + } + + if self.status == AudioStatus::Ready && !self.init(recv_data) { + error!("failed to init oh audio"); + self.destroy(); + } + 0 + } + + fn get_status(&self) -> AudioStatus { + self.status + } +} + +struct CaptureStream { + cond: Condvar, + data: Mutex>, + expected: usize, +} + +impl Default for CaptureStream { + fn default() -> Self { + Self { + cond: Condvar::new(), + data: Mutex::new(Vec::with_capacity(1 << 20)), + expected: 0, + } + } +} + +impl CaptureStream { + fn wait_for_data(&mut self, buf: &mut [u8]) -> bool { + let mut locked_data = self.data.lock().unwrap(); + self.expected = buf.len(); + while locked_data.len() < self.expected { + let ret = self + .cond + .wait_timeout(locked_data, Duration::from_millis(CAPTURE_WAIT_TIMEOUT)) + .unwrap(); + if ret.1.timed_out() { + return false; + } + locked_data = ret.0; + } + buf.copy_from_slice(&locked_data[..self.expected]); + *locked_data = locked_data[self.expected..].to_vec(); + self.expected = 0; + true + } + + fn append_data(&mut self, buf: &[u8]) { + let mut locked_data = self.data.lock().unwrap(); + locked_data.extend_from_slice(buf); + if locked_data.len() > self.expected { + self.cond.notify_all(); + } + } + + fn reset(&mut self) { + let mut locked_data = self.data.lock().unwrap(); + locked_data.clear(); + self.expected = 0; + self.cond.notify_all(); + } +} + +#[derive(Default)] +struct OhAudioCapture { + ctx: Option, + status: AudioStatus, + stream: CaptureStream, +} + +impl OhAudioCapture { + fn check_fmt_update(&mut self, recv_data: &StreamData) { + if self.ctx.is_none() + || !self.ctx.as_ref().unwrap().check_fmt( + recv_data.fmt.size, + recv_data.fmt.get_rate(), + recv_data.fmt.channels, + ) + { + self.destroy(); + } + } +} + +impl OhAudioProcess for OhAudioCapture { + fn init(&mut self, stream: &StreamData) -> bool { + let mut context = AudioContext::new(AudioStreamType::Capturer); + match context.init( + stream.fmt.size, + stream.fmt.get_rate(), + stream.fmt.channels, + AudioProcessCb::CapturerCb(Some(on_read_data_cb), Some(capture_on_interrupt_cb)), + ptr::addr_of!(*self) as *mut c_void, + ) { + Ok(()) => self.ctx = Some(context), + Err(e) => { + error!("failed to create oh audio capturer context: {}", e); + return false; + } + } + match self.ctx.as_ref().unwrap().start() { + Ok(()) => { + info!("Capturer start"); + self.status = AudioStatus::Started; + trace::oh_scream_capture_init(&self.ctx); + true + } + Err(e) => { + error!("failed to start oh audio capturer: {}", e); + false + } + } + } + + fn destroy(&mut self) { + info!("Capturer destroy"); + self.status = AudioStatus::Ready; + self.ctx = None; + self.stream.reset(); + trace::oh_scream_capture_destroy(); + } + + fn process(&mut self, recv_data: &StreamData) -> i32 { + self.check_fmt_update(recv_data); + + trace::trace_scope_start!(ohaudio_capturer_process, args = (recv_data)); + + if self.status == AudioStatus::Error || self.status == AudioStatus::Intr { + self.destroy(); + } + + if self.status == AudioStatus::Ready && !self.init(recv_data) { + self.destroy(); + return -1; + } + // SAFETY: the buffer is from ivshmem and the caller ensures its validation. + let buf = unsafe { + std::slice::from_raw_parts_mut( + recv_data.audio_base as *mut u8, + recv_data.audio_size as usize, + ) + }; + if !self.stream.wait_for_data(buf) { + warn!("timed out to wait for capture audio data"); + self.status = AudioStatus::Error; + return 0; + } + 1 + } + + fn get_status(&self) -> AudioStatus { + self.status + } +} + +extern "C" fn render_on_interrupt_cb( + _renderer: *mut OhAudioRenderer, + user_data: *mut ::std::os::raw::c_void, + source_type: capi::OHAudioInterruptSourceType, + hint: capi::OHAudioInterruptHint, +) -> i32 { + info!( + "Render interrupts, type is {}, hint is {}", + source_type, hint + ); + // SAFETY: we make sure that it is OhAudioRender when register callback. + let render = unsafe { + (user_data as *mut OhAudioRender) + .as_mut() + .unwrap_unchecked() + }; + if hint == capi::AUDIOSTREAM_INTERRUPT_HINT_PAUSE { + render.status = AudioStatus::Intr; + } + 0 +} + +extern "C" fn capture_on_interrupt_cb( + _capturer: *mut OhAudioCapturer, + user_data: *mut ::std::os::raw::c_void, + source_type: capi::OHAudioInterruptSourceType, + hint: capi::OHAudioInterruptHint, +) -> i32 { + info!( + "Capture interrupts, type is {}, hint is {}", + source_type, hint + ); + + // SAFETY: we make sure that it is OhAudioCapture when register callback. + let capture = unsafe { + (user_data as *mut OhAudioCapture) + .as_mut() + .unwrap_unchecked() + }; + if hint == capi::AUDIOSTREAM_INTERRUPT_HINT_PAUSE { + capture.status = AudioStatus::Intr; + } + 0 +} + +extern "C" fn on_write_data_cb( + _renderer: *mut OhAudioRenderer, + user_data: *mut ::std::os::raw::c_void, + buffer: *mut ::std::os::raw::c_void, + length: i32, +) -> i32 { + if buffer.is_null() || user_data.is_null() { + error!("on_write_data_cb: Invalid input"); + return 0; + } + + // SAFETY: we make sure that it is OhAudioRender when register callback. + let render = unsafe { + (user_data as *mut OhAudioRender) + .as_mut() + .unwrap_unchecked() + }; + + let len = length as usize; + // SAFETY: the buffer is guaranteed by OH audio framework. + let wbuf = unsafe { std::slice::from_raw_parts_mut(buffer as *mut u8, len) }; + + trace::oh_scream_on_write_data_cb(len); + trace::trace_scope_start!(ohaudio_write_cb, args = (len)); + match render.stream_data.lock().unwrap().read_exact(wbuf) { + Ok(()) => { + if render.is_flushing() { + render.flush_renderer(); + } + } + Err(e) => error!("Failed to read stream data {:?}", e), + } + 0 +} + +extern "C" fn on_read_data_cb( + _capturer: *mut OhAudioCapturer, + user_data: *mut ::std::os::raw::c_void, + buffer: *mut ::std::os::raw::c_void, + length: i32, +) -> i32 { + if buffer.is_null() || user_data.is_null() { + error!("on_read_data_cb: Invalid input"); + return 0; + } + + // SAFETY: we make sure that it is OhAudioCapture when register callback. + let capture = unsafe { + (user_data as *mut OhAudioCapture) + .as_mut() + .unwrap_unchecked() + }; + + trace::trace_scope_start!(ohaudio_read_cb, args = (length)); + + if capture.status != AudioStatus::Started { + return 0; + } + + // SAFETY: the buffer is checked above. + let buf = unsafe { std::slice::from_raw_parts(buffer as *mut u8, length as usize) }; + capture.stream.append_data(buf); + 0 +} + +pub struct OhAudio { + processor: Box, +} + +// SAFETY: OhAudio's 'send' trait is guaranteed by Mutex lock. +unsafe impl Send for OhAudio {} + +impl OhAudio { + pub fn init(dir: ScreamDirection) -> Self { + match dir { + ScreamDirection::Playback => Self { + processor: Box::::default(), + }, + ScreamDirection::Record => Self { + processor: Box::::default(), + }, + } + } +} + +impl AudioInterface for OhAudio { + fn send(&mut self, recv_data: &StreamData) { + self.processor.process(recv_data); + } + + fn receive(&mut self, recv_data: &StreamData) -> i32 { + self.processor.process(recv_data) + } + + fn destroy(&mut self) { + self.processor.destroy(); + } + + fn get_status(&self) -> AudioStatus { + self.processor.get_status() + } +} + +pub struct OhAudioVolume { + shm_dev: Arc>, + ohos_vol: RwLock, + ohos_vol_max: u32, + ohos_vol_min: u32, +} + +// SAFETY: all unsafe fields are protected by lock +unsafe impl Send for OhAudioVolume {} +// SAFETY: all unsafe fields are protected by lock +unsafe impl Sync for OhAudioVolume {} + +impl GuestVolumeNotifier for OhAudioVolume { + fn notify(&self, vol: u32) { + *self.ohos_vol.write().unwrap() = self.to_guest_vol(vol); + self.shm_dev + .lock() + .unwrap() + .trigger_msix(IVSHMEM_VOLUME_SYNC_VECTOR); + } +} + +impl AudioExtension for OhAudioVolume { + fn get_host_volume(&self) -> u32 { + *self.ohos_vol.read().unwrap() + } + + fn set_host_volume(&self, vol: u32) { + set_ohos_volume(self.to_host_vol(vol)); + } +} + +impl OhAudioVolume { + pub fn new(shm_dev: Arc>) -> Arc { + let vol = Arc::new(Self { + shm_dev, + ohos_vol: RwLock::new(0), + ohos_vol_max: get_ohos_volume_max(), + ohos_vol_min: get_ohos_volume_min(), + }); + *vol.ohos_vol.write().unwrap() = vol.to_guest_vol(get_ohos_volume()); + register_guest_volume_notifier(vol.clone()); + vol + } + + fn to_guest_vol(&self, h_vol: u32) -> u32 { + if self.ohos_vol_max > self.ohos_vol_min { + return SCREAM_MAX_VOLUME * h_vol / (self.ohos_vol_max - self.ohos_vol_min); + } + 0 + } + + fn to_host_vol(&self, v_vol: u32) -> u32 { + if v_vol == 0 || self.ohos_vol_max <= self.ohos_vol_min { + return 0; + } + let res = (self.ohos_vol_max - self.ohos_vol_min) * v_vol / SCREAM_MAX_VOLUME + 1; + if res > self.ohos_vol_max { + return self.ohos_vol_max; + } + res + } +} + +#[cfg(test)] +mod tests { + use crate::misc::scream::ohaudio::{on_read_data_cb, on_write_data_cb, StreamUnit}; + use crate::misc::scream::ohaudio::{OhAudioCapture, OhAudioProcess, OhAudioRender}; + use crate::misc::scream::StreamData; + + use util::ohos_binding::audio::*; + + #[test] + fn test_render_init_and_destroy() { + let mut render = OhAudioRender::default(); + let mut stream_data = StreamData::default(); + + assert!(!render.init(&stream_data)); + + stream_data.fmt.size = 16; + stream_data.fmt.rate = 1; + render.init(&stream_data); + assert!(render.ctx.is_some()); + assert!(render.start); + assert_eq!(render.stream_data.lock().unwrap().len(), 0); + + render.destroy(); + assert!(!render.start); + assert!(render.ctx.is_none()); + assert_eq!(render.stream_data.lock().unwrap().len(), 0); + assert_eq!(render.prepared_data, 0); + } + + #[test] + fn test_render_check_data_ready() { + let mut render = OhAudioRender::default(); + let mut recv_data = StreamData::default(); + recv_data.fmt.size = 16; + recv_data.fmt.rate = 1; + recv_data.fmt.channels = 2; + assert!(!render.check_data_ready(&recv_data)); + + render.prepared_data = 96000; + assert!(render.check_data_ready(&recv_data)); + } + + #[test] + fn test_render_check_fmt_update() { + let mut render = OhAudioRender::default(); + let mut recv_data = StreamData::default(); + recv_data.fmt.size = 16; + recv_data.fmt.rate = 158; + recv_data.fmt.channels = 2; + let stream_data = StreamData::default(); + render.init(&stream_data); + render.check_fmt_update(&recv_data); + assert!(render.ctx.is_none()); + } + + #[test] + fn test_capture_init_and_destroy() { + let mut capture = OhAudioCapture::default(); + let stream_data = StreamData::default(); + assert!(!capture.init(&stream_data)); + } + + #[test] + fn test_on_write_data_cb() { + let mut _renderer = OhAudioRenderer::default(); + let mut render = OhAudioRender::default(); + let user_data = std::ptr::addr_of!(render) as *mut ::std::os::raw::c_void; + + let mut dst: Vec = vec![25, 0, 0, 0, 0, 0, 0, 0, 0]; + + let src1: Vec = vec![10, 11, 12, 13, 14]; + let su1 = StreamUnit { + addr: src1.as_ptr() as u64, + len: src1.len() as u64, + }; + let src2: Vec = vec![21, 22, 23, 24, 25]; + let su2 = StreamUnit { + addr: src2.as_ptr() as u64, + len: src2.len() as u64, + }; + + render.stream_data.lock().unwrap().push(su1); + render.stream_data.lock().unwrap().push(su2); + render.start = true; + + // SAFETY: we checked len. + let dst_ptr = unsafe { dst.as_mut_ptr().offset(1) }; + + on_write_data_cb( + &mut _renderer, + user_data, + dst_ptr as *mut ::std::os::raw::c_void, + 8, + ); + + let target = [25, 10, 11, 12, 13, 14, 21, 22, 23]; + assert_eq!(dst, target); + } + + #[test] + fn test_on_read_data_cb() { + let mut _capturer = OhAudioCapturer::default(); + let mut capture = OhAudioCapture::default(); + + let mut src: Vec = vec![10, 11, 12, 13, 14, 15, 16]; + let mut dst: Vec = vec![99, 0, 0, 0, 0, 0, 0, 0]; + + let user_data = std::ptr::addr_of!(capture) as *mut ::std::os::raw::c_void; + + capture.align = dst.len() as u32; + capture.shm_len = dst.len() as u64; + capture.shm_addr = dst.as_mut_ptr() as u64; + capture.start = true; + // SAFETY: we checked len. + capture.cur_pos = unsafe { dst.as_mut_ptr().offset(3) as u64 }; + + on_read_data_cb( + &mut _capturer, + user_data, + src.as_mut_ptr() as *mut ::std::os::raw::c_void, + src.len() as i32, + ); + + assert_eq!(capture.new_chunks.into_inner(), 0); + let target = [15, 16, 0, 10, 11, 12, 13, 14]; + assert_eq!(dst, target); + } +} diff --git a/devices/src/misc/scream/pulseaudio.rs b/devices/src/misc/scream/pulseaudio.rs new file mode 100644 index 0000000000000000000000000000000000000000..221055850bb44e98232e1b2a59bb87fcf1246eac --- /dev/null +++ b/devices/src/misc/scream/pulseaudio.rs @@ -0,0 +1,371 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::atomic::{fence, Ordering}; + +use log::{error, warn}; +use psimple::Simple; +use pulse::{ + channelmap::{Map, MapDef, Position}, + def::BufferAttr, + sample::{Format, Spec}, + stream::Direction, + time::MicroSeconds, +}; + +use super::{AudioInterface, AudioStatus, AUDIO_SAMPLE_RATE_44KHZ}; +use crate::misc::scream::{ScreamDirection, ShmemStreamFmt, StreamData, TARGET_LATENCY_MS}; + +const MAX_LATENCY_MS: u32 = 100; + +const STREAM_NAME: &str = "Audio"; + +const WINDOWS_POSITION_CNT: usize = 11; +const PULSEAUDIO_POSITION: [Position; WINDOWS_POSITION_CNT] = [ + Position::FrontLeft, + Position::FrontRight, + Position::FrontCenter, + Position::Lfe, + Position::RearLeft, + Position::RearRight, + Position::FrontLeftOfCenter, + Position::FrontRightOfCenter, + Position::RearCenter, + Position::SideLeft, + Position::SideRight, +]; + +impl ScreamDirection { + fn transform(&self) -> Direction { + match self { + Self::Playback => Direction::Playback, + Self::Record => Direction::Record, + } + } +} + +/// Data structure of the audio processed by the pulseaudio. +pub struct PulseStreamData { + simple: Option, + ss: Spec, + channel_map: Map, + buffer_attr: BufferAttr, + stream_fmt: ShmemStreamFmt, + latency: u32, + app_name: String, + stream_name: String, + dir: Direction, +} + +impl PulseStreamData { + pub fn init(name: &str, dir: ScreamDirection) -> Self { + // Map to stereo, it's the default number of channels. + let mut channel_map = Map::default(); + channel_map.init_stereo(); + + // Start with base default format, rate and channels. Will switch to actual format later. + let ss = Spec { + format: Format::S16le, + rate: AUDIO_SAMPLE_RATE_44KHZ, + channels: 2, + }; + + // Init receiver format to track changes. + let stream_fmt = ShmemStreamFmt::default(); + + // Set buffer size for requested latency. + let buffer_attr = BufferAttr { + maxlength: ss.usec_to_bytes(MicroSeconds(u64::from(MAX_LATENCY_MS) * 1000)) as u32, + tlength: ss.usec_to_bytes(MicroSeconds(u64::from(TARGET_LATENCY_MS) * 1000)) as u32, + prebuf: std::u32::MAX, + minreq: std::u32::MAX, + fragsize: std::u32::MAX, + }; + + let pa_dir = dir.transform(); + + #[cfg(not(test))] + let simple = Some( + Simple::new( + None, + name, + pa_dir, + None, + STREAM_NAME, + &ss, + Some(&channel_map), + Some(&buffer_attr), + ) + .unwrap_or_else(|e| panic!("PulseAudio init failed : {}", e)), + ); + #[cfg(test)] + let simple = None; + + Self { + simple, + ss, + channel_map, + buffer_attr, + stream_fmt, + latency: TARGET_LATENCY_MS, + app_name: name.to_string(), + stream_name: STREAM_NAME.to_string(), + dir: pa_dir, + } + } + + fn transfer_channel_map(&mut self, format: &ShmemStreamFmt) { + self.channel_map.init(); + self.channel_map.set_len(format.channels); + let map: &mut [Position] = self.channel_map.get_mut(); + // In Windows, the channel mask shows as following figure. + // 31 11 10 9 8 7 6 5 4 3 2 1 0 + // | | | SR | SL | BC | FRC| FLC| BR | BL | LFE| FC | FR | FL | + // + // Each bit in the channel mask represents a particular speaker position. + // Now, it map a windows SPEAKER_* position to a PA_CHANNEL_POSITION_*. + let mut key: i32 = -1; + for (i, item) in map.iter_mut().enumerate().take(format.channels as usize) { + for j in (key + 1)..32 { + if (format.channel_map >> j) & 0x01 == 1 { + key = j; + break; + } + } + // Map the key value to a pulseaudio channel position. + if (key as usize) < WINDOWS_POSITION_CNT { + *item = PULSEAUDIO_POSITION[key as usize]; + } else { + warn!("Channel {} can not be mapped, Falling back to 'center'.", i); + *item = Position::FrontCenter; + } + } + } + + fn check_fmt_update(&mut self, recv_data: &StreamData) { + if self.stream_fmt == recv_data.fmt { + return; + } + + // Flush left data when audio format changed. + self.destroy(); + + // If audio format changed, reconfigure + self.stream_fmt = recv_data.fmt; + self.ss.channels = recv_data.fmt.channels; + self.ss.rate = recv_data.fmt.get_rate(); + + match recv_data.fmt.size { + 16 => self.ss.format = Format::S16le, + 24 => self.ss.format = Format::S24le, + 32 => self.ss.format = Format::S32le, + _ => { + warn!( + "Unsupported sample size {}, not playing until next format switch", + recv_data.fmt.size + ); + self.ss.rate = 0; + } + } + + if recv_data.fmt.channels == 1 { + self.channel_map.init_mono(); + } else if recv_data.fmt.channels == 2 { + self.channel_map.init_stereo(); + } else { + self.transfer_channel_map(&recv_data.fmt); + } + + if !self.channel_map.is_valid() { + warn!("Invalid channel mapping, falling back to MapDef::WAVEEx"); + self.channel_map + .init_extend(recv_data.fmt.channels, MapDef::WAVEEx); + } + if !self.channel_map.is_compatible_with_sample_spec(&self.ss) { + warn!("Incompatible channel mapping."); + self.ss.rate = 0; + } + + if self.ss.rate > 0 { + // Sample spec has changed, so the playback buffer size for the requested latency must + // be recalculated as well. + self.buffer_attr.tlength = self + .ss + .usec_to_bytes(MicroSeconds(u64::from(self.latency) * 1000)) + as u32; + + self.simple = Simple::new( + None, + self.app_name.as_str(), + self.dir, + None, + self.stream_name.as_str(), + &self.ss, + Some(&self.channel_map), + Some(&self.buffer_attr), + ) + .map_or_else( + |_| { + warn!( + "Unable to open PulseAudio with sample rate {}, sample size {} and channels {}", + self.ss.rate, recv_data.fmt.size, recv_data.fmt.channels + ); + None + }, + Some, + ); + } + } +} + +impl AudioInterface for PulseStreamData { + fn send(&mut self, recv_data: &StreamData) { + self.check_fmt_update(recv_data); + + if self.ss.rate == 0 || self.simple.is_none() { + return; + } + + // Make sure audio read does not bypass chunk_idx read. + fence(Ordering::Acquire); + + // SAFETY: audio_base is the shared memory. It already verifies the validity + // of the address range during the header check. + let data = unsafe { + std::slice::from_raw_parts( + recv_data.audio_base as *const u8, + recv_data.audio_size as usize, + ) + }; + + if let Err(e) = self.simple.as_ref().unwrap().write(data) { + error!("PulseAudio write data failed: {:?}", e); + } + } + + fn receive(&mut self, recv_data: &StreamData) -> i32 { + self.check_fmt_update(recv_data); + + if self.simple.is_none() { + return 0; + } + + // SAFETY: audio_base is the shared memory. It already verifies the validity + // of the address range during the header check. + let data = unsafe { + std::slice::from_raw_parts_mut( + recv_data.audio_base as *mut u8, + recv_data.audio_size as usize, + ) + }; + + if let Err(e) = self.simple.as_ref().unwrap().read(data) { + error!("PulseAudio read data failed: {:?}", e); + self.ss.rate = 0; + return 0; + } + + 1 + } + + fn destroy(&mut self) { + if self.simple.is_none() { + return; + } + if self.dir == Direction::Playback { + if let Err(e) = self.simple.as_ref().unwrap().drain() { + error!("Failed to drain Playback stream: {:?}", e); + } + } else if let Err(e) = self.simple.as_ref().unwrap().flush() { + error!("Failed to flush Capture stream: {:?}", e); + } + self.simple = None; + } + + fn get_status(&self) -> AudioStatus { + if self.simple.is_some() { + AudioStatus::Started + } else { + AudioStatus::Ready + } + } +} + +#[cfg(test)] +mod tests { + use pulse::{channelmap::Position, sample::Format}; + + use super::PulseStreamData; + use crate::misc::scream::{ + ScreamDirection, StreamData, AUDIO_SAMPLE_RATE_44KHZ, AUDIO_SAMPLE_RATE_48KHZ, + WINDOWS_SAMPLE_BASE_RATE, + }; + + #[test] + fn test_channel_map_transfer() { + let mut pulse = PulseStreamData::init("test", ScreamDirection::Playback); + let mut test_data = StreamData::default(); + + // set 8: BC, 6: FLC, 4: BL, 2: FC, 0: FL + test_data.fmt.channels = 5; + test_data.fmt.channel_map = 0b1_0101_0101; + pulse.transfer_channel_map(&test_data.fmt); + + assert_eq!(pulse.channel_map.len(), 5); + let map = pulse.channel_map.get_mut(); + assert_eq!(map[0], Position::FrontLeft); + assert_eq!(map[1], Position::FrontCenter); + assert_eq!(map[2], Position::RearLeft); + assert_eq!(map[3], Position::FrontLeftOfCenter); + assert_eq!(map[4], Position::RearCenter); + + // The first 12 bits are set to 1. + test_data.fmt.channels = 12; + test_data.fmt.channel_map = 0b1111_1111_1111; + pulse.transfer_channel_map(&test_data.fmt); + + assert_eq!(pulse.channel_map.len(), 12); + let map = pulse.channel_map.get_mut(); + assert_eq!(map[11], Position::FrontCenter); + } + + #[test] + fn test_pulseaudio_fmt_update() { + let mut pulse = PulseStreamData::init("test", ScreamDirection::Playback); + let mut test_data = StreamData::default(); + + // Setting sample rate to AUDIO_SAMPLE_RATE_44KHZ, sample size to 16. + test_data.fmt.rate = WINDOWS_SAMPLE_BASE_RATE + 1; + test_data.fmt.size = 16; + + pulse.check_fmt_update(&test_data); + + assert_eq!(pulse.ss.rate, AUDIO_SAMPLE_RATE_44KHZ); + assert_eq!(pulse.ss.format, Format::S16le); + + // Setting sample rate to AUDIO_SAMPLE_RATE_48KHZ, sample size to 24. + test_data.fmt.rate = 1; + test_data.fmt.size = 24; + + pulse.check_fmt_update(&test_data); + + assert_eq!(pulse.ss.rate, AUDIO_SAMPLE_RATE_48KHZ); + assert_eq!(pulse.ss.format, Format::S24le); + + // Settint invalid sample size to 100. + test_data.fmt.size = 100; + + pulse.check_fmt_update(&test_data); + + assert_eq!(pulse.ss.rate, 0); + } +} diff --git a/pci/src/bus.rs b/devices/src/pci/bus.rs similarity index 38% rename from pci/src/bus.rs rename to devices/src/pci/bus.rs index 8fb9d1e37776e9d1c871dad6a01078bc204770ef..e4f730ab752dd2c083c04a0d2ebda8a3964f2d0f 100644 --- a/pci/src/bus.rs +++ b/devices/src/pci/bus.rs @@ -10,28 +10,30 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::collections::HashMap; +use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, Mutex, Weak}; +use anyhow::{Context, Result}; +use log::debug; + +use super::{ + config::{BRIDGE_CONTROL, BRIDGE_CTL_SEC_BUS_RESET, SECONDARY_BUS_NUM, SUBORDINATE_BUS_NUM}, + hotplug::HotplugOps, + PciDevOps, PciIntxState, +}; +use crate::pci::{to_pcidevops, RootPort}; +use crate::{ + convert_bus_mut, convert_bus_ref, convert_device_mut, convert_device_ref, Bus, BusBase, Device, + MsiIrqManager, MUT_ROOT_PORT, PCI_BUS_DEVICE, ROOT_PORT, +}; use address_space::Region; +use util::gen_base_func; -use super::config::{SECONDARY_BUS_NUM, SUBORDINATE_BUS_NUM}; -use super::hotplug::HotplugOps; -use super::PciDevOps; -use crate::errors::{Result, ResultExt}; - -type DeviceBusInfo = (Arc>, Arc>); +type DeviceBusInfo = (Arc>, Arc>); /// PCI bus structure. pub struct PciBus { - /// Bus name - pub name: String, - /// Devices attached to the bus. - pub devices: HashMap>>, - /// Child buses of the bus. - pub child_buses: Vec>>, - /// Pci bridge which the bus orignates from. - pub parent_bridge: Option>>, + pub base: BusBase, /// IO region which the parent bridge manages. #[cfg(target_arch = "x86_64")] pub io_region: Region, @@ -39,6 +41,45 @@ pub struct PciBus { pub mem_region: Region, /// Hot Plug controller for obtaining hot plug ops. pub hotplug_controller: Option>>, + /// Interrupt info related to INTx. + pub intx_state: Option>>, + pub msi_irq_manager: Option>, +} + +/// Convert from Arc> to &mut PciBus. +#[macro_export] +macro_rules! MUT_PCI_BUS { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident) => { + convert_bus_mut!($trait_bus, $lock_bus, $struct_bus, PciBus); + }; +} + +/// Convert from Arc> to &PciBus. +#[macro_export] +macro_rules! PCI_BUS { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident) => { + convert_bus_ref!($trait_bus, $lock_bus, $struct_bus, PciBus); + }; +} + +impl Bus for PciBus { + gen_base_func!(bus_base, bus_base_mut, BusBase, base); + + fn reset(&self) -> Result<()> { + for dev in self.child_devices().values() { + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + pci_dev + .reset(false) + .with_context(|| format!("Fail to reset pci dev {}", pci_dev.name()))?; + + if let Some(bus) = pci_dev.child_bus() { + MUT_PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.reset().with_context(|| "Fail to reset child bus")?; + } + } + + Ok(()) + } } impl PciBus { @@ -55,14 +96,13 @@ impl PciBus { mem_region: Region, ) -> Self { Self { - name, - devices: HashMap::new(), - child_buses: Vec::new(), - parent_bridge: None, + base: BusBase::new(name), #[cfg(target_arch = "x86_64")] io_region, mem_region, hotplug_controller: None, + intx_state: None, + msi_irq_manager: None, } } @@ -73,19 +113,9 @@ impl PciBus { /// /// * `offset` - Offset of bus number register. pub fn number(&self, offset: usize) -> u8 { - if self.parent_bridge.is_none() { - return 0; - } - let mut data = vec![0_u8; 1]; - self.parent_bridge - .as_ref() - .unwrap() - .upgrade() - .unwrap() - .lock() - .unwrap() - .read_config(offset, &mut data); + self.get_bridge_control_reg(offset, &mut data); + data[0] } @@ -94,16 +124,20 @@ impl PciBus { /// # Arguments /// /// * `bus_num` - The bus number. - /// * `devfn` - Slot number << 8 | device number. - pub fn get_device(&self, bus_num: u8, devfn: u8) -> Option>> { - if let Some(dev) = self.devices.get(&devfn) { - return Some((*dev).clone()); + /// * `devfn` - Slot number << 3 | Function number. + pub fn get_device(&self, bus_num: u8, devfn: u8) -> Option>> { + if let Some(dev) = self.child_dev(u64::from(devfn)) { + return Some(dev.clone()); } debug!("Can't find device {}:{}", bus_num, devfn); None } fn in_range(&self, bus_num: u8) -> bool { + if self.is_during_reset() { + return false; + } + let secondary_bus_num: u8 = self.number(SECONDARY_BUS_NUM as usize); let subordinate_bus_num: u8 = self.number(SUBORDINATE_BUS_NUM as usize); if bus_num > secondary_bus_num && bus_num <= subordinate_bus_num { @@ -118,15 +152,18 @@ impl PciBus { /// /// * `bus` - Bus to find from. /// * `bus_number` - The bus number. - pub fn find_bus_by_num(bus: &Arc>, bus_num: u8) -> Option>> { - let locked_bus = bus.lock().unwrap(); - if locked_bus.number(SECONDARY_BUS_NUM as usize) == bus_num { - return Some((*bus).clone()); + pub fn find_bus_by_num(bus: &Arc>, bus_num: u8) -> Option>> { + PCI_BUS!(bus, locked_bus, pci_bus); + if pci_bus.number(SECONDARY_BUS_NUM as usize) == bus_num { + return Some(bus.clone()); } - if locked_bus.in_range(bus_num) { - for sub_bus in &locked_bus.child_buses { - if let Some(b) = PciBus::find_bus_by_num(sub_bus, bus_num) { - return Some(b); + if pci_bus.in_range(bus_num) { + for dev in pci_bus.child_devices().values() { + let child_bus = dev.lock().unwrap().child_bus(); + if let Some(sub_bus) = child_bus { + if let Some(b) = PciBus::find_bus_by_num(&sub_bus, bus_num) { + return Some(b); + } } } } @@ -139,14 +176,20 @@ impl PciBus { /// /// * `bus` - Bus to find from. /// * `name` - Bus name. - pub fn find_bus_by_name(bus: &Arc>, bus_name: &str) -> Option>> { + pub fn find_bus_by_name( + bus: &Arc>, + bus_name: &str, + ) -> Option>> { let locked_bus = bus.lock().unwrap(); - if locked_bus.name.as_str() == bus_name { - return Some((*bus).clone()); + if locked_bus.name().as_str() == bus_name { + return Some(bus.clone()); } - for sub_bus in &locked_bus.child_buses { - if let Some(b) = PciBus::find_bus_by_name(sub_bus, bus_name) { - return Some(b); + for dev in locked_bus.child_devices().values() { + let child_bus = dev.lock().unwrap().child_bus(); + if let Some(sub_bus) = child_bus { + if let Some(b) = PciBus::find_bus_by_name(&sub_bus, bus_name) { + return Some(b); + } } } None @@ -156,20 +199,22 @@ impl PciBus { /// /// # Arguments /// - /// * `pci_bus` - On which bus to find. + /// * `bus` - On which bus to find. /// * `name` - Device name. - pub fn find_attached_bus(pci_bus: &Arc>, name: &str) -> Option { - // Device is attached in pci_bus. - let locked_bus = pci_bus.lock().unwrap(); - for dev in locked_bus.devices.values() { + pub fn find_attached_bus(bus: &Arc>, name: &str) -> Option { + // Device is attached in bus. + let locked_bus = bus.lock().unwrap(); + for dev in locked_bus.child_devices().values() { if dev.lock().unwrap().name() == name { - return Some((pci_bus.clone(), dev.clone())); + return Some((bus.clone(), dev.clone())); } - } - // Find in child bus. - for bus in &locked_bus.child_buses { - if let Some(found) = PciBus::find_attached_bus(bus, name) { - return Some(found); + + // Find in child bus. + let child_bus = dev.lock().unwrap().child_bus(); + if let Some(sub_bus) = child_bus { + if let Some(found) = PciBus::find_attached_bus(&sub_bus, name) { + return Some(found); + } } } None @@ -181,198 +226,138 @@ impl PciBus { /// /// * `bus` - Bus to detach from. /// * `dev` - Device attached to the bus. - pub fn detach_device(bus: &Arc>, dev: &Arc>) -> Result<()> { - let mut dev_locked = dev.lock().unwrap(); - dev_locked + pub fn detach_device(bus: &Arc>, dev: &Arc>) -> Result<()> { + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + pci_dev .unrealize() - .chain_err(|| format!("Failed to unrealize device {}", dev_locked.name()))?; - - let devfn = dev_locked - .devfn() - .chain_err(|| format!("Failed to get devfn: device {}", dev_locked.name()))?; + .with_context(|| format!("Failed to unrealize device {}", pci_dev.name()))?; + let devfn = u64::from(pci_dev.pci_base().devfn); let mut locked_bus = bus.lock().unwrap(); - if locked_bus.devices.get(&devfn).is_some() { - locked_bus.devices.remove(&devfn); - } else { - bail!("Device {} not found in the bus", dev_locked.name()); - } + locked_bus + .detach_child(devfn) + .with_context(|| format!("Device {} not found in the bus", pci_dev.name()))?; Ok(()) } - pub fn reset(&mut self) -> Result<()> { - for (_id, pci_dev) in self.devices.iter() { - pci_dev - .lock() - .unwrap() - .reset(false) - .chain_err(|| "Fail to reset pci dev")?; + fn is_during_reset(&self) -> bool { + let mut data = vec![0_u8; 2]; + self.get_bridge_control_reg(BRIDGE_CONTROL as usize + 1, &mut data); + if data[1] & ((BRIDGE_CTL_SEC_BUS_RESET >> 8) as u8) != 0 { + return true; } + false + } - for child_bus in self.child_buses.iter_mut() { - child_bus - .lock() - .unwrap() - .reset() - .chain_err(|| "Fail to reset child bus")?; + fn get_bridge_control_reg(&self, offset: usize, data: &mut [u8]) { + if let Some(parent_bridge) = self.parent_device() { + let bridge = parent_bridge.upgrade().unwrap(); + MUT_ROOT_PORT!(bridge, locked_bridge, rootport); + rootport.read_config(offset, data); } - - Ok(()) } -} - -#[cfg(test)] -mod tests { - use address_space::{AddressSpace, Region}; - use super::*; - use crate::bus::PciBus; - use crate::config::{PciConfig, PCI_CONFIG_SPACE_SIZE}; - use crate::errors::Result; - use crate::root_port::RootPort; - use crate::PciHost; - - #[derive(Clone)] - struct PciDevice { - name: String, - devfn: u8, - config: PciConfig, - parent_bus: Weak>, + pub fn generate_dev_id(&self, devfn: u8) -> u16 { + let bus_num = self.number(SECONDARY_BUS_NUM as usize); + (u16::from(bus_num) << 8) | u16::from(devfn) } - impl PciDevOps for PciDevice { - fn init_write_mask(&mut self) -> Result<()> { - Ok(()) - } - - fn init_write_clear_mask(&mut self) -> Result<()> { - Ok(()) - } - - fn read_config(&self, offset: usize, data: &mut [u8]) { - self.config.read(offset, data); - } - - fn write_config(&mut self, offset: usize, data: &[u8]) { - self.config.write(offset, data, 0); - } - - fn name(&self) -> String { - self.name.clone() - } - - fn realize(mut self) -> Result<()> { - let devfn = self.devfn; - self.init_write_mask()?; - self.init_write_clear_mask()?; - - let dev = Arc::new(Mutex::new(self)); - dev.lock() - .unwrap() - .parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .devices - .insert(devfn, dev.clone()); - Ok(()) - } - - fn unrealize(&mut self) -> Result<()> { - Ok(()) - } + pub fn update_dev_id(&self, devfn: u8, dev_id: &Arc) { + dev_id.store(self.generate_dev_id(devfn), Ordering::Release); + } - fn devfn(&self) -> Option { - Some(0) + pub fn get_msi_irq_manager(&self) -> Option> { + match self.parent_device().as_ref() { + Some(parent_bridge) => { + let bridge = parent_bridge.upgrade().unwrap(); + ROOT_PORT!(bridge, locked_bridge, rootport); + rootport.get_msi_irq_manager() + } + None => self.msi_irq_manager.clone(), } } +} - pub fn create_pci_host() -> Arc> { - #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)).unwrap(); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - Arc::new(Mutex::new(PciHost::new( - #[cfg(target_arch = "x86_64")] - &sys_io, - &sys_mem, - (0xB000_0000, 0x1000_0000), - (0xC000_0000, 0x3000_0000), - ))) - } +#[cfg(test)] +mod tests { + use super::*; + use crate::pci::bus::PciBus; + use crate::pci::host::tests::create_pci_host; + use crate::pci::root_port::{RootPort, RootPortConfig}; + use crate::pci::tests::TestPciDevice; + use crate::pci::{clean_pcidevops_type, register_pcidevops_type}; #[test] fn test_find_attached_bus() { let pci_host = create_pci_host(); let locked_pci_host = pci_host.lock().unwrap(); - let root_bus = Arc::downgrade(&locked_pci_host.root_bus); - - let root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus.clone(), false); + let root_bus = Arc::downgrade(&locked_pci_host.child_bus().unwrap()); + let root_port_config = RootPortConfig { + addr: (1, 0), + id: "pcie.1".to_string(), + ..Default::default() + }; + let root_port = RootPort::new(root_port_config, root_bus.clone()); root_port.realize().unwrap(); // Test device is attached to the root bus. - let pci_dev = PciDevice { - name: String::from("test1"), - devfn: 10, - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus: root_bus.clone(), - }; + let pci_dev = TestPciDevice::new("test1", 10, root_bus); pci_dev.realize().unwrap(); // Test device is attached to the root port. - let bus = PciBus::find_bus_by_name(&locked_pci_host.root_bus, "pcie.1").unwrap(); - let pci_dev = PciDevice { - name: String::from("test2"), - devfn: 12, - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus: Arc::downgrade(&bus), - }; + let bus = + PciBus::find_bus_by_name(&locked_pci_host.child_bus().unwrap(), "pcie.1").unwrap(); + let pci_dev = TestPciDevice::new("test2", 12, Arc::downgrade(&bus)); pci_dev.realize().unwrap(); - let info = PciBus::find_attached_bus(&locked_pci_host.root_bus, "test0"); + let info = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), "test0"); assert!(info.is_none()); - let info = PciBus::find_attached_bus(&locked_pci_host.root_bus, "test1"); + let info = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), "test1"); assert!(info.is_some()); let (bus, dev) = info.unwrap(); - assert_eq!(bus.lock().unwrap().name, "pcie.0"); + assert_eq!(bus.lock().unwrap().name(), "pcie.0"); assert_eq!(dev.lock().unwrap().name(), "test1"); - let info = PciBus::find_attached_bus(&locked_pci_host.root_bus, "test2"); + let info = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), "test2"); assert!(info.is_some()); let (bus, dev) = info.unwrap(); - assert_eq!(bus.lock().unwrap().name, "pcie.1"); + assert_eq!(bus.lock().unwrap().name(), "pcie.1"); assert_eq!(dev.lock().unwrap().name(), "test2"); } #[test] fn test_detach_device() { + register_pcidevops_type::().unwrap(); + let pci_host = create_pci_host(); let locked_pci_host = pci_host.lock().unwrap(); - let root_bus = Arc::downgrade(&locked_pci_host.root_bus); + let root_bus = Arc::downgrade(&locked_pci_host.child_bus().unwrap()); - let root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus.clone(), false); + let root_port_config = RootPortConfig { + id: "pcie.1".to_string(), + addr: (1, 0), + ..Default::default() + }; + let root_port = RootPort::new(root_port_config, root_bus.clone()); root_port.realize().unwrap(); - let bus = PciBus::find_bus_by_name(&locked_pci_host.root_bus, "pcie.1").unwrap(); - let pci_dev = PciDevice { - name: String::from("test1"), - devfn: 0, - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus: Arc::downgrade(&bus), - }; - let dev = Arc::new(Mutex::new(pci_dev.clone())); - let dev_ops: Arc> = dev; + let bus = + PciBus::find_bus_by_name(&locked_pci_host.child_bus().unwrap(), "pcie.1").unwrap(); + let pci_dev = TestPciDevice::new("test1", 0, Arc::downgrade(&bus)); + let dev_ops: Arc> = Arc::new(Mutex::new(pci_dev.clone())); pci_dev.realize().unwrap(); - let info = PciBus::find_attached_bus(&locked_pci_host.root_bus, "test1"); + let info = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), "test1"); assert!(info.is_some()); let res = PciBus::detach_device(&bus, &dev_ops); assert!(res.is_ok()); - let info = PciBus::find_attached_bus(&locked_pci_host.root_bus, "test1"); + let info = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), "test1"); assert!(info.is_none()); + + clean_pcidevops_type(); } } diff --git a/pci/src/config.rs b/devices/src/pci/config.rs similarity index 55% rename from pci/src/config.rs rename to devices/src/pci/config.rs index c2b4c882cdb8d6ab44040ff1a7034eb116ee590a..84bf48d158c034565eaa114a4e9e28ec0cead901 100644 --- a/pci/src/config.rs +++ b/devices/src/pci/config.rs @@ -13,14 +13,18 @@ use std::collections::HashSet; use std::sync::{Arc, Mutex}; -use address_space::Region; +use anyhow::{anyhow, Context, Result}; +use log::{error, info, warn}; -use crate::errors::{ErrorKind, Result, ResultExt}; -use crate::msix::Msix; -use crate::{ +use crate::pci::intx::Intx; +use crate::pci::msix::{Msix, MSIX_TABLE_ENTRY_SIZE}; +use crate::pci::{ le_read_u16, le_read_u32, le_read_u64, le_write_u16, le_write_u32, le_write_u64, - pci_ext_cap_next, PciBus, BDF_FUNC_SHIFT, + pci_ext_cap_next, PciBus, PciError, BDF_FUNC_SHIFT, }; +use crate::{convert_bus_ref, Bus, PCI_BUS}; +use address_space::Region; +use util::num_ops::ranges_overlap; /// Size in bytes of the configuration space of legacy PCI device. pub const PCI_CONFIG_SPACE_SIZE: usize = 256; @@ -38,6 +42,7 @@ pub const DEVICE_ID: u8 = 0x02; /// Command register. pub const COMMAND: u8 = 0x04; pub const REVISION_ID: usize = 0x08; +pub const CLASS_PI: u16 = 0x09; /// Sub-Class Code Register. pub const SUB_CLASS_CODE: u8 = 0x0a; pub const SUBSYSTEM_VENDOR_ID: usize = 0x2c; @@ -60,8 +65,8 @@ pub const MEMORY_BASE: u8 = 0x20; pub const PREF_MEMORY_BASE: u8 = 0x24; /// Prefetchable memory limit register. pub const PREF_MEMORY_LIMIT: u8 = 0x26; -pub const ROM_ADDRESS: usize = 0x30; -pub const ROM_ADDRESS1: usize = 0x38; +const ROM_ADDRESS_ENDPOINT: usize = 0x30; +const ROM_ADDRESS_BRIDGE: usize = 0x38; /// 64-bit prefetchable memory addresses. pub const PREF_MEM_RANGE_64BIT: u8 = 0x01; @@ -77,59 +82,22 @@ pub const CLASS_CODE_HOST_BRIDGE: u16 = 0x0600; pub const CLASS_CODE_ISA_BRIDGE: u16 = 0x0601; /// Class code of PCI-to-PCI bridge. pub const CLASS_CODE_PCI_BRIDGE: u16 = 0x0604; - +/// Type 0 configuration Space Header Layout. +pub const HEADER_TYPE_ENDPOINT: u8 = 0x0; /// Type 1 configuration Space Header Layout. pub const HEADER_TYPE_BRIDGE: u8 = 0x01; /// Multi-function device. pub const HEADER_TYPE_MULTIFUNC: u8 = 0x80; - +/// The vendor ID for Red Hat / Qumranet. +pub const PCI_VENDOR_ID_REDHAT_QUMRANET: u16 = 0x1af4; /// The vendor ID for PCI devices other than virtio. pub const PCI_VENDOR_ID_REDHAT: u16 = 0x1b36; - -/// PCI Express capability registers, same as kernel defines -/// Link Training -pub const PCI_EXP_LNKSTA: u16 = 18; -/// Data Link Layer Link Active -pub const PCI_EXP_LNKSTA_DLLLA: u16 = 0x2000; -/// Negotiated Link Width -pub const PCI_EXP_LNKSTA_NLW: u16 = 0x03f0; -/// Slot Control -pub const PCI_EXP_SLTCTL: u16 = 24; -/// Power Controller Control -pub const PCI_EXP_SLTCTL_PCC: u16 = 0x0400; -/// Attention Button Pressed Enable -pub const PCI_EXP_SLTCTL_ABPE: u16 = 0x0001; -/// Presence Detect Changed Enable -pub const PCI_EXP_SLTCTL_PDCE: u16 = 0x0008; -/// Command Completed Interrupt Enable -pub const PCI_EXP_SLTCTL_CCIE: u16 = 0x0010; -/// Power Indicator off -pub const PCI_EXP_SLTCTL_PWR_IND_OFF: u16 = 0x0300; -/// Power Indicator on -pub const PCI_EXP_SLTCTL_PWR_IND_ON: u16 = 0x0100; -/// Slot Status -pub const PCI_EXP_SLTSTA: u16 = 26; -/// Presence Detect Changed -pub const PCI_EXP_SLTSTA_PDC: u16 = 0x0008; -/// Presence Detect State -pub const PCI_EXP_SLTSTA_PDS: u16 = 0x0040; - -/// Hot plug event -/// Presence detect changed -pub const PCI_EXP_HP_EV_PDC: u16 = PCI_EXP_SLTCTL_PDCE; -/// Attention button pressed -pub const PCI_EXP_HP_EV_ABP: u16 = PCI_EXP_SLTCTL_ABPE; -/// Command completed -pub const PCI_EXP_HP_EV_CCI: u16 = PCI_EXP_SLTCTL_CCIE; +/// The sub device ID for Red Hat / Qumranet. +pub const PCI_SUBDEVICE_ID_QEMU: u16 = 0x1100; const PCI_CONFIG_HEAD_END: u8 = 64; const NEXT_CAP_OFFSET: u8 = 0x01; const STATUS_CAP_LIST: u16 = 0x0010; -const PCIE_CAP_VERSION_SHIFT: u8 = 16; -const PCIE_CAP_NEXT_OFFSET_SHIFT: u8 = 20; -const PCIE_CAP_SIZE: u8 = 0x3c; -const PCIE_CAP_VERSION_2: u16 = 0x0002; -const PCIE_CAP_SLOT_IMPLEMENTED: u16 = 0x0100; /// 16 bits PCI Status. pub const STATUS: u8 = 0x06; @@ -141,18 +109,25 @@ const IO_LIMIT: u8 = 0x1d; const PREF_MEM_BASE_UPPER: u8 = 0x28; const CAP_LIST: u8 = 0x34; const INTERRUPT_LINE: u8 = 0x3c; -const BRIDGE_CONTROL: u8 = 0x3e; +pub const INTERRUPT_PIN: u8 = 0x3d; +pub const BRIDGE_CONTROL: u8 = 0x3e; const BRIDGE_CTL_PARITY_ENABLE: u16 = 0x0001; const BRIDGE_CTL_SERR_ENABLE: u16 = 0x0002; const BRIDGE_CTL_ISA_ENABLE: u16 = 0x0004; const BRIDGE_CTL_VGA_ENABLE: u16 = 0x0008; const BRIDGE_CTL_VGA_16BIT_DEC: u16 = 0x0010; -const BRIDGE_CTL_SEC_BUS_RESET: u16 = 0x0040; +pub const BRIDGE_CTL_SEC_BUS_RESET: u16 = 0x0040; +const BRIDGE_CTL_FAST_BACK: u16 = 0x0080; +const BRIDGE_CTL_DISCARD_TIMER: u16 = 0x0100; +const BRIDGE_CTL_SEC_DISCARD_TIMER: u16 = 0x0200; const BRIDGE_CTL_DISCARD_TIMER_STATUS: u16 = 0x0400; +const BRIDGE_CTL_DISCARD_TIMER_SERR_E: u16 = 0x0800; pub const COMMAND_BUS_MASTER: u16 = 0x0004; const COMMAND_SERR_ENABLE: u16 = 0x0100; +#[cfg(test)] +const COMMAND_FAST_BACK: u16 = 0x0200; pub const COMMAND_INTERRUPT_DISABLE: u16 = 0x0400; const STATUS_PARITY_ERROR: u16 = 0x0100; @@ -168,98 +143,195 @@ pub const MEM_BASE_ADDR_MASK: u64 = 0xffff_ffff_ffff_fff0; pub const BAR_MEM_64BIT: u8 = 0x04; const BAR_PREFETCH: u8 = 0x08; pub const BAR_SPACE_UNMAPPED: u64 = 0xffff_ffff_ffff_ffff; +/// The maximum Bar ID numbers of a Type 0 device +const BAR_NUM_MAX_FOR_ENDPOINT: u8 = 6; +/// The maximum Bar ID numbers of a Type 1 device +const BAR_NUM_MAX_FOR_BRIDGE: u8 = 2; +/// mmio bar's minimum size shall be 4KB +pub const MINIMUM_BAR_SIZE_FOR_MMIO: usize = 0x1000; +/// pio bar's minimum size shall be 4B +const MINIMUM_BAR_SIZE_FOR_PIO: usize = 0x4; + +/// PCI Express capability registers, same as kernel defines + +const PCI_EXT_CAP_VER_SHIFT: u8 = 16; +const PCI_EXT_CAP_NEXT_SHIFT: u8 = 20; +const PCI_EXP_VER2_SIZEOF: u8 = 0x3c; +const PCI_EXP_FLAGS_VER2: u16 = 0x0002; +const PCI_EXP_FLAGS_SLOT: u16 = 0x0100; +// PCIe type flag +const PCI_EXP_FLAGS_TYPE_SHIFT: u16 = 4; +const PCI_EXP_FLAGS_TYPE: u16 = 0x00f0; // Role-Based error reporting. -const PCIE_CAP_RBER: u32 = 0x8000; -// Correctable error reporting. -const PCIE_CAP_DEV_CER: u16 = 0x01; -// Non-Fatal error reporting. -const PCIE_CAP_DEV_NFER: u16 = 0x02; -// Fatal error reporting. -const PCIE_CAP_DEV_FER: u16 = 0x04; -// Unsupported request reporting. -const PCIE_CAP_DEV_URR: u16 = 0x08; -// Max link speed. -const PCIE_CAP_MLS_16GT: u32 = 0x0000_0004; -// Maximum link width. -const PCIE_CAP_MLW_X32: u32 = 0x0000_0200; +const PCI_EXP_DEVCAP_RBER: u32 = 0x8000; + +// Correctable error reporting enable. +const PCI_EXP_DEVCTL_CERE: u16 = 0x01; +// Non-Fatal error reporting enable. +const PCI_EXP_DEVCTL_NFERE: u16 = 0x02; +// Fatal error reporting enable. +const PCI_EXP_DEVCTL_FERE: u16 = 0x04; +// Unsupported request reporting enable. +const PCI_EXP_DEVCTL_URRE: u16 = 0x08; + +// Supported max link speed, 16GT for default. +const PCI_EXP_LNKCAP_MLS_16GT: u32 = 0x0000_0004; +// Supported maximum link width, X32 for default. +const PCI_EXP_LNKCAP_MLW_X32: u32 = 0x0000_0200; // Active state power management support. -const PCIE_CAP_ASPM_L0S: u32 = 0x0000_0400; -// Link bandwidth notification capability -const PCIE_CAP_LINK_LBNC: u32 = 0x0020_0000; -// Data link layer link active reporting capable -const PCIE_CAP_LINK_DLLLARC: u32 = 0x0010_0000; -const PCIE_CAP_PORT_NUM_SHIFT: u8 = 24; -// Current link speed. -const PCIE_CAP_CLS_X1: u16 = 0x0001; -// Negotiated link width. -const PCIE_CAP_NLW_2_5GT: u16 = 0x0010; +const PCI_EXP_LNKCAP_ASPMS_0S: u32 = 0x0000_0400; +// Link bandwidth notification capability. +const PCI_EXP_LNKCAP_LBNC: u32 = 0x0020_0000; +// Data link layer link active reporting capable. +const PCI_EXP_LNKCAP_DLLLARC: u32 = 0x0010_0000; +// Port number reg's shift. +const PCI_EXP_LNKCAP_PN_SHIFT: u8 = 24; + +/// Link Training +pub const PCI_EXP_LNKSTA: u16 = 18; +// Current link speed, 2.5GB for default. +pub const PCI_EXP_LNKSTA_CLS_2_5GB: u16 = 0x0001; +// Negotiated link width, X1 for default. +pub const PCI_EXP_LNKSTA_NLW_X1: u16 = 0x0010; +/// Data Link Layer Link Active +pub const PCI_EXP_LNKSTA_DLLLA: u16 = 0x2000; +/// Negotiated Link Width +pub const PCI_EXP_LNKSTA_NLW: u16 = 0x03f0; + // Attention button present. -const PCIE_CAP_SLOTCAP_ABP: u32 = 0x0000_0001; +const PCI_EXP_SLTCAP_ABP: u32 = 0x0000_0001; // Power controller present. -const PCIE_CAP_SLOTCAP_PCP: u32 = 0x0000_0002; +const PCI_EXP_SLTCAP_PCP: u32 = 0x0000_0002; // Attention indicator present. -const PCIE_CAP_SLOTCAP_AIP: u32 = 0x0000_0008; +const PCI_EXP_SLTCAP_AIP: u32 = 0x0000_0008; // Power indicator present. -const PCIE_CAP_SLOTCAP_PIP: u32 = 0x0000_0010; +const PCI_EXP_SLTCAP_PIP: u32 = 0x0000_0010; // Hot-Plug surprise. -const PCIE_CAP_SLOTCAP_HPS: u32 = 0x0000_0020; +const PCI_EXP_SLTCAP_HPS: u32 = 0x0000_0020; // Hot-Plug capable. -const PCIE_CAP_SLOTCAP_HPC: u32 = 0x0000_0040; -// Electromechanical interlock present. -const PCIE_CAP_SLOTCAP_EIP: u32 = 0x0002_0000; -const PCIE_CAP_SLOT_NUM_SHIFT: u32 = 19; +const PCI_EXP_SLTCAP_HPC: u32 = 0x0000_0040; +// Physical slot number reg's shift. +const PCI_EXP_SLTCAP_PSN_SHIFT: u32 = 19; + +/// Slot Control +pub const PCI_EXP_SLTCTL: u16 = 24; +/// Attention Button Pressed Enable +pub const PCI_EXP_SLTCTL_ABPE: u16 = 0x0001; +/// Presence Detect Changed Enable +pub const PCI_EXP_SLTCTL_PDCE: u16 = 0x0008; +/// Command Completed Interrupt Enable +pub const PCI_EXP_SLTCTL_CCIE: u16 = 0x0010; +/// Hot-Plug Interrupt Enable +pub const PCI_EXP_SLTCTL_HPIE: u16 = 0x0020; // Attention Indicator Control. -const PCIE_CAP_SLOT_AIC_MASK: u16 = 0x00c0; -const PCIE_CAP_SLOT_AIC_OFF: u16 = 0x00c0; +const PCI_EXP_SLTCTL_AIC: u16 = 0x00c0; +// Attention Indicator off. +const PCI_EXP_SLTCTL_ATTN_IND_OFF: u16 = 0x00c0; // Power Indicator Control. -const PCIE_CAP_SLOT_PIC_MASK: u16 = 0x0300; -const PCIE_CAP_SLOT_PIC_OFF: u16 = 0x0300; -// Attention button pressed enable. -const PCIE_CAP_SLOT_ABP: u16 = 0x0001; -// Presence detect changed enable. -const PCIE_CAP_SLOT_PDC: u16 = 0x0008; -// Command completed interrupt enable. -const PCIE_CAP_SLOT_CCI: u16 = 0x0010; -// Hot-Plug interrupt enable. -const PCIE_CAP_SLOT_HPI: u16 = 0x0020; -// Power controller control. -const PCIE_CAP_SLOT_PCC: u16 = 0x0400; +pub(crate) const PCI_EXP_SLTCTL_PIC: u16 = 0x0300; +// Power Indicator blinking. +pub(crate) const PCI_EXP_SLTCTL_PWR_IND_BLINK: u16 = 0x200; +/// Power Indicator on +pub const PCI_EXP_SLTCTL_PWR_IND_ON: u16 = 0x0100; +// Power Indicator off. +pub const PCI_EXP_SLTCTL_PWR_IND_OFF: u16 = 0x0300; +/// Power Controller Control +pub const PCI_EXP_SLTCTL_PCC: u16 = 0x0400; +/// Power Off +pub const PCI_EXP_SLTCTL_PWR_OFF: u16 = 0x0400; // Electromechanical interlock control. -const PCIE_CAP_SLOT_EIC: u16 = 0x0800; +const PCI_EXP_SLTCTL_EIC: u16 = 0x0800; + +/// Slot Status +pub const PCI_EXP_SLTSTA: u16 = 26; +/// Attention Button Pressed +pub const PCI_EXP_SLTSTA_ABP: u16 = 0x0001; +/// Power Fault Detected +const PCI_EXP_SLTSTA_PFD: u16 = 0x0002; +/// MRL Sensor Changed +const PCI_EXP_SLTSTA_MRLSC: u16 = 0x0004; +/// Presence Detect Changed +pub const PCI_EXP_SLTSTA_PDC: u16 = 0x0008; +/// Command Completed +pub const PCI_EXP_SLTSTA_CC: u16 = 0x0010; +/// Presence Detect State +pub const PCI_EXP_SLTSTA_PDS: u16 = 0x0040; +pub const PCI_EXP_SLOTSTA_EVENTS: u16 = PCI_EXP_SLTSTA_ABP + | PCI_EXP_SLTSTA_PFD + | PCI_EXP_SLTSTA_MRLSC + | PCI_EXP_SLTSTA_PDC + | PCI_EXP_SLTSTA_CC; +pub const PCI_EXP_HP_EV_SPT: u16 = PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_CCIE; + // System error on correctable error enable. -const PCIE_CAP_ROOT_SECEE: u16 = 0x01; +const PCI_EXP_RTCTL_SECEE: u16 = 0x01; // System error on non-fatal error enable. -const PCIE_CAP_ROOT_SENFEE: u16 = 0x02; +const PCI_EXP_RTCTL_SENFEE: u16 = 0x02; // System error on fatal error enable. -const PCIE_CAP_ROOT_SEFEE: u16 = 0x04; -const PCIE_CAP_ARI: u32 = 0x0000_0020; +const PCI_EXP_RTCTL_SEFEE: u16 = 0x04; + +// Alternative Routing-ID. +const PCI_EXP_DEVCAP2_ARI: u32 = 0x0000_0020; // Extended Fmt Field Supported. -const PCIE_CAP_DEV_EFFS: u32 = 0x0010_0000; +const PCI_EXP_DEVCAP2_EFF: u32 = 0x0010_0000; // End-End TLP Prefix Supported. -const PCIE_CAP_DEV_EETPS: u32 = 0x0020_0000; -const PCIE_CAP_ARI_ENABLE: u16 = 0x0020; +const PCI_EXP_DEVCAP2_EETLPP: u32 = 0x0020_0000; +// Alternative Routing-ID. +const PCI_EXP_DEVCTL2_ARI: u16 = 0x0020; // End-End TLP Prefix Blocking -const PCIE_CAP_DEV_EETPB: u16 = 0x8000; +const PCI_EXP_DEVCTL2_EETLPPB: u16 = 0x8000; + // Supported Link Speeds Vector. -const PCIE_CAP_LINK_SLSV_2_5GT: u32 = 0x02; -const PCIE_CAP_LINK_SLSV_5GT: u32 = 0x04; -const PCIE_CAP_LINK_SLSV_8GT: u32 = 0x08; -const PCIE_CAP_LINK_SLSV_16GT: u32 = 0x10; -// Target Link Speed. -const PCIE_CAP_LINK_TLS_16GT: u16 = 0x0004; -// PCIe type flag -const PCI_EXP_FLAGS_TYPE_SHIFT: u16 = 4; -const PCI_EXP_FLAGS_TYPE: u16 = 0x00f0; +const PCI_EXP_LNKCAP2_SLS_2_5GB: u32 = 0x02; +const PCI_EXP_LNKCAP2_SLS_5_0GB: u32 = 0x04; +const PCI_EXP_LNKCAP2_SLS_8_0GB: u32 = 0x08; +const PCI_EXP_LNKCAP2_SLS_16_0GB: u32 = 0x10; + +// Target Link Speed, 16GT for default. +const PCI_EXP_LNKCTL2_TLS_16_0GT: u16 = 0x0004; + +/// Hot plug event +/// Presence detect changed +pub const PCI_EXP_HP_EV_PDC: u16 = PCI_EXP_SLTCTL_PDCE; +/// Attention button pressed +pub const PCI_EXP_HP_EV_ABP: u16 = PCI_EXP_SLTCTL_ABPE; +/// Command completed +pub const PCI_EXP_HP_EV_CCI: u16 = PCI_EXP_SLTCTL_CCIE; + +// XHCI device id +pub const PCI_DEVICE_ID_REDHAT_XHCI: u16 = 0x000d; + +// PvPanic device id +pub const PCI_DEVICE_ID_REDHAT_PVPANIC: u16 = 0x0011; + +// Device classes and subclasses +pub const PCI_CLASS_MEMORY_RAM: u16 = 0x0500; +pub const PCI_CLASS_SERIAL_USB: u16 = 0x0c03; +pub const PCI_CLASS_SYSTEM_OTHER: u16 = 0x0880; /// Type of bar region. -#[derive(PartialEq, Debug, Copy, Clone)] +#[derive(PartialEq, Eq, Debug, Copy, Clone)] pub enum RegionType { Io, Mem32Bit, Mem64Bit, } +impl std::fmt::Display for RegionType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + RegionType::Io => "PIO", + RegionType::Mem32Bit => "32 bits MMIO", + RegionType::Mem64Bit => "64 bits MMIO", + } + ) + } +} + /// Registered bar. #[derive(Clone)] pub struct Bar { @@ -267,6 +339,8 @@ pub struct Bar { address: u64, pub size: u64, pub region: Option, + pub parent_io_region: Option>>, + pub parent_mem_region: Option>>, } /// Capbility ID defined by PCIe/PCI spec. @@ -309,6 +383,8 @@ pub enum PcieDevType { /// Configuration space of PCI/PCIe device. #[derive(Clone)] pub struct PciConfig { + /// Device number and function number. + pub devfn: u8, /// Configuration space data. pub config: Vec, /// Mask of writable bits. @@ -325,8 +401,10 @@ pub struct PciConfig { pub last_ext_cap_end: u16, /// MSI-X information. pub msix: Option>>, - /// Offset of the PCIe extended capability. - pub ext_cap_offset: u16, + /// Offset of the PCI express capability. + pub pci_express_cap_offset: u16, + /// INTx information. + pub intx: Option>>, } impl PciConfig { @@ -336,7 +414,7 @@ impl PciConfig { /// /// * `config_size` - Configuration size in bytes. /// * `nr_bar` - Number of BARs. - pub fn new(config_size: usize, nr_bar: u8) -> Self { + pub fn new(devfn: u8, config_size: usize, nr_bar: u8) -> Self { let mut bars = Vec::new(); for _ in 0..nr_bar as usize { bars.push(Bar { @@ -344,19 +422,23 @@ impl PciConfig { address: 0, size: 0, region: None, + parent_io_region: None, + parent_mem_region: None, }); } PciConfig { + devfn, config: vec![0; config_size], write_mask: vec![0; config_size], write_clear_mask: vec![0; config_size], bars, - last_cap_end: PCI_CONFIG_HEAD_END as u16, + last_cap_end: u16::from(PCI_CONFIG_HEAD_END), last_ext_cap_offset: 0, last_ext_cap_end: PCI_CONFIG_SPACE_SIZE as u16, msix: None, - ext_cap_offset: PCI_CONFIG_HEAD_END as u16, + pci_express_cap_offset: u16::from(PCI_CONFIG_HEAD_END), + intx: None, } } @@ -403,7 +485,11 @@ impl PciConfig { | BRIDGE_CTL_ISA_ENABLE | BRIDGE_CTL_VGA_ENABLE | BRIDGE_CTL_VGA_16BIT_DEC - | BRIDGE_CTL_SEC_BUS_RESET, + | BRIDGE_CTL_SEC_BUS_RESET + | BRIDGE_CTL_FAST_BACK + | BRIDGE_CTL_DISCARD_TIMER + | BRIDGE_CTL_SEC_DISCARD_TIMER + | BRIDGE_CTL_DISCARD_TIMER_SERR_E, )?; Ok(()) } @@ -437,11 +523,49 @@ impl PciConfig { /// /// * `offset` - Offset in the configuration space from which to read. /// * `data` - Buffer to put read data. - pub fn read(&self, offset: usize, buf: &mut [u8]) { + pub fn read(&mut self, offset: usize, buf: &mut [u8]) { + if let Err(err) = self.validate_config_boundary(offset, buf) { + warn!("invalid read: {:?}", err); + return; + } + let size = buf.len(); + // SAFETY: checked in "validate_config_boundary". + if ranges_overlap(offset, size, STATUS as usize, 1).unwrap() { + if let Some(intx) = &self.intx { + if intx.lock().unwrap().level == 1 { + self.config[STATUS as usize] |= STATUS_INTERRUPT; + } else { + self.config[STATUS as usize] &= !STATUS_INTERRUPT; + } + } + } + buf[..].copy_from_slice(&self.config[offset..(offset + size)]); } + fn validate_config_boundary(&self, offset: usize, data: &[u8]) -> Result<()> { + // According to pcie specification 7.2.2.2 PCI Express Device Requirements: + if data.len() > 4 { + return Err(anyhow!(PciError::InvalidConf( + "data size".to_string(), + format!("{}", data.len()) + ))); + } + + offset + .checked_add(data.len()) + .filter(|&end| end <= self.config.len()) + .with_context(|| { + PciError::InvalidConf( + "config size".to_string(), + format!("offset {} with len {}", offset, data.len()), + ) + })?; + + Ok(()) + } + /// Common writing to configuration space. /// /// # Arguments @@ -449,19 +573,134 @@ impl PciConfig { /// * `offset` - Offset in the configuration space from which to write. /// * `data` - Data to write. /// * `dev_id` - Device id to send MSI/MSI-X. - pub fn write(&mut self, mut offset: usize, data: &[u8], dev_id: u16) { + pub fn write( + &mut self, + mut offset: usize, + data: &[u8], + dev_id: u16, + #[cfg(target_arch = "x86_64")] io_region: Option<&Region>, + mem_region: Option<&Region>, + ) { + if let Err(err) = self.validate_config_boundary(offset, data) { + error!("invalid write: {:?}", err); + return; + } + let cloned_data = data.to_vec(); + let old_offset = offset; for data in &cloned_data { self.config[offset] = (self.config[offset] & (!self.write_mask[offset])) | (data & self.write_mask[offset]); self.config[offset] &= !(data & self.write_clear_mask[offset]); offset += 1; } + + let (bar_num, rom_addr) = match self.config[HEADER_TYPE as usize] & HEADER_TYPE_BRIDGE { + HEADER_TYPE_BRIDGE => (BAR_NUM_MAX_FOR_BRIDGE as usize, ROM_ADDRESS_BRIDGE), + _ => (BAR_NUM_MAX_FOR_ENDPOINT as usize, ROM_ADDRESS_ENDPOINT), + }; + + let size = data.len(); + // SAFETY: checked in "validate_config_boundary". + let cmd_overlap = ranges_overlap(old_offset, size, COMMAND as usize, 1).unwrap(); + if cmd_overlap + || ranges_overlap(old_offset, size, BAR_0 as usize, REG_SIZE * bar_num).unwrap() + || ranges_overlap(old_offset, size, rom_addr, 4).unwrap() + { + if let Err(e) = self.update_bar_mapping( + #[cfg(target_arch = "x86_64")] + io_region, + mem_region, + ) { + error!("{:?}", e); + } + } + + if cmd_overlap { + if let Some(intx) = &self.intx { + let cmd = le_read_u16(self.config.as_slice(), COMMAND as usize).unwrap(); + let enabled = cmd & COMMAND_INTERRUPT_DISABLE == 0; + + let mut locked_intx = intx.lock().unwrap(); + if locked_intx.enabled != enabled { + if !enabled { + locked_intx.change_irq_level(-(locked_intx.level as i8)); + } else { + locked_intx.change_irq_level(locked_intx.level as i8); + } + locked_intx.enabled = enabled; + } + } + } + if let Some(msix) = &mut self.msix { - msix.lock().unwrap().write_config(&self.config, dev_id); + if msix.lock().unwrap().is_enabled(&self.config) { + if let Some(intx) = &self.intx { + intx.lock().unwrap().notify(0); + } + } + + msix.lock() + .unwrap() + .write_config(&self.config, dev_id, old_offset, data); } } + /// Reset type1 specific configuration space. + pub fn reset_bridge_regs(&mut self) -> Result<()> { + le_write_u32(&mut self.config, PRIMARY_BUS_NUM as usize, 0)?; + + self.config[IO_BASE as usize] = 0xff; + self.config[IO_LIMIT as usize] = 0; + // set memory/pref memory's base to 0xFFFF and limit to 0. + le_write_u32(&mut self.config, MEMORY_BASE as usize, 0xffff)?; + le_write_u32(&mut self.config, PREF_MEMORY_BASE as usize, 0xffff)?; + le_write_u64(&mut self.config, PREF_MEM_BASE_UPPER as usize, 0)?; + Ok(()) + } + + fn reset_single_writable_reg(&mut self, offset: usize) -> Result<()> { + let writable_command = le_read_u16(&self.write_mask, offset).unwrap() + | le_read_u16(&self.write_clear_mask, offset).unwrap(); + let old_command = le_read_u16(&self.config, offset).unwrap(); + + le_write_u16(&mut self.config, offset, old_command & !writable_command) + } + + /// Reset bits that's writable in the common configuration fields for both type0 and type1 + /// devices. + pub fn reset_common_regs(&mut self) -> Result<()> { + self.reset_single_writable_reg(COMMAND as usize)?; + self.reset_single_writable_reg(STATUS as usize)?; + self.reset_single_writable_reg(INTERRUPT_LINE as usize)?; + self.config[CACHE_LINE_SIZE as usize] = 0; + + Ok(()) + } + + /// General reset process for pci devices + pub fn reset(&mut self) -> Result<()> { + if let Some(intx) = &self.intx { + intx.lock().unwrap().reset(); + } + + self.reset_common_regs()?; + + if let Err(e) = self.update_bar_mapping( + #[cfg(target_arch = "x86_64")] + None, + None, + ) { + error!("{:?}", e); + } + + if let Some(msix) = &self.msix { + msix.lock().unwrap().reset(); + } + + Ok(()) + } + /// Get base offset of the capability in PCIe/PCI configuration space. /// /// # Arguments @@ -498,7 +737,7 @@ impl PciConfig { return BAR_SPACE_UNMAPPED; } let bar_val = le_read_u32(&self.config, offset).unwrap(); - return (bar_val & IO_BASE_ADDR_MASK) as u64; + return u64::from(bar_val & IO_BASE_ADDR_MASK); } if command & COMMAND_MEMORY_SPACE == 0 { @@ -508,7 +747,7 @@ impl PciConfig { RegionType::Io => BAR_SPACE_UNMAPPED, RegionType::Mem32Bit => { let bar_val = le_read_u32(&self.config, offset).unwrap(); - (bar_val & MEM_BASE_ADDR_MASK as u32) as u64 + u64::from(bar_val & MEM_BASE_ADDR_MASK as u32) } RegionType::Mem64Bit => { let bar_val = le_read_u64(&self.config, offset).unwrap(); @@ -533,20 +772,22 @@ impl PciConfig { region_type: RegionType, prefetchable: bool, size: u64, - ) { + ) -> Result<()> { + self.validate_bar_id(id)?; + self.validate_bar_size(region_type, size)?; let offset: usize = BAR_0 as usize + id * REG_SIZE; match region_type { RegionType::Io => { - let write_mask = (!(size - 1) as u32) & 0xffff_fffc; + let write_mask = !(size - 1) as u32; le_write_u32(&mut self.write_mask, offset, write_mask).unwrap(); self.config[offset] = BAR_IO_SPACE; } RegionType::Mem32Bit => { - let write_mask = (!(size - 1) as u32) & 0xffff_fff0; + let write_mask = !(size - 1) as u32; le_write_u32(&mut self.write_mask, offset, write_mask).unwrap(); } RegionType::Mem64Bit => { - let write_mask = !(size - 1) & 0xffff_ffff_ffff_fff0; + let write_mask = !(size - 1); le_write_u64(&mut self.write_mask, offset, write_mask).unwrap(); self.config[offset] = BAR_MEM_64BIT; } @@ -559,6 +800,7 @@ impl PciConfig { self.bars[id].address = BAR_SPACE_UNMAPPED; self.bars[id].size = size; self.bars[id].region = Some(region); + Ok(()) } /// Unregister region in PciConfig::bars. @@ -566,8 +808,8 @@ impl PciConfig { /// # Arguments /// /// * `bus` - The bus which region registered. - pub fn unregister_bars(&mut self, bus: &Arc>) -> Result<()> { - let locked_bus = bus.lock().unwrap(); + pub fn unregister_bars(&mut self, bus: &Arc>) -> Result<()> { + PCI_BUS!(bus, locked_bus, pci_bus); for bar in self.bars.iter_mut() { if bar.address == BAR_SPACE_UNMAPPED || bar.size == 0 { continue; @@ -577,18 +819,18 @@ impl PciConfig { { #[cfg(target_arch = "x86_64")] if let Some(region) = bar.region.as_ref() { - locked_bus + pci_bus .io_region .delete_subregion(region) - .chain_err(|| "Failed to unregister io bar")?; + .with_context(|| "Failed to unregister io bar")?; } } _ => { if let Some(region) = bar.region.as_ref() { - locked_bus + pci_bus .mem_region .delete_subregion(region) - .chain_err(|| "Failed to unregister mem bar")?; + .with_context(|| "Failed to unregister mem bar")?; } } } @@ -597,6 +839,23 @@ impl PciConfig { Ok(()) } + fn is_bar_region_empty( + &self, + id: usize, + #[cfg(target_arch = "x86_64")] io_region: Option<&Region>, + mem_region: Option<&Region>, + ) -> bool { + if self.bars[id].region_type == RegionType::Io { + #[cfg(target_arch = "x86_64")] + if io_region.is_none() { + return true; + } + } else if mem_region.is_none() { + return true; + } + false + } + /// Update bar space mapping once the base address is updated by the guest. /// /// # Arguments @@ -605,8 +864,8 @@ impl PciConfig { /// * `mem_region`: Memory space region which the parent bridge manages. pub fn update_bar_mapping( &mut self, - #[cfg(target_arch = "x86_64")] io_region: &Region, - mem_region: &Region, + #[cfg(target_arch = "x86_64")] io_region: Option<&Region>, + mem_region: Option<&Region>, ) -> Result<()> { for id in 0..self.bars.len() { if self.bars[id].size == 0 { @@ -617,32 +876,81 @@ impl PciConfig { if self.bars[id].address == new_addr { continue; } + if self.bars[id].address != BAR_SPACE_UNMAPPED { match self.bars[id].region_type { + #[cfg(target_arch = "x86_64")] RegionType::Io => { - #[cfg(target_arch = "x86_64")] - io_region - .delete_subregion(self.bars[id].region.as_ref().unwrap()) - .chain_err(|| format!("Failed to unmap BAR{} in I/O space.", id))?; + if self.bars[id].parent_io_region.is_some() { + self.bars[id] + .parent_io_region + .as_ref() + .unwrap() + .lock() + .unwrap() + .delete_subregion(self.bars[id].region.as_ref().unwrap()) + .with_context(|| { + format!("Failed to unmap BAR{} in I/O space.", id) + })?; + } + } + _ => { + if self.bars[id].parent_mem_region.is_some() { + self.bars[id] + .parent_mem_region + .as_ref() + .unwrap() + .lock() + .unwrap() + .delete_subregion(self.bars[id].region.as_ref().unwrap()) + .with_context(|| PciError::UnregMemBar(id))? + } } - _ => mem_region - .delete_subregion(self.bars[id].region.as_ref().unwrap()) - .chain_err(|| ErrorKind::UnregMemBar(id))?, } + + info!( + "pci dev {} delete bar {} mapping: addr 0x{:X} size {}", + self.devfn, id, self.bars[id].address, self.bars[id].size + ); + self.bars[id].address = BAR_SPACE_UNMAPPED; } + + if self.is_bar_region_empty( + id, + #[cfg(target_arch = "x86_64")] + io_region, + mem_region, + ) { + return Ok(()); + } + if new_addr != BAR_SPACE_UNMAPPED { match self.bars[id].region_type { + #[cfg(target_arch = "x86_64")] RegionType::Io => { - #[cfg(target_arch = "x86_64")] io_region + .unwrap() + .add_subregion(self.bars[id].region.clone().unwrap(), new_addr) + .with_context(|| format!("Failed to map BAR{} in I/O space.", id))?; + self.bars[id].parent_io_region = + Some(Arc::new(Mutex::new(io_region.unwrap().clone()))); + } + _ => { + mem_region + .unwrap() .add_subregion(self.bars[id].region.clone().unwrap(), new_addr) - .chain_err(|| format!("Failed to map BAR{} in I/O space.", id))?; + .with_context(|| PciError::UnregMemBar(id))?; + self.bars[id].parent_mem_region = + Some(Arc::new(Mutex::new(mem_region.unwrap().clone()))); } - _ => mem_region - .add_subregion(self.bars[id].region.clone().unwrap(), new_addr) - .chain_err(|| ErrorKind::UnregMemBar(id))?, } + + info!( + "pci dev {} update bar {} mapping: addr 0x{:X} size {}", + self.devfn, id, new_addr, self.bars[id].size + ); + self.bars[id].address = new_addr; } } @@ -658,7 +966,7 @@ impl PciConfig { pub fn add_pci_cap(&mut self, id: u8, size: usize) -> Result { let offset = self.last_cap_end as usize; if offset + size > PCI_CONFIG_SPACE_SIZE { - return Err(ErrorKind::AddPciCap(id, size).into()); + return Err(anyhow!(PciError::AddPciCap(id, size))); } self.config[offset] = id; @@ -689,7 +997,7 @@ impl PciConfig { pub fn add_pcie_ext_cap(&mut self, id: u16, size: usize, version: u32) -> Result { let offset = self.last_ext_cap_end as usize; if offset + size > PCIE_CONFIG_SPACE_SIZE { - return Err(ErrorKind::AddPcieExtCap(id, size).into()); + return Err(anyhow!(PciError::AddPcieExtCap(id, size))); } let regs_num = if size % REG_SIZE == 0 { @@ -705,14 +1013,14 @@ impl PciConfig { le_write_u32( &mut self.config, offset, - id as u32 | (version << PCIE_CAP_VERSION_SHIFT), + u32::from(id) | (version << PCI_EXT_CAP_VER_SHIFT), )?; if self.last_ext_cap_offset != 0 { let old_value = le_read_u32(&self.config, self.last_ext_cap_offset as usize)?; le_write_u32( &mut self.config, self.last_ext_cap_offset as usize, - old_value | ((offset as u32) << PCIE_CAP_NEXT_OFFSET_SHIFT), + old_value | ((offset as u32) << PCI_EXT_CAP_NEXT_SHIFT), )?; } self.last_ext_cap_offset = offset as u16; @@ -724,24 +1032,26 @@ impl PciConfig { /// /// # Arguments /// - /// * `devfn` - Slot number << 3 | function number. + /// * `devfn` - Slot number << 3 | Function number. /// * `port_num` - Port number. /// * `dev_type` - Device type. pub fn add_pcie_cap(&mut self, devfn: u8, port_num: u8, dev_type: u8) -> Result { - let cap_offset: usize = self.add_pci_cap(CapId::Pcie as u8, PCIE_CAP_SIZE as usize)?; - self.ext_cap_offset = cap_offset as u16; + let cap_offset: usize = + self.add_pci_cap(CapId::Pcie as u8, PCI_EXP_VER2_SIZEOF as usize)?; + self.pci_express_cap_offset = cap_offset as u16; let mut offset: usize = cap_offset + PcieCap::CapReg as usize; - let pci_type = (dev_type << PCI_EXP_FLAGS_TYPE_SHIFT) as u16 & PCI_EXP_FLAGS_TYPE; + let pci_type = u16::from(dev_type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE; le_write_u16( &mut self.config, offset, - pci_type | PCIE_CAP_VERSION_2 | PCIE_CAP_SLOT_IMPLEMENTED, + pci_type | PCI_EXP_FLAGS_VER2 | PCI_EXP_FLAGS_SLOT, )?; offset = cap_offset + PcieCap::DevCap as usize; - le_write_u32(&mut self.config, offset, PCIE_CAP_RBER)?; + le_write_u32(&mut self.config, offset, PCI_EXP_DEVCAP_RBER)?; offset = cap_offset + PcieCap::DevCtl as usize; - let mask = PCIE_CAP_DEV_CER | PCIE_CAP_DEV_NFER | PCIE_CAP_DEV_FER | PCIE_CAP_DEV_URR; + let mask = + PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE; le_write_u16(&mut self.write_mask, offset, mask)?; offset = cap_offset + PcieCap::DevStat as usize; le_write_u16(&mut self.write_clear_mask, offset, mask)?; @@ -750,18 +1060,18 @@ impl PciConfig { le_write_u32( &mut self.config, offset, - PCIE_CAP_MLS_16GT - | PCIE_CAP_MLW_X32 - | PCIE_CAP_ASPM_L0S - | PCIE_CAP_LINK_LBNC - | PCIE_CAP_LINK_DLLLARC - | ((port_num as u32) << PCIE_CAP_PORT_NUM_SHIFT), + PCI_EXP_LNKCAP_MLS_16GT + | PCI_EXP_LNKCAP_MLW_X32 + | PCI_EXP_LNKCAP_ASPMS_0S + | PCI_EXP_LNKCAP_LBNC + | PCI_EXP_LNKCAP_DLLLARC + | (u32::from(port_num) << PCI_EXP_LNKCAP_PN_SHIFT), )?; offset = cap_offset + PcieCap::LinkStat as usize; le_write_u16( &mut self.config, offset, - PCIE_CAP_CLS_X1 | PCIE_CAP_NLW_2_5GT, + PCI_EXP_LNKSTA_CLS_2_5GB | PCI_EXP_LNKSTA_NLW_X1, )?; let slot: u8 = devfn >> BDF_FUNC_SHIFT; @@ -769,71 +1079,70 @@ impl PciConfig { le_write_u32( &mut self.config, offset, - PCIE_CAP_SLOTCAP_ABP - | PCIE_CAP_SLOTCAP_PCP - | PCIE_CAP_SLOTCAP_AIP - | PCIE_CAP_SLOTCAP_PIP - | PCIE_CAP_SLOTCAP_HPS - | PCIE_CAP_SLOTCAP_HPC - | PCIE_CAP_SLOTCAP_EIP - | ((slot as u32) << PCIE_CAP_SLOT_NUM_SHIFT), + PCI_EXP_SLTCAP_ABP + | PCI_EXP_SLTCAP_PCP + | PCI_EXP_SLTCAP_AIP + | PCI_EXP_SLTCAP_PIP + | PCI_EXP_SLTCAP_HPS + | PCI_EXP_SLTCAP_HPC + | (u32::from(slot) << PCI_EXP_SLTCAP_PSN_SHIFT), )?; offset = cap_offset + PcieCap::SlotCtl as usize; le_write_u16( &mut self.config, offset, - PCIE_CAP_SLOT_AIC_OFF | PCIE_CAP_SLOT_PIC_OFF | PCIE_CAP_SLOT_PCC, + PCI_EXP_SLTCTL_ATTN_IND_OFF | PCI_EXP_SLTCTL_PWR_IND_OFF | PCI_EXP_SLTCTL_PCC, )?; le_write_u16( &mut self.write_mask, offset, - PCIE_CAP_SLOT_ABP - | PCIE_CAP_SLOT_PDC - | PCIE_CAP_SLOT_CCI - | PCIE_CAP_SLOT_HPI - | PCIE_CAP_SLOT_AIC_MASK - | PCIE_CAP_SLOT_PIC_MASK - | PCIE_CAP_SLOT_PCC - | PCIE_CAP_SLOT_EIC, + PCI_EXP_SLTCTL_ABPE + | PCI_EXP_SLTCTL_PDCE + | PCI_EXP_SLTCTL_CCIE + | PCI_EXP_SLTCTL_HPIE + | PCI_EXP_SLTCTL_AIC + | PCI_EXP_SLTCTL_PIC + | PCI_EXP_SLTCTL_PCC + | PCI_EXP_SLTCTL_EIC, )?; offset = cap_offset + PcieCap::SlotStat as usize; le_write_u16( &mut self.write_clear_mask, offset, - PCIE_CAP_SLOT_ABP | PCIE_CAP_SLOT_PDC | PCIE_CAP_SLOT_CCI, + PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC, )?; offset = cap_offset + PcieCap::RootCtl as usize; le_write_u16( &mut self.write_mask, offset, - PCIE_CAP_ROOT_SECEE | PCIE_CAP_ROOT_SENFEE | PCIE_CAP_ROOT_SEFEE, + PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE | PCI_EXP_RTCTL_SEFEE, )?; offset = cap_offset + PcieCap::DevCap2 as usize; le_write_u32( &mut self.config, offset, - PCIE_CAP_ARI | PCIE_CAP_DEV_EFFS | PCIE_CAP_DEV_EETPS, + PCI_EXP_DEVCAP2_ARI | PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP, )?; offset = cap_offset + PcieCap::DevCtl2 as usize; le_write_u16( &mut self.write_mask, offset, - PCIE_CAP_ARI_ENABLE | PCIE_CAP_DEV_EETPB, + PCI_EXP_DEVCTL2_ARI | PCI_EXP_DEVCTL2_EETLPPB, )?; offset = cap_offset + PcieCap::LinkCap2 as usize; le_write_u32( - &mut self.write_mask, + &mut self.config, offset, - PCIE_CAP_LINK_SLSV_2_5GT - | PCIE_CAP_LINK_SLSV_5GT - | PCIE_CAP_LINK_SLSV_8GT - | PCIE_CAP_LINK_SLSV_16GT, + PCI_EXP_LNKCAP2_SLS_2_5GB + | PCI_EXP_LNKCAP2_SLS_5_0GB + | PCI_EXP_LNKCAP2_SLS_8_0GB + | PCI_EXP_LNKCAP2_SLS_16_0GB, )?; offset = cap_offset + PcieCap::LinkCtl2 as usize; - le_write_u16(&mut self.write_mask, offset, PCIE_CAP_LINK_TLS_16GT)?; + le_write_u16(&mut self.config, offset, PCI_EXP_LNKCTL2_TLS_16_0GT)?; Ok(cap_offset) } @@ -857,20 +1166,61 @@ impl PciConfig { end_pos - pos } + + fn validate_bar_id(&self, id: usize) -> Result<()> { + if (self.config[HEADER_TYPE as usize] == HEADER_TYPE_ENDPOINT + && id >= BAR_NUM_MAX_FOR_ENDPOINT as usize) + || (self.config[HEADER_TYPE as usize] == HEADER_TYPE_BRIDGE + && id >= BAR_NUM_MAX_FOR_BRIDGE as usize) + { + return Err(anyhow!(PciError::InvalidConf( + "Bar id".to_string(), + id.to_string(), + ))); + } + Ok(()) + } + + fn validate_bar_size(&self, bar_type: RegionType, size: u64) -> Result<()> { + if !size.is_power_of_two() + || (bar_type == RegionType::Io && size < MINIMUM_BAR_SIZE_FOR_PIO as u64) + || (bar_type == RegionType::Mem32Bit && size > u64::from(u32::MAX)) + || (bar_type == RegionType::Io && size > u64::from(u16::MAX)) + { + return Err(anyhow!(PciError::InvalidConf( + "Bar size of type ".to_string() + &bar_type.to_string(), + size.to_string(), + ))); + } + Ok(()) + } + + pub fn set_interrupt_pin(&mut self) { + self.config[INTERRUPT_PIN as usize] = 0x01; + } + + pub fn revise_msix_vector(&self, vector_nr: u32) -> bool { + if self.msix.is_none() { + return false; + } + + let table_len = self.msix.as_ref().unwrap().lock().unwrap().table.len(); + let max_vector = table_len / MSIX_TABLE_ENTRY_SIZE as usize; + vector_nr < max_vector as u32 + } } #[cfg(test)] mod tests { - use address_space::{AddressSpace, GuestAddress, RegionOps}; - use super::*; + use address_space::{AddressSpace, GuestAddress, RegionOps}; const MSI_CAP_ID: u8 = 0x05; const MSIX_CAP_ID: u8 = 0x11; #[test] fn test_find_pci_cap() { - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 3); + let mut pci_config = PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 3); let offset = pci_config.find_pci_cap(MSIX_CAP_ID); assert_eq!(offset, 0xff); @@ -899,31 +1249,34 @@ mod tests { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let region = Region::init_io_region(2048, region_ops); - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 3); + let region = Region::init_io_region(8192, region_ops.clone(), "io"); + let mut pci_config = PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 3); #[cfg(target_arch = "x86_64")] - pci_config.register_bar( - 0, - region.clone(), - RegionType::Io, - false, - IO_BASE_ADDR_MASK as u64, - ); - pci_config.register_bar( - 1, - region.clone(), - RegionType::Mem32Bit, - false, - (MEM_BASE_ADDR_MASK as u32) as u64, - ); - pci_config.register_bar(2, region, RegionType::Mem64Bit, true, MEM_BASE_ADDR_MASK); + assert!(pci_config + .register_bar(0, region.clone(), RegionType::Io, false, 8192) + .is_ok()); + assert!(pci_config + .register_bar(1, region.clone(), RegionType::Mem32Bit, false, 8192) + .is_ok()); + assert!(pci_config + .register_bar(2, region.clone(), RegionType::Mem64Bit, true, 8192) + .is_ok()); + // test when bar id is not valid + assert!(pci_config + .register_bar(7, region, RegionType::Mem64Bit, true, 8192) + .is_err()); + // test when bar size is incorrect(not power of 2) + let region_size_not_pow_2 = Region::init_io_region(4238, region_ops, "io2"); + assert!(pci_config + .register_bar(4, region_size_not_pow_2, RegionType::Mem64Bit, true, 4238) + .is_err()); #[cfg(target_arch = "x86_64")] le_write_u32( &mut pci_config.config, BAR_0 as usize, - IO_BASE_ADDR_MASK | BAR_IO_SPACE as u32, + IO_BASE_ADDR_MASK | u32::from(BAR_IO_SPACE), ) .unwrap(); le_write_u32( @@ -935,7 +1288,7 @@ mod tests { le_write_u64( &mut pci_config.config, BAR_0 as usize + 2 * REG_SIZE, - MEM_BASE_ADDR_MASK | (BAR_MEM_64BIT | BAR_PREFETCH) as u64, + MEM_BASE_ADDR_MASK | u64::from(BAR_MEM_64BIT | BAR_PREFETCH), ) .unwrap(); @@ -945,7 +1298,7 @@ mod tests { { // I/O space access is enabled. le_write_u16(&mut pci_config.config, COMMAND as usize, COMMAND_IO_SPACE).unwrap(); - assert_eq!(pci_config.get_bar_address(0), IO_BASE_ADDR_MASK as u64); + assert_eq!(pci_config.get_bar_address(0), u64::from(IO_BASE_ADDR_MASK)); } assert_eq!(pci_config.get_bar_address(1), BAR_SPACE_UNMAPPED); assert_eq!(pci_config.get_bar_address(2), BAR_SPACE_UNMAPPED); @@ -960,7 +1313,7 @@ mod tests { assert_eq!(pci_config.get_bar_address(0), BAR_SPACE_UNMAPPED); assert_eq!( pci_config.get_bar_address(1), - (MEM_BASE_ADDR_MASK as u32) as u64 + u64::from(MEM_BASE_ADDR_MASK as u32) ); assert_eq!(pci_config.get_bar_address(2), MEM_BASE_ADDR_MASK); } @@ -973,26 +1326,32 @@ mod tests { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let region = Region::init_io_region(2048, region_ops); - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 6); + let region = Region::init_io_region(8192, region_ops, "io"); + let mut pci_config = PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 6); #[cfg(target_arch = "x86_64")] - pci_config.register_bar(0, region.clone(), RegionType::Io, false, 2048); - pci_config.register_bar(1, region.clone(), RegionType::Mem32Bit, false, 2048); - pci_config.register_bar(2, region, RegionType::Mem64Bit, true, 2048); + assert!(pci_config + .register_bar(0, region.clone(), RegionType::Io, false, 8192) + .is_ok()); + assert!(pci_config + .register_bar(1, region.clone(), RegionType::Mem32Bit, false, 8192) + .is_ok()); + assert!(pci_config + .register_bar(2, region, RegionType::Mem64Bit, true, 8192) + .is_ok()); #[cfg(target_arch = "x86_64")] le_write_u32( &mut pci_config.config, BAR_0 as usize, - 2048_u32 | BAR_IO_SPACE as u32, + 2048_u32 | u32::from(BAR_IO_SPACE), ) .unwrap(); le_write_u32(&mut pci_config.config, BAR_0 as usize + REG_SIZE, 2048).unwrap(); le_write_u32( &mut pci_config.config, BAR_0 as usize + 2 * REG_SIZE, - 2048_u32 | BAR_MEM_64BIT as u32 | BAR_PREFETCH as u32, + 2048_u32 | u32::from(BAR_MEM_64BIT) | u32::from(BAR_PREFETCH), ) .unwrap(); le_write_u16( @@ -1003,15 +1362,25 @@ mod tests { .unwrap(); #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)).unwrap(); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); + let sys_io = AddressSpace::new( + Region::init_container_region(1 << 16, "sysio"), + "sysio", + None, + ) + .unwrap(); + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::max_value(), "sysmem"), + "sysmem", + None, + ) + .unwrap(); assert_eq!(pci_config.bars[1].address, BAR_SPACE_UNMAPPED); assert_eq!(pci_config.bars[2].address, BAR_SPACE_UNMAPPED); pci_config .update_bar_mapping( #[cfg(target_arch = "x86_64")] - sys_io.root(), - sys_mem.root(), + Some(sys_io.root()), + Some(sys_mem.root()), ) .unwrap(); assert_eq!(pci_config.bars[1].address, 2048); @@ -1021,8 +1390,8 @@ mod tests { pci_config .update_bar_mapping( #[cfg(target_arch = "x86_64")] - sys_io.root(), - sys_mem.root(), + Some(sys_io.root()), + Some(sys_mem.root()), ) .unwrap(); assert_eq!(pci_config.bars[1].address, 2048); @@ -1032,21 +1401,21 @@ mod tests { le_write_u32( &mut pci_config.config, BAR_0 as usize, - 4096_u32 | BAR_IO_SPACE as u32, + 4096_u32 | u32::from(BAR_IO_SPACE), ) .unwrap(); le_write_u32(&mut pci_config.config, BAR_0 as usize + REG_SIZE, 4096).unwrap(); le_write_u32( &mut pci_config.config, BAR_0 as usize + 2 * REG_SIZE, - 4096_u32 | BAR_MEM_64BIT as u32 | BAR_PREFETCH as u32, + 4096_u32 | u32::from(BAR_MEM_64BIT) | u32::from(BAR_PREFETCH), ) .unwrap(); pci_config .update_bar_mapping( #[cfg(target_arch = "x86_64")] - sys_io.root(), - sys_mem.root(), + Some(sys_io.root()), + Some(sys_mem.root()), ) .unwrap(); assert_eq!(pci_config.bars[1].address, pci_config.get_bar_address(1)); @@ -1055,7 +1424,7 @@ mod tests { #[test] fn test_add_pci_cap() { - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 2); + let mut pci_config = PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 2); // Overflow. assert!(pci_config @@ -1067,12 +1436,12 @@ mod tests { // Capbility size is not multiple of DWORD. pci_config.add_pci_cap(0x12, 10).unwrap(); - assert_eq!(pci_config.last_cap_end, PCI_CONFIG_HEAD_END as u16 + 12); + assert_eq!(pci_config.last_cap_end, u16::from(PCI_CONFIG_HEAD_END) + 12); } #[test] fn test_add_pcie_ext_cap() { - let mut pci_config = PciConfig::new(PCIE_CONFIG_SPACE_SIZE, 2); + let mut pci_config = PciConfig::new(0, PCIE_CONFIG_SPACE_SIZE, 2); // Overflow. assert!(pci_config @@ -1093,7 +1462,7 @@ mod tests { #[test] fn test_get_ext_cap_size() { - let mut pcie_config = PciConfig::new(PCIE_CONFIG_SPACE_SIZE, 3); + let mut pcie_config = PciConfig::new(0, PCIE_CONFIG_SPACE_SIZE, 3); let offset1 = pcie_config.add_pcie_ext_cap(1, 0x10, 1).unwrap(); let offset2 = pcie_config.add_pcie_ext_cap(1, 0x40, 1).unwrap(); pcie_config.add_pcie_ext_cap(1, 0x20, 1).unwrap(); @@ -1104,6 +1473,24 @@ mod tests { assert_eq!(size2, 0x40); } + #[test] + fn test_reset_common_regs() { + let mut pcie_config = PciConfig::new(0, PCIE_CONFIG_SPACE_SIZE, 3); + pcie_config.init_common_write_mask().unwrap(); + pcie_config.init_common_write_clear_mask().unwrap(); + + le_write_u16( + &mut pcie_config.config, + COMMAND as usize, + COMMAND_MEMORY_SPACE | COMMAND_FAST_BACK, + ) + .unwrap(); + assert!(pcie_config.reset_common_regs().is_ok()); + + let res = le_read_u16(&mut pcie_config.config, COMMAND as usize).unwrap(); + assert_eq!(res, COMMAND_FAST_BACK); + } + #[test] fn test_unregister_bars() { let read_ops = move |_data: &mut [u8], _addr: GuestAddress, _offset: u64| -> bool { true }; @@ -1112,45 +1499,57 @@ mod tests { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let region = Region::init_io_region(2048, region_ops); - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 3); + let region = Region::init_io_region(4096, region_ops, "io"); + let mut pci_config = PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 3); // bar is unmapped #[cfg(target_arch = "x86_64")] - pci_config.register_bar(0, region.clone(), RegionType::Io, false, 2048); - pci_config.register_bar(1, region.clone(), RegionType::Mem32Bit, false, 2048); - pci_config.register_bar(2, region.clone(), RegionType::Mem64Bit, true, 2048); + assert!(pci_config + .register_bar(0, region.clone(), RegionType::Io, false, 4096) + .is_ok()); + assert!(pci_config + .register_bar(1, region.clone(), RegionType::Mem32Bit, false, 4096) + .is_ok()); + assert!(pci_config + .register_bar(2, region.clone(), RegionType::Mem64Bit, true, 4096) + .is_ok()); #[cfg(target_arch = "x86_64")] - let io_region = Region::init_container_region(1 << 16); - let mem_region = Region::init_container_region(u64::max_value()); + let io_region = Region::init_container_region(1 << 16, "iocon"); + let mem_region = Region::init_container_region(u64::max_value(), "mem"); let bus = Arc::new(Mutex::new(PciBus::new( String::from("bus"), #[cfg(target_arch = "x86_64")] io_region.clone(), mem_region.clone(), - ))); + ))) as Arc>; assert!(pci_config.unregister_bars(&bus).is_ok()); // bar is mapped #[cfg(target_arch = "x86_64")] - pci_config.register_bar(0, region.clone(), RegionType::Io, false, 2048); - pci_config.register_bar(1, region.clone(), RegionType::Mem32Bit, false, 2048); - pci_config.register_bar(2, region.clone(), RegionType::Mem64Bit, true, 2048); + assert!(pci_config + .register_bar(0, region.clone(), RegionType::Io, false, 4096) + .is_ok()); + assert!(pci_config + .register_bar(1, region.clone(), RegionType::Mem32Bit, false, 4096) + .is_ok()); + assert!(pci_config + .register_bar(2, region.clone(), RegionType::Mem64Bit, true, 4096) + .is_ok()); #[cfg(target_arch = "x86_64")] le_write_u32( &mut pci_config.config, BAR_0 as usize, - 2048 | BAR_IO_SPACE as u32, + 2048 | u32::from(BAR_IO_SPACE), ) .unwrap(); le_write_u32(&mut pci_config.config, BAR_0 as usize + REG_SIZE, 2048).unwrap(); le_write_u32( &mut pci_config.config, BAR_0 as usize + 2 * REG_SIZE, - 2048 | BAR_MEM_64BIT as u32 | BAR_PREFETCH as u32, + 2048 | u32::from(BAR_MEM_64BIT) | u32::from(BAR_PREFETCH), ) .unwrap(); le_write_u16( @@ -1162,8 +1561,8 @@ mod tests { pci_config .update_bar_mapping( #[cfg(target_arch = "x86_64")] - &io_region, - &mem_region, + Some(&io_region), + Some(&mem_region), ) .unwrap(); diff --git a/devices/src/pci/demo_device/base_device.rs b/devices/src/pci/demo_device/base_device.rs new file mode 100644 index 0000000000000000000000000000000000000000..ba675514f59eb953e09f9b632c94a2141b0757b6 --- /dev/null +++ b/devices/src/pci/demo_device/base_device.rs @@ -0,0 +1,58 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; + +use anyhow::Result; + +use super::DeviceTypeOperation; +use address_space::GuestAddress; + +/// BaseDevice is a simplest demo-pci-device. Its function is to +/// multiply data written by two and return it when reading. +#[derive(Default)] +pub struct BaseDevice { + result: HashMap, +} + +impl BaseDevice { + pub fn new() -> Self { + Self { + result: HashMap::new(), + } + } +} + +impl DeviceTypeOperation for BaseDevice { + // The base device can multiply the value with 2 when writing to mmio. + fn write(&mut self, data: &[u8], addr: GuestAddress, _offset: u64) -> Result<()> { + let value = data[0].checked_mul(2).unwrap_or(0); + self.result.insert(addr.raw_value(), value); + Ok(()) + } + + // Rm the data after reading, as we assume that the data becomes useless after the test + // process checked the addr. + fn read(&mut self, data: &mut [u8], addr: GuestAddress, _offset: u64) -> Result<()> { + data[0] = *self.result.get(&addr.raw_value()).unwrap_or(&0); + self.result.remove(&addr.raw_value()); + Ok(()) + } + + fn realize(&mut self) -> Result<()> { + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } +} diff --git a/devices/src/pci/demo_device/dpy_device.rs b/devices/src/pci/demo_device/dpy_device.rs new file mode 100644 index 0000000000000000000000000000000000000000..8248b9dd3c78fc4302aa3f66671ff3f01d6e962d --- /dev/null +++ b/devices/src/pci/demo_device/dpy_device.rs @@ -0,0 +1,253 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +// Demo Dpy device is a simple pci device. Its purpose is to simulate the display +// of image from GPU to test the basic functions of GPU. During the initialization +// of device, it will register in the console. Users can write different address in +// the bar space of the device, the Demo Dpy device can do corresponding operations. +// Currently, the supported operations are: +// Get surface size, Get cursor image size, Get Surface data, Get cursor image data. + +use std::{ + ptr, + sync::{Arc, Mutex}, +}; + +use anyhow::{bail, Ok, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::error; +use once_cell::sync::Lazy; + +use super::DeviceTypeOperation; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use ui::{ + console::{ + register_display, DisplayChangeListener, DisplayChangeListenerOperations, DisplayMouse, + DisplaySurface, + }, + pixman::{ + get_image_data, get_image_format, get_image_height, ref_pixman_image, unref_pixman_image, + }, +}; +use util::pixman::{pixman_format_bpp, pixman_image_get_stride, pixman_image_t}; + +// SAFETY: Demo device is only used for test. +unsafe impl Send for Surface {} +// SAFETY: Demo device is only used for test. +unsafe impl Sync for Surface {} +pub struct Surface { + pub pixman_image: *mut pixman_image_t, + image: Vec, + cursor: Vec, +} + +impl Default for Surface { + fn default() -> Self { + Self { + pixman_image: ptr::null_mut(), + image: vec![], + cursor: vec![], + } + } +} + +pub static DISPLAY: Lazy>>>> = Lazy::new(|| Mutex::new(Vec::new())); + +pub struct DemoDisplay { + sys_mem: Arc, +} + +impl DemoDisplay { + pub fn new(sys_mem: Arc) -> Self { + Self { sys_mem } + } +} + +#[derive(Default)] +pub struct DpyInterface {} + +impl DisplayChangeListenerOperations for DpyInterface { + fn dpy_switch(&self, surface: &DisplaySurface) -> Result<()> { + if DISPLAY.lock().unwrap().is_empty() { + error!("Demo Display is empty, check initialize"); + return Ok(()); + } + + let ds_clone = DISPLAY.lock().unwrap()[0].clone(); + let mut ds = ds_clone.lock().unwrap(); + unref_pixman_image(ds.pixman_image); + ds.pixman_image = ref_pixman_image(surface.image); + + let res_data_ptr = get_image_data(surface.image) as *mut u8; + let height = get_image_height(surface.image); + let stride; + // SAFETY: Demo device is only used for test. + unsafe { + stride = pixman_image_get_stride(surface.image); + } + + let size = height * stride; + + let mut data: Vec = vec![0u8; size as usize]; + // SAFETY: Demo device is only used for test. + unsafe { + ptr::copy(res_data_ptr, data.as_mut_ptr(), size as usize); + } + ds.image = data; + Ok(()) + } + + fn dpy_refresh(&self, _dcl: &Arc>) -> Result<()> { + Ok(()) + } + + fn dpy_image_update(&self, x: i32, y: i32, w: i32, h: i32) -> Result<()> { + if DISPLAY.lock().unwrap().is_empty() { + error!("Demo Display is empty, check initialize"); + return Ok(()); + } + + let ds_clone = DISPLAY.lock().unwrap()[0].clone(); + let mut ds = ds_clone.lock().unwrap(); + let res_data_ptr = get_image_data(ds.pixman_image) as *mut u8; + + let bpp = pixman_format_bpp(get_image_format(ds.pixman_image) as u32); + let stride; + // SAFETY: Demo device is only used for test. + unsafe { + stride = pixman_image_get_stride(ds.pixman_image); + } + + let mut i = 0; + let mut offset = y * stride + x * i32::from(bpp) / 8; + let count = w * i32::from(bpp) / 8; + while i < h { + error!( + "update from {} to {}, before is {}", + offset, + offset + count, + ds.image[offset as usize] + ); + // SAFETY: Demo device is only used for test. + unsafe { + ptr::copy( + res_data_ptr.add(offset as usize), + ds.image.as_mut_ptr().add(offset as usize), + count as usize, + ); + } + error!( + "update from {} to {}, after is {}", + offset, + offset + count, + ds.image[offset as usize] + ); + offset += stride; + i += 1; + } + Ok(()) + } + + fn dpy_cursor_update(&self, cursor: &DisplayMouse) -> Result<()> { + if DISPLAY.lock().unwrap().is_empty() { + error!("Demo Display is empty, check initialize"); + return Ok(()); + } + + let ds_clone = DISPLAY.lock().unwrap()[0].clone(); + let mut ds = ds_clone.lock().unwrap(); + + ds.cursor = cursor.data.clone(); + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +pub enum DpyEvent { + QuerySurface = 0, + QueryCursor = 1, + GetSurface = 2, + GetCursor = 3, + Deactive = 4, +} + +// Support at max 64 * 64 image which format is 4 bytes. +// 0x0 WO for surface size +// 0x1 WO for cursor size +// 0x2 WO, surface data +// 0x3 WO, cursor data +impl DeviceTypeOperation for DemoDisplay { + fn read(&mut self, _data: &mut [u8], _addr: GuestAddress, _offset: u64) -> Result<()> { + bail!("Read is not support"); + } + + fn write(&mut self, data: &[u8], _addr: GuestAddress, offset: u64) -> Result<()> { + if DISPLAY.lock().unwrap().is_empty() { + error!("Demo Display is empty, write after get image"); + return Ok(()); + } + + let ds_clone = DISPLAY.lock().unwrap()[0].clone(); + let ds = ds_clone.lock().unwrap(); + + let mem_addr = LittleEndian::read_u64(data); + let mut buf: Vec = vec![]; + + match offset { + 0 => { + buf.push(ds.image.len() as u8); + buf.push((ds.image.len() as u16 >> 8) as u8); + } + 1 => { + buf.push(ds.cursor.len() as u8); + buf.push((ds.cursor.len() as u32 >> 8) as u8); + buf.push((ds.cursor.len() as u32 >> 16) as u8); + buf.push((ds.cursor.len() as u32 >> 24) as u8); + } + 2 => { + buf = ds.image.clone(); + } + 3 => { + buf = ds.cursor.clone(); + } + _ => { + return self.unrealize(); + } + } + return self.sys_mem.write( + &mut buf.as_slice(), + address_space::GuestAddress(mem_addr), + buf.len() as u64, + AddressAttr::Ram, + ); + } + + fn realize(&mut self) -> Result<()> { + DISPLAY + .lock() + .unwrap() + .push(Arc::new(Mutex::new(Surface::default()))); + let opts = Arc::new(DpyInterface::default()); + let dcl = Arc::new(Mutex::new(DisplayChangeListener::new(None, opts))); + register_display(&dcl)?; + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + if DISPLAY.lock().unwrap().is_empty() { + error!("Demo Display is empty, write after get image"); + return Ok(()); + } + DISPLAY.lock().unwrap().pop(); + Ok(()) + } +} diff --git a/devices/src/pci/demo_device/gpu_device.rs b/devices/src/pci/demo_device/gpu_device.rs new file mode 100644 index 0000000000000000000000000000000000000000..8b7c1775d7f7a24222f872ac521fd890ac9b5289 --- /dev/null +++ b/devices/src/pci/demo_device/gpu_device.rs @@ -0,0 +1,256 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +// Demo GPU device is a simple pci device. Its purpose is to simulate the input of image +// to test the basic functions of VNC. During the initialization of device, +// it will register in the console. Users can write a rom address in the mmio +// configuration space of the device. The Demo GPU device can do corresponding +// operations by reading the commands. Currently, the supported operations are: +// Replace surface 、Update surface 、 +// Set dirty for target area of the surface 、 +// Update the cursor image. + +use std::{ + ptr, + sync::{Arc, Mutex, Weak}, +}; + +use anyhow::{bail, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::info; + +use super::DeviceTypeOperation; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use ui::{ + console::{ + console_close, console_init, display_cursor_define, display_graphic_update, + display_replace_surface, ConsoleType, DisplayConsole, DisplayMouse, DisplaySurface, + HardWareOperations, + }, + pixman::{ + create_pixman_image, get_image_data, get_image_format, get_image_stride, ref_pixman_image, + }, +}; +use util::pixman::pixman_format_code_t; + +pub const UPDATE_FACTOR: [u8; 7] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40]; + +#[derive(Debug)] +pub enum GpuEvent { + ReplaceSurface = 0, + ReplaceCursor = 1, + GraphicUpdateArea = 2, + GraphicUpdateDirty = 3, + DeactiveEvent = 4, +} + +impl From for GpuEvent { + fn from(v: u8) -> Self { + match v { + 0 => GpuEvent::ReplaceSurface, + 1 => GpuEvent::ReplaceCursor, + 2 => GpuEvent::GraphicUpdateArea, + 3 => GpuEvent::GraphicUpdateDirty, + _ => GpuEvent::DeactiveEvent, + } + } +} + +pub struct DemoGpu { + dev_id: String, + sys_mem: Arc, + con: Option>>, + width: u32, + height: u32, + pub surface: Option, + mouse: Option, +} +// SAFETY: Demo device is only used for test. +unsafe impl Send for DemoGpu {} + +impl DemoGpu { + pub fn new(sys_mem: Arc, dev_id: String) -> Self { + Self { + dev_id, + sys_mem, + con: None, + width: 0, + height: 0, + surface: None, + mouse: None, + } + } +} + +impl DemoGpu { + /// Create a new surface, and replace the surface. + pub fn hw_replace_surface(&mut self, width: u32, height: u32, format: u32) -> Result<()> { + let pixman_format = match format { + 1 => pixman_format_code_t::PIXMAN_x2r10g10b10, + 2 => pixman_format_code_t::PIXMAN_r8g8b8, + 3 => pixman_format_code_t::PIXMAN_a1, + 4 => pixman_format_code_t::PIXMAN_yuy2, + _ => pixman_format_code_t::PIXMAN_a8b8g8r8, + }; + + // Create Image. + self.width = width; + self.height = height; + let image = create_pixman_image( + pixman_format, + self.width as i32, + self.height as i32, + ptr::null_mut(), + self.width as i32 * 4, + ); + let surface = DisplaySurface { + format: get_image_format(image), + image: ref_pixman_image(image), + }; + self.surface = Some(surface); + self.graphic_replace_surface() + } + + /// Create a new cursor image, and update it. + pub fn hw_replace_cursor( + &mut self, + width: u32, + height: u32, + hot_x: u32, + hot_y: u32, + mouse_data: u32, + ) -> Result<()> { + let mouse = DisplayMouse { + width, + height, + hot_x, + hot_y, + data: vec![0_u8; mouse_data as usize], + }; + display_cursor_define(&self.con, &mouse)?; + self.mouse = Some(mouse); + Ok(()) + } + + /// Change the pixels of the specified area in the image. + pub fn update_image_area(&mut self, x: u32, y: u32, w: u32, h: u32) -> Result<()> { + let image = match self.surface { + Some(s) => s.image, + None => bail!("Surface is null"), + }; + let image_ptr = get_image_data(image) as *mut u8; + let stride = get_image_stride(image); + for i in y..y + h { + let ptr = (image_ptr as usize + i as usize * stride as usize) as *mut u8; + for j in x..x + w { + let tmp_ptr = ptr as usize + 4 * j as usize; + let rand_factor = (i * j) as usize; + let len = UPDATE_FACTOR.len(); + // SAFETY: Demo device is only used for test. + unsafe { + // byte reverse by ^. + *(tmp_ptr as *mut u8) ^= UPDATE_FACTOR[rand_factor % len]; + *((tmp_ptr + 1) as *mut u8) ^= UPDATE_FACTOR[(rand_factor + 1) % len]; + *((tmp_ptr + 2) as *mut u8) ^= UPDATE_FACTOR[(rand_factor + 2) % len]; + *((tmp_ptr + 3) as *mut u8) ^= UPDATE_FACTOR[(rand_factor + 3) % len]; + } + } + } + self.graphic_update(x, y, w, h) + } + + /// Set a area dirty. + pub fn graphic_update(&mut self, x: u32, y: u32, w: u32, h: u32) -> Result<()> { + display_graphic_update(&self.con, x as i32, y as i32, w as i32, h as i32) + } + + /// Update the cursor image. + pub fn graphic_cursor_define(&mut self) -> Result<()> { + if let Some(mouse) = &self.mouse { + display_cursor_define(&self.con, mouse)?; + } + Ok(()) + } + + /// Change surface in display. + pub fn graphic_replace_surface(&mut self) -> Result<()> { + display_replace_surface(&self.con, self.surface) + } +} + +impl DeviceTypeOperation for DemoGpu { + fn read(&mut self, _data: &mut [u8], _addr: GuestAddress, _offset: u64) -> Result<()> { + bail!("read is not support"); + } + + fn write(&mut self, data: &[u8], _addr: GuestAddress, _offset: u64) -> Result<()> { + let mem_addr = LittleEndian::read_u64(data); + // Event Type. + let mut buf: Vec = vec![]; + self.sys_mem.read( + &mut buf, + address_space::GuestAddress(mem_addr), + 21, + AddressAttr::Ram, + )?; + let event_type = GpuEvent::from(buf[0]); + let x = LittleEndian::read_u32(&buf[1..5]); + let y = LittleEndian::read_u32(&buf[5..9]); + let w = LittleEndian::read_u32(&buf[9..13]); + let h = LittleEndian::read_u32(&buf[13..17]); + let data_len = LittleEndian::read_u32(&buf[17..21]); + info!( + "GpuEvent: {:?}, x: {}, y: {}, w: {}, h: {}, data_len: {}", + event_type, x, y, w, h, data_len + ); + match event_type { + GpuEvent::ReplaceSurface => self.hw_replace_surface(w, h, data_len), + GpuEvent::ReplaceCursor => self.hw_replace_cursor(w, h, x, y, data_len), + GpuEvent::GraphicUpdateArea => self.update_image_area(x, y, w, h), + GpuEvent::GraphicUpdateDirty => self.graphic_update(x, y, w, h), + _ => self.unrealize(), + } + } + + fn realize(&mut self) -> Result<()> { + let con_opts = Arc::new(HwOpts {}); + self.con = console_init(self.dev_id.clone(), ConsoleType::Graphic, con_opts); + + // Create Image. + self.width = 640; + self.height = 480; + let image = create_pixman_image( + pixman_format_code_t::PIXMAN_a8b8g8r8, + self.width as i32, + self.height as i32, + ptr::null_mut(), + self.width as i32 * 4, + ); + let surface = DisplaySurface { + format: get_image_format(image), + image: ref_pixman_image(image), + }; + self.surface = Some(surface); + + // Create image. + self.mouse = Some(DisplayMouse::new(64, 64, 4, 4)); + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + let con = self.con.clone(); + console_close(&con) + } +} + +pub struct HwOpts {} +impl HardWareOperations for HwOpts {} diff --git a/devices/src/pci/demo_device/kbd_pointer_device.rs b/devices/src/pci/demo_device/kbd_pointer_device.rs new file mode 100644 index 0000000000000000000000000000000000000000..c4c036bade115e32b3a0d2cb80a7c09899f041c0 --- /dev/null +++ b/devices/src/pci/demo_device/kbd_pointer_device.rs @@ -0,0 +1,221 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +// Demo keyboard-pointer device is a simple pci device. It can be used to test whether VNC can +// correctly receive the input from the client and transmit it to the keyboard and pointer device. +// Users can write a rom address in the mmio configuration space of the device. Then if an input +// event occurs, the event information will be recorded to the corresponding memory by this device. + +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use once_cell::sync::Lazy; + +use super::DeviceTypeOperation; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use ui::input::{register_keyboard, register_pointer, Axis, InputType, KeyboardOpts, PointerOpts}; + +static MEM_ADDR: Lazy>> = Lazy::new(|| { + Arc::new(Mutex::new(MemSpace { + sys_mem: None, + addr: None, + })) +}); + +pub struct MemSpace { + pub sys_mem: Option>, + pub addr: Option, +} + +impl MemSpace { + fn send_kbdmouse_message(&mut self, msg: &PointerMessage) -> Result<()> { + let sys_mem = match &self.sys_mem { + Some(m) => m, + None => { + bail!("Memory space is not initialized!") + } + }; + let addr = match self.addr { + Some(a) => a, + None => { + bail!("No memory allocated!") + } + }; + sys_mem.write_object( + &(msg.event_type as u8), + address_space::GuestAddress(addr), + AddressAttr::Ram, + )?; + sys_mem.write_object( + &msg.keycode, + address_space::GuestAddress(addr + 1), + AddressAttr::Ram, + )?; + sys_mem.write_object( + &msg.down, + address_space::GuestAddress(addr + 3), + AddressAttr::Ram, + )?; + sys_mem.write_object( + &msg.button, + address_space::GuestAddress(addr + 4), + AddressAttr::Ram, + )?; + sys_mem.write_object( + &msg.x, + address_space::GuestAddress(addr + 8), + AddressAttr::Ram, + )?; + sys_mem.write_object( + &msg.y, + address_space::GuestAddress(addr + 12), + AddressAttr::Ram, + )?; + + Ok(()) + } +} + +pub struct DemoKbdMouse { + pub sys_mem: Arc, + pub kbd_name: String, + pub pointer_name: String, + pub test_kbd: Arc>, + pub test_pointer: Arc>, +} + +impl DemoKbdMouse { + pub fn new(sys_mem: Arc) -> Self { + MEM_ADDR.lock().unwrap().sys_mem = Some(sys_mem.clone()); + Self { + sys_mem, + kbd_name: "test-pci-keyboard".to_string(), + pointer_name: "test-pci-pointer".to_string(), + test_kbd: Arc::new(Mutex::new(TestPciKbd {})), + test_pointer: Arc::new(Mutex::new(TestPciPointer { + x: 0, + y: 0, + button: 0, + })), + } + } +} + +pub struct TestPciKbd {} + +impl KeyboardOpts for TestPciKbd { + fn do_key_event(&mut self, keycode: u16, down: bool) -> Result<()> { + let msg = PointerMessage { + event_type: InputEvent::KbdEvent, + keycode, + down: u8::from(down), + ..Default::default() + }; + MEM_ADDR.lock().unwrap().send_kbdmouse_message(&msg) + } +} + +pub struct TestPciPointer { + pub x: u32, + pub y: u32, + pub button: u32, +} + +impl PointerOpts for TestPciPointer { + fn update_point_state(&mut self, input_event: ui::input::InputEvent) -> Result<()> { + match input_event.input_type { + InputType::MoveEvent => match input_event.move_event.axis { + Axis::X => self.x = input_event.move_event.data, + Axis::Y => self.y = input_event.move_event.data, + }, + InputType::ButtonEvent => { + if input_event.button_event.down { + self.button |= input_event.button_event.button & 0x7; + } else { + self.button &= !(input_event.button_event.button & 0x7); + } + } + _ => { + bail!("Input type: {:?} is unsupported", input_event.input_type); + } + } + Ok(()) + } + + fn sync(&mut self) -> Result<()> { + let msg = PointerMessage { + event_type: InputEvent::PointerEvent, + button: self.button, + x: self.x, + y: self.y, + ..Default::default() + }; + self.x = 0; + self.y = 0; + self.button = 0; + + MEM_ADDR.lock().unwrap().send_kbdmouse_message(&msg) + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum InputEvent { + KbdEvent = 0, + PointerEvent = 1, + #[default] + InvalidEvent = 2, +} + +impl From for InputEvent { + fn from(v: u8) -> Self { + match v { + 0 => InputEvent::KbdEvent, + 1 => InputEvent::PointerEvent, + _ => InputEvent::InvalidEvent, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct PointerMessage { + pub event_type: InputEvent, + pub keycode: u16, + pub down: u8, + pub button: u32, + pub x: u32, + pub y: u32, +} + +impl DeviceTypeOperation for DemoKbdMouse { + fn read(&mut self, _data: &mut [u8], _addr: GuestAddress, _offset: u64) -> Result<()> { + Ok(()) + } + + fn write(&mut self, data: &[u8], _addr: GuestAddress, _offset: u64) -> Result<()> { + let mem_addr = LittleEndian::read_u64(data); + MEM_ADDR.lock().unwrap().addr = Some(mem_addr); + Ok(()) + } + + fn realize(&mut self) -> Result<()> { + let test_kbd = self.test_kbd.clone(); + let test_pointer = self.test_pointer.clone(); + register_keyboard(&self.kbd_name, test_kbd); + register_pointer(&self.pointer_name, test_pointer); + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } +} diff --git a/devices/src/pci/demo_device/mod.rs b/devices/src/pci/demo_device/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..e8264b0d95a209d1d1a4e7acde1507cc402096be --- /dev/null +++ b/devices/src/pci/demo_device/mod.rs @@ -0,0 +1,257 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// DemoDev is a demo PCIe device, that can have device properties configurable, eg. +/// bar num, max msix vector num, etc. +/// It can have 0-6 bars, if set, msix always lives in bar 0, data handling in bar 1. +/// 1. its functionality is to read and write data for the guest, meanwhile, do a little +/// mathmetic logic(multiply data[0] with 2) with the write op. +/// 2. After r/w, it sends back a msix interrupt to the guest, which means that it has also +/// msix capability. We assume msix bar is in bar 0. +/// 3. Finally, it supports hotplug/hotunplug. +/// As that it has device memory, it means it has a bar space, we assume the +/// bar size is 4KB in bar 1. +/// As that it has device memory, it means it has a bar space other than the msix one.( +/// therotically they can share the same bar as well). +/// +/// Note: developers can also add yourself mmio r/w ops for this device by changing the +/// callback fn write_data_internal_func(), using trait to expand this function is recommended. +/// +/// The example cmdline for the device is: +/// "-device pcie-demo-dev,addr=0x5,bus=pcie.0,id=demo0,bar_num=3,bar_size=4096" +pub mod base_device; +pub mod dpy_device; +pub mod gpu_device; +pub mod kbd_pointer_device; + +use std::sync::atomic::{AtomicU16, Ordering}; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::Result; +use clap::Parser; +use log::error; + +use crate::pci::config::{ + PciConfig, RegionType, DEVICE_ID, HEADER_TYPE, HEADER_TYPE_ENDPOINT, PCIE_CONFIG_SPACE_SIZE, + SUB_CLASS_CODE, VENDOR_ID, +}; +use crate::pci::demo_device::{ + base_device::BaseDevice, dpy_device::DemoDisplay, gpu_device::DemoGpu, + kbd_pointer_device::DemoKbdMouse, +}; +use crate::pci::{init_msix, le_write_u16, PciBus, PciDevBase, PciDevOps}; +use crate::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +use address_space::{AddressSpace, GuestAddress, Region, RegionOps}; +use machine_manager::config::{get_pci_df, valid_id}; +use util::gen_base_func; + +/// Config struct for `demo_dev`. +/// Contains demo_dev device's attr. +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct DemoDevConfig { + #[arg(long, value_parser = ["pcie-demo-dev"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + // Different device implementations can be configured based on this parameter + #[arg(long, alias = "device_type")] + pub device_type: Option, + #[arg(long, alias = "bar_num", default_value = "0")] + pub bar_num: u8, + // Every bar has the same size just for simplification. + #[arg(long, alias = "bar_size", default_value = "0")] + pub bar_size: u64, +} + +pub struct DemoDev { + base: PciDevBase, + cmd_cfg: DemoDevConfig, + mem_region: Region, + dev_id: Arc, + device: Arc>, +} + +impl DemoDev { + pub fn new( + cfg: DemoDevConfig, + devfn: u8, + sys_mem: Arc, + parent_bus: Weak>, + ) -> Self { + // You can choose different device function based on the parameter of device_type. + let device_type = cfg.device_type.clone().unwrap_or_default(); + let device: Arc> = match device_type.as_str() { + "demo-gpu" => Arc::new(Mutex::new(DemoGpu::new(sys_mem, cfg.id.clone()))), + "demo-input" => Arc::new(Mutex::new(DemoKbdMouse::new(sys_mem))), + "demo-display" => Arc::new(Mutex::new(DemoDisplay::new(sys_mem))), + _ => Arc::new(Mutex::new(BaseDevice::new())), + }; + DemoDev { + base: PciDevBase { + base: DeviceBase::new(cfg.id.clone(), false, Some(parent_bus)), + config: PciConfig::new(devfn, PCIE_CONFIG_SPACE_SIZE, cfg.bar_num), + devfn, + }, + cmd_cfg: cfg, + mem_region: Region::init_container_region(u64::from(u32::MAX), "DemoDev"), + dev_id: Arc::new(AtomicU16::new(0)), + device, + } + } + + fn init_pci_config(&mut self) -> Result<()> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + + let config = &mut self.base.config.config; + le_write_u16(config, DEVICE_ID as usize, DEVICE_ID_DEMO)?; + le_write_u16(config, VENDOR_ID as usize, VENDOR_ID_DEMO)?; + le_write_u16(config, SUB_CLASS_CODE as usize, CLASS_CODE_DEMO)?; + config[HEADER_TYPE as usize] = HEADER_TYPE_ENDPOINT; + + Ok(()) + } + + fn register_data_handling_bar(&mut self) -> Result<()> { + let device = self.device.clone(); + let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + device + .lock() + .unwrap() + .write(data, addr, offset) + .unwrap_or_else(|e| error!("Some error occur in writing: {:?}", e)); + true + }; + + let device = self.device.clone(); + let read_ops = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + device + .lock() + .unwrap() + .read(data, addr, offset) + .unwrap_or_else(|e| error!("Some error occur in reading: {:?}", e)); + true + }; + + let region_ops = RegionOps { + read: Arc::new(read_ops), + write: Arc::new(write_ops), + }; + + let region = Region::init_io_region(self.cmd_cfg.bar_size, region_ops, "DemoRegion"); + + self.mem_region.add_subregion(region, 0)?; + self.base.config.register_bar( + 0, + self.mem_region.clone(), + RegionType::Mem64Bit, + false, + (self.cmd_cfg.bar_size * u64::from(self.cmd_cfg.bar_num)).next_power_of_two(), + )?; + + Ok(()) + } +} + +// reference to https://pci-ids.ucw.cz/read/PC?restrict=1 +// "DEAD BEEF" seems will not be used for a long time. +const VENDOR_ID_DEMO: u16 = 0xDEAD; +const DEVICE_ID_DEMO: u16 = 0xBEEF; +// reference to https://pci-ids.ucw.cz/read/PD/ +const CLASS_CODE_DEMO: u16 = 0xEE; + +impl Device for DemoDev { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.base.config.reset_common_regs() + } + + fn realize(mut self) -> Result>> { + self.init_pci_config()?; + if self.cmd_cfg.bar_num > 0 { + init_msix(&mut self.base, 0, 1, self.dev_id.clone(), None, None)?; + } + + self.register_data_handling_bar()?; + self.device.lock().unwrap().realize()?; + + let devfn = u64::from(self.base.devfn); + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + let mut locked_bus = parent_bus.lock().unwrap(); + let demo_pci_dev = Arc::new(Mutex::new(self)); + locked_bus.attach_child(devfn, demo_pci_dev.clone())?; + + Ok(demo_pci_dev) + } + + fn unrealize(&mut self) -> Result<()> { + self.device.lock().unwrap().unrealize() + } +} + +impl PciDevOps for DemoDev { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); + + /// write the pci configuration space + fn write_config(&mut self, offset: usize, data: &[u8]) { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + + self.base.config.write( + offset, + data, + self.dev_id.load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + None, + Some(&pci_bus.mem_region), + ); + } +} + +pub trait DeviceTypeOperation: Send { + fn read(&mut self, data: &mut [u8], addr: GuestAddress, offset: u64) -> Result<()>; + fn write(&mut self, data: &[u8], addr: GuestAddress, offset: u64) -> Result<()>; + fn realize(&mut self) -> Result<()>; + fn unrealize(&mut self) -> Result<()>; +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + #[test] + fn test_parse_demo_dev() { + // Test1: Right. + let demo_cmd1 = "pcie-demo-dev,bus=pcie.0,addr=0x4,id=test_0,device_type=demo-gpu,bar_num=3,bar_size=4096"; + let result = DemoDevConfig::try_parse_from(str_slip_to_clap(demo_cmd1, true, false)); + assert!(result.is_ok()); + let demo_cfg = result.unwrap(); + assert_eq!(demo_cfg.id, "test_0".to_string()); + assert_eq!(demo_cfg.device_type, Some("demo-gpu".to_string())); + assert_eq!(demo_cfg.bar_num, 3); + assert_eq!(demo_cfg.bar_size, 4096); + + // Test2: Default bar_num/bar_size. + let demo_cmd2 = "pcie-demo-dev,bus=pcie.0,addr=4.0,id=test_0,device_type=demo-gpu"; + let result = DemoDevConfig::try_parse_from(str_slip_to_clap(demo_cmd2, true, false)); + assert!(result.is_ok()); + let demo_cfg = result.unwrap(); + assert_eq!(demo_cfg.bar_num, 0); + assert_eq!(demo_cfg.bar_size, 0); + } +} diff --git a/devices/src/pci/error.rs b/devices/src/pci/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..fa3c140b0b98545b1cc40ffb7e16ab252b805ac4 --- /dev/null +++ b/devices/src/pci/error.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum PciError { + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("Failed to add PCI capability: id 0x{0:x}, size: 0x{1:x}.")] + AddPciCap(u8, usize), + #[error("Failed to add PCIe extended capability: id 0x{0:x}, size: 0x{1:x}.")] + AddPcieExtCap(u16, usize), + #[error("Failed to unmap BAR {0} in memory space.")] + UnregMemBar(usize), + #[error("Invalid device status 0x{0:x}")] + DeviceStatus(u32), + #[error("Unsupported pci register, 0x{0:x}")] + PciRegister(u64), + #[error("Invalid features select 0x{0:x}")] + FeaturesSelect(u32), + #[error("HotPlug is not supported for device with devfn {0}")] + HotplugUnsupported(u8), + #[error("Invalid PCI configuration, key:{0}, value:{1}")] + InvalidConf(String, String), + #[error("Failed to enable queue, value is 0x{0:x}")] + QueueEnable(u32), +} diff --git a/pci/src/host.rs b/devices/src/pci/host.rs similarity index 50% rename from pci/src/host.rs rename to devices/src/pci/host.rs index 1713c2e952fb4fc4c9a17805cfc11bded79ba0b4..7f3b3d78f6be07e974ff727bc5609e40f88d717e 100644 --- a/pci/src/host.rs +++ b/devices/src/pci/host.rs @@ -12,23 +12,29 @@ use std::sync::{Arc, Mutex}; -use acpi::{ - AmlAddressSpaceDecode, AmlBuilder, AmlByte, AmlCacheable, AmlDWord, AmlDWordDesc, AmlDevice, - AmlEisaId, AmlNameDecl, AmlPackage, AmlReadAndWrite, AmlResTemplate, AmlScopeBuilder, - AmlWordDesc, AmlZero, -}; +use anyhow::{Context, Result}; + +#[cfg(target_arch = "aarch64")] +use crate::pci::PCI_INTR_BASE; +use crate::pci::{bus::PciBus, to_pcidevops, PCI_PIN_NUM, PCI_SLOT_MAX}; #[cfg(target_arch = "x86_64")] +use crate::pci::{le_read_u32, le_write_u32}; +use crate::sysbus::{SysBusDevBase, SysBusDevOps}; +use crate::{Device, DeviceBase, PCI_BUS_DEVICE}; use acpi::{ - AmlAnd, AmlArg, AmlCreateDWordField, AmlElse, AmlEqual, AmlISARanges, AmlIf, AmlInteger, - AmlIoDecode, AmlIoResource, AmlLNot, AmlLocal, AmlMethod, AmlName, AmlOr, AmlReturn, AmlStore, - AmlToUuid, + AmlActiveLevel, AmlAddressSpaceDecode, AmlAnd, AmlArg, AmlBuilder, AmlCacheable, + AmlCreateDWordField, AmlDWord, AmlDWordDesc, AmlDevice, AmlEdgeLevel, AmlEisaId, AmlElse, + AmlEqual, AmlExtendedInterrupt, AmlISARanges, AmlIf, AmlIntShare, AmlInteger, AmlLNot, + AmlLocal, AmlMethod, AmlName, AmlNameDecl, AmlOr, AmlPackage, AmlReadAndWrite, AmlResTemplate, + AmlResourceUsage, AmlReturn, AmlScopeBuilder, AmlStore, AmlString, AmlToUuid, AmlWordDesc, + AmlZero, }; -use address_space::{AddressSpace, GuestAddress, RegionOps}; -use sysbus::{errors::Result as SysBusResult, SysBusDevOps}; - -use crate::{bus::PciBus, PciDevOps}; #[cfg(target_arch = "x86_64")] -use crate::{le_read_u32, le_write_u32}; +use acpi::{AmlIoDecode, AmlIoResource}; +#[cfg(target_arch = "aarch64")] +use acpi::{AmlOne, AmlQWordDesc}; +use address_space::{AddressSpace, GuestAddress, RegionOps}; +use util::gen_base_func; #[cfg(target_arch = "x86_64")] const CONFIG_ADDRESS_ENABLE_MASK: u32 = 0x8000_0000; @@ -41,22 +47,22 @@ const PIO_OFFSET_MASK: u32 = 0xff; const CONFIG_BUS_MASK: u32 = 0xff; const CONFIG_DEVFN_MASK: u32 = 0xff; -#[allow(dead_code)] const ECAM_BUS_SHIFT: u32 = 20; -#[allow(dead_code)] const ECAM_DEVFN_SHIFT: u32 = 12; -#[allow(dead_code)] const ECAM_OFFSET_MASK: u64 = 0xfff; #[derive(Clone)] pub struct PciHost { - pub root_bus: Arc>, - #[allow(dead_code)] - device: Option>>, + base: SysBusDevBase, #[cfg(target_arch = "x86_64")] config_addr: u32, pcie_ecam_range: (u64, u64), pcie_mmio_range: (u64, u64), + #[cfg(target_arch = "aarch64")] + pcie_pio_range: (u64, u64), + #[cfg(target_arch = "aarch64")] + high_pcie_mmio_range: (u64, u64), + pub intx_gsi_base: i32, } impl PciHost { @@ -66,11 +72,19 @@ impl PciHost { /// /// * `sys_io` - IO space which the host bridge maps (only on x86_64). /// * `sys_mem`- Memory space which the host bridge maps. + /// * `pcie_ecam_range` - PCIe ECAM base address and length. + /// * `pcie_mmio_range` - PCIe MMIO base address and length. + /// * `pcie_pio_range` - PCIe PIO base addreass and length (only on aarch64). + /// * `high_pcie_mmio_range` - PCIe high MMIO base address and length (only on aarch64). + /// * `intx_gsi_base` - PCIe INTx gsi base. pub fn new( #[cfg(target_arch = "x86_64")] sys_io: &Arc, sys_mem: &Arc, pcie_ecam_range: (u64, u64), pcie_mmio_range: (u64, u64), + #[cfg(target_arch = "aarch64")] pcie_pio_range: (u64, u64), + #[cfg(target_arch = "aarch64")] high_pcie_mmio_range: (u64, u64), + intx_gsi_base: i32, ) -> Self { #[cfg(target_arch = "x86_64")] let io_region = sys_io.root().clone(); @@ -81,24 +95,37 @@ impl PciHost { io_region, mem_region, ); + let mut base = SysBusDevBase::default(); + base.base.child = Some(Arc::new(Mutex::new(root_bus))); PciHost { - root_bus: Arc::new(Mutex::new(root_bus)), - device: None, + base, #[cfg(target_arch = "x86_64")] config_addr: 0, pcie_ecam_range, pcie_mmio_range, + #[cfg(target_arch = "aarch64")] + pcie_pio_range, + #[cfg(target_arch = "aarch64")] + high_pcie_mmio_range, + intx_gsi_base, } } - pub fn find_device(&self, bus_num: u8, devfn: u8) -> Option>> { - let locked_root_bus = self.root_bus.lock().unwrap(); + pub fn find_device(&self, bus_num: u8, devfn: u8) -> Option>> { + let root_bus = self.child_bus().unwrap(); + let locked_root_bus = root_bus.lock().unwrap(); if bus_num == 0 { - return locked_root_bus.get_device(0, devfn); + let dev = locked_root_bus.child_dev(u64::from(devfn))?; + return Some(dev.clone()); } - for bus in &locked_root_bus.child_buses { - if let Some(b) = PciBus::find_bus_by_num(bus, bus_num) { - return b.lock().unwrap().get_device(bus_num, devfn); + + for dev in locked_root_bus.child_devices().values() { + let child_bus = dev.lock().unwrap().child_bus(); + if let Some(bus) = child_bus { + if let Some(b) = PciBus::find_bus_by_num(&bus, bus_num) { + let dev = b.lock().unwrap().child_dev(u64::from(devfn))?.clone(); + return Some(dev); + } } } None @@ -109,7 +136,6 @@ impl PciHost { /// # Arguments /// /// * `host_bridge` - Host brdige device. - #[allow(dead_code)] pub fn build_mmconfig_ops(host_bridge: Arc>) -> RegionOps { let cloned_hb = host_bridge.clone(); let read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { @@ -129,7 +155,6 @@ impl PciHost { /// # Arguments /// /// * `host_bridge` - Host brdige device. - #[allow(dead_code)] #[cfg(target_arch = "x86_64")] pub fn build_pio_addr_ops(host_bridge: Arc>) -> RegionOps { let cloned_hb = host_bridge.clone(); @@ -159,7 +184,6 @@ impl PciHost { /// # Arguments /// /// * `host_bridge` - Host brdige device. - #[allow(dead_code)] #[cfg(target_arch = "x86_64")] pub fn build_pio_data_ops(host_bridge: Arc>) -> RegionOps { let cloned_hb = host_bridge.clone(); @@ -175,12 +199,13 @@ impl PciHost { let mut offset: u32 = (locked_hb.config_addr & !CONFIG_ADDRESS_ENABLE_MASK) + offset as u32; - let bus_num = ((offset as u32 >> PIO_BUS_SHIFT) & CONFIG_BUS_MASK) as u8; - let devfn = ((offset as u32 >> PIO_DEVFN_SHIFT) & CONFIG_DEVFN_MASK) as u8; + let bus_num = ((offset >> PIO_BUS_SHIFT) & CONFIG_BUS_MASK) as u8; + let devfn = ((offset >> PIO_DEVFN_SHIFT) & CONFIG_DEVFN_MASK) as u8; match locked_hb.find_device(bus_num, devfn) { Some(dev) => { offset &= PIO_OFFSET_MASK; - dev.lock().unwrap().read_config(offset as usize, data); + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + pci_dev.read_config(offset as usize, data); } None => { for d in data.iter_mut() { @@ -198,11 +223,12 @@ impl PciHost { let mut offset: u32 = (locked_hb.config_addr & !CONFIG_ADDRESS_ENABLE_MASK) + offset as u32; - let bus_num = ((offset as u32 >> PIO_BUS_SHIFT) & CONFIG_BUS_MASK) as u8; - let devfn = ((offset as u32 >> PIO_DEVFN_SHIFT) & CONFIG_DEVFN_MASK) as u8; + let bus_num = ((offset >> PIO_BUS_SHIFT) & CONFIG_BUS_MASK) as u8; + let devfn = ((offset >> PIO_DEVFN_SHIFT) & CONFIG_DEVFN_MASK) as u8; if let Some(dev) = locked_hb.find_device(bus_num, devfn) { offset &= PIO_OFFSET_MASK; - dev.lock().unwrap().write_config(offset as usize, data); + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + pci_dev.write_config(offset as usize, data); } true }; @@ -214,14 +240,34 @@ impl PciHost { } } +impl Device for PciHost { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + let root_bus = self.child_bus().unwrap(); + for dev in root_bus.lock().unwrap().child_devices().values() { + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + pci_dev + .reset(true) + .with_context(|| "Fail to reset pci device under pci host")?; + } + + Ok(()) + } +} + impl SysBusDevOps for PciHost { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { let bus_num = ((offset as u32 >> ECAM_BUS_SHIFT) & CONFIG_BUS_MASK) as u8; let devfn = ((offset as u32 >> ECAM_DEVFN_SHIFT) & CONFIG_DEVFN_MASK) as u8; match self.find_device(bus_num, devfn) { Some(dev) => { let addr: usize = (offset & ECAM_OFFSET_MASK) as usize; - dev.lock().unwrap().read_config(addr, data); + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + trace::pci_read_config(&pci_dev.name(), addr, data); + pci_dev.read_config(addr, data); } None => { for d in data.iter_mut() { @@ -238,23 +284,156 @@ impl SysBusDevOps for PciHost { match self.find_device(bus_num, devfn) { Some(dev) => { let addr: usize = (offset & ECAM_OFFSET_MASK) as usize; - dev.lock().unwrap().write_config(addr, data); + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + trace::pci_write_config(&pci_dev.name(), addr, data); + pci_dev.write_config(addr, data); true } None => true, } } +} - fn reset(&mut self) -> SysBusResult<()> { - use sysbus::errors::ResultExt as SysBusResultExt; +#[cfg(target_arch = "x86_64")] +fn build_osc_for_aml(pci_host_bridge: &mut AmlDevice) { + let mut method = AmlMethod::new("_OSC", 4, false); + method.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(0), "CDW1")); + let mut if_obj_0 = AmlIf::new(AmlEqual::new( + AmlArg(0), + AmlToUuid::new("33db4d5b-1ff7-401c-9657-7441c03dd766"), + )); + if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(4), "CDW2")); + if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(8), "CDW3")); + let cdw3 = AmlName("CDW3".to_string()); + if_obj_0.append_child(AmlStore::new(cdw3.clone(), AmlLocal(0))); + // Hotplug: We now support PCIe native hotplug(bit 0) with PCI Express Capability + // Structure(bit 4), other bits: bit1: SHPC; bit2: PME; bit3: AER. + if_obj_0.append_child(AmlAnd::new(AmlLocal(0), AmlInteger(0x11), AmlLocal(0))); + let mut if_obj_1 = AmlIf::new(AmlLNot::new(AmlEqual::new(AmlArg(1), AmlInteger(1)))); + let cdw1 = AmlName("CDW1".to_string()); + if_obj_1.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x08), cdw1.clone())); + if_obj_0.append_child(if_obj_1); + let mut if_obj_2 = AmlIf::new(AmlLNot::new(AmlEqual::new(cdw3.clone(), AmlLocal(0)))); + if_obj_2.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x10), cdw1.clone())); + if_obj_0.append_child(if_obj_2); + if_obj_0.append_child(AmlStore::new(AmlLocal(0), cdw3)); + method.append_child(if_obj_0); + let mut else_obj_0 = AmlElse::new(); + else_obj_0.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x04), cdw1)); + method.append_child(else_obj_0); + method.append_child(AmlReturn::with_value(AmlArg(3))); + pci_host_bridge.append_child(method); +} - for (_id, pci_dev) in self.root_bus.lock().unwrap().devices.iter_mut() { - SysBusResultExt::chain_err(pci_dev.lock().unwrap().reset(true), || { - "Fail to reset pci device under pci host" - })?; - } +#[cfg(target_arch = "aarch64")] +fn build_osc_for_aml(pci_host_bridge: &mut AmlDevice) { + // _OSC means Operating System Capabilities. + pci_host_bridge.append_child(AmlNameDecl::new("SUPP", AmlInteger(0))); + pci_host_bridge.append_child(AmlNameDecl::new("CTRL", AmlInteger(0))); + let mut method = AmlMethod::new("_OSC", 4, false); + method.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(0), "CDW1")); + // The id is for PCI Host Bridge Device. + let mut if_obj_0 = AmlIf::new(AmlEqual::new( + AmlArg(0), + AmlToUuid::new("33db4d5b-1ff7-401c-9657-7441c03dd766"), + )); + // Get value from argument for SUPP and CTRL. + if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(4), "CDW2")); + if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(8), "CDW3")); + if_obj_0.append_child(AmlStore::new( + AmlName("CDW2".to_string()), + AmlName("SUPP".to_string()), + )); + if_obj_0.append_child(AmlStore::new( + AmlName("CDW3".to_string()), + AmlName("CTRL".to_string()), + )); + // Hotplug: We now support PCIe native hotplug(bit 0) with PCI Express Capability + // Structure(bit4), other bits: bit1: SHPC; bit2: PME; bit3: AER. + if_obj_0.append_child(AmlStore::new( + AmlAnd::new(AmlName("CTRL".to_string()), AmlInteger(0x11), AmlLocal(0)), + AmlName("CTRL".to_string()), + )); + let mut if_obj_1 = AmlIf::new(AmlLNot::new(AmlEqual::new(AmlArg(1), AmlInteger(1)))); + if_obj_1.append_child(AmlAnd::new( + AmlName("CDW1".to_string()), + AmlInteger(0x08), + AmlName("CDW1".to_string()), + )); + if_obj_0.append_child(if_obj_1); + let mut if_obj_2 = AmlIf::new(AmlLNot::new(AmlEqual::new( + AmlName("CDW3".to_string()), + AmlName("CTRL".to_string()), + ))); + if_obj_2.append_child(AmlOr::new( + AmlName("CDW1".to_string()), + AmlInteger(0x10), + AmlName("CDW1".to_string()), + )); + if_obj_0.append_child(if_obj_2); + if_obj_0.append_child(AmlStore::new( + AmlName("CTRL".to_string()), + AmlName("CDW3".to_string()), + )); + // For pci host, kernel will use _OSC return value to determine whether + // native_pcie_hotplug is enabled or not. + if_obj_0.append_child(AmlReturn::with_value(AmlArg(3))); + method.append_child(if_obj_0); + let mut else_obj_0 = AmlElse::new(); + else_obj_0.append_child(AmlOr::new( + AmlName("CDW1".to_string()), + AmlInteger(0x04), + AmlName("CDW1".to_string()), + )); + else_obj_0.append_child(AmlReturn::with_value(AmlArg(3))); + method.append_child(else_obj_0); + pci_host_bridge.append_child(method); +} - Ok(()) +fn build_prt_for_aml(pci_bus: &mut AmlDevice, irq: i32) { + let mut prt_pkg = AmlPackage::new(PCI_SLOT_MAX * PCI_PIN_NUM); + (0..PCI_SLOT_MAX).for_each(|slot| { + (0..PCI_PIN_NUM).for_each(|pin| { + let gsi = (pin + slot) % PCI_PIN_NUM; + let mut pkg = AmlPackage::new(4); + pkg.append_child(AmlDWord(u32::from(slot) << 16 | 0xFFFF)); + pkg.append_child(AmlDWord(u32::from(pin))); + pkg.append_child(AmlName(format!("GSI{}", gsi))); + pkg.append_child(AmlZero); + prt_pkg.append_child(pkg); + }); + }); + pci_bus.append_child(AmlNameDecl::new("_PRT", prt_pkg)); + + for i in 0..PCI_PIN_NUM { + #[cfg(target_arch = "x86_64")] + let irqs = irq as u8 + i; + #[cfg(target_arch = "aarch64")] + let irqs = irq as u8 + PCI_INTR_BASE + i; + let mut gsi = AmlDevice::new(format!("GSI{}", i).as_str()); + gsi.append_child(AmlNameDecl::new("_HID", AmlEisaId::new("PNP0C0F"))); + gsi.append_child(AmlNameDecl::new("_UID", AmlString(i.to_string()))); + let mut crs = AmlResTemplate::new(); + crs.append_child(AmlExtendedInterrupt::new( + AmlResourceUsage::Consumer, + AmlEdgeLevel::Level, + AmlActiveLevel::High, + AmlIntShare::Exclusive, + vec![u32::from(irqs)], + )); + gsi.append_child(AmlNameDecl::new("_PRS", crs)); + let mut crs = AmlResTemplate::new(); + crs.append_child(AmlExtendedInterrupt::new( + AmlResourceUsage::Consumer, + AmlEdgeLevel::Level, + AmlActiveLevel::High, + AmlIntShare::Exclusive, + vec![u32::from(irqs)], + )); + gsi.append_child(AmlNameDecl::new("_CRS", crs)); + let method = AmlMethod::new("_SRS", 1, false); + gsi.append_child(method); + pci_bus.append_child(gsi); } } @@ -265,36 +444,17 @@ impl AmlBuilder for PciHost { pci_host_bridge.append_child(AmlNameDecl::new("_CID", AmlEisaId::new("PNP0A03"))); pci_host_bridge.append_child(AmlNameDecl::new("_ADR", AmlZero)); pci_host_bridge.append_child(AmlNameDecl::new("_UID", AmlZero)); - - #[cfg(target_arch = "x86_64")] + #[cfg(target_arch = "aarch64")] { - let mut method = AmlMethod::new("_OSC", 4, false); - method.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(0), "CDW1")); - let mut if_obj_0 = AmlIf::new(AmlEqual::new( - AmlArg(0), - AmlToUuid::new("33db4d5b-1ff7-401c-9657-7441c03dd766"), - )); - if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(4), "CDW2")); - if_obj_0.append_child(AmlCreateDWordField::new(AmlArg(3), AmlInteger(8), "CDW3")); - let cdw3 = AmlName("CDW3".to_string()); - if_obj_0.append_child(AmlStore::new(cdw3.clone(), AmlLocal(0))); - if_obj_0.append_child(AmlAnd::new(AmlLocal(0), AmlInteger(0x1f), AmlLocal(0))); - let mut if_obj_1 = AmlIf::new(AmlLNot::new(AmlEqual::new(AmlArg(1), AmlInteger(1)))); - let cdw1 = AmlName("CDW1".to_string()); - if_obj_1.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x08), cdw1.clone())); - if_obj_0.append_child(if_obj_1); - let mut if_obj_2 = AmlIf::new(AmlLNot::new(AmlEqual::new(cdw3.clone(), AmlLocal(0)))); - if_obj_2.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x10), cdw1.clone())); - if_obj_0.append_child(if_obj_2); - if_obj_0.append_child(AmlStore::new(AmlLocal(0), cdw3)); - method.append_child(if_obj_0); - let mut else_obj_0 = AmlElse::new(); - else_obj_0.append_child(AmlOr::new(cdw1.clone(), AmlInteger(0x04), cdw1)); - method.append_child(else_obj_0); - method.append_child(AmlReturn::with_value(AmlArg(3))); - pci_host_bridge.append_child(method); + // CCA: Cache Coherency Attribute, which determines whether + // guest supports DMA features in pci host on aarch64 platform. + pci_host_bridge.append_child(AmlNameDecl::new("_CCA", AmlOne)); + // SEG: The PCI segment number. + pci_host_bridge.append_child(AmlNameDecl::new("_SEG", AmlZero)); } + build_osc_for_aml(&mut pci_host_bridge); + let pcie_ecam = self.pcie_ecam_range; let pcie_mmio = self.pcie_mmio_range; // Build and append "\_SB.PCI0._CRS" to PCI host bridge node. @@ -336,6 +496,30 @@ impl AmlBuilder for PciHost { 0xf300, )); } + #[cfg(target_arch = "aarch64")] + { + let pcie_pio = self.pcie_pio_range; + crs.append_child(AmlDWordDesc::new_io( + AmlAddressSpaceDecode::Positive, + AmlISARanges::EntireRange, + 0, + pcie_pio.0 as u32, + (pcie_pio.0 + pcie_pio.1) as u32 - 1, + 0, + pcie_pio.1 as u32, + )); + let high_pcie_mmio = self.high_pcie_mmio_range; + crs.append_child(AmlQWordDesc::new_memory( + AmlAddressSpaceDecode::Positive, + AmlCacheable::NonCacheable, + AmlReadAndWrite::ReadWrite, + 0, + high_pcie_mmio.0, + high_pcie_mmio.0 + high_pcie_mmio.1 - 1, + 0, + high_pcie_mmio.1, + )); + } crs.append_child(AmlDWordDesc::new_memory( AmlAddressSpaceDecode::Positive, AmlCacheable::NonCacheable, @@ -348,19 +532,7 @@ impl AmlBuilder for PciHost { )); pci_host_bridge.append_child(AmlNameDecl::new("_CRS", crs)); - // Build and append pci-routing-table to PCI host bridge node. - let slot_num = 32_u8; - let mut prt_pkg = AmlPackage::new(slot_num); - let pci_irq_base = 16_u32; - (0..slot_num).for_each(|slot| { - let mut pkg = AmlPackage::new(4); - pkg.append_child(AmlDWord(((slot as u32) << 16) as u32 | 0xFFFF)); - pkg.append_child(AmlByte(0)); - pkg.append_child(AmlByte(0)); - pkg.append_child(AmlDWord(pci_irq_base + (slot as u32 % 8))); - prt_pkg.append_child(pkg); - }); - pci_host_bridge.append_child(AmlNameDecl::new("_PRT", prt_pkg)); + build_prt_for_aml(&mut pci_host_bridge, self.intx_gsi_base); pci_host_bridge.aml_bytes() } @@ -368,92 +540,60 @@ impl AmlBuilder for PciHost { #[cfg(test)] pub mod tests { - use std::sync::Weak; - - use address_space::Region; + #[cfg(target_arch = "x86_64")] use byteorder::{ByteOrder, LittleEndian}; use super::*; - use crate::bus::PciBus; - use crate::config::{PciConfig, PCI_CONFIG_SPACE_SIZE, SECONDARY_BUS_NUM}; - use crate::errors::Result; - use crate::root_port::RootPort; - - struct PciDevice { - devfn: u8, - config: PciConfig, - parent_bus: Weak>, - } - - impl PciDevOps for PciDevice { - fn init_write_mask(&mut self) -> Result<()> { - let mut offset = 0_usize; - while offset < self.config.config.len() { - LittleEndian::write_u32( - &mut self.config.write_mask[offset..offset + 4], - 0xffff_ffff, - ); - offset += 4; - } - Ok(()) - } - - fn init_write_clear_mask(&mut self) -> Result<()> { - Ok(()) - } - - fn read_config(&self, offset: usize, data: &mut [u8]) { - self.config.read(offset, data); - } - - fn write_config(&mut self, offset: usize, data: &[u8]) { - self.config.write(offset, data, 0); - } - - fn name(&self) -> String { - "PCI device".to_string() - } - - fn realize(mut self) -> Result<()> { - let devfn = self.devfn; - self.init_write_mask()?; - self.init_write_clear_mask()?; - - let dev = Arc::new(Mutex::new(self)); - dev.lock() - .unwrap() - .parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .devices - .insert(devfn, dev.clone()); - Ok(()) - } - } + use crate::pci::bus::PciBus; + use crate::pci::config::SECONDARY_BUS_NUM; + use crate::pci::root_port::{RootPort, RootPortConfig}; + use crate::pci::tests::TestPciDevice; + use crate::pci::{clean_pcidevops_type, register_pcidevops_type, PciDevOps}; + use address_space::Region; pub fn create_pci_host() -> Arc> { #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)).unwrap(); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); + let sys_io = AddressSpace::new( + Region::init_container_region(1 << 16, "sysio"), + "sysio", + None, + ) + .unwrap(); + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::max_value(), "sysmem"), + "sysmem", + None, + ) + .unwrap(); Arc::new(Mutex::new(PciHost::new( #[cfg(target_arch = "x86_64")] &sys_io, &sys_mem, (0xB000_0000, 0x1000_0000), (0xC000_0000, 0x3000_0000), + #[cfg(target_arch = "aarch64")] + (0xF000_0000, 0x1000_0000), + #[cfg(target_arch = "aarch64")] + (512 << 30, 512 << 30), + 16, ))) } #[test] #[cfg(target_arch = "x86_64")] fn test_pio_ops() { + register_pcidevops_type::().unwrap(); + let pci_host = create_pci_host(); - let root_bus = Arc::downgrade(&pci_host.lock().unwrap().root_bus); + let root_bus = Arc::downgrade(&pci_host.lock().unwrap().child_bus().unwrap()); let pio_addr_ops = PciHost::build_pio_addr_ops(pci_host.clone()); let pio_data_ops = PciHost::build_pio_data_ops(pci_host.clone()); - let root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus, false); + let root_port_config = RootPortConfig { + addr: (1, 0), + id: "pcie.1".to_string(), + ..Default::default() + }; + let root_port = RootPort::new(root_port_config, root_bus.clone()); root_port.realize().unwrap(); let mut data = [0_u8; 4]; @@ -470,7 +610,6 @@ pub mod tests { assert_eq!(buf, data); // Non-DWORD access on CONFIG_ADDR - let mut config = [0_u8; 4]; (pio_addr_ops.read)(&mut config, GuestAddress(0), 0); let data = [0x12, 0x34]; @@ -522,36 +661,48 @@ pub mod tests { let mut buf = [0_u8; 4]; (pio_data_ops.read)(&mut buf, GuestAddress(0), 0); assert_eq!(buf, [0xff_u8; 4]); + + clean_pcidevops_type(); } #[test] fn test_mmio_ops() { + register_pcidevops_type::().unwrap(); + register_pcidevops_type::().unwrap(); + let pci_host = create_pci_host(); - let root_bus = Arc::downgrade(&pci_host.lock().unwrap().root_bus); + let root_bus = pci_host.lock().unwrap().child_bus().unwrap(); + let weak_root_bus = Arc::downgrade(&root_bus); let mmconfig_region_ops = PciHost::build_mmconfig_ops(pci_host.clone()); - let mut root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus.clone(), false); + let root_port_config = RootPortConfig { + addr: (1, 0), + id: "pcie.1".to_string(), + ..Default::default() + }; + let mut root_port = RootPort::new(root_port_config, weak_root_bus.clone()); root_port.write_config(SECONDARY_BUS_NUM as usize, &[1]); root_port.realize().unwrap(); - let mut root_port = RootPort::new("pcie.2".to_string(), 16, 0, root_bus, false); + let root_port_config = RootPortConfig { + addr: (2, 0), + id: "pcie.2".to_string(), + ..Default::default() + }; + let mut root_port = RootPort::new(root_port_config, weak_root_bus); root_port.write_config(SECONDARY_BUS_NUM as usize, &[2]); root_port.realize().unwrap(); - let bus = PciBus::find_bus_by_name(&pci_host.lock().unwrap().root_bus, "pcie.2").unwrap(); - let pci_dev = PciDevice { - devfn: 8, - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus: Arc::downgrade(&bus), - }; + let bus = PciBus::find_bus_by_name(&root_bus, "pcie.2").unwrap(); + let pci_dev = TestPciDevice::new("PCI device", 8, Arc::downgrade(&bus)); pci_dev.realize().unwrap(); - let addr: u64 = 8_u64 << ECAM_DEVFN_SHIFT | SECONDARY_BUS_NUM as u64; + let addr: u64 = 8_u64 << ECAM_DEVFN_SHIFT | u64::from(SECONDARY_BUS_NUM); let data = [1_u8]; (mmconfig_region_ops.write)(&data, GuestAddress(0), addr); let mut buf = [0_u8]; (mmconfig_region_ops.read)(&mut buf, GuestAddress(0), addr); assert_eq!(buf, data); - let addr: u64 = 16_u64 << ECAM_DEVFN_SHIFT | SECONDARY_BUS_NUM as u64; + let addr: u64 = 16_u64 << ECAM_DEVFN_SHIFT | u64::from(SECONDARY_BUS_NUM); let data = [2_u8]; (mmconfig_region_ops.write)(&data, GuestAddress(0), addr); let mut buf = [0_u8]; @@ -570,5 +721,7 @@ pub mod tests { let mut buf = [0_u8; 2]; (mmconfig_region_ops.read)(&mut buf, GuestAddress(0), addr); assert_eq!(buf, data); + + clean_pcidevops_type(); } } diff --git a/pci/src/hotplug.rs b/devices/src/pci/hotplug.rs similarity index 59% rename from pci/src/hotplug.rs rename to devices/src/pci/hotplug.rs index 9edd0fa2913e35e620826ec8094fb7e64db50917..7f13de932233210c61b7b8b063d4a0368b66b677 100644 --- a/pci/src/hotplug.rs +++ b/devices/src/pci/hotplug.rs @@ -12,18 +12,21 @@ use std::sync::{Arc, Mutex}; -use crate::{errors::Result, PciBus, PciDevOps}; +use anyhow::{bail, Context, Result}; + +use crate::pci::PciBus; +use crate::{convert_bus_ref, Bus, Device, PCI_BUS}; pub trait HotplugOps: Send { /// Plug device, usually called when hot plug device in device_add. - fn plug(&mut self, dev: &Arc>) -> Result<()>; + fn plug(&mut self, dev: &Arc>) -> Result<()>; /// Unplug device request, usually called when hot unplug device in device_del. /// Only send unplug request to the guest OS, without actually removing the device. - fn unplug_request(&mut self, dev: &Arc>) -> Result<()>; + fn unplug_request(&mut self, dev: &Arc>) -> Result<()>; /// Remove the device. - fn unplug(&mut self, dev: &Arc>) -> Result<()>; + fn unplug(&mut self, dev: &Arc>) -> Result<()>; } /// Plug the device into the bus. @@ -38,14 +41,14 @@ pub trait HotplugOps: Send { /// Return Error if /// * No hot plug controller found. /// * Device plug failed. -pub fn handle_plug(bus: &Arc>, dev: &Arc>) -> Result<()> { - let locked_bus = bus.lock().unwrap(); - if let Some(hpc) = locked_bus.hotplug_controller.as_ref() { +pub fn handle_plug(bus: &Arc>, dev: &Arc>) -> Result<()> { + PCI_BUS!(bus, locked_bus, pci_bus); + if let Some(hpc) = pci_bus.hotplug_controller.as_ref() { hpc.upgrade().unwrap().lock().unwrap().plug(dev) } else { bail!( "No hot plug controller found for bus {} when plug", - locked_bus.name + pci_bus.name() ); } } @@ -62,17 +65,22 @@ pub fn handle_plug(bus: &Arc>, dev: &Arc>) -> /// Return Error if /// * No hot plug controller found. /// * Device unplug request failed. -pub fn handle_unplug_request( - bus: &Arc>, - dev: &Arc>, +pub fn handle_unplug_pci_request( + bus: &Arc>, + dev: &Arc>, ) -> Result<()> { - let locked_bus = bus.lock().unwrap(); - if let Some(hpc) = locked_bus.hotplug_controller.as_ref() { - hpc.upgrade().unwrap().lock().unwrap().unplug_request(dev) - } else { - bail!( - "No hot plug controller found for bus {} when unplug request", - locked_bus.name - ); - } + PCI_BUS!(bus, locked_bus, pci_bus); + let hpc = pci_bus + .hotplug_controller + .as_ref() + .cloned() + .with_context(|| { + format!( + "No hot plug controller found for bus {} when unplug request", + pci_bus.name() + ) + })?; + // No need to hold the lock. + drop(locked_bus); + hpc.upgrade().unwrap().lock().unwrap().unplug_request(dev) } diff --git a/devices/src/pci/intx.rs b/devices/src/pci/intx.rs new file mode 100644 index 0000000000000000000000000000000000000000..9b62e0bda81d03fa2c61ff9738b20688fb6b6ef0 --- /dev/null +++ b/devices/src/pci/intx.rs @@ -0,0 +1,167 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::Result; +use log::error; + +use super::{PciDevOps, RootPort}; +use crate::interrupt_controller::LineIrqManager; +use crate::pci::{swizzle_map_irq, PciBus, PciConfig, INTERRUPT_PIN, PCI_PIN_NUM}; +use crate::{convert_bus_ref, convert_device_ref, Bus, PCI_BUS, ROOT_PORT}; + +pub type InterruptHandler = Box Result<()> + Send + Sync>; + +/// PCI INTx information. +pub struct PciIntxState { + /// Gsi of PCI bus INT#A. + pub gsi_base: u32, + /// INTx IRQ numbers to be asserted of every INTx virtual interrupt line. + pub irq_count: [i8; PCI_PIN_NUM as usize], + /// Handler of asserting the INTx IRQ. + pub irq_handler: Arc, +} + +impl PciIntxState { + pub fn new(gsi_base: u32, irq_handler: Arc) -> Self { + Self { + gsi_base, + irq_count: [0; PCI_PIN_NUM as usize], + irq_handler, + } + } +} + +/// INTx structure. +pub struct Intx { + /// Device name. + pub device_name: String, + /// Physical interrupt pin. + pub irq_pin: u32, + /// Interrupt level. + pub level: u8, + /// Driver enable status. + pub enabled: bool, + /// Interrupt info related to INTx. + pub intx_state: Option>>, +} + +impl Intx { + pub fn new(name: String, irq_pin: u32, intx_state: Option>>) -> Self { + Self { + device_name: name, + irq_pin, + level: 0, + enabled: true, + intx_state, + } + } + + pub fn notify(&mut self, level: u8) { + assert!(level == 0 || level == 1); + if self.level == level { + return; + }; + + let change: i8 = level as i8 - self.level as i8; + self.level = level; + if !self.enabled { + error!( + "INTx is disabled, failed to set irq INTx interrupt for {}.", + self.device_name + ); + return; + } + + self.change_irq_level(change); + } + + pub fn change_irq_level(&self, change: i8) { + if let Some(intx_state) = &self.intx_state { + let mut locked_intx_state = intx_state.lock().unwrap(); + locked_intx_state.irq_count[self.irq_pin as usize] += change; + if locked_intx_state.irq_count[self.irq_pin as usize] < 0 { + locked_intx_state.irq_count[self.irq_pin as usize] = 0; + } + + let irq = locked_intx_state.gsi_base + self.irq_pin; + let level = locked_intx_state.irq_count[self.irq_pin as usize] != 0; + + let irq_handler = &locked_intx_state.irq_handler; + if let Err(e) = irq_handler.set_level_irq(irq, level) { + error!( + "Failed to set irq {} level {} of device {}: {}.", + irq, level, self.device_name, e + ); + } + } else { + error!( + "Can't set irq pin {} for {}, the INTx handler is not initialized", + self.irq_pin, self.device_name + ); + }; + } + + pub fn reset(&mut self) { + self.notify(0); + self.enabled = true; + } +} + +pub fn init_intx( + name: String, + config: &mut PciConfig, + parent_bus: Weak>, + devfn: u8, +) -> Result<()> { + if config.config[INTERRUPT_PIN as usize] == 0 { + let (irq, intx_state) = (std::u32::MAX, None); + let intx = Arc::new(Mutex::new(Intx::new(name, irq, intx_state))); + config.intx = Some(intx); + return Ok(()); + } + + let (irq, intx_state) = if let Some(bus) = parent_bus.upgrade() { + PCI_BUS!(bus, locked_bus, pci_bus); + let pin = config.config[INTERRUPT_PIN as usize] - 1; + + let (irq, intx_state) = match &pci_bus.parent_device() { + Some(parent_bridge) => { + let parent_bridge = parent_bridge.upgrade().unwrap(); + ROOT_PORT!(parent_bridge, locked_bridge, bridge); + ( + swizzle_map_irq(bridge.pci_base().devfn, pin), + bridge.get_intx_state(), + ) + } + None => { + if pci_bus.intx_state.is_some() { + ( + swizzle_map_irq(devfn, pin), + Some(pci_bus.intx_state.as_ref().unwrap().clone()), + ) + } else { + (std::u32::MAX, None) + } + } + }; + (irq, intx_state) + } else { + (std::u32::MAX, None) + }; + + let intx = Arc::new(Mutex::new(Intx::new(name, irq, intx_state))); + + config.intx = Some(intx); + Ok(()) +} diff --git a/pci/src/lib.rs b/devices/src/pci/mod.rs similarity index 40% rename from pci/src/lib.rs rename to devices/src/pci/mod.rs index 87eb1637306c958d8ada7d35e136198b855732f7..e2b64ff85e3ff04578bbadd445c822cac1977e49 100644 --- a/pci/src/lib.rs +++ b/devices/src/pci/mod.rs @@ -10,64 +10,50 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate migration_derive; -#[macro_use] -extern crate machine_manager; - -pub mod errors { - error_chain! { - links { - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - } - errors { - AddPciCap(id: u8, size: usize) { - display("Failed to add PCI capability: id 0x{:x}, size: 0x{:x}.", id, size) - } - AddPcieExtCap(id: u16, size: usize) { - display("Failed to add PCIe extended capability: id 0x{:x}, size: 0x{:x}.", id, size) - } - UnregMemBar(id: usize) { - display("Failed to unmap BAR {} in memory space.", id) - } - DeviceStatus(status: u32) { - display("Invalid device status 0x{:x}", status) - } - PciRegister(offset: u64) { - display("Unsupported pci register, 0x{:x}", offset) - } - } - } -} - pub mod config; +#[cfg(feature = "demo_device")] +pub mod demo_device; +pub mod error; +pub mod host; pub mod hotplug; +pub mod intx; pub mod msix; mod bus; -mod host; mod root_port; pub use bus::PciBus; +pub use config::{PciConfig, INTERRUPT_PIN}; +pub use error::PciError; pub use host::PciHost; -pub use msix::init_msix; -pub use root_port::RootPort; +pub use intx::{init_intx, InterruptHandler, PciIntxState}; +pub use msix::{init_msix, MsiVector}; +pub use root_port::{RootPort, RootPortConfig}; -use std::{ - mem::size_of, - sync::{Mutex, Weak}, -}; +use std::any::{Any, TypeId}; +use std::collections::HashMap; +use std::mem::size_of; +use std::sync::{Arc, Mutex, Weak}; +use anyhow::{bail, Result}; use byteorder::{ByteOrder, LittleEndian}; -use config::{HEADER_TYPE, HEADER_TYPE_MULTIFUNC, MAX_FUNC}; -use errors::Result; +#[cfg(feature = "scream")] +use crate::misc::ivshmem::Ivshmem; +#[cfg(feature = "pvpanic")] +use crate::misc::pvpanic::PvPanicPci; +use crate::pci::config::{HEADER_TYPE, HEADER_TYPE_MULTIFUNC, MAX_FUNC}; +use crate::usb::xhci::xhci_pci::XhciPciDevice; +use crate::{ + convert_bus_ref, convert_device_ref, Bus, Device, DeviceBase, MsiIrqManager, PCI_BUS, ROOT_PORT, +}; +#[cfg(feature = "demo_device")] +use demo_device::DemoDev; const BDF_FUNC_SHIFT: u8 = 3; +pub const PCI_SLOT_MAX: u8 = 32; +pub const PCI_PIN_NUM: u8 = 4; +pub const PCI_INTR_BASE: u8 = 32; /// Macros that write data in little endian. macro_rules! le_write { @@ -126,15 +112,15 @@ fn le_write_clear_value_u16(buf: &mut [u8], offset: usize, data: u16) -> Result< le_write_u16(buf, offset, val & !data) } -pub fn pci_devfn(slot: u8, func: u8) -> u8 { +fn pci_devfn(slot: u8, func: u8) -> u8 { ((slot & 0x1f) << 3) | (func & 0x07) } -pub fn pci_slot(devfn: u8) -> u8 { +fn pci_slot(devfn: u8) -> u8 { devfn >> 3 & 0x1f } -pub fn pci_func(devfn: u8) -> u8 { +fn pci_func(devfn: u8) -> u8 { devfn & 0x07 } @@ -150,19 +136,40 @@ pub fn pci_ext_cap_next(header: u32) -> usize { ((header >> 20) & 0xffc) as usize } -pub trait PciDevOps: Send { +#[derive(Clone)] +pub struct PciDevBase { + pub base: DeviceBase, + /// Pci config space. + pub config: PciConfig, + /// Devfn. + pub devfn: u8, +} + +pub trait PciDevOps: Device + Send { + /// Get base property of pci device. + fn pci_base(&self) -> &PciDevBase; + + /// Get mutable base property of pci device. + fn pci_base_mut(&mut self) -> &mut PciDevBase; + /// Init writable bit mask. - fn init_write_mask(&mut self) -> Result<()>; + fn init_write_mask(&mut self, is_bridge: bool) -> Result<()> { + self.pci_base_mut().config.init_common_write_mask()?; + if is_bridge { + self.pci_base_mut().config.init_bridge_write_mask()?; + } - /// Init write-and-clear bit mask. - fn init_write_clear_mask(&mut self) -> Result<()>; + Ok(()) + } - /// Realize PCI/PCIe device. - fn realize(self) -> Result<()>; + /// Init write-and-clear bit mask. + fn init_write_clear_mask(&mut self, is_bridge: bool) -> Result<()> { + self.pci_base_mut().config.init_common_write_clear_mask()?; + if is_bridge { + self.pci_base_mut().config.init_bridge_write_clear_mask()?; + } - /// Unrealize PCI/PCIe device. - fn unrealize(&mut self) -> Result<()> { - bail!("Unrealize of the pci device is not implemented"); + Ok(()) } /// Configuration space read. @@ -171,7 +178,9 @@ pub trait PciDevOps: Send { /// /// * `offset` - Offset in configuration space. /// * `data` - Data buffer for reading. - fn read_config(&self, offset: usize, data: &mut [u8]); + fn read_config(&mut self, offset: usize, data: &mut [u8]) { + self.pci_base_mut().config.read(offset, data); + } /// Configuration space write. /// @@ -186,28 +195,128 @@ pub trait PciDevOps: Send { /// # Arguments /// /// * `bus_num` - Bus number. - /// * `devfn` - Slot number << 8 | Function number. + /// * `devfn` - Slot number << 3 | Function number. /// /// # Returns /// /// Device id to send MSI/MSI-X. fn set_dev_id(&self, bus_num: u8, devfn: u8) -> u16 { let bus_shift: u16 = 8; - ((bus_num as u16) << bus_shift) | (devfn as u16) + (u16::from(bus_num) << bus_shift) | u16::from(devfn) + } + + /// Get the path of the PCI bus where the device resides. + fn get_parent_dev_path(&self, parent_bus: Arc>) -> String { + PCI_BUS!(parent_bus, locked_bus, pci_bus); + + if pci_bus.name().eq("pcie.0") { + String::from("/pci@ffffffffffffffff") + } else { + // This else branch will not be executed currently, + // which is mainly to be compatible with new PCI bridge devices. + // unwrap is safe because pci bus under root port will not return null. + let parent_bridge = pci_bus.parent_device().unwrap().upgrade().unwrap(); + ROOT_PORT!(parent_bridge, locked_bridge, rootport); + rootport.get_dev_path().unwrap() + } } - /// Get device name. - fn name(&self) -> String; + /// Fill the device path according to parent device path and device function. + fn populate_dev_path(&self, parent_dev_path: String, devfn: u8, dev_type: &str) -> String { + let slot = pci_slot(devfn); + let function = pci_func(devfn); - /// Reset device - fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + let slot_function = if function != 0 { + format!("{:x},{:x}", slot, function) + } else { + format!("{:x}", slot) + }; + + format!("{}{}{}", parent_dev_path, dev_type, slot_function) + } + + /// Get firmware device path. + fn get_dev_path(&self) -> Option { + None + } + + fn change_irq_level(&self, _irq_pin: u32, _level: i8) -> Result<()> { Ok(()) } - /// Get device devfn - fn devfn(&self) -> Option { + fn get_intx_state(&self) -> Option>> { None } + + fn get_msi_irq_manager(&self) -> Option> { + None + } +} + +pub type ToPciDevOpsFunc = fn(&mut dyn Any) -> &mut dyn PciDevOps; + +static mut PCIDEVOPS_HASHMAP: Option> = None; + +pub fn convert_to_pcidevops(item: &mut dyn Any) -> &mut dyn PciDevOps { + // SAFETY: The typeid of `T` is the typeid recorded in the hashmap. The target structure type of + // the conversion is its own structure type, so the conversion result will definitely not be `None`. + let t = item.downcast_mut::().unwrap(); + t as &mut dyn PciDevOps +} + +pub fn register_pcidevops_type() -> Result<()> { + let type_id = TypeId::of::(); + // SAFETY: PCIDEVOPS_HASHMAP will be built in `type_init` function sequentially in the main thread. + // And will not be changed after `type_init`. + unsafe { + if PCIDEVOPS_HASHMAP.is_none() { + PCIDEVOPS_HASHMAP = Some(HashMap::new()); + } + let types = PCIDEVOPS_HASHMAP.as_mut().unwrap(); + if types.get(&type_id).is_some() { + bail!("Type Id {:?} has been registered.", type_id); + } + types.insert(type_id, convert_to_pcidevops::); + } + + Ok(()) +} + +pub fn devices_register_pcidevops_type() -> Result<()> { + #[cfg(feature = "scream")] + register_pcidevops_type::()?; + #[cfg(feature = "pvpanic")] + register_pcidevops_type::()?; + register_pcidevops_type::()?; + #[cfg(feature = "demo_device")] + register_pcidevops_type::()?; + register_pcidevops_type::() +} + +#[cfg(test)] +pub fn clean_pcidevops_type() { + unsafe { + PCIDEVOPS_HASHMAP = None; + } +} + +pub fn to_pcidevops(dev: &mut dyn Device) -> Option<&mut dyn PciDevOps> { + // SAFETY: PCIDEVOPS_HASHMAP has been built. And this function is called without changing hashmap. + unsafe { + let types = PCIDEVOPS_HASHMAP.as_mut().unwrap(); + let func = types.get(&dev.device_type_id())?; + let pcidev = func(dev.as_any_mut()); + Some(pcidev) + } +} + +/// Convert from Arc> to &mut dyn PciDevOps. +#[macro_export] +macro_rules! PCI_BUS_DEVICE { + ($trait_device:expr, $lock_device: ident, $trait_pcidevops: ident) => { + let mut $lock_device = $trait_device.lock().unwrap(); + let $trait_pcidevops = to_pcidevops(&mut *$lock_device).unwrap(); + }; } /// Init multifunction for pci devices. @@ -220,16 +329,16 @@ pub trait PciDevOps: Send { /// * `parent_bus` - Parent bus of pci devices. pub fn init_multifunction( multifunction: bool, - config: &mut Vec, + config: &mut [u8], devfn: u8, - parent_bus: Weak>, + parent_bus: Weak>, ) -> Result<()> { let mut header_type = - le_read_u16(config, HEADER_TYPE as usize)? & (!HEADER_TYPE_MULTIFUNC as u16); + le_read_u16(config, HEADER_TYPE as usize)? & u16::from(!HEADER_TYPE_MULTIFUNC); if multifunction { - header_type |= HEADER_TYPE_MULTIFUNC as u16; + header_type |= u16::from(HEADER_TYPE_MULTIFUNC); } - le_write_u16(config, HEADER_TYPE as usize, header_type as u16)?; + le_write_u16(config, HEADER_TYPE as usize, header_type)?; // Allow two ways of multifunction bit: // 1. The multifunction bit of all devices must be set; @@ -237,24 +346,21 @@ pub fn init_multifunction( // leave the bit to 0. let slot = pci_slot(devfn); let bus = parent_bus.upgrade().unwrap(); - let locked_bus = bus.lock().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); if pci_func(devfn) != 0 { - let pci_dev = locked_bus.devices.get(&pci_devfn(slot, 0)); - if pci_dev.is_none() { + let dev = pci_bus.child_dev(u64::from(pci_devfn(slot, 0))); + if dev.is_none() { return Ok(()); } let mut data = vec![0_u8; 2]; - pci_dev - .unwrap() - .lock() - .unwrap() - .read_config(HEADER_TYPE as usize, data.as_mut_slice()); - if LittleEndian::read_u16(&data) & HEADER_TYPE_MULTIFUNC as u16 == 0 { + PCI_BUS_DEVICE!(dev.unwrap(), locked_dev, pci_dev); + pci_dev.read_config(HEADER_TYPE as usize, data.as_mut_slice()); + if LittleEndian::read_u16(&data) & u16::from(HEADER_TYPE_MULTIFUNC) == 0 { // Function 0 should set multifunction bit. bail!( "PCI: single function device can't be populated in bus {} function {}.{}", - &locked_bus.name, + &pci_bus.name(), slot, devfn & 0x07 ); @@ -268,7 +374,10 @@ pub fn init_multifunction( // If function 0 is set to single function, the rest function should be None. for func in 1..MAX_FUNC { - if locked_bus.devices.get(&pci_devfn(slot, func)).is_some() { + if pci_bus + .child_dev(u64::from(pci_devfn(slot, func))) + .is_some() + { bail!( "PCI: {}.0 indicates single function, but {}.{} is already populated", slot, @@ -280,24 +389,91 @@ pub fn init_multifunction( Ok(()) } -/// Check whether two regions overlap with each other. -/// -/// # Arguments -/// -/// * `start` - Start address of the first region. -/// * `end` - End address of the first region. -/// * `region_start` - Start address of the second region. -/// * `region_end` - End address of the second region. -pub fn ranges_overlap(start: usize, end: usize, range_start: usize, range_end: usize) -> bool { - if start >= range_end || range_start >= end { - return false; - } - true +/// 0 <= pin <= 3. 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD. +/// PCI-to-PCI bridge specification 9.1: Interrupt routing. +pub fn swizzle_map_irq(devfn: u8, pin: u8) -> u32 { + let pci_slot = devfn >> 3 & 0x1f; + u32::from((pci_slot + pin) % PCI_PIN_NUM) } #[cfg(test)] mod tests { use super::*; + use crate::pci::config::{PciConfig, PCI_CONFIG_SPACE_SIZE}; + use crate::DeviceBase; + use address_space::{AddressSpace, Region}; + use util::gen_base_func; + + #[derive(Clone)] + pub struct TestPciDevice { + base: PciDevBase, + } + + impl TestPciDevice { + pub fn new(name: &str, devfn: u8, parent_bus: Weak>) -> Self { + Self { + base: PciDevBase { + base: DeviceBase::new(name.to_string(), false, Some(parent_bus)), + config: PciConfig::new(devfn, PCI_CONFIG_SPACE_SIZE, 0), + devfn, + }, + } + } + } + + impl Device for TestPciDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn realize(mut self) -> Result>> { + let devfn = u64::from(self.base.devfn); + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + + let dev = Arc::new(Mutex::new(self)); + let parent_bus = dev.lock().unwrap().parent_bus().unwrap().upgrade().unwrap(); + parent_bus + .lock() + .unwrap() + .attach_child(devfn, dev.clone())?; + + Ok(dev) + } + + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } + } + + impl PciDevOps for TestPciDevice { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); + + fn write_config(&mut self, offset: usize, data: &[u8]) { + self.base.config.write( + offset, + data, + 0, + #[cfg(target_arch = "x86_64")] + None, + None, + ); + } + + fn init_write_mask(&mut self, _is_bridge: bool) -> Result<()> { + let mut offset = 0_usize; + while offset < self.base.config.config.len() { + LittleEndian::write_u32( + &mut self.base.config.write_mask[offset..offset + 4], + 0xffff_ffff, + ); + offset += 4; + } + Ok(()) + } + + fn init_write_clear_mask(&mut self, _is_bridge: bool) -> Result<()> { + Ok(()) + } + } #[test] fn test_le_write_u16_01() { @@ -338,47 +514,22 @@ mod tests { assert!(le_write_u64(&mut buf, 1, 0x1234_5678_9abc_deff).is_err()); } - #[test] - fn test_ranges_overlap() { - assert!(ranges_overlap(100, 200, 150, 250)); - assert!(ranges_overlap(100, 200, 150, 200)); - assert!(!ranges_overlap(100, 200, 200, 250)); - assert!(ranges_overlap(100, 200, 100, 150)); - assert!(!ranges_overlap(100, 200, 50, 100)); - assert!(ranges_overlap(100, 200, 50, 150)); - } - #[test] fn set_dev_id() { - struct PciDev { - name: String, - } - - impl PciDevOps for PciDev { - fn init_write_mask(&mut self) -> Result<()> { - Ok(()) - } - - fn init_write_clear_mask(&mut self) -> Result<()> { - Ok(()) - } - - fn read_config(&self, _offset: usize, _data: &mut [u8]) {} - - fn write_config(&mut self, _offset: usize, _data: &[u8]) {} - - fn name(&self) -> String { - self.name.clone() - } - - fn realize(self) -> Result<()> { - Ok(()) - } - } - - let dev = PciDev { - name: "PCI device".to_string(), - }; + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::max_value(), "sysmem"), + "sysmem", + None, + ) + .unwrap(); + let parent_bus = Arc::new(Mutex::new(PciBus::new( + String::from("test bus"), + #[cfg(target_arch = "x86_64")] + Region::init_container_region(1 << 16, "parent_bus"), + sys_mem.root().clone(), + ))) as Arc>; + + let dev = TestPciDevice::new("PCI device", 0, Arc::downgrade(&parent_bus)); assert_eq!(dev.set_dev_id(1, 2), 258); } } diff --git a/pci/src/msix.rs b/devices/src/pci/msix.rs similarity index 49% rename from pci/src/msix.rs rename to devices/src/pci/msix.rs index cbf27ea1aad730728e47367a691123146eb7fc73..428c4e83112777108c7f962ff2d7b24826e7f4c2 100644 --- a/pci/src/msix.rs +++ b/devices/src/pci/msix.rs @@ -10,18 +10,29 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::cmp::max; +use std::collections::HashMap; use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::{Arc, Mutex, Weak}; +use std::sync::{Arc, Mutex}; -use address_space::{GuestAddress, Region, RegionOps}; -use hypervisor::kvm::{MsiVector, KVM_FDS}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::{byte_code::ByteCode, num_ops::round_up, unix::host_page_size}; +use anyhow::{bail, Context, Result}; +use log::{error, warn}; +use vmm_sys_util::eventfd::EventFd; -use crate::config::{CapId, PciConfig, RegionType, SECONDARY_BUS_NUM}; -use crate::errors::{Result, ResultExt}; -use crate::{ +use crate::pci::config::{CapId, RegionType, MINIMUM_BAR_SIZE_FOR_MMIO}; +use crate::pci::{ le_read_u16, le_read_u32, le_read_u64, le_write_u16, le_write_u32, le_write_u64, PciBus, + PciDevBase, +}; +use crate::{convert_bus_ref, MsiIrqManager, PCI_BUS}; +use address_space::{GuestAddress, Region, RegionOps}; +use migration::{ + DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; +use util::{ + byte_code::ByteCode, + num_ops::{ranges_overlap, round_up}, }; pub const MSIX_TABLE_ENTRY_SIZE: u16 = 16; @@ -39,9 +50,21 @@ pub const MSIX_CAP_FUNC_MASK: u16 = 0x4000; pub const MSIX_CAP_SIZE: u8 = 12; pub const MSIX_CAP_ID: u8 = 0x11; pub const MSIX_CAP_TABLE: u8 = 0x04; -pub const MSIX_CAP_PBA: u8 = 0x08; +const MSIX_CAP_PBA: u8 = 0x08; + +/// Basic data for msi vector. +#[derive(Copy, Clone, Default)] +pub struct MsiVector { + pub msg_addr_lo: u32, + pub msg_addr_hi: u32, + pub msg_data: u32, + pub masked: bool, + #[cfg(target_arch = "aarch64")] + pub dev_id: u32, +} /// MSI-X message structure. +#[derive(Copy, Clone)] pub struct Message { /// Lower 32bit address of MSI-X address. pub address_lo: u32, @@ -51,6 +74,13 @@ pub struct Message { pub data: u32, } +/// GSI information for routing msix. +struct GsiMsiRoute { + irq_fd: Arc, + gsi: i32, + msg: Message, +} + /// The state of msix device. #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] @@ -75,6 +105,9 @@ pub struct Msix { pub enabled: bool, pub msix_cap_offset: u16, pub dev_id: Arc, + /// Maintains a list of GSI with irqfds that are registered to kvm. + gsi_msi_routes: HashMap, + pub msi_irq_manager: Option>, } impl Msix { @@ -86,27 +119,53 @@ impl Msix { /// * `pba_size` - Size in bytes of MSI-X PBA. /// * `msix_cap_offset` - Offset of MSI-X capability in configuration space. /// * `dev_id` - Dev_id for device. - pub fn new(table_size: u32, pba_size: u32, msix_cap_offset: u16, dev_id: u16) -> Self { + pub fn new( + table_size: u32, + pba_size: u32, + msix_cap_offset: u16, + dev_id: Arc, + msi_irq_manager: Option>, + ) -> Self { let mut msix = Msix { table: vec![0; table_size as usize], pba: vec![0; pba_size as usize], func_masked: true, enabled: true, msix_cap_offset, - dev_id: Arc::new(AtomicU16::new(dev_id)), + dev_id, + gsi_msi_routes: HashMap::new(), + msi_irq_manager, }; msix.mask_all_vectors(); msix } pub fn reset(&mut self) { - self.table.resize_with(self.table.len(), || 0); - self.pba.resize_with(self.pba.len(), || 0); + self.table.fill(0); + self.pba.fill(0); self.func_masked = true; self.enabled = true; self.mask_all_vectors(); } + pub fn is_enabled(&self, config: &[u8]) -> bool { + let offset: usize = self.msix_cap_offset as usize + MSIX_CAP_CONTROL as usize; + let msix_ctl = le_read_u16(config, offset).unwrap(); + if msix_ctl & MSIX_CAP_ENABLE > 0 { + return true; + } + false + } + + pub fn is_func_masked(&self, config: &[u8]) -> bool { + let offset: usize = self.msix_cap_offset as usize + MSIX_CAP_CONTROL as usize; + let msix_ctl = le_read_u16(config, offset).unwrap(); + if msix_ctl & MSIX_CAP_FUNC_MASK > 0 { + return true; + } + false + } + fn mask_all_vectors(&mut self) { let nr_vectors: usize = self.table.len() / MSIX_TABLE_ENTRY_SIZE as usize; for v in 0..nr_vectors { @@ -129,7 +188,7 @@ impl Msix { fn is_vector_pending(&self, vector: u16) -> bool { let offset: usize = vector as usize / 64; - let pending_bit: u64 = 1 << (vector as u64 % 64); + let pending_bit: u64 = 1 << (u64::from(vector) % 64); let value = le_read_u64(&self.pba, offset).unwrap(); if value & pending_bit > 0 { return true; @@ -139,22 +198,102 @@ impl Msix { fn set_pending_vector(&mut self, vector: u16) { let offset: usize = vector as usize / 64; - let pending_bit: u64 = 1 << (vector as u64 % 64); + let pending_bit: u64 = 1 << (u64::from(vector) % 64); let old_val = le_read_u64(&self.pba, offset).unwrap(); le_write_u64(&mut self.pba, offset, old_val | pending_bit).unwrap(); } fn clear_pending_vector(&mut self, vector: u16) { let offset: usize = vector as usize / 64; - let pending_bit: u64 = !(1 << (vector as u64 % 64)); + let pending_bit: u64 = !(1 << (u64::from(vector) % 64)); let old_val = le_read_u64(&self.pba, offset).unwrap(); le_write_u64(&mut self.pba, offset, old_val & pending_bit).unwrap(); } + pub fn clear_pending_vectors(&mut self) { + let max_vector_nr = self.table.len() as u16 / MSIX_TABLE_ENTRY_SIZE; + for v in 0..max_vector_nr { + self.clear_pending_vector(v); + } + } + + fn update_irq_routing(&mut self, vector: u16, is_masked: bool) -> Result<()> { + let entry = self.get_message(vector); + let route = if let Some(route) = self.gsi_msi_routes.get_mut(&vector) { + route + } else { + return Ok(()); + }; + + let msix_vector = MsiVector { + msg_addr_lo: entry.address_lo, + msg_addr_hi: entry.address_hi, + msg_data: entry.data, + masked: false, + #[cfg(target_arch = "aarch64")] + dev_id: u32::from(self.dev_id.load(Ordering::Acquire)), + }; + + let irq_manager = self.msi_irq_manager.as_ref().unwrap(); + + if is_masked { + irq_manager.unregister_irqfd(route.irq_fd.clone(), route.gsi as u32)?; + } else { + let msg = &route.msg; + if msg.data != entry.data + || msg.address_lo != entry.address_lo + || msg.address_hi != entry.address_hi + { + irq_manager.update_route_table(route.gsi as u32, msix_vector)?; + route.msg = entry; + } + + irq_manager.register_irqfd(route.irq_fd.clone(), route.gsi as u32)?; + } + Ok(()) + } + + pub fn register_irqfd(&mut self, vector: u16, call_fd: Arc) -> Result<()> { + let entry = self.get_message(vector); + let msix_vector = MsiVector { + msg_addr_lo: entry.address_lo, + msg_addr_hi: entry.address_hi, + msg_data: entry.data, + masked: false, + #[cfg(target_arch = "aarch64")] + dev_id: u32::from(self.dev_id.load(Ordering::Acquire)), + }; + + let irq_manager = self.msi_irq_manager.as_ref().unwrap(); + + let gsi = irq_manager.allocate_irq(msix_vector)?; + irq_manager.register_irqfd(call_fd.clone(), gsi)?; + + let gsi_route = GsiMsiRoute { + irq_fd: call_fd, + gsi: gsi as i32, + msg: entry, + }; + self.gsi_msi_routes.insert(vector, gsi_route); + Ok(()) + } + + pub fn unregister_irqfd(&mut self) -> Result<()> { + let irq_manager = &self.msi_irq_manager.as_ref().unwrap(); + for (_, route) in self.gsi_msi_routes.iter() { + irq_manager.unregister_irqfd(route.irq_fd.clone(), route.gsi as u32)?; + irq_manager.release_irq(route.gsi as u32)?; + } + self.gsi_msi_routes.clear(); + Ok(()) + } + fn register_memory_region( msix: Arc>, region: &Region, dev_id: Arc, + table_offset: u64, + pba_offset: u64, ) -> Result<()> { let locked_msix = msix.lock().unwrap(); let table_size = locked_msix.table.len() as u64; @@ -164,9 +303,10 @@ impl Msix { let table_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { if offset as usize + data.len() > cloned_msix.lock().unwrap().table.len() { error!( - "Fail to read msi table, illegal data length {}, offset {}", - data.len(), - offset + "It's forbidden to read out of the msix table(size: {}), with offset of {} and size of {}", + cloned_msix.lock().unwrap().table.len(), + offset, + data.len() ); return false; } @@ -176,6 +316,15 @@ impl Msix { }; let cloned_msix = msix.clone(); let table_write = move |data: &[u8], _addr: GuestAddress, offset: u64| -> bool { + if offset as usize + data.len() > cloned_msix.lock().unwrap().table.len() { + error!( + "It's forbidden to write out of the msix table(size: {}), with offset of {} and size of {}", + cloned_msix.lock().unwrap().table.len(), + offset, + data.len() + ); + return false; + } let mut locked_msix = cloned_msix.lock().unwrap(); let vector: u16 = offset as u16 / MSIX_TABLE_ENTRY_SIZE; let was_masked: bool = locked_msix.is_vector_masked(vector); @@ -183,7 +332,14 @@ impl Msix { locked_msix.table[offset..(offset + 4)].copy_from_slice(data); let is_masked: bool = locked_msix.is_vector_masked(vector); - if was_masked && !is_masked { + if was_masked != is_masked && locked_msix.update_irq_routing(vector, is_masked).is_err() + { + return false; + } + + // Clear the pending vector just when it is pending. Otherwise, it + // will cause unknown error. + if was_masked && !is_masked && locked_msix.is_vector_pending(vector) { locked_msix.clear_pending_vector(vector); locked_msix.notify(vector, dev_id.load(Ordering::Acquire)); } @@ -194,10 +350,10 @@ impl Msix { read: Arc::new(table_read), write: Arc::new(table_write), }; - let table_region = Region::init_io_region(table_size, table_region_ops); + let table_region = Region::init_io_region(table_size, table_region_ops, "MsixTable"); region - .add_subregion(table_region, 0) - .chain_err(|| "Failed to register MSI-X table region.")?; + .add_subregion(table_region, table_offset) + .with_context(|| "Failed to register MSI-X table region.")?; let cloned_msix = msix.clone(); let pba_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { @@ -218,10 +374,10 @@ impl Msix { read: Arc::new(pba_read), write: Arc::new(pba_write), }; - let pba_region = Region::init_io_region(pba_size, pba_region_ops); + let pba_region = Region::init_io_region(pba_size, pba_region_ops, "MsixPba"); region - .add_subregion(pba_region, table_size) - .chain_err(|| "Failed to register MSI-X PBA region.")?; + .add_subregion(pba_region, pba_offset) + .with_context(|| "Failed to register MSI-X PBA region.")?; Ok(()) } @@ -242,9 +398,27 @@ impl Msix { } } + pub fn send_msix(&self, vector: u16, dev_id: u16) { + let msg = self.get_message(vector); + + let msix_vector = MsiVector { + msg_addr_lo: msg.address_lo, + msg_addr_hi: msg.address_hi, + msg_data: msg.data, + masked: false, + #[cfg(target_arch = "aarch64")] + dev_id: u32::from(dev_id), + }; + + let irq_manager = self.msi_irq_manager.as_ref().unwrap(); + if let Err(e) = irq_manager.trigger(None, msix_vector, u32::from(dev_id)) { + error!("Send msix error: {:?}", e); + }; + } + pub fn notify(&mut self, vector: u16, dev_id: u16) { if vector >= self.table.len() as u16 / MSIX_TABLE_ENTRY_SIZE { - error!("Invaild msix vector {}.", vector); + warn!("Invalid msix vector {}.", vector); return; } @@ -253,29 +427,42 @@ impl Msix { return; } - send_msix(self.get_message(vector), dev_id); + self.send_msix(vector, dev_id); } - pub fn write_config(&mut self, config: &[u8], dev_id: u16) { - let func_masked: bool = is_msix_func_masked(self.msix_cap_offset as usize, config); - let enabled: bool = is_msix_enabled(self.msix_cap_offset as usize, config); + pub fn write_config(&mut self, config: &[u8], dev_id: u16, offset: usize, data: &[u8]) { + let len = data.len(); + let msix_cap_control_off: usize = self.msix_cap_offset as usize + MSIX_CAP_CONTROL as usize; + // Only care about the bits Masked(14) & Enabled(15) in msix control register. + // SAFETY: msix_cap_control_off is less than u16::MAX. + // Offset and len have been checked in call function PciConfig::write. + if !ranges_overlap(offset, len, msix_cap_control_off + 1, 1).unwrap() { + return; + } + + let masked: bool = self.is_func_masked(config); + let enabled: bool = self.is_enabled(config); + trace::msix_write_config(self.dev_id.load(Ordering::Relaxed), masked, enabled); + + let mask_state_changed = !((self.func_masked == masked) && (self.enabled == enabled)); + + self.func_masked = masked; + self.enabled = enabled; - if enabled && self.func_masked && !func_masked { + if mask_state_changed && (self.enabled && !self.func_masked) { let max_vectors_nr: u16 = self.table.len() as u16 / MSIX_TABLE_ENTRY_SIZE; for v in 0..max_vectors_nr { if !self.is_vector_masked(v) && self.is_vector_pending(v) { self.clear_pending_vector(v); - send_msix(self.get_message(v), dev_id); + self.send_msix(v, dev_id); } } } - self.func_masked = func_masked; - self.enabled = enabled; } } impl StateTransfer for Msix { - fn get_state_vec(&self) -> migration::errors::Result> { + fn get_state_vec(&self) -> Result> { let mut state = MsixState::default(); for (idx, table_byte) in self.table.iter().enumerate() { @@ -292,9 +479,9 @@ impl StateTransfer for Msix { Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { let msix_state = *MsixState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("MSIX_DEVICE"))?; + .with_context(|| MigrationError::FromBytesError("MSIX_DEVICE"))?; let table_length = self.table.len(); let pba_length = self.pba.len(); @@ -309,16 +496,12 @@ impl StateTransfer for Msix { } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&MsixState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&MsixState::descriptor().name).unwrap_or(!0) } } impl MigrationHook for Msix { - fn resume(&mut self) -> migration::errors::Result<()> { + fn resume(&mut self) -> Result<()> { if self.enabled && !self.func_masked { for vector in 0..self.table.len() as u16 / MSIX_TABLE_ENTRY_SIZE { if self.is_vector_masked(vector) { @@ -328,32 +511,20 @@ impl MigrationHook for Msix { let msg = self.get_message(vector); // update and commit irq routing - { - let kvm_fds = KVM_FDS.load_signal_safe(); - let mut locked_irq_table = kvm_fds.irq_route_table.lock().unwrap(); - let allocated_gsi = match locked_irq_table.allocate_gsi() { - Ok(gsi) => gsi, - Err(e) => bail!("Failed to allocate new gsi: {}", e), - }; - let msi_vector = MsiVector { - msg_addr_hi: msg.address_hi, - msg_addr_lo: msg.address_lo, - msg_data: msg.data, - masked: false, - #[cfg(target_arch = "aarch64")] - dev_id: self.dev_id.load(Ordering::Acquire) as u32, - }; - if let Err(e) = locked_irq_table.add_msi_route(allocated_gsi, msi_vector) { - bail!("Failed to add msi route to global irq routing table: {}", e); - } - } - if let Err(e) = KVM_FDS.load().commit_irq_routing() { - bail!("Failed to commit irq routing: {}", e); - } + let msi_vector = MsiVector { + msg_addr_hi: msg.address_hi, + msg_addr_lo: msg.address_lo, + msg_data: msg.data, + masked: false, + #[cfg(target_arch = "aarch64")] + dev_id: u32::from(self.dev_id.load(Ordering::Acquire)), + }; + let irq_manager = self.msi_irq_manager.as_ref().unwrap(); + irq_manager.allocate_irq(msi_vector)?; if self.is_vector_pending(vector) { self.clear_pending_vector(vector); - send_msix(msg, self.dev_id.load(Ordering::Acquire)); + self.send_msix(vector, self.dev_id.load(Ordering::Acquire)); } } } @@ -362,53 +533,32 @@ impl MigrationHook for Msix { } } -pub fn is_msix_enabled(msix_cap_offset: usize, config: &[u8]) -> bool { - let offset: usize = msix_cap_offset + MSIX_CAP_CONTROL as usize; - let msix_ctl = le_read_u16(config, offset).unwrap(); - if msix_ctl & MSIX_CAP_ENABLE > 0 { - return true; - } - false -} - -fn is_msix_func_masked(msix_cap_offset: usize, config: &[u8]) -> bool { - let offset: usize = msix_cap_offset + MSIX_CAP_CONTROL as usize; - let msix_ctl = le_read_u16(config, offset).unwrap(); - if msix_ctl & MSIX_CAP_FUNC_MASK > 0 { - return true; - } - false -} - -fn send_msix(msg: Message, dev_id: u16) { - #[cfg(target_arch = "aarch64")] - let flags: u32 = kvm_bindings::KVM_MSI_VALID_DEVID; - #[cfg(target_arch = "x86_64")] - let flags: u32 = 0; - - let kvm_msi = kvm_bindings::kvm_msi { - address_lo: msg.address_lo, - address_hi: msg.address_hi, - data: msg.data, - flags, - devid: dev_id as u32, - pad: [0; 12], - }; - if let Err(e) = KVM_FDS.load().vm_fd.as_ref().unwrap().signal_msi(kvm_msi) { - error!("Send msix error: {}", e); - }; -} - /// MSI-X initialization. +/// +/// # Arguments +/// +/// * `pcidev_base ` - The Base of PCI device +/// * `bar_id` - BAR id. +/// * `vector_nr` - The number of vector. +/// * `dev_id` - Dev id. +/// * `parent_region` - Parent region which the MSI-X region registered. If none, registered in BAR. +/// * `offset_opt` - Offset of table(table_offset) and Offset of pba(pba_offset). Set the +/// table_offset and pba_offset together. pub fn init_msix( + pcidev_base: &mut PciDevBase, bar_id: usize, vector_nr: u32, - config: &mut PciConfig, dev_id: Arc, - _id: &str, + parent_region: Option<&Region>, + offset_opt: Option<(u32, u32)>, ) -> Result<()> { - if vector_nr > MSIX_TABLE_SIZE_MAX as u32 + 1 { - bail!("Too many msix vectors."); + let config = &mut pcidev_base.config; + let parent_bus = pcidev_base.base.parent.as_ref().unwrap(); + if vector_nr == 0 || vector_nr > u32::from(MSIX_TABLE_SIZE_MAX) + 1 { + bail!( + "invalid msix vectors, which should be in [1, {}]", + MSIX_TABLE_SIZE_MAX + 1 + ); } let msix_cap_offset: usize = config.add_pci_cap(CapId::Msix as u8, MSIX_CAP_SIZE as usize)?; @@ -420,64 +570,103 @@ pub fn init_msix( MSIX_CAP_FUNC_MASK | MSIX_CAP_ENABLE, )?; offset = msix_cap_offset + MSIX_CAP_TABLE as usize; - le_write_u32(&mut config.config, offset, bar_id as u32)?; + let table_size = vector_nr * u32::from(MSIX_TABLE_ENTRY_SIZE); + let pba_size = ((round_up(u64::from(vector_nr), 64).unwrap() / 64) * 8) as u32; + let (table_offset, pba_offset) = offset_opt.unwrap_or((0, table_size)); + if ranges_overlap( + table_offset as usize, + table_size as usize, + pba_offset as usize, + pba_size as usize, + ) + .unwrap() + { + bail!("msix table and pba table overlapped."); + } + le_write_u32(&mut config.config, offset, table_offset | bar_id as u32)?; offset = msix_cap_offset + MSIX_CAP_PBA as usize; - let table_size = vector_nr * MSIX_TABLE_ENTRY_SIZE as u32; - le_write_u32(&mut config.config, offset, table_size | bar_id as u32)?; + le_write_u32(&mut config.config, offset, pba_offset | bar_id as u32)?; + + let msi_irq_manager = if let Some(bus) = parent_bus.upgrade() { + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.get_msi_irq_manager() + } else { + error!("Msi irq controller is none"); + None + }; - let pba_size = ((round_up(vector_nr as u64, 64).unwrap() / 64) * 8) as u32; let msix = Arc::new(Mutex::new(Msix::new( table_size, pba_size, msix_cap_offset as u16, - dev_id.load(Ordering::Acquire), + dev_id.clone(), + msi_irq_manager, ))); - let mut bar_size = ((table_size + pba_size) as u64).next_power_of_two(); - bar_size = round_up(bar_size, host_page_size()).unwrap(); - let region = Region::init_container_region(bar_size); - Msix::register_memory_region(msix.clone(), ®ion, dev_id)?; - config.register_bar(bar_id, region, RegionType::Mem32Bit, false, bar_size); + if let Some(region) = parent_region { + Msix::register_memory_region( + msix.clone(), + region, + dev_id, + u64::from(table_offset), + u64::from(pba_offset), + )?; + } else { + let mut bar_size = u64::from(table_size + pba_size).next_power_of_two(); + bar_size = max(bar_size, MINIMUM_BAR_SIZE_FOR_MMIO as u64); + let region = Region::init_container_region(bar_size, "Msix_region"); + Msix::register_memory_region( + msix.clone(), + ®ion, + dev_id, + u64::from(table_offset), + u64::from(pba_offset), + )?; + config.register_bar(bar_id, region, RegionType::Mem32Bit, false, bar_size)?; + } + config.msix = Some(msix.clone()); #[cfg(not(test))] - MigrationManager::register_device_instance_mutex_with_id(MsixState::descriptor(), msix, _id); + MigrationManager::register_device_instance(MsixState::descriptor(), msix, &pcidev_base.base.id); Ok(()) } -pub fn update_dev_id(parent_bus: &Weak>, devfn: u8, dev_id: &Arc) { - let bus_num = parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .number(SECONDARY_BUS_NUM as usize); - let device_id = ((bus_num as u16) << 8) | (devfn as u16); - dev_id.store(device_id, Ordering::Release); -} - #[cfg(test)] mod tests { use super::*; - use crate::config::PCI_CONFIG_SPACE_SIZE; + use crate::pci::config::{PciConfig, PCI_CONFIG_SPACE_SIZE}; + use crate::pci::host::tests::create_pci_host; + use crate::{Device, DeviceBase}; #[test] fn test_init_msix() { - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 2); - + let pci_host = create_pci_host(); + let locked_pci_host = pci_host.lock().unwrap(); + let root_bus = Arc::downgrade(&locked_pci_host.child_bus().unwrap()); + let mut base = PciDevBase { + base: DeviceBase::new("msix".to_string(), false, Some(root_bus)), + config: PciConfig::new(1, PCI_CONFIG_SPACE_SIZE, 2), + devfn: 1, + }; // Too many vectors. assert!(init_msix( + &mut base, 0, - MSIX_TABLE_SIZE_MAX as u32 + 2, - &mut pci_config, + u32::from(MSIX_TABLE_SIZE_MAX) + 2, Arc::new(AtomicU16::new(0)), - "msix" + None, + None, ) .is_err()); - init_msix(1, 2, &mut pci_config, Arc::new(AtomicU16::new(0)), "msix").unwrap(); + // No vector. + assert!(init_msix(&mut base, 0, 0, Arc::new(AtomicU16::new(0)), None, None,).is_err()); + + init_msix(&mut base, 1, 2, Arc::new(AtomicU16::new(0)), None, None).unwrap(); + let pci_config = base.config; let msix_cap_start = 64_u8; - assert_eq!(pci_config.last_cap_end, 64 + MSIX_CAP_SIZE as u16); + assert_eq!(pci_config.last_cap_end, 64 + u16::from(MSIX_CAP_SIZE)); // Capabilities pointer assert_eq!(pci_config.config[0x34], msix_cap_start); assert_eq!( @@ -500,7 +689,13 @@ mod tests { #[test] fn test_mask_vectors() { let nr_vector = 2_u32; - let mut msix = Msix::new(nr_vector * MSIX_TABLE_ENTRY_SIZE as u32, 64, 64, 0); + let mut msix = Msix::new( + nr_vector * u32::from(MSIX_TABLE_ENTRY_SIZE), + 64, + 64, + Arc::new(AtomicU16::new(0)), + None, + ); assert!(msix.table[MSIX_TABLE_VEC_CTL as usize] & MSIX_TABLE_MASK_BIT > 0); assert!( @@ -516,7 +711,13 @@ mod tests { #[test] fn test_pending_vectors() { - let mut msix = Msix::new(MSIX_TABLE_ENTRY_SIZE as u32, 64, 64, 0); + let mut msix = Msix::new( + u32::from(MSIX_TABLE_ENTRY_SIZE), + 64, + 64, + Arc::new(AtomicU16::new(0)), + None, + ); msix.set_pending_vector(0); assert!(msix.is_vector_pending(0)); @@ -526,7 +727,13 @@ mod tests { #[test] fn test_get_message() { - let mut msix = Msix::new(MSIX_TABLE_ENTRY_SIZE as u32, 64, 64, 0); + let mut msix = Msix::new( + u32::from(MSIX_TABLE_ENTRY_SIZE), + 64, + 64, + Arc::new(AtomicU16::new(0)), + None, + ); le_write_u32(&mut msix.table, 0, 0x1000_0000).unwrap(); le_write_u32(&mut msix.table, 4, 0x2000_0000).unwrap(); le_write_u32(&mut msix.table, 8, 0x3000_0000).unwrap(); @@ -539,16 +746,28 @@ mod tests { #[test] fn test_write_config() { - let mut pci_config = PciConfig::new(PCI_CONFIG_SPACE_SIZE, 2); - init_msix(0, 2, &mut pci_config, Arc::new(AtomicU16::new(0)), "msix").unwrap(); - let msix = pci_config.msix.as_ref().unwrap(); + let pci_host = create_pci_host(); + let locked_pci_host = pci_host.lock().unwrap(); + let root_bus = Arc::downgrade(&locked_pci_host.child_bus().unwrap()); + let mut base = PciDevBase { + base: DeviceBase::new("msix".to_string(), false, Some(root_bus)), + config: PciConfig::new(1, PCI_CONFIG_SPACE_SIZE, 2), + devfn: 1, + }; + init_msix(&mut base, 0, 2, Arc::new(AtomicU16::new(0)), None, None).unwrap(); + let msix = base.config.msix.as_ref().unwrap(); let mut locked_msix = msix.lock().unwrap(); locked_msix.enabled = false; let offset = locked_msix.msix_cap_offset as usize + MSIX_CAP_CONTROL as usize; - let val = le_read_u16(&pci_config.config, offset).unwrap(); - le_write_u16(&mut pci_config.config, offset, val | MSIX_CAP_ENABLE).unwrap(); + let val = le_read_u16(&base.config.config, offset).unwrap(); + le_write_u16(&mut base.config.config, offset, val | MSIX_CAP_ENABLE).unwrap(); locked_msix.set_pending_vector(0); - locked_msix.write_config(&pci_config.config, 0); + locked_msix.write_config( + &base.config.config, + 0, + offset, + &[0, val as u8 | MSIX_CAP_ENABLE as u8], + ); assert!(!locked_msix.func_masked); assert!(locked_msix.enabled); diff --git a/pci/src/root_port.rs b/devices/src/pci/root_port.rs similarity index 30% rename from pci/src/root_port.rs rename to devices/src/pci/root_port.rs index 3dc97dd4c8c16d91c90ef5399c62318ab0a709c9..168083447c9baefa73a4b08f024cdd7eef953aa5 100644 --- a/pci/src/root_port.rs +++ b/devices/src/pci/root_port.rs @@ -13,42 +13,75 @@ use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, Mutex, Weak}; +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; +use log::{error, info}; use once_cell::sync::OnceCell; -use address_space::Region; -use error_chain::ChainedError; -use machine_manager::qmp::{qmp_schema as schema, QmpChannel}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::byte_code::ByteCode; - use super::config::{ - PciConfig, PcieDevType, BAR_0, CLASS_CODE_PCI_BRIDGE, COMMAND, COMMAND_IO_SPACE, - COMMAND_MEMORY_SPACE, DEVICE_ID, HEADER_TYPE, HEADER_TYPE_BRIDGE, IO_BASE, MEMORY_BASE, - PCIE_CONFIG_SPACE_SIZE, PCI_EXP_HP_EV_ABP, PCI_EXP_HP_EV_CCI, PCI_EXP_HP_EV_PDC, - PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA, PCI_EXP_LNKSTA_NLW, PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PCC, - PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_PWR_IND_ON, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC, - PCI_EXP_SLTSTA_PDS, PCI_VENDOR_ID_REDHAT, PREF_MEMORY_BASE, PREF_MEMORY_LIMIT, - PREF_MEM_RANGE_64BIT, REG_SIZE, SUB_CLASS_CODE, VENDOR_ID, + PciConfig, PcieDevType, CLASS_CODE_PCI_BRIDGE, COMMAND, COMMAND_IO_SPACE, COMMAND_MEMORY_SPACE, + DEVICE_ID, HEADER_TYPE, HEADER_TYPE_BRIDGE, IO_BASE, MEMORY_BASE, PCIE_CONFIG_SPACE_SIZE, + PCI_EXP_HP_EV_ABP, PCI_EXP_HP_EV_CCI, PCI_EXP_HP_EV_PDC, PCI_EXP_HP_EV_SPT, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_CLS_2_5GB, PCI_EXP_LNKSTA_DLLLA, PCI_EXP_LNKSTA_NLW_X1, PCI_EXP_SLOTSTA_EVENTS, + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_HPIE, PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PIC, + PCI_EXP_SLTCTL_PWR_IND_BLINK, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_PWR_IND_ON, + PCI_EXP_SLTCTL_PWR_OFF, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC, PCI_EXP_SLTSTA_PDS, + PCI_VENDOR_ID_REDHAT, PREF_MEMORY_BASE, PREF_MEMORY_LIMIT, PREF_MEM_RANGE_64BIT, + SUB_CLASS_CODE, VENDOR_ID, +}; +use crate::pci::bus::PciBus; +use crate::pci::config::{BRIDGE_CONTROL, BRIDGE_CTL_SEC_BUS_RESET}; +use crate::pci::hotplug::HotplugOps; +use crate::pci::intx::init_intx; +use crate::pci::msix::init_msix; +use crate::pci::{ + init_multifunction, le_read_u16, le_write_clear_value_u16, le_write_set_value_u16, + le_write_u16, to_pcidevops, PciDevBase, PciDevOps, PciError, PciIntxState, INTERRUPT_PIN, }; -use crate::bus::PciBus; -use crate::errors::{Result, ResultExt}; -use crate::hotplug::HotplugOps; -use crate::init_multifunction; -use crate::msix::init_msix; use crate::{ - le_read_u16, le_write_clear_value_u16, le_write_set_value_u16, le_write_u16, ranges_overlap, - PciDevOps, + convert_bus_mut, convert_bus_ref, Bus, Device, DeviceBase, MsiIrqManager, MUT_PCI_BUS, PCI_BUS, + PCI_BUS_DEVICE, +}; +use address_space::Region; +use machine_manager::config::{get_pci_df, parse_bool, valid_id}; +use machine_manager::qmp::qmp_channel::send_device_deleted_msg; +use migration::{ + DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, MigrationManager, StateTransfer, }; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::num_ops::{ranges_overlap, str_to_num}; const DEVICE_ID_RP: u16 = 0x000c; static FAST_UNPLUG_FEATURE: OnceCell = OnceCell::new(); +/// Basic information of RootPort like port number. +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct RootPortConfig { + #[arg(long, value_parser = ["pcie-root-port"])] + pub classtype: String, + #[arg(long, value_parser = str_to_num::)] + pub port: u8, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long, default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: bool, + #[arg(long, default_value = "0")] + pub chassis: u8, +} + /// Device state root port. #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] -pub struct RootPortState { +struct RootPortState { /// Max length of config_space is 4096. config_space: [u8; 4096], write_mask: [u8; 4096], @@ -59,17 +92,14 @@ pub struct RootPortState { } pub struct RootPort { - name: String, - devfn: u8, + base: PciDevBase, port_num: u8, - config: PciConfig, - parent_bus: Weak>, - sec_bus: Arc>, #[cfg(target_arch = "x86_64")] io_region: Region, mem_region: Region, dev_id: Arc, multifunction: bool, + hpev_notified: bool, } impl RootPort { @@ -77,79 +107,110 @@ impl RootPort { /// /// # Arguments /// - /// * `name` - Root port name. - /// * `devfn` - Device number << 3 | Function number. - /// * `port_num` - Root port number. + /// * `cfg` - Root port config. /// * `parent_bus` - Weak reference to the parent bus. - #[allow(dead_code)] - pub fn new( - name: String, - devfn: u8, - port_num: u8, - parent_bus: Weak>, - multifunction: bool, - ) -> Self { + pub fn new(cfg: RootPortConfig, parent_bus: Weak>) -> Self { + let devfn = cfg.addr.0 << 3 | cfg.addr.1; #[cfg(target_arch = "x86_64")] - let io_region = Region::init_container_region(1 << 16); - let mem_region = Region::init_container_region(u64::max_value()); - let sec_bus = Arc::new(Mutex::new(PciBus::new( - name.clone(), + let io_region = Region::init_container_region(1 << 16, "RootPortIo"); + let mem_region = Region::init_container_region(u64::max_value(), "RootPortMem"); + let child_bus = Arc::new(Mutex::new(PciBus::new( + cfg.id.clone(), #[cfg(target_arch = "x86_64")] io_region.clone(), mem_region.clone(), ))); + let mut dev_base = DeviceBase::new(cfg.id, true, Some(parent_bus)); + dev_base.child = Some(child_bus); Self { - name, - devfn, - port_num, - config: PciConfig::new(PCIE_CONFIG_SPACE_SIZE, 2), - parent_bus, - sec_bus, + base: PciDevBase { + base: dev_base, + config: PciConfig::new(devfn, PCIE_CONFIG_SPACE_SIZE, 2), + devfn, + }, + port_num: cfg.port, #[cfg(target_arch = "x86_64")] io_region, mem_region, dev_id: Arc::new(AtomicU16::new(0)), - multifunction, + multifunction: cfg.multifunction, + hpev_notified: false, } } fn hotplug_command_completed(&mut self) { if let Err(e) = le_write_set_value_u16( - &mut self.config.config, - (self.config.ext_cap_offset + PCI_EXP_SLTSTA) as usize, + &mut self.base.config.config, + (self.base.config.pci_express_cap_offset + PCI_EXP_SLTSTA) as usize, PCI_EXP_HP_EV_CCI, ) { - error!("{}", e.display_chain()); + error!("{}", format!("{:?}", e)); error!("Failed to write command completed"); } } + fn update_hp_event_status(&mut self) { + let cap_offset = self.base.config.pci_express_cap_offset; + let slot_status = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + ) + .unwrap(); + let slot_control = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTCTL) as usize, + ) + .unwrap(); + + self.hpev_notified = (slot_control & PCI_EXP_SLTCTL_HPIE != 0) + && (slot_status & slot_control & PCI_EXP_HP_EV_SPT != 0); + } + fn hotplug_event_notify(&mut self) { - if let Some(msix) = self.config.msix.as_mut() { - msix.lock() - .unwrap() - .notify(0, self.dev_id.load(Ordering::Acquire)); - } else { - error!("Failed to send interrupt: msix does not exist"); + let last_event = self.hpev_notified; + self.update_hp_event_status(); + if last_event == self.hpev_notified { + return; + } + + let msix = self.base.config.msix.as_ref().unwrap(); + let intx = self.base.config.intx.as_ref().unwrap(); + let mut locked_msix = msix.lock().unwrap(); + if locked_msix.enabled { + locked_msix.notify(0, self.dev_id.load(Ordering::Acquire)); + } else if self.base.config.config[INTERRUPT_PIN as usize] != 0 { + intx.lock().unwrap().notify(u8::from(self.hpev_notified)); + } + } + + fn hotplug_event_clear(&mut self) { + self.update_hp_event_status(); + + let msix = self.base.config.msix.as_ref().unwrap(); + let intx = self.base.config.intx.as_ref().unwrap(); + let locked_msix = msix.lock().unwrap(); + let intr_pin = self.base.config.config[INTERRUPT_PIN as usize]; + if !locked_msix.enabled && intr_pin != 0 && !self.hpev_notified { + intx.lock().unwrap().notify(0); } } /// Update register when the guest OS trigger the removal of the device. fn update_register_status(&mut self) -> Result<()> { - let cap_offset = self.config.ext_cap_offset; + let cap_offset = self.base.config.pci_express_cap_offset; le_write_clear_value_u16( - &mut self.config.config, + &mut self.base.config.config, (cap_offset + PCI_EXP_SLTSTA) as usize, PCI_EXP_SLTSTA_PDS, )?; le_write_clear_value_u16( - &mut self.config.config, + &mut self.base.config.config, (cap_offset + PCI_EXP_LNKSTA) as usize, PCI_EXP_LNKSTA_DLLLA, )?; le_write_set_value_u16( - &mut self.config.config, + &mut self.base.config.config, (cap_offset + PCI_EXP_SLTSTA) as usize, PCI_EXP_SLTSTA_PDC, )?; @@ -161,91 +222,110 @@ impl RootPort { // Store device in a temp vector and unlock the bus. // If the device unrealize called when the bus is locked, a deadlock occurs. // This is because the device unrealize also requires the bus lock. - let devices = self.sec_bus.lock().unwrap().devices.clone(); + let bus = self.child_bus().unwrap(); + let devices = bus.lock().unwrap().child_devices(); for dev in devices.values() { - let mut locked_dev = dev.lock().unwrap(); - if let Err(e) = locked_dev.unrealize() { - error!("{}", e.display_chain()); + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + if let Err(e) = pci_dev.unrealize() { + error!("{}", format!("{:?}", e)); error!("Failed to unrealize device {}.", locked_dev.name()); } - info!("Device {} unplug from {}", locked_dev.name(), self.name); + info!("Device {} unplug from {}", locked_dev.name(), self.name()); // Send QMP event for successful hot unplugging. - if QmpChannel::is_connected() { - let device_del = schema::DeviceDeleted { - device: Some(locked_dev.name()), - path: format!("/machine/peripheral/{}", &locked_dev.name()), - }; - event!(DeviceDeleted; device_del); - } + send_device_deleted_msg(&locked_dev.name()); } - self.sec_bus.lock().unwrap().devices.clear(); + bus.lock().unwrap().bus_base_mut().children.clear(); } fn register_region(&mut self) { - let command: u16 = le_read_u16(&self.config.config, COMMAND as usize).unwrap(); + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + + let command: u16 = le_read_u16(&self.base.config.config, COMMAND as usize).unwrap(); if command & COMMAND_IO_SPACE != 0 { #[cfg(target_arch = "x86_64")] - if let Err(e) = self - .parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() + if let Err(e) = pci_bus .io_region .add_subregion(self.io_region.clone(), 0) - .chain_err(|| "Failed to add IO container region.") + .with_context(|| "Failed to add IO container region.") { - error!("{}", e.display_chain()); + error!("{}", format!("{:?}", e)); } } if command & COMMAND_MEMORY_SPACE != 0 { - if let Err(e) = self - .parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() + if let Err(e) = pci_bus .mem_region .add_subregion(self.mem_region.clone(), 0) - .chain_err(|| "Failed to add memory container region.") + .with_context(|| "Failed to add memory container region.") { - error!("{}", e.display_chain()); + error!("{}", format!("{:?}", e)); } } } - fn do_unplug(&mut self, offset: usize, end: usize, old_ctl: u16) { - let cap_offset = self.config.ext_cap_offset; + fn correct_race_unplug(&mut self, offset: usize, data: &[u8], old_status: u16) { + let size = data.len(); + let cap_offset = self.base.config.pci_express_cap_offset; + // SAFETY: Checked in write_config. + if !ranges_overlap(offset, size, (cap_offset + PCI_EXP_SLTSTA) as usize, 2).unwrap() { + return; + } + + let status = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + ) + .unwrap(); + let val: u16 = u16::from(data[0]) + (u16::from(data[1]) << 8); + if (val & !old_status & PCI_EXP_SLOTSTA_EVENTS) != 0 { + let tmpstat = + (status & !PCI_EXP_SLOTSTA_EVENTS) | (old_status & PCI_EXP_SLOTSTA_EVENTS); + le_write_u16( + &mut self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + tmpstat, + ) + .unwrap(); + } + } + + fn do_unplug(&mut self, offset: usize, data: &[u8], old_ctl: u16, old_status: u16) { + self.correct_race_unplug(offset, data, old_status); + + let size = data.len(); + let cap_offset = self.base.config.pci_express_cap_offset; // Only care the write config about slot control - if !ranges_overlap( - offset, - end, - (cap_offset + PCI_EXP_SLTCTL) as usize, - (cap_offset + PCI_EXP_SLTCTL + 2) as usize, - ) { + // SAFETY: Checked in write_config. + if !ranges_overlap(offset, size, (cap_offset + PCI_EXP_SLTCTL) as usize, 2).unwrap() { return; } - let status = - le_read_u16(&self.config.config, (cap_offset + PCI_EXP_SLTSTA) as usize).unwrap(); - let val = le_read_u16(&self.config.config, offset).unwrap(); + let status = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + ) + .unwrap(); + let val = le_read_u16(&self.base.config.config, offset).unwrap(); // Only unplug device when the slot is on // Don't unplug when slot is off for guest OS overwrite the off status before slot on. if (status & PCI_EXP_SLTSTA_PDS != 0) - && (val as u16 & PCI_EXP_SLTCTL_PCC == PCI_EXP_SLTCTL_PCC) - && (val as u16 & PCI_EXP_SLTCTL_PWR_IND_OFF == PCI_EXP_SLTCTL_PWR_IND_OFF) + && (val & PCI_EXP_SLTCTL_PCC == PCI_EXP_SLTCTL_PCC) + && (val & PCI_EXP_SLTCTL_PWR_IND_OFF == PCI_EXP_SLTCTL_PWR_IND_OFF) && (old_ctl & PCI_EXP_SLTCTL_PCC != PCI_EXP_SLTCTL_PCC || old_ctl & PCI_EXP_SLTCTL_PWR_IND_OFF != PCI_EXP_SLTCTL_PWR_IND_OFF) { self.remove_devices(); if let Err(e) = self.update_register_status() { - error!("{}", e.display_chain()); + error!("{}", format!("{:?}", e)); error!("Failed to update register status"); } } + // According to the PCIe specification 6.7.3, CCI events is different from others. + // To avoid mixing them together, trigger a notify for each. + self.hotplug_event_notify(); self.hotplug_command_completed(); self.hotplug_event_notify(); } @@ -257,101 +337,138 @@ impl RootPort { } } -impl PciDevOps for RootPort { - fn init_write_mask(&mut self) -> Result<()> { - self.config.init_common_write_mask()?; - self.config.init_bridge_write_mask() - } +impl Device for RootPort { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + /// Only set slot status to on, and no other device reset actions are implemented. + fn reset(&mut self, reset_child_device: bool) -> Result<()> { + if reset_child_device { + let child_bus = self.child_bus().unwrap(); + MUT_PCI_BUS!(child_bus, locked_child_bus, child_pci_bus); + child_pci_bus + .reset() + .with_context(|| "Fail to reset child_bus in root port")?; + } else { + let cap_offset = self.base.config.pci_express_cap_offset; + le_write_u16( + &mut self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + PCI_EXP_SLTSTA_PDS, + )?; + le_write_u16( + &mut self.base.config.config, + (cap_offset + PCI_EXP_SLTCTL) as usize, + !PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PWR_IND_ON, + )?; + le_write_u16( + &mut self.base.config.config, + (cap_offset + PCI_EXP_LNKSTA) as usize, + PCI_EXP_LNKSTA_DLLLA, + )?; + } - fn init_write_clear_mask(&mut self) -> Result<()> { - self.config.init_common_write_clear_mask()?; - self.config.init_bridge_write_clear_mask() + self.base.config.reset_bridge_regs()?; + self.base.config.reset() } - fn realize(mut self) -> Result<()> { - self.init_write_mask()?; - self.init_write_clear_mask()?; + fn realize(mut self) -> Result>> { + let parent_bus = self.parent_bus().unwrap(); + self.init_write_mask(true)?; + self.init_write_clear_mask(true)?; - let config_space = &mut self.config.config; + let config_space = &mut self.base.config.config; le_write_u16(config_space, VENDOR_ID as usize, PCI_VENDOR_ID_REDHAT)?; le_write_u16(config_space, DEVICE_ID as usize, DEVICE_ID_RP)?; le_write_u16(config_space, SUB_CLASS_CODE as usize, CLASS_CODE_PCI_BRIDGE)?; config_space[HEADER_TYPE as usize] = HEADER_TYPE_BRIDGE; config_space[PREF_MEMORY_BASE as usize] = PREF_MEM_RANGE_64BIT; config_space[PREF_MEMORY_LIMIT as usize] = PREF_MEM_RANGE_64BIT; + init_multifunction( self.multifunction, config_space, - self.devfn, - self.parent_bus.clone(), + self.base.devfn, + parent_bus.clone(), )?; - self.config - .add_pcie_cap(self.devfn, self.port_num, PcieDevType::RootPort as u8)?; - self.dev_id.store(self.devfn as u16, Ordering::SeqCst); - init_msix(0, 1, &mut self.config, self.dev_id.clone(), &self.name)?; + #[cfg(target_arch = "aarch64")] + self.base.config.set_interrupt_pin(); - let parent_bus = self.parent_bus.upgrade().unwrap(); - let mut locked_parent_bus = parent_bus.lock().unwrap(); + self.base.config.add_pcie_cap( + self.base.devfn, + self.port_num, + PcieDevType::RootPort as u8, + )?; + + self.dev_id + .store(u16::from(self.base.devfn), Ordering::SeqCst); + init_msix(&mut self.base, 0, 1, self.dev_id.clone(), None, None)?; + + init_intx( + self.name(), + &mut self.base.config, + parent_bus.clone(), + self.base.devfn, + )?; + + let arc_parent_bus = parent_bus.upgrade().unwrap(); + MUT_PCI_BUS!(arc_parent_bus, locked_parent_bus, parent_pci_bus); + let child_bus = self.child_bus().unwrap(); + MUT_PCI_BUS!(child_bus, locked_child_bus, child_pci_bus); #[cfg(target_arch = "x86_64")] - locked_parent_bus + parent_pci_bus .io_region - .add_subregion(self.sec_bus.lock().unwrap().io_region.clone(), 0) - .chain_err(|| "Failed to register subregion in I/O space.")?; - locked_parent_bus + .add_subregion(child_pci_bus.io_region.clone(), 0) + .with_context(|| "Failed to register subregion in I/O space.")?; + parent_pci_bus .mem_region - .add_subregion(self.sec_bus.lock().unwrap().mem_region.clone(), 0) - .chain_err(|| "Failed to register subregion in memory space.")?; + .add_subregion(child_pci_bus.mem_region.clone(), 0) + .with_context(|| "Failed to register subregion in memory space.")?; + drop(locked_child_bus); - let name = self.name.clone(); + let name = self.name(); let root_port = Arc::new(Mutex::new(self)); - #[allow(unused_mut)] - let mut locked_root_port = root_port.lock().unwrap(); - locked_root_port.sec_bus.lock().unwrap().parent_bridge = - Some(Arc::downgrade(&root_port) as Weak>); - locked_root_port.sec_bus.lock().unwrap().hotplug_controller = + let locked_root_port = root_port.lock().unwrap(); + let child_bus = locked_root_port.child_bus().unwrap(); + MUT_PCI_BUS!(child_bus, locked_child_bus, child_pci_bus); + child_pci_bus.base.parent = Some(Arc::downgrade(&root_port) as Weak>); + child_pci_bus.hotplug_controller = Some(Arc::downgrade(&root_port) as Weak>); - let pci_device = locked_parent_bus.devices.get(&locked_root_port.devfn); - if pci_device.is_none() { - locked_parent_bus - .child_buses - .push(locked_root_port.sec_bus.clone()); - locked_parent_bus - .devices - .insert(locked_root_port.devfn, root_port.clone()); - } else { - bail!( - "Devfn {:?} has been used by {:?}", - locked_root_port.devfn, - pci_device.unwrap().lock().unwrap().name() - ); - } + parent_pci_bus.attach_child(u64::from(locked_root_port.base.devfn), root_port.clone())?; // Need to drop locked_root_port in order to register root_port instance. drop(locked_root_port); - MigrationManager::register_device_instance_mutex_with_id( + MigrationManager::register_device_instance( RootPortState::descriptor(), - root_port, + root_port.clone(), &name, ); - Ok(()) + Ok(root_port) } +} - fn read_config(&self, offset: usize, data: &mut [u8]) { - let size = data.len(); - if offset + size > PCIE_CONFIG_SPACE_SIZE || size > 4 { - error!( - "Failed to read pcie config space at offset {} with data size {}", - offset, size - ); - return; - } +/// Convert from Arc> to &mut RootPort. +#[macro_export] +macro_rules! MUT_ROOT_PORT { + ($trait_device:expr, $lock_device: ident, $struct_device: ident) => { + convert_device_mut!($trait_device, $lock_device, $struct_device, RootPort); + }; +} - self.config.read(offset, data); - } +/// Convert from Arc> to &RootPort. +#[macro_export] +macro_rules! ROOT_PORT { + ($trait_device:expr, $lock_device: ident, $struct_device: ident) => { + convert_device_ref!($trait_device, $lock_device, $struct_device, RootPort); + }; +} + +impl PciDevOps for RootPort { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); fn write_config(&mut self, offset: usize, data: &[u8]) { let size = data.len(); + // SAFETY: offset is no more than 0xfff. let end = offset + size; if end > PCIE_CONFIG_SPACE_SIZE || size > 4 { error!( @@ -361,177 +478,223 @@ impl PciDevOps for RootPort { return; } - let cap_offset = self.config.ext_cap_offset; - let old_ctl = - le_read_u16(&self.config.config, (cap_offset + PCI_EXP_SLTCTL) as usize).unwrap(); + let cap_offset = self.base.config.pci_express_cap_offset; + let old_ctl = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTCTL) as usize, + ) + .unwrap(); + let old_status = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + ) + .unwrap(); - self.config - .write(offset, data, self.dev_id.load(Ordering::Acquire)); - if ranges_overlap(offset, end, COMMAND as usize, (COMMAND + 1) as usize) - || ranges_overlap(offset, end, BAR_0 as usize, BAR_0 as usize + REG_SIZE * 2) - { - if let Err(e) = self.config.update_bar_mapping( - #[cfg(target_arch = "x86_64")] - &self.io_region, - &self.mem_region, - ) { - error!("{}", e.display_chain()); + let old_br_ctl = le_read_u16(&self.base.config.config, BRIDGE_CONTROL.into()).unwrap(); + + self.base.config.write( + offset, + data, + self.dev_id.load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&self.io_region), + Some(&self.mem_region), + ); + + let new_br_ctl = le_read_u16(&self.base.config.config, BRIDGE_CONTROL.into()).unwrap(); + if (!old_br_ctl & new_br_ctl & BRIDGE_CTL_SEC_BUS_RESET) != 0 { + if let Err(e) = self.reset(true) { + error!( + "Failed to reset child devices under root port {}: {:?}", + self.name(), + e + ) } } - if ranges_overlap(offset, end, COMMAND as usize, (COMMAND + 1) as usize) - || ranges_overlap(offset, end, IO_BASE as usize, (IO_BASE + 2) as usize) - || ranges_overlap( - offset, - end, - MEMORY_BASE as usize, - (MEMORY_BASE + 20) as usize, - ) + + if ranges_overlap(offset, size, COMMAND as usize, 1).unwrap() + || ranges_overlap(offset, size, IO_BASE as usize, 2).unwrap() + || ranges_overlap(offset, size, MEMORY_BASE as usize, 20).unwrap() { self.register_region(); } - self.do_unplug(offset, end, old_ctl); + let mut status = le_read_u16( + &self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + ) + .unwrap(); + let exp_slot_status = (cap_offset + PCI_EXP_SLTSTA) as usize; + if ranges_overlap(offset, size, exp_slot_status, 2).unwrap() { + let new_status = le_read_u16(data, 0).unwrap(); + if new_status & !old_status & PCI_EXP_SLOTSTA_EVENTS != 0 { + status = (status & !PCI_EXP_SLOTSTA_EVENTS) | (old_status & PCI_EXP_SLOTSTA_EVENTS); + if let Err(e) = le_write_u16( + &mut self.base.config.config, + (cap_offset + PCI_EXP_SLTSTA) as usize, + status, + ) { + error!("Failed to write config: {:?}", e); + } + } + self.hotplug_event_clear(); + } + self.do_unplug(offset, data, old_ctl, old_status); } - fn name(&self) -> String { - self.name.clone() + fn get_dev_path(&self) -> Option { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + let parent_dev_path = self.get_parent_dev_path(parent_bus); + let dev_path = self.populate_dev_path(parent_dev_path, self.base.devfn, "/pci-bridge@"); + Some(dev_path) } - /// Only set slot status to on, and no other device reset actions are implemented. - fn reset(&mut self, reset_child_device: bool) -> Result<()> { - if reset_child_device { - self.sec_bus - .lock() - .unwrap() - .reset() - .chain_err(|| "Fail to reset sec_bus in root port") - } else { - let cap_offset = self.config.ext_cap_offset; - le_write_u16( - &mut self.config.config, - (cap_offset + PCI_EXP_SLTSTA) as usize, - PCI_EXP_SLTSTA_PDS, - )?; - le_write_u16( - &mut self.config.config, - (cap_offset + PCI_EXP_SLTCTL) as usize, - !PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PWR_IND_ON, - )?; - le_write_u16( - &mut self.config.config, - (cap_offset + PCI_EXP_LNKSTA) as usize, - PCI_EXP_LNKSTA_DLLLA, - )?; - Ok(()) + fn get_intx_state(&self) -> Option>> { + let intx = self.base.config.intx.as_ref().unwrap(); + if intx.lock().unwrap().intx_state.is_some() { + let intx_state = intx.lock().unwrap().intx_state.as_ref().unwrap().clone(); + return Some(intx_state); } + + None + } + + fn get_msi_irq_manager(&self) -> Option> { + let msix = self.base.config.msix.as_ref().unwrap(); + msix.lock().unwrap().msi_irq_manager.clone() } } impl HotplugOps for RootPort { - fn plug(&mut self, dev: &Arc>) -> Result<()> { - let devfn = dev - .lock() - .unwrap() - .devfn() - .chain_err(|| "Failed to get devfn")?; + fn plug(&mut self, dev: &Arc>) -> Result<()> { + if !dev.lock().unwrap().hotpluggable() { + bail!("Don't support hot-plug!"); + } + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + let devfn = pci_dev.pci_base().devfn; + drop(locked_dev); // Only if devfn is equal to 0, hot plugging is supported. - if devfn == 0 { - let offset = self.config.ext_cap_offset; - le_write_set_value_u16( - &mut self.config.config, - (offset + PCI_EXP_SLTSTA) as usize, - PCI_EXP_SLTSTA_PDS | PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP, - )?; - le_write_set_value_u16( - &mut self.config.config, - (offset + PCI_EXP_LNKSTA) as usize, - PCI_EXP_LNKSTA_NLW | PCI_EXP_LNKSTA_DLLLA, - )?; - self.hotplug_event_notify(); + if devfn != 0 { + return Err(anyhow!(PciError::HotplugUnsupported(devfn))); } + + let offset = self.base.config.pci_express_cap_offset; + le_write_set_value_u16( + &mut self.base.config.config, + (offset + PCI_EXP_SLTSTA) as usize, + PCI_EXP_SLTSTA_PDS | PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP, + )?; + le_write_set_value_u16( + &mut self.base.config.config, + (offset + PCI_EXP_LNKSTA) as usize, + PCI_EXP_LNKSTA_CLS_2_5GB | PCI_EXP_LNKSTA_NLW_X1 | PCI_EXP_LNKSTA_DLLLA, + )?; + self.hotplug_event_notify(); + Ok(()) } - fn unplug_request(&mut self, dev: &Arc>) -> Result<()> { - let devfn = dev - .lock() - .unwrap() - .devfn() - .chain_err(|| "Failed to get devfn")?; + fn unplug_request(&mut self, dev: &Arc>) -> Result<()> { + let pcie_cap_offset = self.base.config.pci_express_cap_offset; + let sltctl = le_read_u16( + &self.base.config.config, + (pcie_cap_offset + PCI_EXP_SLTCTL) as usize, + ) + .unwrap(); + + if (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK { + bail!("Guest is still on the fly of another (un)plugging"); + } + + if !dev.lock().unwrap().hotpluggable() { + bail!("Don't support hot-unplug request!"); + } + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + let devfn = pci_dev.pci_base().devfn; + drop(locked_dev); if devfn != 0 { return self.unplug(dev); } - let offset = self.config.ext_cap_offset; + let offset = self.base.config.pci_express_cap_offset; le_write_clear_value_u16( - &mut self.config.config, + &mut self.base.config.config, (offset + PCI_EXP_LNKSTA) as usize, PCI_EXP_LNKSTA_DLLLA, )?; - let mut slot_status = PCI_EXP_HP_EV_ABP; + let mut slot_status = 0; if let Some(&true) = FAST_UNPLUG_FEATURE.get() { slot_status |= PCI_EXP_HP_EV_PDC; } le_write_set_value_u16( - &mut self.config.config, + &mut self.base.config.config, (offset + PCI_EXP_SLTSTA) as usize, slot_status, )?; + + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) + && ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF) + { + // if the slot has already been unpluged, skip notifing the guest. + return Ok(()); + } + + le_write_set_value_u16( + &mut self.base.config.config, + (offset + PCI_EXP_SLTSTA) as usize, + slot_status | PCI_EXP_HP_EV_ABP, + )?; self.hotplug_event_notify(); Ok(()) } - fn unplug(&mut self, dev: &Arc>) -> Result<()> { - let devfn = dev - .lock() - .unwrap() - .devfn() - .chain_err(|| "Failed to get devfn")?; - let mut locked_dev = dev.lock().unwrap(); - locked_dev.unrealize()?; - self.sec_bus.lock().unwrap().devices.remove(&devfn); + fn unplug(&mut self, dev: &Arc>) -> Result<()> { + if !dev.lock().unwrap().hotpluggable() { + bail!("Don't support hot-unplug!"); + } + PCI_BUS_DEVICE!(dev, locked_dev, pci_dev); + let devfn = u64::from(pci_dev.pci_base().devfn); + pci_dev.unrealize()?; + let child_bus = self.child_bus().unwrap(); + child_bus.lock().unwrap().detach_child(devfn)?; Ok(()) } } impl StateTransfer for RootPort { - fn get_state_vec(&self) -> migration::errors::Result> { + fn get_state_vec(&self) -> Result> { let mut state = RootPortState::default(); - for idx in 0..self.config.config.len() { - state.config_space[idx] = self.config.config[idx]; - state.write_mask[idx] = self.config.write_mask[idx]; - state.write_clear_mask[idx] = self.config.write_clear_mask[idx]; + for idx in 0..self.base.config.config.len() { + state.config_space[idx] = self.base.config.config[idx]; + state.write_mask[idx] = self.base.config.write_mask[idx]; + state.write_clear_mask[idx] = self.base.config.write_clear_mask[idx]; } - state.last_cap_end = self.config.last_cap_end; - state.last_ext_cap_end = self.config.last_ext_cap_end; - state.last_ext_cap_offset = self.config.last_ext_cap_offset; + state.last_cap_end = self.base.config.last_cap_end; + state.last_ext_cap_end = self.base.config.last_ext_cap_end; + state.last_ext_cap_offset = self.base.config.last_ext_cap_offset; Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { let root_port_state = *RootPortState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("ROOT_PORT"))?; + .with_context(|| MigrationError::FromBytesError("ROOT_PORT"))?; - let length = self.config.config.len(); - self.config.config = root_port_state.config_space[..length].to_vec(); - self.config.write_mask = root_port_state.write_mask[..length].to_vec(); - self.config.write_clear_mask = root_port_state.write_clear_mask[..length].to_vec(); - self.config.last_cap_end = root_port_state.last_cap_end; - self.config.last_ext_cap_end = root_port_state.last_ext_cap_end; - self.config.last_ext_cap_offset = root_port_state.last_ext_cap_offset; + let length = self.base.config.config.len(); + self.base.config.config = root_port_state.config_space[..length].to_vec(); + self.base.config.write_mask = root_port_state.write_mask[..length].to_vec(); + self.base.config.write_clear_mask = root_port_state.write_clear_mask[..length].to_vec(); + self.base.config.last_cap_end = root_port_state.last_cap_end; + self.base.config.last_ext_cap_end = root_port_state.last_ext_cap_end; + self.base.config.last_ext_cap_offset = root_port_state.last_ext_cap_offset; Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&RootPortState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&RootPortState::descriptor().name).unwrap_or(!0) } } @@ -540,43 +703,46 @@ impl MigrationHook for RootPort {} #[cfg(test)] mod tests { use super::*; - use crate::host::tests::create_pci_host; + use crate::{convert_device_mut, pci::host::tests::create_pci_host, MUT_ROOT_PORT}; #[test] fn test_read_config() { let pci_host = create_pci_host(); - let root_bus = Arc::downgrade(&pci_host.lock().unwrap().root_bus); - let root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus, false); + let root_bus = Arc::downgrade(&pci_host.lock().unwrap().child_bus().unwrap()); + let root_port_config = RootPortConfig { + addr: (1, 0), + id: "pcie.1".to_string(), + ..Default::default() + }; + let root_port = RootPort::new(root_port_config, root_bus.clone()); root_port.realize().unwrap(); - let root_port = pci_host.lock().unwrap().find_device(0, 8).unwrap(); + let dev = pci_host.lock().unwrap().find_device(0, 8).unwrap(); let mut buf = [1_u8; 4]; - root_port - .lock() - .unwrap() - .read_config(PCIE_CONFIG_SPACE_SIZE - 1, &mut buf); + MUT_ROOT_PORT!(dev, locked_dev, root_port); + root_port.read_config(PCIE_CONFIG_SPACE_SIZE - 1, &mut buf); assert_eq!(buf, [1_u8; 4]); } #[test] fn test_write_config() { let pci_host = create_pci_host(); - let root_bus = Arc::downgrade(&pci_host.lock().unwrap().root_bus); - let root_port = RootPort::new("pcie.1".to_string(), 8, 0, root_bus, false); + let root_bus = Arc::downgrade(&pci_host.lock().unwrap().child_bus().unwrap()); + let root_port_config = RootPortConfig { + addr: (1, 0), + id: "pcie.1".to_string(), + ..Default::default() + }; + let root_port = RootPort::new(root_port_config, root_bus.clone()); root_port.realize().unwrap(); - let root_port = pci_host.lock().unwrap().find_device(0, 8).unwrap(); + let dev = pci_host.lock().unwrap().find_device(0, 8).unwrap(); + MUT_ROOT_PORT!(dev, locked_dev, root_port); // Invalid write. let data = [1_u8; 4]; - root_port - .lock() - .unwrap() - .write_config(PCIE_CONFIG_SPACE_SIZE - 1, &data); + root_port.write_config(PCIE_CONFIG_SPACE_SIZE - 1, &data); let mut buf = [0_u8]; - root_port - .lock() - .unwrap() - .read_config(PCIE_CONFIG_SPACE_SIZE - 1, &mut buf); + root_port.read_config(PCIE_CONFIG_SPACE_SIZE - 1, &mut buf); assert_eq!(buf, [0_u8]); } } diff --git a/devices/src/scsi/bus.rs b/devices/src/scsi/bus.rs new file mode 100644 index 0000000000000000000000000000000000000000..ceaa1e5bb50bbba608a82065c8ad2bb7f03fe088 --- /dev/null +++ b/devices/src/scsi/bus.rs @@ -0,0 +1,1762 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cmp; +use std::io::Write; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +use byteorder::{BigEndian, ByteOrder}; +use log::info; + +use crate::ScsiDisk::{ + ScsiDevice, DEFAULT_SECTOR_SIZE, SCSI_CDROM_DEFAULT_BLOCK_SIZE_SHIFT, + SCSI_DISK_DEFAULT_BLOCK_SIZE_SHIFT, SCSI_DISK_F_DPOFUA, SCSI_DISK_F_REMOVABLE, SCSI_TYPE_DISK, + SCSI_TYPE_ROM, SECTOR_SHIFT, +}; +use crate::{convert_bus_ref, convert_device_ref, Bus, BusBase, Device, SCSI_DEVICE}; +use util::aio::{AioCb, AioReqResult, Iovec}; +use util::{gen_base_func, AsAny}; + +/// Scsi Operation code. +pub const TEST_UNIT_READY: u8 = 0x00; +pub const REWIND: u8 = 0x01; +pub const REQUEST_SENSE: u8 = 0x03; +pub const FORMAT_UNIT: u8 = 0x04; +pub const READ_BLOCK_LIMITS: u8 = 0x05; +pub const INITIALIZE_ELEMENT_STATUS: u8 = 0x07; +pub const REASSIGN_BLOCKS: u8 = 0x07; +pub const READ_6: u8 = 0x08; +pub const WRITE_6: u8 = 0x0a; +pub const SET_CAPACITY: u8 = 0x0b; +pub const READ_REVERSE: u8 = 0x0f; +pub const WRITE_FILEMARKS: u8 = 0x10; +pub const SPACE: u8 = 0x11; +pub const INQUIRY: u8 = 0x12; +pub const RECOVER_BUFFERED_DATA: u8 = 0x14; +pub const MODE_SELECT: u8 = 0x15; +pub const RESERVE: u8 = 0x16; +pub const RELEASE: u8 = 0x17; +pub const COPY: u8 = 0x18; +pub const ERASE: u8 = 0x19; +pub const MODE_SENSE: u8 = 0x1a; +pub const LOAD_UNLOAD: u8 = 0x1b; +pub const SCAN: u8 = 0x1b; +pub const START_STOP: u8 = 0x1b; +pub const RECEIVE_DIAGNOSTIC: u8 = 0x1c; +pub const SEND_DIAGNOSTIC: u8 = 0x1d; +pub const ALLOW_MEDIUM_REMOVAL: u8 = 0x1e; +pub const SET_WINDOW: u8 = 0x24; +pub const READ_CAPACITY_10: u8 = 0x25; +pub const GET_WINDOW: u8 = 0x25; +pub const READ_10: u8 = 0x28; +pub const WRITE_10: u8 = 0x2a; +pub const SEND: u8 = 0x2a; +pub const SEEK_10: u8 = 0x2b; +pub const LOCATE_10: u8 = 0x2b; +pub const POSITION_TO_ELEMENT: u8 = 0x2b; +pub const WRITE_VERIFY_10: u8 = 0x2e; +pub const VERIFY_10: u8 = 0x2f; +pub const SEARCH_HIGH: u8 = 0x30; +pub const SEARCH_EQUAL: u8 = 0x31; +pub const OBJECT_POSITION: u8 = 0x31; +pub const SEARCH_LOW: u8 = 0x32; +pub const SET_LIMITS: u8 = 0x33; +pub const PRE_FETCH: u8 = 0x34; +pub const READ_POSITION: u8 = 0x34; +pub const GET_DATA_BUFFER_STATUS: u8 = 0x34; +pub const SYNCHRONIZE_CACHE: u8 = 0x35; +pub const LOCK_UNLOCK_CACHE: u8 = 0x36; +pub const INITIALIZE_ELEMENT_STATUS_WITH_RANGE: u8 = 0x37; +pub const READ_DEFECT_DATA: u8 = 0x37; +pub const MEDIUM_SCAN: u8 = 0x38; +pub const COMPARE: u8 = 0x39; +pub const COPY_VERIFY: u8 = 0x3a; +pub const WRITE_BUFFER: u8 = 0x3b; +pub const READ_BUFFER: u8 = 0x3c; +pub const UPDATE_BLOCK: u8 = 0x3d; +pub const READ_LONG_10: u8 = 0x3e; +pub const WRITE_LONG_10: u8 = 0x3f; +pub const CHANGE_DEFINITION: u8 = 0x40; +pub const WRITE_SAME_10: u8 = 0x41; +pub const UNMAP: u8 = 0x42; +/// The Read TOC command requests that the Drive read data from a table of contexts. +pub const READ_TOC: u8 = 0x43; +pub const REPORT_DENSITY_SUPPORT: u8 = 0x44; +pub const GET_CONFIGURATION: u8 = 0x46; +pub const SANITIZE: u8 = 0x48; +pub const GET_EVENT_STATUS_NOTIFICATION: u8 = 0x4a; +pub const LOG_SELECT: u8 = 0x4c; +pub const LOG_SENSE: u8 = 0x4d; +pub const READ_DISC_INFORMATION: u8 = 0x51; +pub const RESERVE_TRACK: u8 = 0x53; +pub const MODE_SELECT_10: u8 = 0x55; +pub const RESERVE_10: u8 = 0x56; +pub const RELEASE_10: u8 = 0x57; +pub const MODE_SENSE_10: u8 = 0x5a; +pub const SEND_CUE_SHEET: u8 = 0x5d; +pub const PERSISTENT_RESERVE_IN: u8 = 0x5e; +pub const PERSISTENT_RESERVE_OUT: u8 = 0x5f; +pub const VARLENGTH_CDB: u8 = 0x7f; +pub const WRITE_FILEMARKS_16: u8 = 0x80; +pub const READ_REVERSE_16: u8 = 0x81; +pub const ALLOW_OVERWRITE: u8 = 0x82; +pub const EXTENDED_COPY: u8 = 0x83; +pub const ATA_PASSTHROUGH_16: u8 = 0x85; +pub const ACCESS_CONTROL_IN: u8 = 0x86; +pub const ACCESS_CONTROL_OUT: u8 = 0x87; +pub const READ_16: u8 = 0x88; +pub const COMPARE_AND_WRITE: u8 = 0x89; +pub const WRITE_16: u8 = 0x8a; +pub const WRITE_VERIFY_16: u8 = 0x8e; +pub const VERIFY_16: u8 = 0x8f; +pub const PRE_FETCH_16: u8 = 0x90; +pub const SPACE_16: u8 = 0x91; +pub const SYNCHRONIZE_CACHE_16: u8 = 0x91; +pub const LOCATE_16: u8 = 0x92; +pub const WRITE_SAME_16: u8 = 0x93; +pub const ERASE_16: u8 = 0x93; +pub const SERVICE_ACTION_IN_16: u8 = 0x9e; +pub const WRITE_LONG_16: u8 = 0x9f; +pub const REPORT_LUNS: u8 = 0xa0; +pub const ATA_PASSTHROUGH_12: u8 = 0xa1; +pub const MAINTENANCE_IN: u8 = 0xa3; +pub const MAINTENANCE_OUT: u8 = 0xa4; +pub const MOVE_MEDIUM: u8 = 0xa5; +pub const EXCHANGE_MEDIUM: u8 = 0xa6; +pub const SET_READ_AHEAD: u8 = 0xa7; +pub const READ_12: u8 = 0xa8; +pub const WRITE_12: u8 = 0xaa; +pub const SERVICE_ACTION_IN_12: u8 = 0xab; +pub const ERASE_12: u8 = 0xac; +pub const READ_DVD_STRUCTURE: u8 = 0xad; +pub const WRITE_VERIFY_12: u8 = 0xae; +pub const VERIFY_12: u8 = 0xaf; +pub const SEARCH_HIGH_12: u8 = 0xb0; +pub const SEARCH_EQUAL_12: u8 = 0xb1; +pub const SEARCH_LOW_12: u8 = 0xb2; +pub const READ_ELEMENT_STATUS: u8 = 0xb8; +pub const SEND_VOLUME_TAG: u8 = 0xb6; +pub const READ_DEFECT_DATA_12: u8 = 0xb7; +pub const SET_CD_SPEED: u8 = 0xbb; +pub const MECHANISM_STATUS: u8 = 0xbd; +pub const READ_CD: u8 = 0xbe; +pub const SEND_DVD_STRUCTURE: u8 = 0xbf; + +/// SAM Status codes. +pub const GOOD: u8 = 0x00; +pub const CHECK_CONDITION: u8 = 0x02; +pub const CONDITION_GOOD: u8 = 0x04; +pub const BUSY: u8 = 0x08; +pub const INTERMEDIATE_GOOD: u8 = 0x10; +pub const INTERMEDIATE_C_GOOD: u8 = 0x14; +pub const RESERVATION_CONFLICT: u8 = 0x18; +pub const COMMAND_TERMINATED: u8 = 0x22; +pub const TASK_SET_FULL: u8 = 0x28; +pub const ACA_ACTIVE: u8 = 0x30; +pub const TASK_ABORTED: u8 = 0x40; + +pub const STATUS_MASK: u8 = 0x3e; + +/// Scsi cdb length will be 6/10/12/16 bytes. +pub const SCSI_CMD_BUF_SIZE: usize = 16; +pub const SCSI_SENSE_BUF_SIZE: usize = 252; + +/// SERVICE ACTION IN subcodes. +pub const SUBCODE_READ_CAPACITY_16: u8 = 0x10; + +/// Sense Keys. +pub const NO_SENSE: u8 = 0x00; +pub const RECOVERED_ERROR: u8 = 0x01; +pub const NOT_READY: u8 = 0x02; +pub const MEDIUM_ERROR: u8 = 0x03; +pub const HARDWARE_ERROR: u8 = 0x04; +pub const ILLEGAL_REQUEST: u8 = 0x05; +pub const UNIT_ATTENTION: u8 = 0x06; +pub const DATA_PROTECT: u8 = 0x07; +pub const BLANK_CHECK: u8 = 0x08; +pub const COPY_ABORTED: u8 = 0x0a; +pub const ABORTED_COMMAND: u8 = 0x0b; +pub const VOLUME_OVERFLOW: u8 = 0x0d; +pub const MISCOMPARE: u8 = 0x0e; + +macro_rules! scsisense { + ( $key:expr, $asc: expr, $ascq:expr) => { + ScsiSense { + key: $key, + asc: $asc, + ascq: $ascq, + } + }; +} + +/// Sense Code. +pub const SCSI_SENSE_NO_SENSE: ScsiSense = scsisense!(NO_SENSE, 0x00, 0x00); +pub const SCSI_SENSE_LUN_NOT_READY: ScsiSense = scsisense!(NOT_READY, 0x04, 0x03); +pub const SCSI_SENSE_NO_MEDIUM: ScsiSense = scsisense!(NOT_READY, 0x3a, 0x00); +pub const SCSI_SENSE_NOT_READY_REMOVAL_PREVENTED: ScsiSense = scsisense!(NOT_READY, 0x53, 0x02); +pub const SCSI_SENSE_TARGET_FAILURE: ScsiSense = scsisense!(HARDWARE_ERROR, 0x44, 0x00); +pub const SCSI_SENSE_INVALID_OPCODE: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x20, 0x00); +pub const SCSI_SENSE_LBA_OUT_OF_RANGE: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x21, 0x00); +pub const SCSI_SENSE_INVALID_FIELD: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x24, 0x00); +pub const SCSI_SENSE_INVALID_PARAM: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x26, 0x00); +pub const SCSI_SENSE_INVALID_PARAM_VALUE: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x26, 0x01); +pub const SCSI_SENSE_INVALID_PARAM_LEN: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x1a, 0x00); +pub const SCSI_SENSE_LUN_NOT_SUPPORTED: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x25, 0x00); +pub const SCSI_SENSE_SAVING_PARAMS_NOT_SUPPORTED: ScsiSense = + scsisense!(ILLEGAL_REQUEST, 0x39, 0x00); +pub const SCSI_SENSE_INCOMPATIBLE_FORMAT: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x30, 0x00); +pub const SCSI_SENSE_ILLEGAL_REQ_REMOVAL_PREVENTED: ScsiSense = + scsisense!(ILLEGAL_REQUEST, 0x53, 0x02); +pub const SCSI_SENSE_INVALID_TAG: ScsiSense = scsisense!(ILLEGAL_REQUEST, 0x4b, 0x01); +pub const SCSI_SENSE_IO_ERROR: ScsiSense = scsisense!(ABORTED_COMMAND, 0x00, 0x06); +pub const SCSI_SENSE_I_T_NEXUS_LOSS: ScsiSense = scsisense!(ABORTED_COMMAND, 0x29, 0x07); +pub const SCSI_SENSE_LUN_FAILURE: ScsiSense = scsisense!(ABORTED_COMMAND, 0x3e, 0x01); +pub const SCSI_SENSE_OVERLAPPED_COMMANDS: ScsiSense = scsisense!(ABORTED_COMMAND, 0x4e, 0x00); +pub const SCSI_SENSE_LUN_COMM_FAILURE: ScsiSense = scsisense!(ABORTED_COMMAND, 0x08, 0x00); +pub const SCSI_SENSE_LUN_NOT_RESPONDING: ScsiSense = scsisense!(ABORTED_COMMAND, 0x05, 0x00); +pub const SCSI_SENSE_COMMAND_TIMEOUT: ScsiSense = scsisense!(ABORTED_COMMAND, 0x2e, 0x02); +pub const SCSI_SENSE_COMMAND_ABORTED: ScsiSense = scsisense!(ABORTED_COMMAND, 0x2f, 0x02); +pub const SCSI_SENSE_READ_ERROR: ScsiSense = scsisense!(MEDIUM_ERROR, 0x11, 0x00); +pub const SCSI_SENSE_NOT_READY: ScsiSense = scsisense!(NOT_READY, 0x04, 0x00); +pub const SCSI_SENSE_CAPACITY_CHANGED: ScsiSense = scsisense!(UNIT_ATTENTION, 0x2a, 0x09); +pub const SCSI_SENSE_RESET: ScsiSense = scsisense!(UNIT_ATTENTION, 0x29, 0x00); +pub const SCSI_SENSE_SCSI_BUS_RESET: ScsiSense = scsisense!(UNIT_ATTENTION, 0x29, 0x02); +pub const SCSI_SENSE_UNIT_ATTENTION_NO_MEDIUM: ScsiSense = scsisense!(UNIT_ATTENTION, 0x3a, 0x00); +pub const SCSI_SENSE_MEDIUM_CHANGED: ScsiSense = scsisense!(UNIT_ATTENTION, 0x28, 0x00); +pub const SCSI_SENSE_REPORTED_LUNS_CHANGED: ScsiSense = scsisense!(UNIT_ATTENTION, 0x3f, 0x0e); +pub const SCSI_SENSE_DEVICE_INTERNAL_RESET: ScsiSense = scsisense!(UNIT_ATTENTION, 0x29, 0x04); +pub const SCSI_SENSE_WRITE_PROTECTED: ScsiSense = scsisense!(DATA_PROTECT, 0x27, 0x00); +pub const SCSI_SENSE_SPACE_ALLOC_FAILED: ScsiSense = scsisense!(DATA_PROTECT, 0x27, 0x07); + +#[derive(Default)] +pub struct ScsiSense { + /// Sense key. + pub key: u8, + /// Additional sense code. + pub asc: u8, + /// Additional sense code qualifier. + pub ascq: u8, +} + +/// Mode page codes for mode sense/set. +pub const MODE_PAGE_R_W_ERROR: u8 = 0x01; +pub const MODE_PAGE_HD_GEOMETRY: u8 = 0x04; +pub const MODE_PAGE_FLEXIBLE_DISK_GEOMETRY: u8 = 0x05; +pub const MODE_PAGE_CACHING: u8 = 0x08; +pub const MODE_PAGE_AUDIO_CTL: u8 = 0x0e; +pub const MODE_PAGE_POWER: u8 = 0x1a; +pub const MODE_PAGE_FAULT_FAIL: u8 = 0x1c; +pub const MODE_PAGE_TO_PROTECT: u8 = 0x1d; +pub const MODE_PAGE_CAPABILITIES: u8 = 0x2a; +pub const MODE_PAGE_ALLS: u8 = 0x3f; + +pub const SCSI_MAX_INQUIRY_LEN: u64 = 256; +pub const SCSI_INQUIRY_PRODUCT_MAX_LEN: usize = 16; +pub const SCSI_INQUIRY_VENDOR_MAX_LEN: usize = 8; +pub const SCSI_INQUIRY_VERSION_MAX_LEN: usize = 4; +pub const SCSI_INQUIRY_VPD_SERIAL_NUMBER_MAX_LEN: usize = 32; + +const SCSI_TARGET_INQUIRY_LEN: u64 = 36; + +/// | bit7 - bit 5 | bit 4 - bit 0 | +/// | Peripheral Qualifier | Peripheral Device Type | +/// Unknown or no device type. +const TYPE_UNKNOWN: u8 = 0x1f; +/// A peripheral device having the specified peripheral device type is not connected to this logical +/// unit. +const TYPE_INACTIVE: u8 = 0x20; +/// Scsi target device is not capable of supporting a peripheral device connected to this logical +/// unit. +const TYPE_NO_LUN: u8 = 0x7f; + +/// Notification Classes for GET EVENT STATUS NOTIFICATION. +/// 000b: No requested Event Classes are supported. +pub const GESN_NO_REQUESTED_EVENT: u8 = 0; +/// 001b: Operational Change Request/Notification. +pub const GESN_OPERATIONAL_CHANGE: u8 = 1; +/// 010b: Power Management. +pub const GESN_POWER_MANAGEMENT: u8 = 2; +/// 011b: External Request. +pub const GESN_EXTERNAL_REQUEST: u8 = 3; +/// 100b: Media. +pub const GESN_MEDIA: u8 = 4; +/// 101b: Multiple Hosts. +pub const GESN_MULTIPLE_HOSTS: u8 = 5; +/// 110b: Device Busy. +/// 111b: Reserved. +pub const GESN_DEVICE_BUSY: u8 = 6; + +/// Media Status in Get Event Status Notification. +/// If the Media Present bit is set to zero, no media is present in the Drive. +/// If the Media Present bit is set to one, media is present in the Drive. +pub const GESN_MS_DOOR_OR_TRAY_OPEN_BIT: u8 = 0; +/// If the Door or Tray Open bit is set to zero, the Tray or Door mechanism is in the closed state. +/// If the Door or Tray Open bit is set to one, the Tray or Door mechanism is in the open state. +/// If the Drive does not have either a tray or a door, this bit shall be set to zero. +pub const GESN_MS_MEDIA_PRESENT_BIT: u8 = 1; + +/// Event Code in Get Event Status Notification. +/// Media status is unchanged. +pub const GESN_EC_NOCHG: u8 = 0; +/// The Drive has received a request from the user(usually through a mechanical switch on the Drive) +/// to eject the specified slot or media. +pub const GESN_EC_EJECTREQUEST: u8 = 1; +/// The specified slot(or the Drive) has received new media, and is ready to access it. +pub const GESN_EC_NEWMEDIA: u8 = 2; +/// The media has been removed from the specified slot, and the Drive is unable to access the media +/// without user intervention. This applies to media changers only. +pub const GESN_EC_MEDIAREMOVAL: u8 = 3; +/// The user has requested that the media in the specified slot be loaded. This applies to media +/// changers only. +pub const GESN_EC_MEDIACHANGED: u8 = 4; +/// A DVD+RW background format has completed. Since DVD+RW Drives are capable of generationg +/// multiple media events concurrently, such Drives shall be capable of queuing media events. +pub const GESN_EC_BGFORMATCOMPLETED: u8 = 5; +/// A DVD+RW background format has been automatically restarted by the Drive. Since DVD+RW Drives +/// are capable of generationg multiple media events concurrently, such Drives shall be capable of +/// queuing media event. +pub const GESN_EC_BGFORMATRESTARTED: u8 = 6; + +/// Some generally useful CD-ROM information. From +/// Max. minutes per CD. +pub const CD_MINS: u32 = 74; +/// Seconds per minute. +pub const CD_SECS: u32 = 60; +/// Frames per second. +pub const CD_FRAMES: u32 = 75; +/// Bytes per frame, "cooked" mode. +pub const CD_FRAME_SIZE: u32 = 2048; +/// MSF numbering offset of the first frame. +pub const CD_MSF_OFFSET: u32 = 150; +/// Max bytes supported for CD in stratovirt now. +pub const CD_MAX_BYTES: u32 = CD_MINS * CD_SECS * CD_FRAMES * CD_FRAME_SIZE; +pub const CD_MAX_SECTORS: u32 = CD_MAX_BYTES / DEFAULT_SECTOR_SIZE; + +/// Profile Number for GET CONFIGURATION command in MMC-6. +/// Read only Compact Disc capable. +const GC_PROFILE_CD_ROM: u16 = 0x0008; +/// Read only DVD. +const GC_PROFILE_DVD_ROM: u16 = 0x0010; + +/// Features Codes for GET CONFIGURATION command in MMC-6. +/// A list of all Profiles supported by the Drive. +const GC_FC_PROFILE_LIST: u16 = 0x0000; +/// Mandatory behavior for all devices. +const GC_FC_CORE: u16 = 0x0001; +/// The medium may be removed from the device. +const GC_FC_REMOVABLE_MEDIUM: u16 = 0x0003; + +// BusBase.Children uses `u64` for device's unique address. We use bits [32-39] in `u64` +// to represent the target number and bits[0-15] in `u64` to the lun number. +const TARGET_ID_SHIFT: u64 = 32; +const LUN_ID_MASK: u64 = 0xFFFF; + +#[derive(Clone, PartialEq, Eq)] +pub enum ScsiXferMode { + /// TEST_UNIT_READY, ... + ScsiXferNone, + /// READ, INQUIRY, MODE_SENSE, ... + ScsiXferFromDev, + /// WRITE, MODE_SELECT, ... + ScsiXferToDev, +} + +// Convert from (target, lun) to unique address in BusBase. +pub fn get_scsi_key(target: u8, lun: u16) -> u64 { + u64::from(target) << TARGET_ID_SHIFT | u64::from(lun) +} + +// Convert from unique address in BusBase to (target, lun). +fn parse_scsi_key(key: u64) -> (u8, u16) { + ((key >> TARGET_ID_SHIFT) as u8, (key & LUN_ID_MASK) as u16) +} + +pub struct ScsiBus { + pub base: BusBase, +} + +impl Bus for ScsiBus { + gen_base_func!(bus_base, bus_base_mut, BusBase, base); +} + +/// Convert from Arc> to &ScsiBus. +#[macro_export] +macro_rules! SCSI_BUS { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident) => { + convert_bus_ref!($trait_bus, $lock_bus, $struct_bus, ScsiBus); + }; +} + +impl ScsiBus { + pub fn new(bus_name: String) -> ScsiBus { + ScsiBus { + base: BusBase::new(bus_name), + } + } + + /// Get device by the target number and the lun number. + /// If the device requested by the target number and the lun number is non-existen, + /// return the first device in ScsiBus's devices list. It's OK because we will not + /// use this "random" device, we will just use it to prove that the target is existen. + pub fn get_device(&self, target: u8, lun: u16) -> Option>> { + if let Some(device) = self.child_dev(get_scsi_key(target, lun)) { + return Some(device.clone()); + } + + // If lun device requested in CDB's LUNS bytes is not found, it may be a target request. + // Target request means if there is any lun in this scsi target, it will response some + // scsi commands. And, if there is no lun found in this scsi target, it means such target + // is non-existent. So, we should find if there exists a lun which has the same id with + // target id in CBD's LUNS bytes. And, if there exist two or more luns which have the same + // target id, just return the first one is OK enough. + for (key, device) in self.child_devices() { + let (target_id, lun_id) = parse_scsi_key(key); + if target_id == target { + trace::scsi_bus_get_device(target_id, lun, lun_id); + return Some(device.clone()); + } + } + + // No lun found in requested target. It seems there is no such target requested in + // CDB's LUNS bytes. + trace::scsi_bus_get_no_device(target, lun); + None + } +} + +fn scsi_bus_parse_req_cdb( + cdb: [u8; SCSI_CMD_BUF_SIZE], + dev: Arc>, +) -> Option { + let op = cdb[0]; + let len = scsi_cdb_length(&cdb); + if len < 0 { + return None; + } + + // When CDB's Group Code is vendor specific or reserved, len/xfer/lba will be negative. + // So, don't need to check again after checking in cdb length. + let xfer = scsi_cdb_xfer(&cdb, dev) as u64; + let lba = scsi_cdb_lba(&cdb) as u64; + + Some(ScsiCommand { + buf: cdb, + op, + len: len as u32, + xfer, + lba, + mode: scsi_cdb_xfer_mode(&cdb), + }) +} + +#[derive(Clone)] +pub struct ScsiCommand { + /// The Command Descriptor Block(CDB). + pub buf: [u8; SCSI_CMD_BUF_SIZE], + /// Scsi Operation Code. + pub op: u8, + /// Length of CDB. + pub len: u32, + /// Transfer length. + pub xfer: u64, + /// Logical Block Address. + pub lba: u64, + /// Transfer direction. + pub mode: ScsiXferMode, +} + +#[derive(Clone)] +pub struct ScsiCompleteCb { + pub req: Arc>, +} + +pub fn aio_complete_cb(aiocb: &AioCb, mut ret: i64) -> Result<()> { + match aiocb.req_is_completed(ret) { + AioReqResult::Inflight => return Ok(()), + AioReqResult::Error(v) => ret = v, + AioReqResult::Done => (), + } + + let (status, sense) = if ret < 0 { + (CHECK_CONDITION, Some(SCSI_SENSE_IO_ERROR)) + } else { + (GOOD, None) + }; + + let sreq = &mut aiocb.iocompletecb.req.lock().unwrap(); + sreq.upper_req + .as_mut() + .scsi_request_complete_cb(status, sense)?; + Ok(()) +} + +pub trait ScsiRequestOps: Send + Sync + AsAny { + // Will be called in the end of this scsi instruction execution. + fn scsi_request_complete_cb(&mut self, status: u8, scsisense: Option) -> Result<()>; +} + +pub struct ScsiRequest { + pub cmd: ScsiCommand, + // Requested lun id for scsi request. It may be not equal to scsi device's lun id when it's a + // scsi target request. + pub req_lun: u16, + pub opstype: u32, + // For datain and dataout. Can be empty when it's a ScsiXferMode::ScsiXferNone request. + pub iovec: Vec, + // Provided buffer's length. + pub datalen: u32, + pub dev: Arc>, + // Upper level request which contains this ScsiRequest. + pub upper_req: Box, +} + +impl ScsiRequest { + pub fn new( + cdb: [u8; SCSI_CMD_BUF_SIZE], + req_lun: u16, + iovec: Vec, + datalen: u32, + device: Arc>, + upper_req: Box, + ) -> Result { + let cmd = scsi_bus_parse_req_cdb(cdb, device.clone()).with_context(|| "Error cdb!")?; + let op = cmd.op; + let opstype = scsi_operation_type(op); + + if op == WRITE_10 || op == READ_10 { + SCSI_DEVICE!(device, locked_dev, scsi_dev); + let disk_size = scsi_dev.disk_sectors << SECTOR_SHIFT; + let disk_type = scsi_dev.scsi_type; + let offset_shift = match disk_type { + SCSI_TYPE_DISK => SCSI_DISK_DEFAULT_BLOCK_SIZE_SHIFT, + _ => SCSI_CDROM_DEFAULT_BLOCK_SIZE_SHIFT, + }; + let offset = cmd + .lba + .checked_shl(offset_shift) + .with_context(|| "Too large offset IO!")?; + + offset + .checked_add(u64::from(datalen)) + .filter(|&off| off <= disk_size) + .with_context(|| { + format!( + "op 0x{:x} read/write length {} from {} is larger than disk size {}", + op, datalen, offset, disk_size + ) + })?; + } + + Ok(ScsiRequest { + cmd, + req_lun, + opstype, + iovec, + datalen, + dev: device, + upper_req, + }) + } + + pub fn execute(self) -> Result>> { + let mode = self.cmd.mode.clone(); + let op = self.cmd.op; + let dev = self.dev.clone(); + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + // SAFETY: the block_backend is assigned after device realized. + let block_backend = scsi_dev.block_backend.as_ref().unwrap(); + let mut locked_backend = block_backend.lock().unwrap(); + let s_req = Arc::new(Mutex::new(self)); + + let scsicompletecb = ScsiCompleteCb { req: s_req.clone() }; + let offset_bits = match scsi_dev.scsi_type { + SCSI_TYPE_DISK => SCSI_DISK_DEFAULT_BLOCK_SIZE_SHIFT, + _ => SCSI_CDROM_DEFAULT_BLOCK_SIZE_SHIFT, + }; + let locked_req = s_req.lock().unwrap(); + let iovecs = locked_req.iovec.clone(); + let offset = (locked_req.cmd.lba << offset_bits) as usize; + drop(locked_req); + + if op == SYNCHRONIZE_CACHE { + locked_backend + .datasync(scsicompletecb) + .with_context(|| "Failed to process scsi request for flushing")?; + locked_backend.flush_request()?; + + return Ok(s_req); + } + + match mode { + ScsiXferMode::ScsiXferFromDev => { + locked_backend + .read_vectored(iovecs, offset, scsicompletecb) + .with_context(|| "Failed to process scsi request for reading")?; + } + ScsiXferMode::ScsiXferToDev => { + locked_backend + .write_vectored(iovecs, offset, scsicompletecb) + .with_context(|| "Failed to process scsi request for writing")?; + } + _ => { + info!("xfer none"); + } + } + + locked_backend.flush_request()?; + Ok(s_req) + } + + fn emulate_target_execute( + &self, + not_supported_flag: &mut bool, + sense: &mut Option, + ) -> Result> { + match self.cmd.op { + REPORT_LUNS => scsi_command_emulate_report_luns(&self.cmd, &self.dev), + INQUIRY => scsi_command_emulate_target_inquiry(self.req_lun, &self.cmd), + REQUEST_SENSE => { + if self.req_lun != 0 { + *sense = Some(SCSI_SENSE_LUN_NOT_SUPPORTED); + } + // Scsi Device does not realize sense buffer now, so just return. + Ok(Vec::new()) + } + TEST_UNIT_READY => Ok(Vec::new()), + _ => { + *not_supported_flag = true; + *sense = Some(SCSI_SENSE_INVALID_OPCODE); + Err(anyhow!("Invalid emulation target scsi command")) + } + } + } + + fn emulate_device_execute( + &self, + not_supported_flag: &mut bool, + sense: &mut Option, + ) -> Result> { + match self.cmd.op { + REQUEST_SENSE => { + *sense = Some(SCSI_SENSE_NO_SENSE); + Ok(Vec::new()) + } + TEST_UNIT_READY => { + SCSI_DEVICE!(self.dev, locked_dev, scsi_dev); + if scsi_dev.block_backend.is_none() { + Err(anyhow!("No scsi backend!")) + } else { + Ok(Vec::new()) + } + } + // Do not support SCSI_DISK_F_REMOVABLE now. + // Return Ok is enough for START_STOP/ALLOW_MEDIUM_REMOVAL. + // TODO: implement SCSI_DISK_F_REMOVABLE. + START_STOP => Ok(Vec::new()), + ALLOW_MEDIUM_REMOVAL => Ok(Vec::new()), + INQUIRY => scsi_command_emulate_inquiry(&self.cmd, &self.dev), + READ_CAPACITY_10 => scsi_command_emulate_read_capacity_10(&self.cmd, &self.dev), + MODE_SENSE | MODE_SENSE_10 => scsi_command_emulate_mode_sense(&self.cmd, &self.dev), + SERVICE_ACTION_IN_16 => scsi_command_emulate_service_action_in_16(&self.cmd, &self.dev), + READ_DISC_INFORMATION => { + scsi_command_emulate_read_disc_information(&self.cmd, &self.dev) + } + GET_EVENT_STATUS_NOTIFICATION => { + scsi_command_emulate_get_event_status_notification(&self.cmd, &self.dev) + } + READ_TOC => scsi_command_emulate_read_toc(&self.cmd, &self.dev), + GET_CONFIGURATION => scsi_command_emulate_get_configuration(&self.cmd, &self.dev), + _ => { + *not_supported_flag = true; + Err(anyhow!("Emulation scsi command is not supported now!")) + } + } + } + + pub fn emulate_execute(mut self) -> Result>> { + trace::scsi_emulate_execute(self.cmd.op); + let mut not_supported_flag = false; + let mut sense = None; + let mut status = GOOD; + SCSI_DEVICE!(self.dev, locked_dev, scsi_dev); + let found_lun = scsi_dev.dev_cfg.lun; + drop(locked_dev); + + // Requested lun id is not equal to found device id means it may be a target request. + // REPORT LUNS is also a target request command. + let result = if self.req_lun != found_lun || self.cmd.op == REPORT_LUNS { + self.emulate_target_execute(&mut not_supported_flag, &mut sense) + } else { + // It's not a target request. + self.emulate_device_execute(&mut not_supported_flag, &mut sense) + }; + + match result { + Ok(outbuf) => { + outbuf_to_iov(self.cmd.op, &outbuf, &self.iovec)?; + } + Err(ref e) => { + if not_supported_flag { + trace::scsi_emulate_execute_error(self.cmd.op, &"not supported"); + status = CHECK_CONDITION; + sense = Some(SCSI_SENSE_INVALID_OPCODE); + } else { + trace::scsi_emulate_execute_error(self.cmd.op, e); + status = CHECK_CONDITION; + sense = Some(SCSI_SENSE_INVALID_FIELD); + } + } + } + + self.upper_req + .as_mut() + .scsi_request_complete_cb(status, sense)?; + + Ok(Arc::new(Mutex::new(self))) + } +} + +fn write_buf_mem(buf: &[u8], max: u64, hva: u64) -> Result { + let mut slice = + // SAFETY: The hva is managed by Address Space, it can be guaranteed to be legal. + unsafe { + std::slice::from_raw_parts_mut(hva as *mut u8, cmp::min(buf.len(), max as usize)) + }; + let size = (&mut slice) + .write(buf) + .with_context(|| format!("Failed to write buf(hva:{})", hva))?; + + Ok(size) +} + +fn outbuf_to_iov(command: u8, outbuf: &[u8], iovec: &[Iovec]) -> Result<()> { + let mut start = 0; + for (idx, iov) in iovec.iter().enumerate() { + if start >= outbuf.len() { + return Ok(()); + } + + trace::scsi_outbuf_to_iov(command, outbuf.len(), iov.iov_len, idx, iovec.len()); + + start += write_buf_mem(&outbuf[start..], iov.iov_len, iov.iov_base) + .with_context(|| "Failed to write buf for scsi command result iov")?; + } + + Ok(()) +} + +// Scsi Commands which are emulated in stratovirt and do noting to the backend. +pub const EMULATE_SCSI_OPS: u32 = 0; +// Scsi Commands which will do something(eg: read and write) to the backend. +pub const NON_EMULATE_SCSI_OPS: u32 = 1; + +fn scsi_operation_type(op: u8) -> u32 { + match op { + READ_6 | READ_10 | READ_12 | READ_16 | WRITE_6 | WRITE_10 | WRITE_12 | WRITE_16 + | WRITE_VERIFY_10 | WRITE_VERIFY_12 | WRITE_VERIFY_16 | SYNCHRONIZE_CACHE => { + NON_EMULATE_SCSI_OPS + } + _ => EMULATE_SCSI_OPS, + } +} + +fn scsi_cdb_length(cdb: &[u8; SCSI_CMD_BUF_SIZE]) -> i32 { + match cdb[0] >> 5 { + // CDB[0]: Operation Code Byte. Bits[0-4]: Command Code. Bits[5-7]: Group Code. + // Group Code | Meaning | + // 000b | 6 bytes commands. | + // 001b | 10 bytes commands. | + // 010b | 10 bytes commands. | + // 011b | reserved. | + // 100b | 16 bytes commands. | + // 101b | 12 bytes commands. | + // 110b | vendor specific. | + // 111b | vendor specific. | + 0 => 6, + 1 | 2 => 10, + 4 => 16, + 5 => 12, + _ => -1, + } +} + +pub fn scsi_cdb_xfer(cdb: &[u8; SCSI_CMD_BUF_SIZE], dev: Arc>) -> i64 { + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let block_size = scsi_dev.block_size as i64; + drop(locked_dev); + + let mut xfer: i64 = match cdb[0] >> 5 { + // Group Code | Transfer length. | + // 000b | Byte[4]. | + // 001b | Bytes[7-8]. | + // 010b | Bytes[7-8]. | + // 100b | Bytes[10-13]. | + // 101b | Bytes[6-9]. | + 0 => i64::from(cdb[4]), + 1 | 2 => i64::from(BigEndian::read_u16(&cdb[7..])), + 4 => i64::from(BigEndian::read_u32(&cdb[10..])), + 5 => i64::from(BigEndian::read_u32(&cdb[6..])), + _ => -1, + }; + + match cdb[0] { + TEST_UNIT_READY | START_STOP | SYNCHRONIZE_CACHE | SYNCHRONIZE_CACHE_16 => { + xfer = 0; + } + READ_CAPACITY_10 => { + xfer = 8; + } + WRITE_6 | READ_6 => { + // length 0 means 256 blocks. + if xfer == 0 { + // Safety: block_size is 2048 or 512. + xfer = 256 * block_size; + } + } + WRITE_10 | WRITE_12 | WRITE_16 | READ_10 | READ_12 | READ_16 => { + // Safety: xfer is less than u32::max now. + xfer *= block_size; + } + INQUIRY => { + xfer = i64::from(cdb[4]) | i64::from(cdb[3]) << 8; + } + _ => {} + } + xfer +} + +fn scsi_cdb_lba(cdb: &[u8; SCSI_CMD_BUF_SIZE]) -> i64 { + match cdb[0] >> 5 { + // Group Code | Logical Block Address. | + // 000b | Byte[1].bits[0-4]~Byte[3]. | + // 001b | Bytes[2-5]. | + // 010b | Bytes[2-5]. | + // 100b | Bytes[2-9]. | + // 101b | Bytes[2-5]. | + 0 => i64::from(BigEndian::read_u32(&cdb[0..]) & 0x1fffff), + 1 | 2 | 5 => i64::from(BigEndian::read_u32(&cdb[2..])), + 4 => BigEndian::read_u64(&cdb[2..]) as i64, + _ => -1, + } +} + +fn scsi_cdb_xfer_mode(cdb: &[u8; SCSI_CMD_BUF_SIZE]) -> ScsiXferMode { + match cdb[0] { + WRITE_6 + | WRITE_10 + | WRITE_VERIFY_10 + | WRITE_12 + | WRITE_VERIFY_12 + | WRITE_16 + | WRITE_VERIFY_16 + | VERIFY_10 + | VERIFY_12 + | VERIFY_16 + | COPY + | COPY_VERIFY + | COMPARE + | CHANGE_DEFINITION + | LOG_SELECT + | MODE_SELECT + | MODE_SELECT_10 + | SEND_DIAGNOSTIC + | WRITE_BUFFER + | FORMAT_UNIT + | REASSIGN_BLOCKS + | SEARCH_EQUAL + | SEARCH_HIGH + | SEARCH_LOW + | UPDATE_BLOCK + | WRITE_LONG_10 + | WRITE_SAME_10 + | WRITE_SAME_16 + | UNMAP + | SEARCH_HIGH_12 + | SEARCH_EQUAL_12 + | SEARCH_LOW_12 + | MEDIUM_SCAN + | SEND_VOLUME_TAG + | SEND_CUE_SHEET + | SEND_DVD_STRUCTURE + | PERSISTENT_RESERVE_OUT + | MAINTENANCE_OUT + | SET_WINDOW + | SCAN => ScsiXferMode::ScsiXferToDev, + + ATA_PASSTHROUGH_12 | ATA_PASSTHROUGH_16 => match cdb[2] & 0x8 { + 0 => ScsiXferMode::ScsiXferToDev, + _ => ScsiXferMode::ScsiXferFromDev, + }, + + _ => ScsiXferMode::ScsiXferFromDev, + } +} + +/// VPD: Vital Product Data. +fn scsi_command_emulate_vpd_page( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + let buflen: usize; + let mut outbuf: Vec = vec![0; 4]; + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let page_code = cmd.buf[2]; + + outbuf[0] = scsi_dev.scsi_type as u8 & 0x1f; + outbuf[1] = page_code; + + match page_code { + 0x00 => { + // Supported VPD Pages. + outbuf.push(0_u8); + if !scsi_dev.state.serial.is_empty() { + // 0x80: Unit Serial Number. + outbuf.push(0x80); + } + // 0x83: Device Identification. + outbuf.push(0x83); + if scsi_dev.scsi_type == SCSI_TYPE_DISK { + // 0xb0: Block Limits. + outbuf.push(0xb0); + // 0xb1: Block Device Characteristics. + outbuf.push(0xb1); + // 0xb2: Logical Block Provisioning. + outbuf.push(0xb2); + } + buflen = outbuf.len(); + } + 0x80 => { + // Unit Serial Number. + let len = scsi_dev.state.serial.len(); + if len == 0 { + bail!("Missed serial number!"); + } + + let l = cmp::min(SCSI_INQUIRY_VPD_SERIAL_NUMBER_MAX_LEN, len); + let mut serial_vec = scsi_dev.state.serial.as_bytes().to_vec(); + serial_vec.truncate(l); + outbuf.append(&mut serial_vec); + buflen = outbuf.len(); + } + 0x83 => { + // Device Identification. + let mut len: u8 = scsi_dev.state.device_id.len() as u8; + if len > (255 - 8) { + len = 255 - 8; + } + + if len > 0 { + // 0x2: Code Set: ASCII, Protocol Identifier: reserved. + // 0: Identifier Type, Association, Reserved, Piv. + // 0: Reserved. + // len: identifier length. + outbuf.append(&mut [0x2_u8, 0_u8, 0_u8, len].to_vec()); + + let mut device_id_vec = scsi_dev.state.device_id.as_bytes().to_vec(); + device_id_vec.truncate(len as usize); + outbuf.append(&mut device_id_vec); + } + buflen = outbuf.len(); + } + 0xb0 => { + // Block Limits. + if scsi_dev.scsi_type == SCSI_TYPE_ROM { + bail!("Invalid scsi type: SCSI_TYPE_ROM !"); + } + outbuf.resize(64, 0); + + // Byte[4]: bit 0: wsnz: Write Same Non-zero. + // Byte[5] = 0: Maximum compare and write length (COMPARE_AND_WRITE not supported). + // Byte[6-7] = 0: Optimal transfer length granularity. + // Byte[8-11]: Maximum transfer length. + // Byte[12-15] = 0: Optimal Transfer Length. + // Byte[16-19] = 0: Maximum Prefetch Length. + // Byte[20-23]: Maximum unmap lba count. + // Byte[24-27]: Maximum unmap block descriptor count. + // Byte[28-31]: Optimal unmap granulatity. + // Byte[32-35] = 0: Unmap Granularity alignment. + // Byte[36-43]: Maximum write same length. + // Bytes[44-47] = 0: Maximum atomic Transfer length. + // Bytes[48-51] = 0: Atomic Alignment. + // Bytes[52-55] = 0: Atomic Transfer Length Granularity. + // Bytes[56-59] = 0: Maximum Atomic Transfer Length With Atomic Boundary. + // Bytes[60-63] = 0: Maximum Atomic Boundary Size. + outbuf[4] = 1; + let max_xfer_length: u32 = u32::MAX / 512; + BigEndian::write_u32(&mut outbuf[8..12], max_xfer_length); + BigEndian::write_u64(&mut outbuf[36..44], u64::from(max_xfer_length)); + buflen = outbuf.len(); + } + 0xb1 => { + // Block Device Characteristics. + // 0: Medium Rotation Rate: 2Bytes. + // 0: Product Type. + // 0: Nominal Form Factor, Wacereq, Wabereq. + // 0: Vbuls, Fuab, Bocs, Reserved, Zoned, Reserved. + outbuf.append(&mut [0_u8, 0_u8, 0_u8, 0_u8, 0_u8].to_vec()); + buflen = 0x40; + } + 0xb2 => { + // Logical Block Provisioning. + // 0: Threshold exponent. + // 0xe0: LBPU(bit 7) | LBPWS | LBPWS10 | LBPRZ | ANC_SUP | DP. + // 0: Threshold percentage | Provisioning Type. + // 0: Threshold percentage. + outbuf.append(&mut [0_u8, 0x60_u8, 1_u8, 0_u8].to_vec()); + buflen = 8; + } + _ => { + bail!("Invalid INQUIRY pagecode {}", page_code); + } + } + + // It's OK for just using outbuf byte 3, because all page_code's buflen in stratovirt is less + // than 255 now. + outbuf[3] = buflen as u8 - 4; + Ok(outbuf) +} + +fn scsi_command_emulate_target_inquiry(lun: u16, cmd: &ScsiCommand) -> Result> { + let mut outbuf: Vec = vec![0; 4]; + + // Byte1: bit0: EVPD (Enable Vital product bit). + if cmd.buf[1] == 0x1 { + // Vital Product Data. + // Byte2: Page Code. + let page_code = cmd.buf[2]; + outbuf[1] = page_code; + match page_code { + 0x00 => { + // Supported page codes. + // Page Length: outbuf.len() - 4. Supported VPD page list only has 0x00 item. + outbuf[3] = 0x1; + // Supported VPD page list. Only support this page. + outbuf.push(0x00); + } + _ => { + bail!("Emulate target inquiry invalid page code {:x}", page_code); + } + } + return Ok(outbuf); + } + + // EVPD = 0 means it's a Standard INQUIRY command. + // Byte2: page code. + if cmd.buf[2] != 0 { + bail!("Invalid standard inquiry command!"); + } + + outbuf.resize(SCSI_TARGET_INQUIRY_LEN as usize, 0); + let len = cmp::min(cmd.xfer, SCSI_TARGET_INQUIRY_LEN); + + // outbuf. + // Byte0: Peripheral Qualifier | peripheral device type. + // Byte1:RMB. + // Byte2: VERSION. + // Byte3: NORMACA | HISUP | Response Data Format. + // Byte4: Additional length(outbuf.len() - 5). + // Byte5: SCCS | ACC | TPGS | 3PC | RESERVED | PROTECT. + // Byte6: ENCSERV | VS | MULTIP | ADDR16. + // Byte7: WBUS16 | SYNC | CMDQUE | VS. + if lun != 0 { + outbuf[0] = TYPE_NO_LUN; + } else { + outbuf[0] = TYPE_UNKNOWN | TYPE_INACTIVE; + // scsi version. + outbuf[2] = 5; + // HISUP(hierarchical support). Response Data Format(must be 2). + outbuf[3] = 0x12; + outbuf[4] = if len <= 5 { + bail!("Invalid xfer field in INQUIRY command"); + } else { + len as u8 - 5 + }; + // SYNC, CMDQUE(the logical unit supports the task management model). + outbuf[7] = 0x12; + } + + Ok(outbuf) +} + +fn scsi_command_emulate_inquiry( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // Byte1 bit0: EVPD(enable vital product data). + if cmd.buf[1] == 0x1 { + return scsi_command_emulate_vpd_page(cmd, dev); + } + + if cmd.buf[2] != 0 { + bail!("Invalid INQUIRY!"); + } + + let buflen = cmp::min(cmd.xfer, SCSI_MAX_INQUIRY_LEN); + let mut outbuf: Vec = vec![0; SCSI_MAX_INQUIRY_LEN as usize]; + + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + + outbuf[0] = (scsi_dev.scsi_type & 0x1f) as u8; + outbuf[1] = match scsi_dev.state.features & SCSI_DISK_F_REMOVABLE { + 1 => 0x80, + _ => 0, + }; + + let product_bytes = scsi_dev.state.product.as_bytes(); + let product_len = cmp::min(product_bytes.len(), SCSI_INQUIRY_PRODUCT_MAX_LEN); + let vendor_bytes = scsi_dev.state.vendor.as_bytes(); + let vendor_len = cmp::min(vendor_bytes.len(), SCSI_INQUIRY_VENDOR_MAX_LEN); + let version_bytes = scsi_dev.state.version.as_bytes(); + let vension_len = cmp::min(version_bytes.len(), SCSI_INQUIRY_VERSION_MAX_LEN); + + outbuf[16..16 + product_len].copy_from_slice(product_bytes); + outbuf[8..8 + vendor_len].copy_from_slice(vendor_bytes); + outbuf[32..32 + vension_len].copy_from_slice(version_bytes); + + drop(locked_dev); + + // outbuf: + // Byte2: Version. + // Byte3: bits[0-3]: Response Data Format; bit 4:Hisup. + // Byte4: Additional Length(outbuf.len()-5). + // Byte7: bit2: Cmdque; bit4: SYNC. + outbuf[2] = 5; + outbuf[3] = (2 | 0x10) as u8; + + if buflen > 36 { + outbuf[4] = (buflen - 5) as u8; + } else { + outbuf[4] = 36 - 5; + } + + outbuf[7] = 0x12; + + Ok(outbuf) +} + +fn scsi_command_emulate_read_capacity_10( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + if cmd.buf[8] & 1 == 0 && cmd.lba != 0 { + // PMI(Partial Medium Indicator) + bail!("Invalid scsi cmd READ_CAPACITY_10!"); + } + + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let block_size = scsi_dev.block_size; + let mut outbuf: Vec = vec![0; 8]; + let mut nb_sectors = cmp::min(scsi_dev.disk_sectors as u32, u32::MAX); + nb_sectors /= block_size / DEFAULT_SECTOR_SIZE; + nb_sectors -= 1; + + // Bytes[0-3]: Returned Logical Block Address(the logical block address of the last logical + // block). + // Bytes[4-7]: Logical Block Length In Bytes. + BigEndian::write_u32(&mut outbuf[0..4], nb_sectors); + BigEndian::write_u32(&mut outbuf[4..8], block_size); + + Ok(outbuf) +} + +fn scsi_command_emulate_mode_sense( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // disable block descriptors(DBD) bit. + let mut dbd: bool = cmd.buf[1] & 0x8 != 0; + let page_code = cmd.buf[2] & 0x3f; + let page_control = (cmd.buf[2] & 0xc0) >> 6; + let mut outbuf: Vec = vec![0]; + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let mut dev_specific_parameter: u8 = 0; + let mut nb_sectors = scsi_dev.disk_sectors as u32; + let scsi_type = scsi_dev.scsi_type; + let block_size = scsi_dev.block_size; + nb_sectors /= block_size / DEFAULT_SECTOR_SIZE; + + trace::scsi_emulate_mode_sense( + page_code, + page_control, + cmd.buf[3], + cmd.buf[1] & 0x8, + cmd.buf[4], + ); + + // Device specific paramteter field for direct access block devices: + // Bit 7: WP(Write Protect); bit 4: DPOFUA; + if scsi_type == SCSI_TYPE_DISK { + if scsi_dev.state.features & (1 << SCSI_DISK_F_DPOFUA) != 0 { + dev_specific_parameter = 0x10; + } + if scsi_dev.drive_cfg.readonly { + // Readonly. + dev_specific_parameter |= 0x80; + } + } else { + dbd = true; + } + drop(locked_dev); + + if cmd.op == MODE_SENSE { + outbuf.resize(4, 0); + // Device Specific Parameter. + outbuf[2] = dev_specific_parameter; + } else { + // MODE_SENSE_10. + outbuf.resize(8, 0); + // Device Specific Parameter. + outbuf[3] = dev_specific_parameter; + } + + if !dbd && nb_sectors > 0 { + if cmd.op == MODE_SENSE { + // Block Descriptor Length. + outbuf[3] = 8; + } else { + // Block Descriptor Length. + outbuf[7] = 8; + } + + // Block descriptors. + // Byte[0]: density code. + // Bytes[1-3]: number of blocks. + // Byte[4]: Reserved. + // Byte[5-7]: Block Length. + let mut block_desc: Vec = vec![0; 8]; + BigEndian::write_u32(&mut block_desc[0..4], nb_sectors & 0xffffff); + BigEndian::write_u32(&mut block_desc[4..8], block_size); + outbuf.append(&mut block_desc); + } + + if page_control == 3 { + bail!("Invalid Mode Sense command, Page control 0x11 is not supported!"); + } + + if page_code == 0x3f { + // 3Fh Return all pages not including subpages. + for pg in 0..page_code { + let _ = scsi_command_emulate_mode_sense_page(pg, page_control, &mut outbuf, scsi_type); + } + } else { + scsi_command_emulate_mode_sense_page(page_code, page_control, &mut outbuf, scsi_type)?; + } + + // The Mode Data Length field indicates the length in bytes of the following data + // that is available to be transferred. The Mode data length does not include the + // number of bytes in the Mode Data Length field. + let buflen = outbuf.len(); + if cmd.op == MODE_SENSE { + outbuf[0] = (buflen - 1) as u8; + } else { + outbuf[0] = (((buflen - 2) >> 8) & 0xff) as u8; + outbuf[1] = ((buflen - 2) & 0xff) as u8; + } + + Ok(outbuf) +} + +fn scsi_command_emulate_mode_sense_page( + page: u8, + page_control: u8, + outbuf: &mut Vec, + scsi_type: u32, +) -> Result> { + if scsi_type == SCSI_TYPE_DISK + && ![ + MODE_PAGE_HD_GEOMETRY, + MODE_PAGE_FLEXIBLE_DISK_GEOMETRY, + MODE_PAGE_CACHING, + MODE_PAGE_R_W_ERROR, + ] + .contains(&page) + || scsi_type == SCSI_TYPE_ROM + && ![ + MODE_PAGE_CACHING, + MODE_PAGE_R_W_ERROR, + MODE_PAGE_AUDIO_CTL, + MODE_PAGE_CAPABILITIES, + ] + .contains(&page) + { + bail!( + "Invalid Mode Sense command, page control ({:x}), page ({:x}), scsi device type ({})", + page_control, + page, + scsi_type + ); + } + let buflen = outbuf.len(); + match page { + MODE_PAGE_CACHING => { + // Caching Mode Page. + outbuf.resize(buflen + 20, 0); + outbuf[buflen] = page; + outbuf[buflen + 1] = 18; + // 0x4: WCE(Write Cache Enable). + outbuf[buflen + 2] = 0x4; + } + MODE_PAGE_R_W_ERROR => { + // Read-Write Error Recovery mode page. + outbuf.resize(buflen + 12, 0); + outbuf[buflen] = page; + outbuf[buflen + 1] = 10; + + if page_control != 1 { + // 0x80: AWRE(Automatic Write Reallocation Enabled). + outbuf[buflen + 2] = 0x80; + } + } + MODE_PAGE_CAPABILITIES => { + // MM Capabilities and Mechanical Status Page(Page Code 0x2A). + // This mode page is legacy and was most recently defined in MMC-3. + // Outbuf in CD/DVD Capabilities and Mechanical Status Page: + // Byte[buflen + 0]: PS | Reserved | Bits[0-5]: Page Code(0x2A). + // Byte[buflen + 1]: Page Length(28 + 4 * (maximum number of n)). + // Byte[buflen + 2]: Bits[6-7]: Reserved | DVD-RAW Read(1) | DVD-R READ(1) | + // DVD-ROM READ(1) | Method 2 | CD-RW Read(1) | CD-R Read(1). + // Byte[buflen + 3]: Bits[6-7]: Reserved | DVD-RAW WRITE | DVD-R WRITE | + // Reserved | Test Write | CD-R/RW Write | CD-R Write. + // Byte[buflen + 4]: BUF | Multi Session(1) | Mode 2 Form 2(1) | Mode 2 Form 1(1) | + // Digital Port 2(1) | Digital Port 1(1) | Composite(1) | + // Audio Play(1). + // Byte[buflen + 5]: Read Bar Code(1) | UPC(1) | ISRC(1) | C2 Pointers supported(1) | + // R-W Deinterleaved & corrected(1) | R-W supported(1) | + // CD-DA Stream is Accurate(1) | CD-DA Cmds supported(1). + // Byte[buflen + 6]: Bits[5-7]: Loading Mechanism Type(1) | Reserved | Eject(1) | + // Prevent Jumper(1) | Lock State | Lock(1). + // Byte[buflen + 7]: Bits[6-7]: Reserved | R-W in Lead-in | Side Change Capable | SSS | + // Changer Supports Disc Present | Separate Channel Mute | + // Separate volume levels. + // Bytes[buflen + 8 - buflen + 9]: Obsolete. + // Bytes[buflen + 10 - buflen + 11]: Number of Volume Levels Supported. + // Bytes[buflen + 12 - buflen + 13]: Buffer Size Supported. + // Bytes[buflen + 14 - buflen + 15]: Obsolete. + // Byte[buflen + 16]: Reserved. + // Byte[buflen + 17]: Bits[6-7]: Reserved | Bits[4-5]: Length | LSBF | RCK | BCKF | + // Reserved. + // Bytes[buflen + 18 - buflen + 21]: Obsolete. + // Bytes[buflen + 22 - buflen + 23]: Copy Management Revision Supported. + // Bytes[buflen + 24 - buflen + 26]: Reserved. + // Byte[buflen + 27]: Bits[2-7]: Reserved. Bits[0-1]: Rotation Control Selected. + // Bytes[buflen + 28 - buflen + 29]: Current Write Speed Selected. + // Bytes[buflen + 31]: Number of Logical Unit Write Speed Performance Descriptor + // Tables(n). + outbuf.resize(buflen + 32, 0); + outbuf[buflen] = page; + outbuf[buflen + 1] = 28; + + if page_control == 1 { + bail!("Not supported page control"); + } + + outbuf[buflen + 2] = 0x3b; + outbuf[buflen + 4] = 0x7f; + outbuf[buflen + 5] = 0xff; + // Stratovirt does not implement tray for CD, so set "Lock State" to 0. + outbuf[buflen + 6] = 0x2d; + BigEndian::write_u16(&mut outbuf[(buflen + 10)..(buflen + 12)], 2); + BigEndian::write_u16(&mut outbuf[(buflen + 12)..(buflen + 14)], 2048); + } + _ => { + bail!( + "Invalid Mode Sense command, page control ({:x}), page ({:x})", + page_control, + page + ); + } + } + + Ok(outbuf.to_vec()) +} + +fn scsi_command_emulate_report_luns( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + // Byte 0-3: Lun List Length. Byte 4-7: Reserved. + let mut outbuf: Vec = vec![0; 8]; + let target = scsi_dev.dev_cfg.target; + + if cmd.xfer < 16 { + bail!("scsi REPORT LUNS xfer {} too short!", cmd.xfer); + } + + // Byte2: SELECT REPORT:00h/01h/02h. 03h to FFh is reserved. + if cmd.buf[2] > 2 { + bail!( + "Invalid REPORT LUNS cmd, SELECT REPORT Byte is {}", + cmd.buf[2] + ); + } + + let bus = scsi_dev.parent_bus().unwrap().upgrade().unwrap(); + SCSI_BUS!(bus, locked_bus, scsi_bus); + drop(locked_dev); + + for device in scsi_bus.child_devices().values() { + SCSI_DEVICE!(device, locked_dev, scsi_dev); + if scsi_dev.dev_cfg.target != target { + continue; + } + let len = outbuf.len(); + if scsi_dev.dev_cfg.lun < 256 { + outbuf.push(0); + outbuf.push(scsi_dev.dev_cfg.lun as u8); + } else { + outbuf.push(0x40 | ((scsi_dev.dev_cfg.lun >> 8) & 0xff) as u8); + outbuf.push((scsi_dev.dev_cfg.lun & 0xff) as u8); + } + outbuf.resize(len + 8, 0); + } + + let len: u32 = outbuf.len() as u32 - 8; + BigEndian::write_u32(&mut outbuf[0..4], len); + Ok(outbuf) +} + +fn scsi_command_emulate_service_action_in_16( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // Read Capacity(16) Command. + // Byte 0: Operation Code(0x9e) + // Byte 1: bit0 - bit4: Service Action(0x10), bit 5 - bit 7: Reserved. + if cmd.buf[1] & 0x1f == SUBCODE_READ_CAPACITY_16 { + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let block_size = scsi_dev.block_size; + let mut outbuf: Vec = vec![0; 32]; + let mut nb_sectors = scsi_dev.disk_sectors; + nb_sectors /= u64::from(block_size / DEFAULT_SECTOR_SIZE); + nb_sectors -= 1; + drop(locked_dev); + + // Byte[0-7]: Returned Logical BLock Address(the logical block address of the last logical + // block). + // Byte[8-11]: Logical Block Length in Bytes. + BigEndian::write_u64(&mut outbuf[0..8], nb_sectors); + BigEndian::write_u32(&mut outbuf[8..12], block_size); + + return Ok(outbuf); + } + + bail!( + "Invalid combination Scsi Command, operation code ({:x}), service action ({:x})", + SERVICE_ACTION_IN_16, + cmd.buf[1] & 31 + ); +} + +fn scsi_command_emulate_read_disc_information( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // Byte1: Bits[0-2]: Data type. + // Data Type | Returned Data. | + // 000b | Standard Disc Information. | + // 001b | Track Resources Information. | + // 010b | POW Resources Information. | + // 011b-111b | Reserved | + let data_type = cmd.buf[1] & 7; + + // Types 001b/010b are only defined for Blu-Ray. + if data_type != 0 { + bail!("Unsupported read disc information data type {}!", data_type); + } + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + if scsi_dev.scsi_type != SCSI_TYPE_ROM { + bail!("Read disc information command is only for scsi multi-media device!"); + } + drop(locked_dev); + + // Outbuf: + // Bytes[0-1]: Disc Information Length(32). + // Byte2: Disc Information Data Type(000b) | Erasable(0) | State of last Session(01b) | + // Disc Status(11b). + // Byte3: Number of First Track on Disc. + // Byte4: Number of Sessions. + // Byte5: First Track Number in Last Session(Least Significant Byte). + // Byte6: Last Track Number in Last Session(Last Significant Byte). + // Byte7: DID_V | DBC_V | URU:Unrestricted Use Disc(1) | DAC_V | Reserved | Legacy | + // BG Format Status. + // Byte8: Disc Type(00h: CD-DA or CD-ROM Disc). + // Byte9: Number of sessions(Most Significant Byte). + // Byte10: First Trace Number in Last Session(Most Significant Byte). + // Byte11: Last Trace Number in Last Session(Most Significant Byte). + // Bytes12-15: Disc Identification. + // Bytes16-19: Last Session Lead-in Start Address. + // Bytes20-23: Last Possible Lead-Out Start Address. + // Bytes24-31: Disc Bar Code. + // Byte32: Disc Application Code. + // Byte33: Number of OPC Tables.(0) + let mut outbuf: Vec = vec![0; 34]; + outbuf[1] = 32; + outbuf[2] = 0xe; + outbuf[3] = 1; + outbuf[4] = 1; + outbuf[5] = 1; + outbuf[6] = 1; + outbuf[7] = 0x20; + + Ok(outbuf) +} + +/// Format field for READ TOC command. +/// The Track/Session Number field specifies starting track number for which the data is returned. +/// For multi-session discs, TOC data is returned for all sessions. Track number Aah is reported +/// only for the Lead-out area of the last complete session. +const RT_FORMATTED_TOC: u8 = 0x0000; +/// This format returns the first complete session number, last complete session number and last +/// complete session starting address. +const RT_MULTI_SESSION_INFORMATION: u8 = 0x0001; +/// This format returns all Q sub-code data in the Lead-IN(TOC) areas starting from a session number +/// as specified in the Track/Session Number field. +const RT_RAW_TOC: u8 = 0x0010; + +fn scsi_command_emulate_read_toc( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // Byte1: Bit1: MSF.(MSF: Minute, Second, Frame) + // MSF = 1: the address fields in some returned data formats shall be in MSF form. + // MSF = 0: the address fields in some returned data formats shall be in LBA form. + let msf = cmd.buf[1] & 2; + // Byte2: Bits[0-3]: Format(Select specific returned data format)(CD: 0,1,2). + let format = cmd.buf[2] & 0xf; + // Byte6: Track/Session Number. + let track_number = cmd.buf[6]; + let mut outbuf: Vec = vec![0; 0]; + + match format { + RT_FORMATTED_TOC => { + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + let nb_sectors = scsi_dev.disk_sectors as u32; + let mut buf = cdrom_read_formatted_toc(nb_sectors, msf, track_number)?; + outbuf.append(&mut buf); + } + RT_MULTI_SESSION_INFORMATION => { + outbuf.resize(12, 0); + outbuf[1] = 0x0a; + outbuf[2] = 0x01; + outbuf[3] = 0x01; + } + RT_RAW_TOC => {} + _ => { + bail!("Invalid read toc format {}", format); + } + } + + Ok(outbuf) +} + +fn scsi_command_emulate_get_configuration( + _cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + if scsi_dev.scsi_type != SCSI_TYPE_ROM { + bail!("Invalid scsi type {}", scsi_dev.scsi_type); + } + + // 8 bytes(Feature Header) + 12 bytes(Profile List Feature) + + // 12bytes(Core Feature) + 8bytes(Removable media feature) = 40 bytes. + let mut outbuf = vec![0; 40]; + + // Outbuf: + // Bytes[0-7]: Feature Header. + // Bytes[0-3]: Data Length(36 = 40 - 4). + // Bytes[4-5]: Reserved. + // Bytes[6-7]: Current Profile. + BigEndian::write_u32(&mut outbuf[0..4], 36); + let current = if scsi_dev.disk_sectors > u64::from(CD_MAX_SECTORS) { + GC_PROFILE_DVD_ROM + } else { + GC_PROFILE_CD_ROM + }; + BigEndian::write_u16(&mut outbuf[6..8], current); + + // Bytes[8-n]: Feature Descriptor(s): + // Bytes[8-19]: Feature 0: Profile List Feature: + // Bytes[8-9]: Feature code(0000h). + // Byte[10]: Bits[6-7]: Reserved. Bits[2-5]: Version. Bit 1: Persistent. Bit 0: Current(1). + // Byte[11]: Additional Length. + // Byte[12-19]: Profile Descriptors.(2 descriptors: CD and DVD) + // Byte[12-13]: Profile Number(CD). + // Byte[14]: Bits[1-7]: Reserved. Bit 0: CurrentP. + // Byte[15]: Reserved. + // Byte[16-17]: Profile Number(DVD). + // Byte[18]: Bits[1-7]: Reserved. Bit 0: CurrentP. + // Byte[19]: Reserved. + BigEndian::write_u16(&mut outbuf[8..10], GC_FC_PROFILE_LIST); + outbuf[10] = 0x03; + outbuf[11] = 8; + BigEndian::write_u16(&mut outbuf[12..14], GC_PROFILE_CD_ROM); + outbuf[14] |= u8::from(current == GC_PROFILE_CD_ROM); + BigEndian::write_u16(&mut outbuf[16..18], GC_PROFILE_DVD_ROM); + outbuf[18] |= u8::from(current == GC_PROFILE_DVD_ROM); + + // Bytes[8-n]: Feature Descriptor(s): + // Bytes[20-31]: Feature 1: Core Feature: + // Bytes[20-21]: Feature Code(0001h). + // Byte[22]: Bits[6-7]: Reserved. Bits[2-5]: Version(0010b). Bit 1: Persistent(1). + // Bit 0: Current(1). + // Byte[23]: Additional Length(8). + // Bytes[24-27]: Physical Interface Standard. (Scsi Family: 00000001h) + // Byte[28]: Bits[2-7]: Reserved. Bit 1: INQ2. Bit 0: DBE(1). + // Bytes[29-31]: Reserved. + BigEndian::write_u16(&mut outbuf[20..22], GC_FC_CORE); + outbuf[22] = 0x0b; + outbuf[23] = 8; + BigEndian::write_u32(&mut outbuf[24..28], 1); + outbuf[28] = 1; + + // Bytes[8-n]: Feature Descriptor(s): + // Bytes[32-40]: Feature 2: Removable media feature: + // Bytes[32-33]: Feature Code(0003h). + // Byte[34]: Bits[6-7]: Reserved. Bit[2-5]: Version(0010b). Bit 1: Persistent(1). + // Bit 0: Current(1). + // Byte[35]: Additional Length(4). + // Byte[36]: Bits[5-7]: Loading Mechanism Type(001b). Bit4: Load(1). Bit 3: Eject(1). + // Bit 2: Pvnt Jmpr. Bit 1: DBML. Bit 0: Lock(1). + // Byte[37-39]: Reserved. + BigEndian::write_u16(&mut outbuf[32..34], GC_FC_REMOVABLE_MEDIUM); + outbuf[34] = 0x0b; + outbuf[35] = 4; + outbuf[36] = 0x39; + + Ok(outbuf) +} + +fn scsi_command_emulate_get_event_status_notification( + cmd: &ScsiCommand, + dev: &Arc>, +) -> Result> { + // Byte4: Notification Class Request. + let notification_class_request = cmd.buf[4]; + SCSI_DEVICE!(dev, locked_dev, scsi_dev); + + if scsi_dev.scsi_type != SCSI_TYPE_ROM { + bail!("Invalid scsi type {}", scsi_dev.scsi_type); + } + + // Byte1: Bit0: Polled. + // Polled = 1: the Host is requesting polled operation. + // Polled = 0: the Host is requesting asynchronous operation. + if cmd.buf[1] & 1 == 0 { + bail!("Asynchronous. Do not support."); + } + + // Outbuf: + // Bytes[0-3]: Event Header. + // Bytes[4-n]: Event Descriptor. + // Bytes[0-1]: Event Descriptor Length. + // Byte2: Bit7: NEC(No Event Available). Bits[0-2]: Notification Class. + // NEC = 1: The Drive supports none of the requested notification classes. + // NEC = 0: At least one of the requested notification classes is supported. + // Byte3: Supported Event Class. + let mut outbuf: Vec = vec![0; 4]; + + outbuf[3] = 1 << GESN_MEDIA; + if notification_class_request & (1 << GESN_MEDIA) != 0 { + // NCE = 0, notification class = media. + outbuf[2] = GESN_MEDIA; + outbuf.resize(8, 0); + // Bytes[4-7]: Media Event Descriptor. + // Byte4: Bits[4-7]: reserved. Bits[0-3]: Event Code. + // Byte5: Media Status. Bits[2-7] reserved. Bit 1: Media Present. Bit 0: Door or Tray open. + // Byte6: Start Slot. + // Byte7: End Slot. + + // Do not support hot-plug/hot-unplug scsi cd which will be present all the time once vm + // starts. To do: this outbuf event code and media status should be changed after + // allowing hot-plug. + outbuf[4] = GESN_EC_NOCHG; + outbuf[5] = 1 << GESN_MS_MEDIA_PRESENT_BIT; + } else { + // NCE = 1. + outbuf[2] = 0x80; + } + + let len = outbuf.len() as u16 - 2; + BigEndian::write_u16(&mut outbuf[0..2], len); + + Ok(outbuf) +} + +/// LBA to MSF translation is defined in MMC6 Table 647. +/// MSF values are converted to LBA values via such formula: +/// lba = ((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET. +fn lba_to_msf(lba: u32) -> Vec { + // Note: lba is logical block address and it is in sectors. + // Max lba is u32::MAX * 512byte / 1024 / 1024 / 1024 = 2047GB. + // But, dvd size is less than 4.7G usually and cd size is less than 700M usually. + // So it will not overflow here. + let minute = ((lba + CD_MSF_OFFSET) / CD_FRAMES / CD_SECS) as u8; + let second = ((lba + CD_MSF_OFFSET) / CD_FRAMES % CD_SECS) as u8; + let frame = ((lba + CD_MSF_OFFSET) % CD_FRAMES) as u8; + + vec![minute, second, frame] +} + +fn cdrom_read_formatted_toc(nb_sectors: u32, msf: u8, track_number: u8) -> Result> { + // Track number 0xaa is reported only for the Lead-out area of the last complete session. + if track_number > 1 && track_number != 0xaa { + bail!("Invalid track number!"); + } + + let mut outbuf: Vec = vec![0; 4]; + + // Outbuf: + // Bytes[0-1]: TOC Data Length. + // Byte[2]: First Track Number(1). + // Byte[3]: Last Track Number(1). + outbuf[2] = 1; + outbuf[3] = 1; + if track_number <= 1 { + // Byte[4]: Reserved. + // Byte[5]: Bits[5-7]: ADR, Bits[0-4]: CONTROL. + // Byte[6]: Track Number. + // Byte[7]: Reserved. + // Bytes[8-11]: Track Start Address(LBA form = 000000h, MSF form = 00:00:02:00). + outbuf.append(&mut [0, 0x14, 1, 0].to_vec()); + if msf != 0 { + // MSF form. + outbuf.push(0); + outbuf.append(&mut lba_to_msf(0)); + } else { + outbuf.append(&mut [0, 0, 0, 0].to_vec()); + } + } + + // Lead Out Track. + // Byte[temporary buflen]: Reserved. + // Byte[temporary buflen + 1]: Bits[5-7]: ADR, Bits[0-4]: CONTROL. + // Byte[temporary buflen + 2]: Track Number. + // Byte[temporary buflen + 3]: Reserved. + // Bytes[temporary buflen + 4 - temporary buflen + 7]: Track Start Address. + outbuf.append(&mut [0, 0x14, 0xaa, 0].to_vec()); + if msf != 0 { + outbuf.push(0); + outbuf.append(&mut lba_to_msf(nb_sectors)); + } else { + let pos = outbuf.len(); + outbuf.resize(pos + 4, 0); + BigEndian::write_u32(&mut outbuf[pos..pos + 4], nb_sectors); + } + + let len = outbuf.len() as u16; + BigEndian::write_u16(&mut outbuf[0..2], len - 2); + + Ok(outbuf) +} diff --git a/devices/src/scsi/disk.rs b/devices/src/scsi/disk.rs new file mode 100644 index 0000000000000000000000000000000000000000..a3106890b56ebad25593fc82a4dba5dcd31e9a95 --- /dev/null +++ b/devices/src/scsi/disk.rs @@ -0,0 +1,321 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Result}; +use clap::Parser; + +use crate::ScsiBus::{aio_complete_cb, ScsiCompleteCb}; +use crate::{Device, DeviceBase}; +use block_backend::{create_block_backend, BlockDriverOps, BlockProperty}; +use machine_manager::config::{valid_id, DriveConfig, DriveFile, VmConfig}; +use machine_manager::event_loop::EventLoop; +use util::aio::{Aio, AioEngine, WriteZeroesState}; +use util::gen_base_func; + +use super::bus::ScsiBus; + +/// SCSI DEVICE TYPES. +pub const SCSI_TYPE_DISK: u32 = 0x00; +pub const SCSI_TYPE_TAPE: u32 = 0x01; +pub const SCSI_TYPE_PRINTER: u32 = 0x02; +pub const SCSI_TYPE_PROCESSOR: u32 = 0x03; +pub const SCSI_TYPE_WORM: u32 = 0x04; +pub const SCSI_TYPE_ROM: u32 = 0x05; +pub const SCSI_TYPE_SCANNER: u32 = 0x06; +pub const SCSI_TYPE_MOD: u32 = 0x07; +pub const SCSI_TYPE_MEDIUM_CHANGER: u32 = 0x08; +pub const SCSI_TYPE_STORAGE_ARRAY: u32 = 0x0c; +pub const SCSI_TYPE_ENCLOSURE: u32 = 0x0d; +pub const SCSI_TYPE_RBC: u32 = 0x0e; +pub const SCSI_TYPE_OSD: u32 = 0x11; +pub const SCSI_TYPE_ZBC: u32 = 0x14; +pub const SCSI_TYPE_WLUN: u32 = 0x1e; +pub const SCSI_TYPE_NOT_PRESENT: u32 = 0x1f; +pub const SCSI_TYPE_INACTIVE: u32 = 0x20; +pub const SCSI_TYPE_NO_LUN: u32 = 0x7f; + +pub const SCSI_DISK_F_REMOVABLE: u32 = 0; +pub const SCSI_DISK_F_DPOFUA: u32 = 1; + +/// Used to compute the number of sectors. +pub const SECTOR_SHIFT: u8 = 9; +pub const DEFAULT_SECTOR_SIZE: u32 = 1_u32 << SECTOR_SHIFT; + +/// Scsi disk's block size is 512 Bytes. +pub const SCSI_DISK_DEFAULT_BLOCK_SIZE_SHIFT: u32 = 9; +pub const SCSI_DISK_DEFAULT_BLOCK_SIZE: u32 = 1 << SCSI_DISK_DEFAULT_BLOCK_SIZE_SHIFT; + +/// Scsi media device's block size is 2048 Bytes. +pub const SCSI_CDROM_DEFAULT_BLOCK_SIZE_SHIFT: u32 = 11; +pub const SCSI_CDROM_DEFAULT_BLOCK_SIZE: u32 = 1 << SCSI_CDROM_DEFAULT_BLOCK_SIZE_SHIFT; + +// Stratovirt uses scsi mod in only virtio-scsi and usb-storage. Scsi's channel/target/lun +// of usb-storage are both 0. Scsi's channel/target/lun of virtio-scsi is no more than 0/255/16383. +// Set valid range of channel/target according to the range of virtio-scsi as 0/255. +// +// For stratovirt doesn't support `Flat space addressing format`(14 bits for lun) and only supports +// `peripheral device addressing format`(8 bits for lun) now, lun should be less than 255(2^8 - 1) temporarily. +const SCSI_MAX_CHANNEL: i64 = 0; +const SCSI_MAX_TARGET: i64 = 255; +const SUPPORT_SCSI_MAX_LUN: i64 = 255; + +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct ScsiDevConfig { + #[arg(long, value_parser = ["scsi-cd", "scsi-hd"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long, value_parser = valid_scsi_bus)] + pub bus: String, + /// Scsi four level hierarchical address(host, channel, target, lun). + #[arg(long, default_value = "0", value_parser = clap::value_parser!(u8).range(..=SCSI_MAX_CHANNEL))] + pub channel: u8, + #[arg(long, alias = "scsi-id", value_parser = clap::value_parser!(u8).range(..=SCSI_MAX_TARGET))] + pub target: u8, + #[arg(long, value_parser = clap::value_parser!(u16).range(..=SUPPORT_SCSI_MAX_LUN))] + pub lun: u16, + #[arg(long)] + pub drive: String, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub bootindex: Option, +} + +// Scsi device should has bus named as "$parent_cntlr_name.0". +fn valid_scsi_bus(bus: &str) -> Result { + let strs = bus.split('.').collect::>(); + if strs.len() != 2 || strs[1] != "0" { + bail!("Invalid scsi bus {}", bus); + } + Ok(bus.to_string()) +} + +#[derive(Clone, Default)] +pub struct ScsiDevState { + /// Features which the scsi device supports. + pub features: u32, + /// Scsi device vendor identification. + pub vendor: String, + /// Scsi device product identification. + pub product: String, + /// Scsi device id. + pub device_id: String, + /// The standard version which the scsi device complies to. + pub version: String, + /// Scsi device serial number. + pub serial: String, +} + +impl ScsiDevState { + fn new() -> Self { + ScsiDevState { + features: 0, + vendor: "STRA".to_string(), + product: "".to_string(), + device_id: "".to_string(), + version: "".to_string(), + serial: "".to_string(), + } + } +} + +impl Device for ScsiDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base); + + fn realize(mut self) -> Result>> { + match self.scsi_type { + SCSI_TYPE_DISK => { + self.block_size = SCSI_DISK_DEFAULT_BLOCK_SIZE; + self.state.product = "STRA HARDDISK".to_string(); + } + SCSI_TYPE_ROM => { + self.block_size = SCSI_CDROM_DEFAULT_BLOCK_SIZE; + self.state.product = "STRA CDROM".to_string(); + } + _ => { + bail!("Scsi type {} does not support now", self.scsi_type); + } + } + + if let Some(serial) = &self.dev_cfg.serial { + self.state.serial = serial.clone(); + } + + let drive_files = self.drive_files.lock().unwrap(); + // File path can not be empty string. And it has also been checked in command parsing by using `Clap`. + let file = VmConfig::fetch_drive_file(&drive_files, &self.drive_cfg.path_on_host)?; + + let alignments = VmConfig::fetch_drive_align(&drive_files, &self.drive_cfg.path_on_host)?; + self.req_align = alignments.0; + self.buf_align = alignments.1; + let drive_id = VmConfig::get_drive_id(&drive_files, &self.drive_cfg.path_on_host)?; + drop(drive_files); + + let mut thread_pool = None; + if self.drive_cfg.aio != AioEngine::Off { + thread_pool = Some(EventLoop::get_ctx(None).unwrap().thread_pool.clone()); + } + let aio = Aio::new(Arc::new(aio_complete_cb), self.drive_cfg.aio, thread_pool)?; + let conf = BlockProperty { + id: drive_id, + format: self.drive_cfg.format, + iothread: self.iothread.clone(), + direct: self.drive_cfg.direct, + req_align: self.req_align, + buf_align: self.buf_align, + discard: false, + write_zeroes: WriteZeroesState::Off, + l2_cache_size: self.drive_cfg.l2_cache_size, + refcount_cache_size: self.drive_cfg.refcount_cache_size, + }; + let backend = create_block_backend(file, aio, conf)?; + let disk_size = backend.lock().unwrap().disk_size()?; + self.block_backend = Some(backend); + self.disk_sectors = disk_size >> SECTOR_SHIFT; + + let dev = Arc::new(Mutex::new(self)); + Ok(dev) + } +} + +/// Convert from Arc> to &ScsiDevice. +#[macro_export] +macro_rules! SCSI_DEVICE { + ($trait_device:expr, $lock_device: ident, $struct_device: ident) => { + convert_device_ref!($trait_device, $lock_device, $struct_device, ScsiDevice); + }; +} + +#[derive(Default)] +pub struct ScsiDevice { + pub base: DeviceBase, + /// Configuration of the scsi device. + pub dev_cfg: ScsiDevConfig, + /// Configuration of the scsi device's drive. + pub drive_cfg: DriveConfig, + /// State of the scsi device. + pub state: ScsiDevState, + /// Block backend opened by scsi device. + pub block_backend: Option>>>, + /// The align requirement of request(offset/len). + pub req_align: u32, + /// The align requirement of buffer(iova_base). + pub buf_align: u32, + /// Number of sectors of the image file. + pub disk_sectors: u64, + /// Scsi Device block size. + pub block_size: u32, + /// Scsi device type. + pub scsi_type: u32, + /// Drive backend files. + drive_files: Arc>>, + /// Aio context. + pub aio: Option>>>, + pub iothread: Option, +} + +// SAFETY: the devices attached in one scsi controller will process IO in the same thread. +unsafe impl Send for ScsiDevice {} +// SAFETY: The reason is same as above. +unsafe impl Sync for ScsiDevice {} + +impl ScsiDevice { + pub fn new( + dev_cfg: ScsiDevConfig, + drive_cfg: DriveConfig, + drive_files: Arc>>, + iothread: Option, + scsi_bus: Arc>, + ) -> ScsiDevice { + let scsi_type = match dev_cfg.classtype.as_str() { + "scsi-hd" => SCSI_TYPE_DISK, + _ => SCSI_TYPE_ROM, + }; + + let mut scsi_dev = ScsiDevice { + base: DeviceBase::new(dev_cfg.id.clone(), false, None), + dev_cfg, + drive_cfg, + state: ScsiDevState::new(), + req_align: 1, + buf_align: 1, + scsi_type, + drive_files, + iothread, + ..Default::default() + }; + scsi_dev.set_parent_bus(scsi_bus); + scsi_dev + } +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_scsi_device_cmdline_parser() { + // Test1: Right. + let cmdline1 = "scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0,serial=123456,bootindex=1"; + let config = + ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline1, true, false)).unwrap(); + assert_eq!(config.id, "scsi0-0-0-0"); + assert_eq!(config.bus, "scsi0.0"); + assert_eq!(config.target, 0); + assert_eq!(config.lun, 0); + assert_eq!(config.drive, "drive-0-0-0-0"); + assert_eq!(config.serial.unwrap(), "123456"); + assert_eq!(config.bootindex.unwrap(), 1); + + // Test2: Default value. + let cmdline2 = "scsi-cd,bus=scsi0.0,scsi-id=0,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let config = + ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline2, true, false)).unwrap(); + assert_eq!(config.channel, 0); + assert_eq!(config.serial, None); + assert_eq!(config.bootindex, None); + + // Test3: Illegal value. + let cmdline3 = "scsi-hd,bus=scsi0.0,scsi-id=256,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + let cmdline3 = "scsi-hd,bus=scsi0.0,scsi-id=0,lun=256,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + let cmdline3 = "illegal,bus=scsi0.0,scsi-id=0,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + + // Test4: Missing necessary parameters. + let cmdline4 = "scsi-hd,scsi-id=0,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "scsi-hd,bus=scsi0.0,lun=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "scsi-hd,bus=scsi0.0,scsi-id=0,drive=drive-0-0-0-0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,id=scsi0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=drive-0-0-0-0"; + let result = ScsiDevConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + } +} diff --git a/devices/src/scsi/mod.rs b/devices/src/scsi/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..466de40753afc289755d589dd79a70f2fdcf9cd1 --- /dev/null +++ b/devices/src/scsi/mod.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod bus; +pub mod disk; diff --git a/devices/src/smbios/mod.rs b/devices/src/smbios/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..f5aa812ee71e8d62a565cc373e433fc125b65dd6 --- /dev/null +++ b/devices/src/smbios/mod.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod smbios_table; + +// The name of corresponding file-entry in FwCfg device that represents smbios table data. +pub const SMBIOS_TABLE_FILE: &str = "etc/smbios/smbios-tables"; +// The name of corresponding file-entry in FwCfg device that represents smbios table anchor. +pub const SMBIOS_ANCHOR_FILE: &str = "etc/smbios/smbios-anchor"; diff --git a/devices/src/smbios/smbios_table.rs b/devices/src/smbios/smbios_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..2751e6c9312785775b411a7335a50702d542216e --- /dev/null +++ b/devices/src/smbios/smbios_table.rs @@ -0,0 +1,1061 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; + +use machine_manager::config::{ + MachineConfig, SmbiosConfig, SmbiosType0Config, SmbiosType17Config, SmbiosType1Config, + SmbiosType2Config, SmbiosType3Config, SmbiosType4Config, +}; +use util::byte_code::ByteCode; + +const TYPE0_HANDLE: u16 = 0x0; +const TYPE1_HANDLE: u16 = 0x100; +const TYPE2_HANDLE: u16 = 0x200; +const TYPE3_HANDLE: u16 = 0x300; +const TYPE4_HANDLE: u16 = 0x400; +const TYPE16_HANDLE: u16 = 0x1000; +const TYPE17_HANDLE: u16 = 0x1100; +const TYPE19_HANDLE: u16 = 0x1300; +const TYPE32_HANDLE: u16 = 0x2000; +const TYPE127_HANDLE: u16 = 0x7F00; + +const GB_SIZE: u64 = 1_u64 << 30; +const KB_2T_SIZE: u32 = 0x80000000; +const HYPERVISOR_STR: &str = "StratoVirt"; + +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosHeader { + type_num: u8, + len: u8, + handle: u16, +} + +impl SmbiosHeader { + fn new(type_num: u8, len: u8, handle: u16) -> SmbiosHeader { + SmbiosHeader { + type_num, + len, + handle, + } + } +} + +/// Type0: BIOS information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType0 { + header: SmbiosHeader, + vendor_idx: u8, + bios_version_idx: u8, + bios_starting_addr_seg: [u8; 2], + bios_release_date_idx: u8, + bios_rom_size: u8, + bios_characteristics: [u8; 8], + bios_characteristics_ext: [u8; 2], + system_bios_major_release: u8, + system_bios_minor_release: u8, + embedded_controller_major_release: u8, + embedded_controller_minor_release: u8, +} + +impl ByteCode for SmbiosType0 {} + +impl SmbiosType0 { + fn new() -> SmbiosType0 { + SmbiosType0 { + header: SmbiosHeader::new(0_u8, size_of::() as u8, TYPE0_HANDLE), + bios_starting_addr_seg: 0xE800_u16.to_le_bytes(), + bios_rom_size: 0_u8, + bios_characteristics: 0x08_u64.to_le_bytes(), + bios_characteristics_ext: [0, 0x1C], + embedded_controller_major_release: 0xFF, + embedded_controller_minor_release: 0xFF, + ..Default::default() + } + } +} +#[derive(Default, Clone)] +struct SmbiosType0Table { + header: SmbiosType0, + body: Vec, + str_index: u8, +} + +impl SmbiosType0Table { + fn new() -> SmbiosType0Table { + SmbiosType0Table { + header: SmbiosType0::new(), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type1: System information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType1 { + header: SmbiosHeader, + manufacturer: u8, + product_name: u8, + version: u8, + serial_num: u8, + uuid: [u8; 16], + wake_up_type: u8, + sku_num: u8, + family: u8, +} + +impl ByteCode for SmbiosType1 {} + +impl SmbiosType1 { + fn new() -> SmbiosType1 { + SmbiosType1 { + header: SmbiosHeader::new(1_u8, size_of::() as u8, TYPE1_HANDLE), + wake_up_type: 0x6_u8, + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType1Table { + header: SmbiosType1, + body: Vec, + str_index: u8, +} + +impl SmbiosType1Table { + fn new() -> SmbiosType1Table { + SmbiosType1Table { + header: SmbiosType1::new(), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type2: Baseboard information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType2 { + header: SmbiosHeader, + manufacturer: u8, + product_name: u8, + version: u8, + serial_num: u8, + asset_tag_num: u8, + feature_flags: u8, + location: u8, + chassis_handle: [u8; 2], + board_type: u8, + contained_element_count: u8, +} + +impl ByteCode for SmbiosType2 {} + +impl SmbiosType2 { + fn new() -> SmbiosType2 { + SmbiosType2 { + header: SmbiosHeader::new(2_u8, size_of::() as u8, TYPE2_HANDLE), + feature_flags: 1_u8, + chassis_handle: 0x300_u16.to_le_bytes(), + board_type: 0x0A_u8, + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType2Table { + header: SmbiosType2, + body: Vec, + str_index: u8, +} + +impl SmbiosType2Table { + fn new() -> SmbiosType2Table { + SmbiosType2Table { + header: SmbiosType2::new(), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type3: System enclosure information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType3 { + header: SmbiosHeader, + manufacturer: u8, + type_id: u8, + version: u8, + serial_num: u8, + asset_tag_num: u8, + boot_up_state: u8, + power_supply_state: u8, + thermal_state: u8, + security_status: u8, + oem_defined: [u8; 4], + height: u8, + number_of_power_cords: u8, + contained_element_count: u8, + contained_element_record_length: u8, + sku_num: u8, +} + +impl ByteCode for SmbiosType3 {} + +impl SmbiosType3 { + fn new() -> SmbiosType3 { + SmbiosType3 { + header: SmbiosHeader::new(3_u8, size_of::() as u8, TYPE3_HANDLE), + type_id: 0x1_u8, + boot_up_state: 0x03_u8, + power_supply_state: 0x03_u8, + thermal_state: 0x03_u8, + security_status: 0x02_u8, + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType3Table { + header: SmbiosType3, + body: Vec, + str_index: u8, +} + +impl SmbiosType3Table { + fn new() -> SmbiosType3Table { + SmbiosType3Table { + header: SmbiosType3::new(), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type4: Processor information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType4 { + header: SmbiosHeader, + socket_design: u8, + processor_type: u8, + processor_family: u8, + processor_manufacturer: u8, + processor_id0: [u8; 4], + processor_id1: [u8; 4], + processor_version: u8, + voltage: u8, + external_clock: [u8; 2], + max_speed: [u8; 2], + current_speed: [u8; 2], + status: u8, + processor_upgrade: u8, + l1_cache_handle: [u8; 2], + l2_cache_handle: [u8; 2], + l3_cache_handle: [u8; 2], + serial_num: u8, + asset_tag_num: u8, + part_num: u8, + core_count: u8, + core_enabled: u8, + thread_count: u8, + processor_characteristics: [u8; 2], + processor_family2: [u8; 2], + core_count2: [u8; 2], + core_enabled2: [u8; 2], + thread_count2: [u8; 2], +} + +impl ByteCode for SmbiosType4 {} + +impl SmbiosType4 { + fn new(instance: u16) -> SmbiosType4 { + SmbiosType4 { + header: SmbiosHeader::new( + 4_u8, + size_of::() as u8, + TYPE4_HANDLE + instance, + ), + processor_type: 0x03_u8, + processor_family: 0x01_u8, + status: 0x41_u8, + processor_upgrade: 0x01_u8, + l1_cache_handle: 0xFFFF_u16.to_le_bytes(), + l2_cache_handle: 0xFFFF_u16.to_le_bytes(), + l3_cache_handle: 0xFFFF_u16.to_le_bytes(), + processor_characteristics: 0x02_u16.to_le_bytes(), + processor_family2: 0x01_u16.to_le_bytes(), + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType4Table { + header: SmbiosType4, + body: Vec, + str_index: u8, +} + +impl SmbiosType4Table { + fn new(instance: u16) -> SmbiosType4Table { + SmbiosType4Table { + header: SmbiosType4::new(instance), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type16: Physical memory array information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType16 { + header: SmbiosHeader, + location: u8, + used: u8, + error_correction: u8, + maximum_capacity: [u8; 4], + memory_error_information_handle: [u8; 2], + number_of_memory_devices: [u8; 2], + extended_maximum_capacity: [u8; 8], +} + +impl ByteCode for SmbiosType16 {} + +impl SmbiosType16 { + fn new(cnt: u16) -> SmbiosType16 { + SmbiosType16 { + header: SmbiosHeader::new(16_u8, size_of::() as u8, TYPE16_HANDLE), + location: 0x01, + used: 0x03, + error_correction: 0x06, + memory_error_information_handle: 0xFFFE_u16.to_le_bytes(), + number_of_memory_devices: cnt.to_le_bytes(), + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType16Table { + header: SmbiosType16, + body: Vec, +} + +impl SmbiosType16Table { + fn new(cnt: u16) -> SmbiosType16Table { + SmbiosType16Table { + header: SmbiosType16::new(cnt), + body: Vec::new(), + } + } + + fn finish(&mut self) { + self.body.append(&mut vec![0; 2]); + } +} + +/// Type17: memory device +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType17 { + header: SmbiosHeader, + physical_memory_array_handle: [u8; 2], + memory_error_information_handle: [u8; 2], + total_width: [u8; 2], + data_width: [u8; 2], + size: [u8; 2], + form_factor: u8, + device_set: u8, + device_locator_str: u8, + bank_locator_str: u8, + memory_type: u8, + type_detail: [u8; 2], + speed: [u8; 2], + manufacturer_str: u8, + serial_number_str: u8, + asset_tag_number_str: u8, + part_number_str: u8, + attributes: u8, + extended_size: [u8; 4], + configured_clock_speed: [u8; 2], + minimum_voltage: [u8; 2], + maximum_voltage: [u8; 2], + configured_voltage: [u8; 2], +} + +impl ByteCode for SmbiosType17 {} + +impl SmbiosType17 { + fn new(ins: u16) -> SmbiosType17 { + SmbiosType17 { + header: SmbiosHeader::new(17_u8, size_of::() as u8, TYPE17_HANDLE + ins), + physical_memory_array_handle: 0x1000_u16.to_le_bytes(), + memory_error_information_handle: 0xFFFE_u16.to_le_bytes(), + total_width: 0xFFFF_u16.to_le_bytes(), + data_width: 0xFFFF_u16.to_le_bytes(), + form_factor: 0x09, + memory_type: 0x07, + type_detail: 0x02_u16.to_le_bytes(), + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType17Table { + header: SmbiosType17, + body: Vec, + str_index: u8, +} + +impl SmbiosType17Table { + fn new(ins: u16) -> SmbiosType17Table { + SmbiosType17Table { + header: SmbiosType17::new(ins), + body: Vec::new(), + str_index: 0_u8, + } + } + + fn set_str(&mut self, str: String) { + self.str_index += 1; + self.body.append(&mut str.as_bytes().to_vec()); + self.body.append(&mut vec![0]); + } + + fn finish(&mut self) { + if self.str_index == 0 { + self.body.append(&mut vec![0; 2]); + } else { + self.body.append(&mut vec![0]); + } + } +} + +/// Type19: Memory device information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType19 { + header: SmbiosHeader, + starting_address: [u8; 4], + ending_address: [u8; 4], + memory_array_handle: [u8; 2], + partition_width: u8, + extended_starting_address: [u8; 8], + extended_ending_address: [u8; 8], +} + +impl ByteCode for SmbiosType19 {} + +impl SmbiosType19 { + fn new(ins: u16) -> SmbiosType19 { + SmbiosType19 { + header: SmbiosHeader::new(19_u8, size_of::() as u8, TYPE19_HANDLE + ins), + memory_array_handle: 0x1000_u16.to_le_bytes(), + partition_width: 1, + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType19Table { + header: SmbiosType19, + body: Vec, +} + +impl SmbiosType19Table { + fn new(ins: u16) -> SmbiosType19Table { + SmbiosType19Table { + header: SmbiosType19::new(ins), + body: Vec::new(), + } + } + + fn finish(&mut self) { + self.body.append(&mut vec![0; 2]); + } +} + +/// Type32: boot information +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType32 { + header: SmbiosHeader, + reserved: [u8; 6], + boot_status: u8, +} + +impl ByteCode for SmbiosType32 {} + +impl SmbiosType32 { + fn new() -> SmbiosType32 { + SmbiosType32 { + header: SmbiosHeader::new(32_u8, size_of::() as u8, TYPE32_HANDLE), + ..Default::default() + } + } +} + +#[derive(Default, Clone)] +struct SmbiosType32Table { + header: SmbiosType32, + body: Vec, +} + +impl SmbiosType32Table { + fn new() -> SmbiosType32Table { + SmbiosType32Table { + header: SmbiosType32::new(), + body: Vec::new(), + } + } + + fn finish(&mut self) { + self.body.append(&mut vec![0; 2]); + } +} + +/// Type127: End of table +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosType127 { + header: SmbiosHeader, +} + +impl SmbiosType127 { + fn new() -> SmbiosType127 { + SmbiosType127 { + header: SmbiosHeader::new(127_u8, size_of::() as u8, TYPE127_HANDLE), + } + } +} + +impl ByteCode for SmbiosType127 {} + +#[derive(Default, Clone)] +struct SmbiosType127Table { + header: SmbiosType127, + body: Vec, +} + +impl SmbiosType127Table { + fn new() -> SmbiosType127Table { + SmbiosType127Table { + header: SmbiosType127::new(), + body: Vec::new(), + } + } + + fn finish(&mut self) { + self.body.append(&mut vec![0; 2]); + } +} + +#[derive(Default)] +pub struct SmbiosTable { + entries: Vec, +} + +impl SmbiosTable { + pub fn new() -> SmbiosTable { + SmbiosTable { + entries: Vec::new(), + } + } + + fn build_type0(&mut self, type0: SmbiosType0Config) { + let mut table0: SmbiosType0Table = SmbiosType0Table::new(); + + if let Some(vendor) = type0.vendor { + table0.header.vendor_idx = table0.str_index + 1; + table0.set_str(vendor); + } + + if let Some(version) = type0.version { + table0.header.bios_version_idx = table0.str_index + 1; + table0.set_str(version); + } + + if let Some(date) = type0.date { + table0.header.bios_release_date_idx = table0.str_index + 1; + table0.set_str(date); + } + table0.finish(); + + self.entries.append(&mut table0.header.as_bytes().to_vec()); + self.entries.append(&mut table0.body); + } + + fn build_type1(&mut self, type1: SmbiosType1Config) { + let mut table1: SmbiosType1Table = SmbiosType1Table::new(); + + table1.header.manufacturer = table1.str_index + 1; + if let Some(manufacturer) = type1.manufacturer { + table1.set_str(manufacturer); + } else { + table1.set_str(String::from(HYPERVISOR_STR)); + } + + table1.header.product_name = table1.str_index + 1; + if let Some(product) = type1.product { + table1.set_str(product); + } else { + table1.set_str(String::from("Virtual Machine")); + } + + table1.header.version = table1.str_index + 1; + if let Some(version) = type1.version { + table1.set_str(version); + } else { + table1.set_str(String::from(HYPERVISOR_STR)); + } + + if let Some(serial) = type1.serial { + table1.header.serial_num = table1.str_index + 1; + table1.set_str(serial); + } + + if let Some(sku) = type1.sku { + table1.header.sku_num = table1.str_index + 1; + table1.set_str(sku); + } + + if let Some(family) = type1.family { + table1.header.family = table1.str_index + 1; + table1.set_str(family); + } + + if let Some(uuid) = type1.uuid { + for (idx, data) in uuid.name.iter().enumerate() { + table1.header.uuid[idx] = *data; + } + } + table1.finish(); + + self.entries.append(&mut table1.header.as_bytes().to_vec()); + self.entries.append(&mut table1.body); + } + + fn build_type2(&mut self, type2: SmbiosType2Config) { + if !type2.added { + return; + } + let mut table2 = SmbiosType2Table::new(); + + table2.header.manufacturer = table2.str_index + 1; + if let Some(manufacturer) = type2.manufacturer { + table2.set_str(manufacturer); + } else { + table2.set_str(String::from(HYPERVISOR_STR)); + } + + table2.header.product_name = table2.str_index + 1; + if let Some(product) = type2.product { + table2.set_str(product); + } else { + table2.set_str(String::from("Virtual Machine")); + } + + table2.header.version = table2.str_index + 1; + if let Some(version) = type2.version { + table2.set_str(version); + } else { + table2.set_str(String::from(HYPERVISOR_STR)); + } + + if let Some(serial) = type2.serial { + table2.header.serial_num = table2.str_index + 1; + table2.set_str(serial); + } + + if let Some(location) = type2.location { + table2.header.location = table2.str_index + 1; + table2.set_str(location); + } + + if let Some(asset) = type2.asset { + table2.header.asset_tag_num = table2.str_index + 1; + table2.set_str(asset); + } + + table2.finish(); + + self.entries.append(&mut table2.header.as_bytes().to_vec()); + self.entries.append(&mut table2.body); + } + + fn build_type3(&mut self, type3: SmbiosType3Config) { + let mut table3 = SmbiosType3Table::new(); + + table3.header.manufacturer = table3.str_index + 1; + if let Some(manufacturer) = type3.manufacturer { + table3.set_str(manufacturer); + } else { + table3.set_str(String::from(HYPERVISOR_STR)); + } + + table3.header.version = table3.str_index + 1; + if let Some(version) = type3.version { + table3.set_str(version); + } else { + table3.set_str(String::from(HYPERVISOR_STR)); + } + + if let Some(serial) = type3.serial { + table3.header.serial_num = table3.str_index + 1; + table3.set_str(serial); + } + + if let Some(sku) = type3.sku { + table3.header.sku_num = table3.str_index + 1; + table3.set_str(sku); + } + + if let Some(asset) = type3.asset { + table3.header.asset_tag_num = table3.str_index + 1; + table3.set_str(asset); + } + + table3.finish(); + + self.entries.append(&mut table3.header.as_bytes().to_vec()); + self.entries.append(&mut table3.body); + } + + fn build_type4(&mut self, type4: SmbiosType4Config, instance: u16, mach_cfg: &MachineConfig) { + let mut table4 = SmbiosType4Table::new(instance); + + table4.header.socket_design = table4.str_index + 1; + if let Some(sock_str) = type4.sock_pfx { + table4.set_str(std::format!("{}{:2x}", sock_str, instance)); + } else { + table4.set_str(std::format!("CPU{:2x}", instance)); + } + + table4.header.processor_manufacturer = table4.str_index + 1; + if let Some(manufacturer) = type4.manufacturer { + table4.set_str(manufacturer); + } else { + table4.set_str(String::from(HYPERVISOR_STR)); + } + + table4.header.processor_version = table4.str_index + 1; + if let Some(version) = type4.version { + table4.set_str(version); + } else { + table4.set_str(String::from(HYPERVISOR_STR)); + } + + if let Some(serial) = type4.serial { + table4.header.serial_num = table4.str_index + 1; + table4.set_str(serial); + } + + if let Some(asset) = type4.asset { + table4.header.asset_tag_num = table4.str_index + 1; + table4.set_str(asset); + } + + if let Some(part) = type4.part { + table4.header.part_num = table4.str_index + 1; + table4.set_str(part); + } + + if let Some(max_speed) = type4.max_speed { + table4.header.max_speed = (max_speed as u16).to_le_bytes(); + } else { + table4.header.max_speed = 2000_u16.to_le_bytes(); + } + + if let Some(current_speed) = type4.current_speed { + table4.header.current_speed = (current_speed as u16).to_le_bytes(); + } else { + table4.header.current_speed = 2000_u16.to_le_bytes(); + } + + table4.header.core_count = mach_cfg.nr_cores; + table4.header.core_enabled = mach_cfg.nr_cores; + + table4.header.core_count2 = u16::from(mach_cfg.nr_cores).to_le_bytes(); + table4.header.core_enabled2 = u16::from(mach_cfg.nr_cores).to_le_bytes(); + + table4.header.thread_count = mach_cfg.nr_threads; + table4.header.thread_count2 = u16::from(mach_cfg.nr_threads).to_le_bytes(); + table4.finish(); + + self.entries.append(&mut table4.header.as_bytes().to_vec()); + self.entries.append(&mut table4.body); + } + + fn build_type16(&mut self, size: u64, number_device: u16) { + let mut table16 = SmbiosType16Table::new(1); + + let size_kb = (size / 1024) as u32; + if size_kb < KB_2T_SIZE { + table16.header.maximum_capacity = size_kb.to_le_bytes(); + } else { + table16.header.maximum_capacity = KB_2T_SIZE.to_le_bytes(); + table16.header.extended_maximum_capacity = size.to_le_bytes(); + } + table16.header.number_of_memory_devices = number_device.to_le_bytes(); + table16.finish(); + + self.entries.append(&mut table16.header.as_bytes().to_vec()); + self.entries.append(&mut table16.body); + } + + fn build_type17(&mut self, type17: SmbiosType17Config, ins: u16, size: u64) { + let mut table17 = SmbiosType17Table::new(ins); + + let size_mb = (size / 1024 / 1024) as u16; + table17.header.size = size_mb.to_le_bytes(); + + table17.header.manufacturer_str = table17.str_index + 1; + if let Some(manufacturer) = type17.manufacturer { + table17.set_str(manufacturer); + } else { + table17.set_str(String::from(HYPERVISOR_STR)); + } + table17.header.device_locator_str = table17.str_index + 1; + if let Some(loc_pfx) = type17.loc_pfx { + table17.set_str(std::format!("{} {}", loc_pfx, ins)); + } else { + table17.set_str(std::format!("DIMM {}", ins)); + } + + if let Some(bank) = type17.bank { + table17.header.bank_locator_str = table17.str_index + 1; + table17.set_str(bank); + } + + if let Some(serial) = type17.serial { + table17.header.serial_number_str = table17.str_index + 1; + table17.set_str(serial); + } + + if let Some(part) = type17.part { + table17.header.part_number_str = table17.str_index + 1; + table17.set_str(part); + } + + if let Some(asset) = type17.asset { + table17.header.asset_tag_number_str = table17.str_index + 1; + table17.set_str(asset); + } + table17.header.speed = type17.speed.to_le_bytes(); + table17.header.configured_clock_speed = type17.speed.to_le_bytes(); + table17.finish(); + + self.entries.append(&mut table17.header.as_bytes().to_vec()); + self.entries.append(&mut table17.body); + } + + fn build_type19(&mut self, ins: u16, start: u64, size: u64) { + let mut table19 = SmbiosType19Table::new(ins); + + let start_kb = start / 1024; + let end_kb = (start + size - 1) / 1024; + + if start_kb < u64::from(u32::MAX) && end_kb < u64::from(u32::MAX) { + table19.header.starting_address = (start_kb as u32).to_le_bytes(); + table19.header.ending_address = (end_kb as u32).to_le_bytes(); + } else { + table19.header.starting_address = u32::MAX.to_le_bytes(); + table19.header.ending_address = u32::MAX.to_le_bytes(); + table19.header.extended_starting_address = start.to_le_bytes(); + table19.header.extended_ending_address = (start + size - 1).to_le_bytes(); + } + + table19.finish(); + + self.entries.append(&mut table19.header.as_bytes().to_vec()); + self.entries.append(&mut table19.body); + } + + fn build_type32(&mut self) { + let mut table32 = SmbiosType32Table::new(); + + table32.finish(); + + self.entries.append(&mut table32.header.as_bytes().to_vec()); + self.entries.append(&mut table32.body); + } + + fn build_type127(&mut self) { + let mut table127 = SmbiosType127Table::new(); + + table127.finish(); + + self.entries + .append(&mut table127.header.as_bytes().to_vec()); + self.entries.append(&mut table127.body); + } + + pub fn build_smbios_tables( + &mut self, + smbios: SmbiosConfig, + mach_cfg: &MachineConfig, + mem_arr: Vec<(u64, u64)>, + ) -> Vec { + self.build_type0(smbios.type0); + self.build_type1(smbios.type1); + self.build_type2(smbios.type2); + self.build_type3(smbios.type3); + + let smbios_sockets = mach_cfg.nr_cpus / (mach_cfg.nr_cores * mach_cfg.nr_threads); + for i in 0..smbios_sockets { + self.build_type4(smbios.type4.clone(), u16::from(i), mach_cfg); + } + let mem_num = ((mach_cfg.mem_config.mem_size + 16 * GB_SIZE - 1) / (16 * GB_SIZE)) as u16; + self.build_type16(mach_cfg.mem_config.mem_size, mem_num); + + for i in 0..mem_num { + let memdev_size = if i < mem_num - 1 { + 16 * GB_SIZE + } else { + (mach_cfg.mem_config.mem_size - 1) % (16 * GB_SIZE) + 1 + }; + self.build_type17(smbios.type17.clone(), i, memdev_size); + } + + let offset = if mem_num > (TYPE19_HANDLE - TYPE17_HANDLE) { + mem_num - (TYPE19_HANDLE - TYPE17_HANDLE) + } else { + 0_u16 + }; + for (index, (start, size)) in mem_arr.iter().enumerate() { + self.build_type19(offset + index as u16, *start, *size); + } + self.build_type32(); + self.build_type127(); + + self.entries.clone() + } +} + +#[repr(C, packed)] +#[derive(Default, Copy, Clone)] +struct SmbiosEntryPoint30 { + anchor_str: [u8; 5], + checksum: u8, + len: u8, + smbios_major_version: u8, + smbios_minor_version: u8, + smbios_doc_rev: u8, + entry_point_revision: u8, + reserved: u8, + structure_table_max_size: [u8; 4], + structure_table_address: u64, +} + +impl ByteCode for SmbiosEntryPoint30 {} +impl SmbiosEntryPoint30 { + fn new(table_len: u32) -> SmbiosEntryPoint30 { + let anchor: [u8; 5] = [b'_', b'S', b'M', b'3', b'_']; + SmbiosEntryPoint30 { + anchor_str: anchor, + len: size_of::() as u8, + entry_point_revision: 1_u8, + smbios_major_version: 3_u8, + smbios_minor_version: 0_u8, + structure_table_max_size: table_len.to_le_bytes(), + ..Default::default() + } + } +} + +pub fn build_smbios_ep30(table_len: u32) -> Vec { + let ep = SmbiosEntryPoint30::new(table_len); + + ep.as_bytes().to_vec() +} diff --git a/devices/src/sysbus/error.rs b/devices/src/sysbus/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..1d233b6be21582140b3bb94d5197eaa562ba185b --- /dev/null +++ b/devices/src/sysbus/error.rs @@ -0,0 +1,24 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum SysBusError { + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("Failed to register region in {0} space: offset={1:#x},size={2:#x}")] + AddRegionErr(&'static str, u64, u64), +} diff --git a/devices/src/sysbus/mod.rs b/devices/src/sysbus/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..01de827dd7337eefe2ebc83b8c3041c4c0f02c8f --- /dev/null +++ b/devices/src/sysbus/mod.rs @@ -0,0 +1,451 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod error; + +pub use error::SysBusError; + +use std::any::{Any, TypeId}; +use std::collections::HashMap; +use std::fmt; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use vmm_sys_util::eventfd::EventFd; + +#[cfg(target_arch = "x86_64")] +use crate::acpi::cpu_controller::CpuController; +use crate::acpi::ged::Ged; +#[cfg(target_arch = "aarch64")] +use crate::acpi::power::PowerDev; +#[cfg(all(feature = "ramfb", target_arch = "aarch64"))] +use crate::legacy::Ramfb; +#[cfg(target_arch = "x86_64")] +use crate::legacy::{FwCfgIO, RTC}; +#[cfg(target_arch = "aarch64")] +use crate::legacy::{FwCfgMem, PL011, PL031}; +use crate::legacy::{PFlash, Serial}; +use crate::pci::PciHost; +use crate::{Bus, BusBase, Device, DeviceBase, IrqState, LineIrqManager, TriggerMode}; +use acpi::{AmlBuilder, AmlScope}; +use address_space::{AddressSpace, GuestAddress, Region, RegionIoEventFd, RegionOps}; +use util::gen_base_func; + +// Now that the serial device use a hardcoded IRQ number (4), and the starting +// free IRQ number can be 5. +#[cfg(target_arch = "x86_64")] +pub const IRQ_BASE: i32 = 5; +#[cfg(target_arch = "x86_64")] +pub const IRQ_MAX: i32 = 15; + +// 0-31 is private to each CPU (SGIs and PPIs). +#[cfg(target_arch = "aarch64")] +pub const IRQ_BASE: i32 = 32; +#[cfg(target_arch = "aarch64")] +pub const IRQ_MAX: i32 = 191; + +pub struct SysBus { + pub base: BusBase, + // Record the largest key used in the BTreemap of the busbase(children field). + max_key: u64, + #[cfg(target_arch = "x86_64")] + pub sys_io: Arc, + pub sys_mem: Arc, + pub free_irqs: (i32, i32), + pub min_free_irq: i32, + pub mmio_region: (u64, u64), + pub min_free_base: u64, + pub irq_manager: Option>, +} + +impl fmt::Debug for SysBus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut debug = f.debug_struct("SysBus"); + + #[cfg(target_arch = "x86_64")] + let debug = debug.field("sys_io", &self.sys_io); + + debug + .field("sys_mem", &self.sys_mem) + .field("free_irqs", &self.free_irqs) + .field("min_free_irq", &self.min_free_irq) + .field("mmio_region", &self.mmio_region) + .field("min_free_base", &self.min_free_base) + .finish() + } +} + +impl SysBus { + pub fn new( + #[cfg(target_arch = "x86_64")] sys_io: &Arc, + sys_mem: &Arc, + free_irqs: (i32, i32), + mmio_region: (u64, u64), + ) -> Self { + Self { + base: BusBase::new("sysbus".to_string()), + max_key: 0, + #[cfg(target_arch = "x86_64")] + sys_io: sys_io.clone(), + sys_mem: sys_mem.clone(), + free_irqs, + min_free_irq: free_irqs.0, + mmio_region, + min_free_base: mmio_region.0, + irq_manager: None, + } + } + + pub fn build_region_ops(&self, dev: &Arc>) -> RegionOps { + let cloned_dev = dev.clone(); + let read_ops = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + cloned_dev.lock().unwrap().read(data, addr, offset) + }; + + let cloned_dev = dev.clone(); + let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + cloned_dev.lock().unwrap().write(data, addr, offset) + }; + + RegionOps { + read: Arc::new(read_ops), + write: Arc::new(write_ops), + } + } + + pub fn attach_device(&mut self, dev: &Arc>) -> Result<()> { + let res = dev.lock().unwrap().get_sys_resource().clone(); + let region_base = res.region_base; + let region_size = res.region_size; + let region_name = res.region_name; + + // region_base/region_size are both 0 means this device doesn't have its own memory layout. + // The normally allocated device region_base is above the `MEM_LAYOUT[LayoutEntryType::Mmio as usize].0`. + if region_base != 0 && region_size != 0 { + let region_ops = self.build_region_ops(dev); + let region = Region::init_io_region(region_size, region_ops, ®ion_name); + let locked_dev = dev.lock().unwrap(); + + region.set_ioeventfds(&locked_dev.ioeventfds()); + match locked_dev.sysbusdev_base().dev_type { + #[cfg(target_arch = "x86_64")] + SysBusDevType::Serial | SysBusDevType::FwCfg | SysBusDevType::Rtc => { + self.sys_io + .root() + .add_subregion(region, region_base) + .with_context(|| { + SysBusError::AddRegionErr("I/O", region_base, region_size) + })?; + } + _ => self + .sys_mem + .root() + .add_subregion(region, region_base) + .with_context(|| { + SysBusError::AddRegionErr("memory", region_base, region_size) + })?, + } + } + + self.sysbus_attach_child(dev.clone())?; + Ok(()) + } + + pub fn sysbus_attach_child(&mut self, dev: Arc>) -> Result<()> { + self.attach_child(self.max_key, dev.clone())?; + // Note: Incrementally generate a number that has no substantive effect, and is only used for the + // key of Btreemap in the busbase(children field). + // The number of system-bus devices is limited, and it is also difficult to reach the `u64` range for + // hot-plug times. So, `u64` is currently sufficient for using and don't consider overflow issues for now. + self.max_key += 1; + Ok(()) + } +} + +impl Bus for SysBus { + gen_base_func!(bus_base, bus_base_mut, BusBase, base); +} + +/// Convert from Arc> to &mut SysBus. +#[macro_export] +macro_rules! MUT_SYS_BUS { + ($trait_bus:expr, $lock_bus: ident, $struct_bus: ident) => { + convert_bus_mut!($trait_bus, $lock_bus, $struct_bus, SysBus); + }; +} + +#[derive(Clone)] +pub struct SysRes { + // Note: region_base/region_size are both 0 means that this device doesn't have its own memory layout. + // The normally allocated device memory region is above the `MEM_LAYOUT[LayoutEntryType::Mmio as usize].0`. + pub region_base: u64, + pub region_size: u64, + pub region_name: String, + pub irq: i32, +} + +impl Default for SysRes { + fn default() -> Self { + Self { + region_base: 0, + region_size: 0, + region_name: "".to_string(), + irq: -1, + } + } +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum SysBusDevType { + Serial, + Rtc, + VirtioMmio, + #[cfg(target_arch = "aarch64")] + PL011, + FwCfg, + Flash, + #[cfg(all(feature = "ramfb", target_arch = "aarch64"))] + Ramfb, + Others, +} + +#[derive(Clone)] +pub struct SysBusDevBase { + pub base: DeviceBase, + /// System bus device type. + pub dev_type: SysBusDevType, + /// System resource. + pub res: SysRes, + /// Interrupt event file descriptor. + pub interrupt_evt: Option>, + /// Interrupt state. + pub irq_state: IrqState, +} + +impl Default for SysBusDevBase { + fn default() -> Self { + SysBusDevBase { + base: DeviceBase::default(), + dev_type: SysBusDevType::Others, + res: SysRes::default(), + interrupt_evt: None, + irq_state: IrqState::default(), + } + } +} + +impl SysBusDevBase { + pub fn new(dev_type: SysBusDevType) -> SysBusDevBase { + Self { + dev_type, + ..Default::default() + } + } + + pub fn set_sys(&mut self, irq: i32, region_base: u64, region_size: u64, region_name: &str) { + self.res.irq = irq; + self.res.region_base = region_base; + self.res.region_size = region_size; + self.res.region_name = region_name.to_string(); + } +} + +/// Operations for sysbus devices. +pub trait SysBusDevOps: Device + Send + AmlBuilder { + fn sysbusdev_base(&self) -> &SysBusDevBase; + + fn sysbusdev_base_mut(&mut self) -> &mut SysBusDevBase; + + /// Read function of device. + /// + /// # Arguments + /// + /// * `data` - A u8-type array. + /// * `base` - Base address of this device. + /// * `offset` - Offset from base address. + fn read(&mut self, data: &mut [u8], base: GuestAddress, offset: u64) -> bool; + + /// Write function of device. + /// + /// # Arguments + /// + /// * `data` - A u8-type array. + /// * `base` - Base address of this device. + /// * `offset` - Offset from base address. + fn write(&mut self, data: &[u8], base: GuestAddress, offset: u64) -> bool; + + fn ioeventfds(&self) -> Vec { + Vec::new() + } + + fn interrupt_evt(&self) -> Option> { + self.sysbusdev_base().interrupt_evt.clone() + } + + fn get_irq(&self, sysbus: &mut SysBus) -> Result { + let irq = sysbus.min_free_irq; + if irq > sysbus.free_irqs.1 { + bail!("IRQ number exhausted."); + } + + sysbus.min_free_irq = irq + 1; + Ok(irq) + } + + fn get_sys_resource(&mut self) -> &mut SysRes { + &mut self.sysbusdev_base_mut().res + } + + fn set_sys_resource( + &mut self, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + region_name: &str, + ) -> Result<()> { + let mut locked_sysbus = sysbus.lock().unwrap(); + let irq = self.get_irq(&mut locked_sysbus)?; + let interrupt_evt = self.sysbusdev_base().interrupt_evt.clone(); + let irq_manager = locked_sysbus.irq_manager.clone(); + drop(locked_sysbus); + + self.sysbusdev_base_mut().irq_state = + IrqState::new(irq as u32, interrupt_evt, irq_manager, TriggerMode::Edge); + let irq_state = &mut self.sysbusdev_base_mut().irq_state; + irq_state.register_irq()?; + + self.sysbusdev_base_mut() + .set_sys(irq, region_base, region_size, region_name); + Ok(()) + } + + fn inject_interrupt(&self) { + let irq_state = &self.sysbusdev_base().irq_state; + irq_state.trigger_irq().unwrap_or_else(|e| { + log::error!( + "Device {:?} failed to inject interrupt: {:?}", + self.sysbusdev_base().dev_type, + e + ) + }); + } +} + +/// Convert from Arc> to &mut dyn SysBusDevOps. +#[macro_export] +macro_rules! SYS_BUS_DEVICE { + ($trait_device:expr, $lock_device: ident, $trait_sysbusdevops: ident) => { + let mut $lock_device = $trait_device.lock().unwrap(); + let $trait_sysbusdevops = to_sysbusdevops(&mut *$lock_device).unwrap(); + }; +} + +impl AmlBuilder for SysBus { + fn aml_bytes(&self) -> Vec { + let mut scope = AmlScope::new("_SB"); + let child_devices = self.base.children.clone(); + for dev in child_devices.values() { + SYS_BUS_DEVICE!(dev, locked_dev, sysbusdev); + scope.append(&sysbusdev.aml_bytes()); + } + + scope.aml_bytes() + } +} + +pub type ToSysBusDevOpsFunc = fn(&mut dyn Any) -> &mut dyn SysBusDevOps; + +static mut SYSBUSDEVTYPE_HASHMAP: Option> = None; + +pub fn convert_to_sysbusdevops(item: &mut dyn Any) -> &mut dyn SysBusDevOps { + // SAFETY: The typeid of `T` is the typeid recorded in the hashmap. The target structure type of + // the conversion is its own structure type, so the conversion result will definitely not be `None`. + let t = item.downcast_mut::().unwrap(); + t as &mut dyn SysBusDevOps +} + +pub fn register_sysbusdevops_type() -> Result<()> { + let type_id = TypeId::of::(); + // SAFETY: SYSBUSDEVTYPE_HASHMAP will be built in `type_init` function sequentially in the main thread. + // And will not be changed after `type_init`. + unsafe { + if SYSBUSDEVTYPE_HASHMAP.is_none() { + SYSBUSDEVTYPE_HASHMAP = Some(HashMap::new()); + } + let types = SYSBUSDEVTYPE_HASHMAP.as_mut().unwrap(); + if types.get(&type_id).is_some() { + bail!("Type Id {:?} has been registered.", type_id); + } + types.insert(type_id, convert_to_sysbusdevops::); + } + + Ok(()) +} + +pub fn devices_register_sysbusdevops_type() -> Result<()> { + #[cfg(target_arch = "x86_64")] + { + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + } + #[cfg(target_arch = "aarch64")] + { + register_sysbusdevops_type::()?; + #[cfg(all(feature = "ramfb"))] + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + } + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + register_sysbusdevops_type::()?; + register_sysbusdevops_type::() +} + +pub fn to_sysbusdevops(dev: &mut dyn Device) -> Option<&mut dyn SysBusDevOps> { + // SAFETY: SYSBUSDEVTYPE_HASHMAP has been built. And this function is called without changing hashmap. + unsafe { + let types = SYSBUSDEVTYPE_HASHMAP.as_mut().unwrap(); + let func = types.get(&dev.device_type_id())?; + let sysbusdev = func(dev.as_any_mut()); + Some(sysbusdev) + } +} + +#[cfg(test)] +pub fn sysbus_init() -> Arc> { + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::max_value(), "sys_mem"), + "sys_mem", + None, + ) + .unwrap(); + #[cfg(target_arch = "x86_64")] + let sys_io = AddressSpace::new( + Region::init_container_region(1 << 16, "sys_io"), + "sys_io", + None, + ) + .unwrap(); + let free_irqs: (i32, i32) = (IRQ_BASE, IRQ_MAX); + let mmio_region: (u64, u64) = (0x0A00_0000, 0x1000_0000); + Arc::new(Mutex::new(SysBus::new( + #[cfg(target_arch = "x86_64")] + &sys_io, + &sys_mem, + free_irqs, + mmio_region, + ))) +} diff --git a/devices/src/usb/camera.rs b/devices/src/usb/camera.rs new file mode 100644 index 0000000000000000000000000000000000000000..e05f89b91881afc72179cb829b3ee7dd1c9af26f --- /dev/null +++ b/devices/src/usb/camera.rs @@ -0,0 +1,1290 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! Emulated camera device that based on UVC(USB video class) protocol. + +use std::collections::LinkedList; +use std::os::unix::prelude::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use log::{error, info, warn}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use super::camera_media_type_guid::MEDIA_TYPE_GUID_HASHMAP; +use super::xhci::xhci_controller::XhciDevice; +use crate::camera_backend::{ + create_cam_backend, get_bit_rate, get_video_frame_size, CamBasicFmt, CameraBackend, + CameraBrokenCallback, CameraFormatList, CameraFrame, CameraNotifyCallback, FmtType, +}; +use crate::usb::config::*; +use crate::usb::descriptor::*; +use crate::usb::{UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbPacket, UsbPacketStatus}; +use machine_manager::config::camera::CameraDevConfig; +use machine_manager::config::valid_id; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use machine_manager::notifier::{register_vm_pause_notifier, unregister_vm_pause_notifier}; +use util::aio::{iov_discard_front_direct, Iovec}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + create_new_eventfd, read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, +}; + +const INTERFACE_ID_CONTROL: u8 = 0; +const INTERFACE_ID_STREAMING: u8 = 1; + +const TERMINAL_ID_INPUT_TERMINAL: u8 = 1; +const TERMINAL_ID_OUTPUT_TERMINAL: u8 = 2; + +const ENDPOINT_ID_STREAMING: u8 = 0x1; +const VS_INTERFACE_NUM: u8 = 1; + +// According to UVC specification 1.5 +// A.2. Video Interface Subclass Codes +const SC_VIDEOCONTROL: u8 = 0x01; +const SC_VIDEOSTREAMING: u8 = 0x02; +const SC_VIDEO_INTERFACE_COLLECTION: u8 = 0x03; +// A.3. Video Interface Protocol Codes +const PC_PROTOCOL_UNDEFINED: u8 = 0x0; +// A.4. Video Class-Specific Descriptor Types +const CS_INTERFACE: u8 = 0x24; +// A.5. Video Class-Specific VC Interface Descriptor Subtypes +const VC_HEADER: u8 = 0x01; +const VC_INPUT_TERMINAL: u8 = 0x02; +const VC_OUTPUT_TERMINAL: u8 = 0x03; +// A.6 Video Class-Specific VS Interface Descriptor Subtypes +const VS_INPUT_HEADER: u8 = 0x01; +const VS_FORMAT_UNCOMPRESSED: u8 = 0x04; +const VS_FRAME_UNCOMPRESSED: u8 = 0x05; +const VS_FORMAT_MJPEG: u8 = 0x06; +const VS_FRAME_MJPEG: u8 = 0x07; +const VS_COLORFORMAT: u8 = 0x0D; +// A.8. Video Class-Specific Request Codes +const SET_CUR: u8 = 0x1; +const GET_CUR: u8 = 0x81; +const GET_MIN: u8 = 0x82; +const GET_MAX: u8 = 0x83; +const GET_INFO: u8 = 0x86; +const GET_DEF: u8 = 0x87; +const UVC_FID: u8 = 1; +// A.9.8. VideoStreaming Interface Control Selectors +const VS_PROBE_CONTROL: u8 = 1; +const VS_COMMIT_CONTROL: u8 = 2; + +const MAX_PAYLOAD: u32 = FRAME_SIZE_1280_720; +const FRAME_SIZE_1280_720: u32 = 1280 * 720 * 2; +const USB_CAMERA_BUFFER_LEN: usize = 12 * 1024; + +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct UsbCameraConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub iothread: Option, + #[arg(long)] + pub cameradev: String, +} + +pub struct UsbCamera { + base: UsbDeviceBase, // general usb device object + vs_control: VideoStreamingControl, // video stream control info + camera_fd: Arc, // camera io fd + camera_backend: Arc>, // backend device + packet_list: Arc>>>>, // packet to be processed + payload: Arc>, // uvc payload + listening: bool, // if the camera is listening or not + broken: Arc, // if the device broken or not + iothread: Option, + delete_evts: Vec, + notifier_id: u64, +} + +#[derive(Debug)] +enum UsbCameraStringIDs { + #[allow(unused)] + Invalid = 0, + Manufacture, + Product, + SerialNumber, + Configuration, + Iad, + VideoControl, + InputTerminal, + OutputTerminal, + #[allow(unused)] + SelectUnit, + #[allow(unused)] + ProcessingUnit, + VideoStreaming, +} + +const UVC_CAMERA_STRINGS: [&str; 12] = [ + "", + "StratoVirt", + "USB Camera", + "4", + "USB Camera Configuration", + "STRATO CAM", + "Video Control", + "Input Terminal", + "Output Terminal", + "Select Unit", + "Processing Unit", + "Video Streaming", +]; + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct VCHeaderDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bDescriptorSubType: u8, + pub bcdUVC: u16, + pub wTotalLength: u16, + pub dwClockFrequency: u32, + pub bInCollection: u8, + pub baInterfaceNr: u8, +} + +impl ByteCode for VCHeaderDescriptor {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct InputTerminalDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bDescriptorSubType: u8, + pub bTerminalID: u8, + pub wTerminalType: u16, + pub bAssocTerminal: u8, + pub iTerminal: u8, + pub wObjectiveFocalLengthMin: u16, + pub wObjectiveFocalLengthMax: u16, + pub wOcularFocalLength: u16, + pub bControlSize: u8, + pub bmControls: [u8; 3], +} + +impl ByteCode for InputTerminalDescriptor {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct OutputTerminalDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bDescriptorSubType: u8, + pub bTerminalID: u8, + pub wTerminalType: u16, + pub bAssocTerminal: u8, + pub bSourceID: u8, + pub iTerminal: u8, +} + +impl ByteCode for OutputTerminalDescriptor {} + +fn gen_desc_interface_camera_vc() -> Result> { + // VideoControl Interface Descriptor + let desc = Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: INTERFACE_ID_CONTROL, + bAlternateSetting: 0, + bNumEndpoints: 0, + bInterfaceClass: USB_CLASS_VIDEO, + bInterfaceSubClass: SC_VIDEOCONTROL, + bInterfaceProtocol: 0, + iInterface: UsbCameraStringIDs::VideoControl as u8, + }, + other_desc: vec![ + Arc::new(UsbDescOther { + data: VCHeaderDescriptor { + bLength: 0x0d, + bDescriptorType: CS_INTERFACE, + bDescriptorSubType: VC_HEADER, + bcdUVC: 0x100, + wTotalLength: 40, + dwClockFrequency: 0x02dc6c00, + bInCollection: 0x1, + baInterfaceNr: 0x1, + } + .as_bytes() + .to_vec(), + }), + // Input Terminal Descriptor + Arc::new(UsbDescOther { + data: InputTerminalDescriptor { + bLength: 0x12, + bDescriptorType: CS_INTERFACE, + bDescriptorSubType: VC_INPUT_TERMINAL, + bTerminalID: TERMINAL_ID_INPUT_TERMINAL, + wTerminalType: 0x0201, + bAssocTerminal: 0, + iTerminal: UsbCameraStringIDs::InputTerminal as u8, + wObjectiveFocalLengthMin: 0, + wObjectiveFocalLengthMax: 0, + wOcularFocalLength: 0, + bControlSize: 0x3, + bmControls: [0; 3], + } + .as_bytes() + .to_vec(), + }), + // Output Terminal Descriptor + Arc::new(UsbDescOther { + data: OutputTerminalDescriptor { + bLength: 0x9, + bDescriptorType: CS_INTERFACE, + bDescriptorSubType: VC_OUTPUT_TERMINAL, + bTerminalID: TERMINAL_ID_OUTPUT_TERMINAL, + wTerminalType: 0x0101, + bAssocTerminal: 0, + bSourceID: 1, + iTerminal: UsbCameraStringIDs::OutputTerminal as u8, + } + .as_bytes() + .to_vec(), + }), + ], + endpoints: vec![], + }); + + Ok(desc) +} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +struct VsDescInputHeader { + bLength: u8, + bDescriptorType: u8, + bDescriptorSubtype: u8, + bNumFormats: u8, + wTotalLength: u16, + bEndpointAddress: u8, + bmInfo: u8, + bTerminalLink: u8, + bStillCaptureMethod: u8, + bTriggerSupport: u8, + bTriggerUsage: u8, + bControlSize: u8, +} + +impl ByteCode for VsDescInputHeader {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +struct VsDescUncompressedFmt { + bLength: u8, + bDescriptorType: u8, + bDescriptorSubtype: u8, + bFormatIndex: u8, + bNumFrameDescriptors: u8, + guidFormat: [u8; 16], + bBitsPerPixel: u8, + bDefaultFrameIndex: u8, + bAspectRatioX: u8, + bAspectRatioY: u8, + bmInterlaceFlags: u8, + bCopyProtect: u8, +} + +impl ByteCode for VsDescUncompressedFmt {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +struct VsDescMjpgFmt { + bLength: u8, + bDescriptorType: u8, + bDescriptorSubtype: u8, + bFormatIndex: u8, + bNumFrameDescriptors: u8, + bmFlags: u8, + bDefaultFrameIndex: u8, + bAspectRatioX: u8, + bAspectRatioY: u8, + bmInterlaceFlags: u8, + bCopyProtect: u8, +} + +impl ByteCode for VsDescMjpgFmt {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +struct VsDescFrm { + bLength: u8, + bDescriptorType: u8, + bDescriptorSubtype: u8, + bFrameIndex: u8, + bmCapabilities: u8, + wWidth: u16, + wHeight: u16, + dwMinBitRate: u32, + dwMaxBitRate: u32, + dwMaxVideoFrameBufSize: u32, + dwDefaultFrameInterval: u32, + bFrameIntervalType: u8, + dwIntervalVals: u32, +} + +impl ByteCode for VsDescFrm {} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Clone, Copy, Default)] +struct VsDescColorMatching { + bLength: u8, + bDescriptorType: u8, + bDescriptorSubtype: u8, + bColorPrimaries: u8, + bTransferCharacteristics: u8, + bMatrixCoefficients: u8, +} + +impl ByteCode for VsDescColorMatching {} + +fn gen_desc_interface_camera_vs(fmt_list: Vec) -> Result> { + let desc = Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: INTERFACE_ID_STREAMING, + bAlternateSetting: 0, + bNumEndpoints: 1, + bInterfaceClass: USB_CLASS_VIDEO, + bInterfaceSubClass: SC_VIDEOSTREAMING, + bInterfaceProtocol: 0, + iInterface: UsbCameraStringIDs::VideoStreaming as u8, + }, + other_desc: gen_fmt_desc(fmt_list)?, + endpoints: vec![Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | ENDPOINT_ID_STREAMING, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 0x400, + bInterval: 0x0, + }, + extra: UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 0, + bmAttributes: 0, + wBytesPerInterval: 0, + } + .as_bytes() + .to_vec(), + })], + }); + + Ok(desc) +} + +fn gen_desc_iad_camera(fmt_list: Vec) -> Result> { + let desc = Arc::new(UsbDescIAD { + iad_desc: UsbIadDescriptor { + bLength: 0x8, + bDescriptorType: USB_DT_INTERFACE_ASSOCIATION, + bFirstInterface: INTERFACE_ID_CONTROL, + bInterfaceCount: 2, + bFunctionClass: USB_CLASS_VIDEO, + bFunctionSubClass: SC_VIDEO_INTERFACE_COLLECTION, + bFunctionProtocol: PC_PROTOCOL_UNDEFINED, + iFunction: UsbCameraStringIDs::Iad as u8, + }, + itfs: vec![ + gen_desc_interface_camera_vc()?, + gen_desc_interface_camera_vs(fmt_list)?, + ], + }); + + Ok(desc) +} + +/// UVC Camera device descriptor +fn gen_desc_device_camera(fmt_list: Vec) -> Result> { + let desc = Arc::new(UsbDescDevice { + device_desc: UsbDeviceDescriptor { + bLength: USB_DT_DEVICE_SIZE, + bDescriptorType: USB_DT_DEVICE, + idVendor: USB_VENDOR_ID_STRATOVIRT, + idProduct: USB_PRODUCT_ID_UVC, + bcdDevice: 0, + iManufacturer: UsbCameraStringIDs::Manufacture as u8, + iProduct: UsbCameraStringIDs::Product as u8, + iSerialNumber: UsbCameraStringIDs::SerialNumber as u8, + bcdUSB: 0x0300, + bDeviceClass: USB_CLASS_MISCELLANEOUS, + // Refer to https://www.usb.org/defined-class-codes for details. + bDeviceSubClass: 2, + bDeviceProtocol: 1, // Interface Association + bMaxPacketSize0: 64, + bNumConfigurations: 1, + }, + configs: vec![Arc::new(UsbDescConfig { + config_desc: UsbConfigDescriptor { + bLength: USB_DT_CONFIG_SIZE, + bDescriptorType: USB_DT_CONFIGURATION, + wTotalLength: 0, + bNumInterfaces: 2, + bConfigurationValue: 1, + iConfiguration: UsbCameraStringIDs::Configuration as u8, + bmAttributes: USB_CONFIGURATION_ATTR_ONE, + bMaxPower: 50, + }, + iad_desc: vec![gen_desc_iad_camera(fmt_list)?], + interfaces: vec![], + })], + }); + + Ok(desc) +} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct VideoStreamingControl { + pub bmHint: u16, + pub bFormatIndex: u8, + pub bFrameIndex: u8, + pub dwFrameInterval: u32, + pub wKeyFrameRate: u16, + pub wPFrameRate: u16, + pub wCompQuality: u16, + pub wCompWindowSize: u16, + pub wDelay: u16, + pub dwMaxVideoFrameSize: u32, + pub dwMaxPayloadTransferSize: u32, +} + +impl ByteCode for VideoStreamingControl {} + +impl VideoStreamingControl { + fn reset(&mut self, fmt: &CamBasicFmt) { + self.bFormatIndex = 1; + self.bFrameIndex = 1; + self.dwFrameInterval = fmt.get_frame_intervals().unwrap_or_else(|e| { + error!("Invalid interval {:?}", e); + 0 + }); + self.dwMaxVideoFrameSize = get_video_frame_size(fmt.width, fmt.height, fmt.fmttype) + .unwrap_or_else(|e| { + error!("Invalid frame size {:?}", e); + 0 + }); + self.dwMaxPayloadTransferSize = MAX_PAYLOAD; + } +} + +impl UsbCamera { + pub fn new(config: UsbCameraConfig, cameradev: CameraDevConfig, tokenid: u64) -> Result { + let camera = create_cam_backend(config.clone(), cameradev, tokenid)?; + Ok(Self { + base: UsbDeviceBase::new(config.id, USB_CAMERA_BUFFER_LEN), + vs_control: VideoStreamingControl::default(), + camera_fd: Arc::new(create_new_eventfd()?), + camera_backend: camera, + packet_list: Arc::new(Mutex::new(LinkedList::new())), + payload: Arc::new(Mutex::new(UvcPayload::new())), + listening: false, + broken: Arc::new(AtomicBool::new(false)), + iothread: config.iothread, + delete_evts: Vec::new(), + notifier_id: 0, + }) + } + + fn register_cb(&mut self) { + let clone_fd = self.camera_fd.clone(); + let notify_cb: CameraNotifyCallback = Arc::new(move || { + if let Err(e) = clone_fd.write(1) { + error!( + "Failed to write camera device fd {} {:?}", + clone_fd.as_raw_fd(), + e + ); + } + }); + let clone_broken = self.broken.clone(); + let clone_id = self.device_id().to_string(); + let clone_fd = self.camera_fd.clone(); + let broken_cb: CameraBrokenCallback = Arc::new(move || { + clone_broken.store(true, Ordering::SeqCst); + error!("USB Camera {} device broken", clone_id); + // Notify the camera to process the packet. + if let Err(e) = clone_fd.write(1) { + error!("Failed to notify camera fd {:?}", e); + } + }); + let mut locked_camera = self.camera_backend.lock().unwrap(); + locked_camera.register_notify_cb(notify_cb); + locked_camera.register_broken_cb(broken_cb); + } + + fn activate(&mut self, fmt: &CamBasicFmt) -> Result<()> { + info!("USB Camera {} activate", self.device_id()); + self.camera_backend.lock().unwrap().reset(); + self.payload.lock().unwrap().reset(); + let mut locked_camera = self.camera_backend.lock().unwrap(); + locked_camera.set_fmt(fmt)?; + locked_camera.video_stream_on()?; + drop(locked_camera); + self.register_camera_fd()?; + Ok(()) + } + + fn register_camera_fd(&mut self) -> Result<()> { + if self.listening { + return Ok(()); + } + let cam_handler = Arc::new(Mutex::new(CameraIoHandler::new( + &self.camera_fd, + &self.packet_list, + &self.camera_backend, + &self.payload, + &self.broken, + ))); + register_event_helper( + EventNotifierHelper::internal_notifiers(cam_handler), + self.iothread.as_ref(), + &mut self.delete_evts, + )?; + self.listening = true; + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + info!("USB Camera {} deactivate", self.device_id()); + if self.broken.load(Ordering::Acquire) { + info!( + "USB Camera {} broken when deactivate, reset it.", + self.device_id() + ); + self.camera_backend.lock().unwrap().reset(); + self.broken.store(false, Ordering::SeqCst); + } else { + self.camera_backend.lock().unwrap().video_stream_off()?; + } + self.unregister_camera_fd()?; + self.packet_list.lock().unwrap().clear(); + Ok(()) + } + + fn unregister_camera_fd(&mut self) -> Result<()> { + if !self.listening { + return Ok(()); + } + unregister_event_helper(self.iothread.as_ref(), &mut self.delete_evts)?; + self.listening = false; + Ok(()) + } + + fn handle_uvc_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + ) -> Result<()> { + let inter_num = (device_req.index & 0xff) as u8; + match device_req.request_type { + USB_INTERFACE_IN_REQUEST => { + if device_req.request == USB_REQUEST_GET_STATUS { + self.base.data_buf[0] = 0; + self.base.data_buf[1] = 0; + packet.actual_length = 2; + return Ok(()); + } + } + USB_INTERFACE_OUT_REQUEST => { + if device_req.request == USB_REQUEST_SET_FEATURE { + return Ok(()); + } + } + USB_INTERFACE_CLASS_IN_REQUEST => { + if inter_num == VS_INTERFACE_NUM { + return self.do_vs_interface_in_request(packet, device_req); + } + } + USB_INTERFACE_CLASS_OUT_REQUEST => { + if inter_num == VS_INTERFACE_NUM { + return self.do_vs_interface_out_request(device_req); + } + } + USB_ENDPOINT_OUT_REQUEST => { + if device_req.request == USB_REQUEST_CLEAR_FEATURE { + return self + .deactivate() + .with_context(|| "Failed to deactivate device"); + } + } + _ => (), + } + bail!("Unknown UVC request {:?}", device_req.request); + } + + fn do_vs_interface_in_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + ) -> Result<()> { + match device_req.request { + GET_INFO => { + self.base.data_buf[0] = 1 | 2; + packet.actual_length = 1; + } + GET_CUR | GET_MIN | GET_MAX | GET_DEF => { + self.video_stream_in_request(packet, device_req)?; + } + _ => { + bail!( + "Unsupported VS interface in request {:?}", + device_req.request + ); + } + } + Ok(()) + } + + fn video_stream_in_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + ) -> Result<()> { + let cs = (device_req.value >> 8) as u8; + if cs != VS_PROBE_CONTROL { + bail!("Invalid VS Control Selector {}", cs); + } + let len = self.vs_control.as_bytes().len(); + self.base.data_buf[0..len].copy_from_slice(self.vs_control.as_bytes()); + packet.actual_length = len as u32; + Ok(()) + } + + fn do_vs_interface_out_request(&mut self, device_req: &UsbDeviceRequest) -> Result<()> { + let mut vs_control = VideoStreamingControl::default(); + let len = vs_control.as_mut_bytes().len(); + vs_control + .as_mut_bytes() + .copy_from_slice(&self.base.data_buf[0..len]); + let cs = (device_req.value >> 8) as u8; + trace::usb_camera_vs_control_request(cs, &vs_control); + match device_req.request { + SET_CUR => match cs { + VS_PROBE_CONTROL => { + let fmt = self + .camera_backend + .lock() + .unwrap() + .get_format_by_index(vs_control.bFormatIndex, vs_control.bFrameIndex)?; + self.update_vs_control(&fmt, &vs_control)?; + } + VS_COMMIT_CONTROL => { + let fmt = self + .camera_backend + .lock() + .unwrap() + .get_format_by_index(vs_control.bFormatIndex, vs_control.bFrameIndex)?; + self.update_vs_control(&fmt, &vs_control)?; + self.activate(&fmt) + .with_context(|| "Failed to activate device")?; + } + _ => { + bail!("Invalid VS control selector {}", cs); + } + }, + _ => { + bail!("Unsupported VS interface out request {:?}", device_req); + } + } + Ok(()) + } + + fn update_vs_control( + &mut self, + fmt: &CamBasicFmt, + vs_control: &VideoStreamingControl, + ) -> Result<()> { + self.vs_control.bFormatIndex = vs_control.bFormatIndex; + self.vs_control.bFrameIndex = vs_control.bFrameIndex; + self.vs_control.dwMaxVideoFrameSize = + get_video_frame_size(fmt.width, fmt.height, fmt.fmttype)?; + self.vs_control.dwFrameInterval = vs_control.dwFrameInterval; + Ok(()) + } + + fn reset_vs_control(&mut self) { + let default_fmt = self + .camera_backend + .lock() + .unwrap() + .get_format_by_index(1, 1) + .unwrap_or_default(); + self.vs_control.reset(&default_fmt); + } + + fn generate_iad(&self, prefix: &str) -> String { + format!("{} ({})", prefix, self.device_id()) + } +} + +impl UsbDevice for UsbCamera { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + let fmt_list = self.camera_backend.lock().unwrap().list_format()?; + self.base.reset_usb_endpoint(); + self.base.speed = USB_SPEED_SUPER; + let mut s: Vec = UVC_CAMERA_STRINGS.iter().map(|&s| s.to_string()).collect(); + let prefix = &s[UsbCameraStringIDs::SerialNumber as usize]; + s[UsbCameraStringIDs::SerialNumber as usize] = self.base.generate_serial_number(prefix); + let iad = &s[UsbCameraStringIDs::Iad as usize]; + s[UsbCameraStringIDs::Iad as usize] = self.generate_iad(iad); + let device_desc = gen_desc_device_camera(fmt_list)?; + self.base.init_descriptor(device_desc, s)?; + self.register_cb(); + + let camera = Arc::new(Mutex::new(self)); + let cloned_camera = camera.clone(); + let pause_notify = Arc::new(move |paused: bool| { + let locked_cam = cloned_camera.lock().unwrap(); + locked_cam.camera_backend.lock().unwrap().pause(paused); + }); + camera.lock().unwrap().notifier_id = register_vm_pause_notifier(pause_notify); + + Ok(camera) + } + + fn unrealize(&mut self) -> Result<()> { + info!("Camera {} unrealize", self.device_id()); + self.unregister_camera_fd()?; + self.camera_backend.lock().unwrap().reset(); + unregister_vm_pause_notifier(self.notifier_id); + self.notifier_id = 0; + Ok(()) + } + + fn cancel_packet(&mut self, _packet: &Arc>) {} + + fn reset(&mut self) { + info!("Camera {} device reset", self.device_id()); + self.base.addr = 0; + if let Err(e) = self.unregister_camera_fd() { + error!("Failed to unregister fd when reset {:?}", e); + } + self.reset_vs_control(); + self.payload.lock().unwrap().reset(); + self.camera_backend.lock().unwrap().reset(); + self.packet_list.lock().unwrap().clear(); + self.broken.store(false, Ordering::SeqCst); + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + let mut locked_packet = packet.lock().unwrap(); + match self + .base + .handle_control_for_descriptor(&mut locked_packet, device_req) + { + Ok(handled) => { + if handled { + trace::usb_camera_handle_control(); + return; + } + } + Err(e) => { + warn!("Received incorrect USB Camera descriptor message: {:?}", e); + locked_packet.status = UsbPacketStatus::Stall; + return; + } + } + + if let Err(e) = self.handle_uvc_request(&mut locked_packet, device_req) { + error!("Camera uvc descriptor error {:?}", e); + locked_packet.status = UsbPacketStatus::Stall; + } + } + + fn handle_data(&mut self, packet: &Arc>) { + if packet.lock().unwrap().ep_number == ENDPOINT_ID_STREAMING { + packet.lock().unwrap().is_async = true; + let mut locked_list = self.packet_list.lock().unwrap(); + locked_list.push_back(packet.clone()); + // Notify the camera to deal with the request. + if let Err(e) = self.camera_fd.write(1) { + error!( + "Failed to write fd when handle data for {} {:?}", + self.device_id(), + e + ); + // SAFETY: packet is push before, and no other thread modify the list. + let p = locked_list.pop_back().unwrap(); + let mut locked_p = p.lock().unwrap(); + locked_p.status = UsbPacketStatus::Stall; + // Async request failed, let controller report the error. + locked_p.is_async = false; + } + } else { + error!("Invalid ep number {}", packet.lock().unwrap().ep_number); + packet.lock().unwrap().status = UsbPacketStatus::Stall; + } + } + + fn set_controller(&mut self, _cntlr: Weak>) {} + + fn get_controller(&self) -> Option>> { + None + } +} + +/// UVC payload +struct UvcPayload { + header: Vec, + frame_offset: usize, + payload_offset: usize, +} + +impl UvcPayload { + fn new() -> Self { + Self { + header: vec![2, 0], + frame_offset: 0, + payload_offset: 0, + } + } + + fn reset(&mut self) { + self.header[0] = 2; + self.header[1] = 0; + self.frame_offset = 0; + self.payload_offset = 0; + } + + fn get_frame_data_size(&self, current_frame_size: usize, iov_size: u64) -> Result { + let mut frame_data_size = iov_size; + let header_len = self.header.len(); + // Within the scope of the frame. + if self + .frame_offset + .checked_add(frame_data_size as usize) + .with_context(|| "get_frame_data_size: invalid frame data")? + >= current_frame_size + { + if self.frame_offset > current_frame_size { + bail!( + "Invalid frame offset {} {}", + self.frame_offset, + current_frame_size + ); + } + frame_data_size = (current_frame_size - self.frame_offset) as u64; + } + // Within the scope of the payload. + if self + .payload_offset + .checked_add(frame_data_size as usize) + .with_context(|| "get_frame_data_size: invalid payload data")? + >= MAX_PAYLOAD as usize + { + if self.payload_offset > MAX_PAYLOAD as usize { + bail!( + "Invalid payload offset {} {}", + self.payload_offset, + MAX_PAYLOAD + ); + } + frame_data_size = u64::from(MAX_PAYLOAD) - self.payload_offset as u64; + } + // payload start, reserve the header. + if self.payload_offset == 0 + && frame_data_size + .checked_add(header_len as u64) + .with_context(|| "get_frame_data_size: invalid header_len")? + > iov_size + { + if iov_size <= header_len as u64 { + bail!("Invalid iov size {}", iov_size); + } + frame_data_size = iov_size - header_len as u64; + } + Ok(frame_data_size) + } + + fn next_frame(&mut self) { + self.frame_offset = 0; + self.payload_offset = 0; + self.header[1] ^= UVC_FID; + } +} + +/// Camere handler for copying frame data to usb packet. +struct CameraIoHandler { + camera: Arc>, + fd: Arc, + packet_list: Arc>>>>, + payload: Arc>, + broken: Arc, +} + +impl CameraIoHandler { + fn new( + fd: &Arc, + list: &Arc>>>>, + camera: &Arc>, + payload: &Arc>, + broken: &Arc, + ) -> Self { + CameraIoHandler { + camera: camera.clone(), + fd: fd.clone(), + packet_list: list.clone(), + payload: payload.clone(), + broken: broken.clone(), + } + } + + fn handle_io(&mut self) { + const REQUEST_LIMIT: u32 = 100; + for _ in 0..REQUEST_LIMIT { + let len = self.camera.lock().unwrap().get_frame_size(); + if len == 0 { + break; + } + let mut locked_list = self.packet_list.lock().unwrap(); + if locked_list.is_empty() { + break; + } + // SAFETY: packet list is not empty. + let p = locked_list.pop_front().unwrap(); + drop(locked_list); + let mut locked_p = p.lock().unwrap(); + if let Err(e) = self.handle_payload(&mut locked_p) { + error!("Failed handle uvc data {:?}", e); + locked_p.status = UsbPacketStatus::IoError; + } + if let Some(transfer) = locked_p.xfer_ops.as_ref() { + if let Some(ops) = transfer.clone().upgrade() { + drop(locked_p); + ops.lock().unwrap().submit_transfer(); + } + } + } + } + + fn handle_payload(&mut self, pkt: &mut UsbPacket) -> Result<()> { + let mut locked_camera = self.camera.lock().unwrap(); + let current_frame_size = locked_camera.get_frame_size(); + let mut locked_payload = self.payload.lock().unwrap(); + let header_len = locked_payload.header.len(); + let pkt_size = pkt.get_iovecs_size(); + let frame_data_size = locked_payload.get_frame_data_size(current_frame_size, pkt_size)?; + if frame_data_size == 0 { + bail!( + "Invalid frame data size, frame offset {} payload offset {} packet size {}", + locked_payload.frame_offset, + locked_payload.payload_offset, + pkt.get_iovecs_size(), + ); + } + let mut iovecs: &mut [Iovec] = &mut pkt.iovecs; + if locked_payload.payload_offset == 0 { + // Payload start, add header. + pkt.transfer_packet(&mut locked_payload.header, header_len); + locked_payload.payload_offset += header_len; + iovecs = iov_discard_front_direct(&mut pkt.iovecs, u64::from(pkt.actual_length)) + .with_context(|| format!("Invalid iov size {}", pkt_size))?; + } + let copied = locked_camera.get_frame( + iovecs, + locked_payload.frame_offset, + frame_data_size as usize, + )?; + pkt.actual_length += copied as u32; + trace::usb_camera_handle_payload( + locked_payload.frame_offset, + locked_payload.payload_offset, + frame_data_size, + copied, + ); + locked_payload.frame_offset += frame_data_size as usize; + locked_payload.payload_offset += frame_data_size as usize; + + if locked_payload.payload_offset >= MAX_PAYLOAD as usize { + locked_payload.payload_offset = 0; + } + if locked_payload.frame_offset >= current_frame_size { + locked_payload.next_frame(); + locked_camera.next_frame()?; + } + Ok(()) + } +} + +impl EventNotifierHelper for CameraIoHandler { + fn internal_notifiers(io_handler: Arc>) -> Vec { + let cloned_io_handler = io_handler.clone(); + let handler: Rc = Rc::new(move |_event, fd: RawFd| { + read_fd(fd); + let mut locked_handler = cloned_io_handler.lock().unwrap(); + if locked_handler.broken.load(Ordering::Acquire) { + let mut locked_list = locked_handler.packet_list.lock().unwrap(); + while let Some(p) = locked_list.pop_front() { + let mut locked_p = p.lock().unwrap(); + locked_p.status = UsbPacketStatus::IoError; + if let Some(transfer) = locked_p.xfer_ops.as_ref() { + if let Some(ops) = transfer.clone().upgrade() { + drop(locked_p); + ops.lock().unwrap().submit_transfer(); + } + } + } + return None; + } + locked_handler.handle_io(); + None + }); + vec![EventNotifier::new( + NotifierOperation::AddShared, + io_handler.lock().unwrap().fd.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )] + } +} + +fn gen_fmt_desc(fmt_list: Vec) -> Result>> { + let mut body = vec![]; + let mut buf = vec![]; + + let mut header_struct = gen_intface_header_desc(fmt_list.len() as u8); + + for fmt in fmt_list { + let data = gen_fmt_header(&fmt)?; + body.push(Arc::new(UsbDescOther { data: data.clone() })); + + for frm in &fmt.frame { + let data = gen_frm_desc(fmt.format, frm)?; + body.push(Arc::new(UsbDescOther { data: data.clone() })); + } + let data = gen_color_matching_desc()?; + body.push(Arc::new(UsbDescOther { data })); + } + + header_struct.wTotalLength = u16::from(header_struct.bLength) + + body.clone().iter().fold(0, |len, x| len + x.data.len()) as u16; + + let mut vec = header_struct.as_bytes().to_vec(); + // SAFETY: bLength is 0xb + fmt_list.len(). + vec.resize(header_struct.bLength as usize, 0); + buf.push(Arc::new(UsbDescOther { data: vec })); + buf.append(&mut body); + + Ok(buf) +} + +fn gen_intface_header_desc(fmt_num: u8) -> VsDescInputHeader { + VsDescInputHeader { + bLength: 0xd_u8.checked_add(fmt_num).unwrap_or_else(|| { + error!("gen_intface_header_desc: too large fmt num"); + u8::MAX + }), + bDescriptorType: CS_INTERFACE, + bDescriptorSubtype: VS_INPUT_HEADER, + bNumFormats: fmt_num, + wTotalLength: 0x00, // Shall be filled later after all members joined together. + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | ENDPOINT_ID_STREAMING, // EP 1 IN + bmInfo: 0x00, + bTerminalLink: 2, + bStillCaptureMethod: 0x00, + bTriggerSupport: 0x00, + bTriggerUsage: 0x00, + bControlSize: 0x01, + } +} + +fn gen_fmt_header(fmt: &CameraFormatList) -> Result> { + let bits_per_pixel = match fmt.format { + FmtType::Yuy2 | FmtType::Rgb565 => 0x10_u8, + FmtType::Nv12 => 0xc_u8, + _ => 0, + }; + let header = match fmt.format { + FmtType::Yuy2 | FmtType::Rgb565 | FmtType::Nv12 => VsDescUncompressedFmt { + bLength: 0x1B, + bDescriptorType: CS_INTERFACE, + bDescriptorSubtype: VS_FORMAT_UNCOMPRESSED, + bFormatIndex: fmt.fmt_index, + bNumFrameDescriptors: fmt.frame.len() as u8, + guidFormat: *MEDIA_TYPE_GUID_HASHMAP + .get(&fmt.format) + .with_context(|| "unsupported video format.")?, + bBitsPerPixel: bits_per_pixel, + bDefaultFrameIndex: 1, + bAspectRatioX: 0, + bAspectRatioY: 0, + bmInterlaceFlags: 0, + bCopyProtect: 0, + } + .as_bytes() + .to_vec(), + FmtType::Mjpg => VsDescMjpgFmt { + bLength: 0xb, + bDescriptorType: CS_INTERFACE, + bDescriptorSubtype: VS_FORMAT_MJPEG, + bFormatIndex: fmt.fmt_index, + bNumFrameDescriptors: fmt.frame.len() as u8, + bmFlags: 0x01, + bDefaultFrameIndex: 0x01, + bAspectRatioX: 0x00, + bAspectRatioY: 0x00, + bmInterlaceFlags: 0x00, + bCopyProtect: 0x00, + } + .as_bytes() + .to_vec(), + }; + + Ok(header) +} + +#[inline(always)] +fn get_subtype(pixfmt: FmtType) -> u8 { + match pixfmt { + FmtType::Yuy2 | FmtType::Rgb565 | FmtType::Nv12 => VS_FRAME_UNCOMPRESSED, + FmtType::Mjpg => VS_FRAME_MJPEG, + } +} + +fn gen_frm_desc(pixfmt: FmtType, frm: &CameraFrame) -> Result> { + let bitrate = get_bit_rate(frm.width, frm.height, frm.interval, pixfmt)?; + let desc = VsDescFrm { + bLength: 0x1e, // TODO: vary with interval number. + bDescriptorType: CS_INTERFACE, + bDescriptorSubtype: get_subtype(pixfmt), + bFrameIndex: frm.index, + bmCapabilities: 0x1, + wWidth: frm.width as u16, + wHeight: frm.height as u16, + dwMinBitRate: bitrate, + dwMaxBitRate: bitrate, + dwMaxVideoFrameBufSize: get_video_frame_size(frm.width, frm.height, pixfmt)?, + dwDefaultFrameInterval: frm.interval, + bFrameIntervalType: 1, + dwIntervalVals: frm.interval, + } + .as_bytes() + .to_vec(); + + Ok(desc) +} + +fn gen_color_matching_desc() -> Result> { + Ok(VsDescColorMatching { + bLength: 0x06, + bDescriptorType: CS_INTERFACE, + bDescriptorSubtype: VS_COLORFORMAT, + bColorPrimaries: 0x01, // BT.709,sRGB + bTransferCharacteristics: 0x01, // BT.709 + bMatrixCoefficients: 0x04, // SMPTE 170M (BT.601) + } + .as_bytes() + .to_vec()) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::camera_backend::{CameraFormatList, CameraFrame, FmtType}; + + fn test_interface_table_data_len(interface: Arc, size_offset: usize) { + let descs = &interface.other_desc; + let mut total_len_exact: usize = 0; + + let total_len_set: usize = (descs[0].data[size_offset] as usize + + ((descs[0].data[size_offset + 1] as usize) << 8)) + as usize; // field 'wTotalLength' in the 1st data desc + + for desc in descs { + let sub_len_set = desc.data[0] as usize; // field 'bLength' + let sub_len_exact = desc.data.len(); + assert_eq!(sub_len_set, sub_len_exact); + + total_len_exact += sub_len_exact; + } + + assert_eq!(total_len_set, total_len_exact); + } + + fn list_format() -> Vec { + vec![ + CameraFormatList { + format: FmtType::Yuy2, + fmt_index: 1, + frame: vec![ + CameraFrame { + width: 1980, + height: 720, + interval: 30, + index: 1, + }, + CameraFrame { + width: 640, + height: 480, + interval: 30, + index: 2, + }, + ], + }, + CameraFormatList { + format: FmtType::Mjpg, + frame: vec![ + CameraFrame { + width: 1980, + height: 720, + interval: 30, + index: 1, + }, + CameraFrame { + width: 640, + height: 680, + interval: 20, + index: 2, + }, + ], + fmt_index: 2, + }, + ] + } + + #[test] + fn test_interfaces_table_data_len() { + // VC and VS's header difference, their wTotalSize field's offset are the bit 5 and 4 + // respectively in their data[0] vector. The rest data follow the same principle that the + // 1st element is the very data vector's length. + test_interface_table_data_len(gen_desc_interface_camera_vc().unwrap(), 5); + test_interface_table_data_len(gen_desc_interface_camera_vs(list_format()).unwrap(), 4); + } +} diff --git a/devices/src/usb/camera_media_type_guid.rs b/devices/src/usb/camera_media_type_guid.rs new file mode 100644 index 0000000000000000000000000000000000000000..409e20c2f6c74b3f0cdf6dba75bdc7536d320415 --- /dev/null +++ b/devices/src/usb/camera_media_type_guid.rs @@ -0,0 +1,50 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +//! Media Type GUID. referred with uvc.h in linux kernel. + +use std::collections::HashMap; + +use once_cell::sync::Lazy; + +use crate::camera_backend::FmtType; + +pub const MEDIA_TYPE_GUID: [(FmtType, [u8; 16]); 3] = [ + ( + FmtType::Yuy2, + [ + b'Y', b'U', b'Y', b'2', 0x00, 0x00, 0x10, 0x00, 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, + 0x9b, 0x71, + ], + ), + ( + FmtType::Rgb565, + [ + b'R', b'G', b'B', b'P', 0x00, 0x00, 0x10, 0x00, 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, + 0x9b, 0x71, + ], + ), + ( + FmtType::Nv12, + [ + b'N', b'V', b'1', b'2', 0x00, 0x00, 0x10, 0x00, 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, + 0x9b, 0x71, + ], + ), +]; + +pub static MEDIA_TYPE_GUID_HASHMAP: Lazy> = + Lazy::new(gen_mediatype_hashmap); + +fn gen_mediatype_hashmap() -> HashMap { + HashMap::from(MEDIA_TYPE_GUID) +} diff --git a/devices/src/usb/config.rs b/devices/src/usb/config.rs new file mode 100644 index 0000000000000000000000000000000000000000..aef89932a7a14ad4d202c152795692c2cf96bb8f --- /dev/null +++ b/devices/src/usb/config.rs @@ -0,0 +1,269 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// USB Command +/// Run/Stop +pub const USB_CMD_RUN: u32 = 1 << 0; +/// Host Controller Reset +pub const USB_CMD_HCRST: u32 = 1 << 1; +/// Interrupter Enable +pub const USB_CMD_INTE: u32 = 1 << 2; +/// Host System Error Enable +pub const USB_CMD_HSEE: u32 = 1 << 3; +/// Light Host Controller Reset +pub const USB_CMD_LHCRST: u32 = 1 << 7; +/// Controller Save State +pub const USB_CMD_CSS: u32 = 1 << 8; +/// Controller Restore State +pub const USB_CMD_CRS: u32 = 1 << 9; +/// Enable Wrap Event +pub const USB_CMD_EWE: u32 = 1 << 10; +/// Enable U3 MFINDEX Stop +pub const USB_CMD_EU3S: u32 = 1 << 11; + +/// USB status +/// HC Halted +pub const USB_STS_HCH: u32 = 1 << 0; +/// Host System Error +pub const USB_STS_HSE: u32 = 1 << 2; +/// Event Interrupt +pub const USB_STS_EINT: u32 = 1 << 3; +/// Port Change Detect +pub const USB_STS_PCD: u32 = 1 << 4; +/// Save State Status +pub const USB_STS_SSS: u32 = 1 << 8; +/// Restore State Status +pub const USB_STS_RSS: u32 = 1 << 9; +/// Save/Restore Error +pub const USB_STS_SRE: u32 = 1 << 10; +/// Controller Not Ready +pub const USB_STS_CNR: u32 = 1 << 11; +/// Host Controller Error +pub const USB_STS_HCE: u32 = 1 << 12; + +/// Command Ring Control +/// Ring Cycle State +pub const CMD_RING_CTRL_RCS: u32 = 1 << 0; +/// Command Stop +pub const CMD_RING_CTRL_CS: u32 = 1 << 1; +/// Command Abort +pub const CMD_RING_CTRL_CA: u32 = 1 << 2; +/// Command Ring Running +pub const CMD_RING_CTRL_CRR: u32 = 1 << 3; +/// Interrupt Pending +pub const IMAN_IP: u32 = 1 << 0; +/// Interrupt Enable +pub const IMAN_IE: u32 = 1 << 1; +/// Event Handler Busy +pub const ERDP_EHB: u32 = 1 << 3; + +/// Port Status and Control Register +/// Current Connect Status +pub const PORTSC_CCS: u32 = 1 << 0; +/// Port Enabled/Disabled +pub const PORTSC_PED: u32 = 1 << 1; +/// Over-current Active +pub const PORTSC_OCA: u32 = 1 << 3; +/// Port Reset +pub const PORTSC_PR: u32 = 1 << 4; +/// Port Power +pub const PORTSC_PP: u32 = 1 << 9; +/// Port Speed +pub const PORTSC_SPEED_SHIFT: u32 = 10; +pub const PORTSC_SPEED_FULL: u32 = 1 << PORTSC_SPEED_SHIFT; +pub const PORTSC_SPEED_LOW: u32 = 2 << PORTSC_SPEED_SHIFT; +pub const PORTSC_SPEED_HIGH: u32 = 3 << PORTSC_SPEED_SHIFT; +pub const PORTSC_SPEED_SUPER: u32 = 4 << PORTSC_SPEED_SHIFT; +/// Port Indicator Control +pub const PORTSC_PLS_SHIFT: u32 = 5; +pub const PORTSC_PLS_MASK: u32 = 0xf; +/// Port Link State Write Strobe +pub const PORTSC_LWS: u32 = 1 << 16; +/// Connect Status Change +pub const PORTSC_CSC: u32 = 1 << 17; +/// Port Enabled/Disabled Change +pub const PORTSC_PEC: u32 = 1 << 18; +/// Warm Port Reset Change +pub const PORTSC_WRC: u32 = 1 << 19; +/// Over-current Change +pub const PORTSC_OCC: u32 = 1 << 20; +/// Port Reset Change +pub const PORTSC_PRC: u32 = 1 << 21; +/// Port Link State Change +pub const PORTSC_PLC: u32 = 1 << 22; +/// Port Config Error Change +pub const PORTSC_CEC: u32 = 1 << 23; +/// Cold Attach Status +pub const PORTSC_CAS: u32 = 1 << 24; +/// Wake on Connect Enable +pub const PORTSC_WCE: u32 = 1 << 25; +/// Wake on Disconnect Enable +pub const PORTSC_WDE: u32 = 1 << 26; +/// Wake on Over-current Enable +pub const PORTSC_WOE: u32 = 1 << 27; +/// Device Removable +pub const PORTSC_DR: u32 = 1 << 30; +/// Warm Port Reset +pub const PORTSC_WPR: u32 = 1 << 31; +/// Port Link State +pub const PLS_U0: u32 = 0; +pub const PLS_U1: u32 = 1; +pub const PLS_U2: u32 = 2; +pub const PLS_U3: u32 = 3; +pub const PLS_DISABLED: u32 = 4; +pub const PLS_RX_DETECT: u32 = 5; +pub const PLS_INACTIVE: u32 = 6; +pub const PLS_POLLING: u32 = 7; +pub const PLS_RECOVERY: u32 = 8; +pub const PLS_HOT_RESET: u32 = 9; +pub const PLS_COMPLIANCE_MODE: u32 = 10; +pub const PLS_TEST_MODE: u32 = 11; +pub const PLS_RESUME: u32 = 15; + +/// USB speed +pub const USB_SPEED_LOW: u32 = 0; +pub const USB_SPEED_FULL: u32 = 1; +pub const USB_SPEED_HIGH: u32 = 2; +pub const USB_SPEED_SUPER: u32 = 3; +pub const USB_SPEED_MASK_LOW: u32 = 1 << USB_SPEED_LOW; +pub const USB_SPEED_MASK_FULL: u32 = 1 << USB_SPEED_FULL; +pub const USB_SPEED_MASK_HIGH: u32 = 1 << USB_SPEED_HIGH; +pub const USB_SPEED_MASK_SUPER: u32 = 1 << USB_SPEED_SUPER; + +/// See the spec section 8.3.1 Packet Identifier Field. +pub const USB_TOKEN_SETUP: u8 = 0x2d; +pub const USB_TOKEN_IN: u8 = 0x69; +pub const USB_TOKEN_OUT: u8 = 0xe1; + +/// See the spec section 9.3 USB Device Requests. Setup Data. +pub const USB_DIRECTION_HOST_TO_DEVICE: u8 = 0 << 7; +pub const USB_DIRECTION_DEVICE_TO_HOST: u8 = 0x80; +pub const USB_TYPE_STANDARD: u8 = 0x00 << 5; +pub const USB_TYPE_CLASS: u8 = 1 << 5; +pub const USB_TYPE_VENDOR: u8 = 2 << 5; +pub const USB_TYPE_RESERVED: u8 = 3 << 5; +pub const USB_RECIPIENT_DEVICE: u8 = 0; +pub const USB_RECIPIENT_INTERFACE: u8 = 1; +pub const USB_RECIPIENT_ENDPOINT: u8 = 2; +pub const USB_RECIPIENT_OTHER: u8 = 3; +pub const USB_TYPE_MASK: u8 = 3 << 5; +pub const USB_RECIPIENT_MASK: u8 = 0x1F; + +/// USB device request combination +pub const USB_DEVICE_IN_REQUEST: u8 = + USB_DIRECTION_DEVICE_TO_HOST | USB_TYPE_STANDARD | USB_RECIPIENT_DEVICE; +pub const USB_DEVICE_OUT_REQUEST: u8 = + USB_DIRECTION_HOST_TO_DEVICE | USB_TYPE_STANDARD | USB_RECIPIENT_DEVICE; +pub const USB_INTERFACE_IN_REQUEST: u8 = + USB_DIRECTION_DEVICE_TO_HOST | USB_TYPE_STANDARD | USB_RECIPIENT_INTERFACE; +pub const USB_INTERFACE_OUT_REQUEST: u8 = + USB_DIRECTION_HOST_TO_DEVICE | USB_TYPE_STANDARD | USB_RECIPIENT_INTERFACE; +pub const USB_INTERFACE_CLASS_IN_REQUEST: u8 = + USB_DIRECTION_DEVICE_TO_HOST | USB_TYPE_CLASS | USB_RECIPIENT_INTERFACE; +pub const USB_INTERFACE_CLASS_OUT_REQUEST: u8 = + USB_DIRECTION_HOST_TO_DEVICE | USB_TYPE_CLASS | USB_RECIPIENT_INTERFACE; +pub const USB_ENDPOINT_IN_REQUEST: u8 = + USB_DIRECTION_DEVICE_TO_HOST | USB_TYPE_STANDARD | USB_RECIPIENT_ENDPOINT; +pub const USB_ENDPOINT_OUT_REQUEST: u8 = + USB_DIRECTION_HOST_TO_DEVICE | USB_TYPE_STANDARD | USB_RECIPIENT_ENDPOINT; + +/// USB Standard Request Code. 9.4 Standard Device Requests +pub const USB_REQUEST_GET_STATUS: u8 = 0; +pub const USB_REQUEST_CLEAR_FEATURE: u8 = 1; +pub const USB_REQUEST_SET_FEATURE: u8 = 3; +pub const USB_REQUEST_SET_ADDRESS: u8 = 5; +pub const USB_REQUEST_GET_DESCRIPTOR: u8 = 6; +pub const USB_REQUEST_SET_DESCRIPTOR: u8 = 7; +pub const USB_REQUEST_GET_CONFIGURATION: u8 = 8; +pub const USB_REQUEST_SET_CONFIGURATION: u8 = 9; +pub const USB_REQUEST_GET_INTERFACE: u8 = 10; +pub const USB_REQUEST_SET_INTERFACE: u8 = 11; +pub const USB_REQUEST_SYNCH_FRAME: u8 = 12; +pub const USB_REQUEST_SET_SEL: u8 = 48; +pub const USB_REQUEST_SET_ISOCH_DELAY: u8 = 49; + +/// See the spec section 9.4.5 Get Status +pub const USB_DEVICE_SELF_POWERED: u32 = 0; +pub const USB_DEVICE_REMOTE_WAKEUP: u32 = 1; + +/// USB Descriptor Type +pub const USB_DT_DEVICE: u8 = 1; +pub const USB_DT_CONFIGURATION: u8 = 2; +pub const USB_DT_STRING: u8 = 3; +pub const USB_DT_INTERFACE: u8 = 4; +pub const USB_DT_ENDPOINT: u8 = 5; +pub const USB_DT_DEVICE_QUALIFIER: u8 = 6; +pub const USB_DT_INTERFACE_POWER: u8 = 8; +pub const USB_DT_OTG: u8 = 9; +pub const USB_DT_DEBUG: u8 = 10; +pub const USB_DT_INTERFACE_ASSOCIATION: u8 = 11; +pub const USB_DT_BOS: u8 = 15; +pub const USB_DT_DEVICE_CAPABILITY: u8 = 16; +pub const USB_DT_PIPE_USAGE: u8 = 36; +pub const USB_DT_ENDPOINT_COMPANION: u8 = 48; + +/// USB SuperSpeed Device Capability. +pub const USB_SS_DEVICE_CAP: u8 = 0x3; +pub const USB_SS_DEVICE_SPEED_SUPPORTED_HIGH: u16 = 1 << 2; +pub const USB_SS_DEVICE_SPEED_SUPPORTED_SUPER: u16 = 1 << 3; +pub const USB_SS_DEVICE_FUNCTIONALITY_SUPPORT_HIGH: u8 = 2; +pub const USB_SS_DEVICE_FUNCTIONALITY_SUPPORT_SUPER: u8 = 3; + +/// USB Descriptor size +pub const USB_DT_DEVICE_SIZE: u8 = 18; +pub const USB_DT_CONFIG_SIZE: u8 = 9; +pub const USB_DT_INTERFACE_SIZE: u8 = 9; +pub const USB_DT_ENDPOINT_SIZE: u8 = 7; +pub const USB_DT_DEVICE_QUALIFIER_SIZE: u8 = 10; +pub const USB_DT_BOS_SIZE: u8 = 5; +pub const USB_DT_SS_CAP_SIZE: u8 = 10; +pub const USB_DT_PIPE_USAGE_SIZE: u8 = 4; +pub const USB_DT_SS_EP_COMP_SIZE: u8 = 6; + +/// USB Endpoint Descriptor +pub const USB_ENDPOINT_ATTR_CONTROL: u8 = 0; +pub const USB_ENDPOINT_ATTR_ISOC: u8 = 1; +pub const USB_ENDPOINT_ATTR_BULK: u8 = 2; +pub const USB_ENDPOINT_ATTR_INT: u8 = 3; +pub const USB_ENDPOINT_ATTR_TRANSFER_TYPE_MASK: u8 = 0x3; +pub const USB_ENDPOINT_ATTR_INVALID: u8 = 255; +pub const USB_ENDPOINT_ADDRESS_NUMBER_MASK: u8 = 0xf; + +/// See the spec section 9.6.3 Configuration. Standard Configuration Descriptor. +pub const USB_CONFIGURATION_ATTR_ONE: u8 = 1 << 7; +pub const USB_CONFIGURATION_ATTR_SELF_POWER: u8 = 1 << 6; +pub const USB_CONFIGURATION_ATTR_REMOTE_WAKEUP: u8 = 1 << 5; + +/// USB Class +pub const USB_CLASS_HID: u8 = 3; +pub const USB_CLASS_MASS_STORAGE: u8 = 8; +pub const USB_CLASS_VIDEO: u8 = 0xe; +pub const USB_CLASS_MISCELLANEOUS: u8 = 0xef; + +/// USB Subclass +pub const USB_SUBCLASS_BOOT: u8 = 0x01; +pub const USB_SUBCLASS_SCSI: u8 = 0x06; + +/// USB Interface Protocol +pub const USB_IFACE_PROTOCOL_KEYBOARD: u8 = 0x01; +pub const USB_IFACE_PROTOCOL_BOT: u8 = 0x50; +pub const USB_IFACE_PROTOCOL_UAS: u8 = 0x62; + +/// CRC16 of "STRATOVIRT" +pub const USB_VENDOR_ID_STRATOVIRT: u16 = 0xB74C; + +/// USB Product IDs +pub const USB_PRODUCT_ID_UVC: u16 = 0x0001; +pub const USB_PRODUCT_ID_KEYBOARD: u16 = 0x0002; +pub const USB_PRODUCT_ID_STORAGE: u16 = 0x0003; +pub const USB_PRODUCT_ID_TABLET: u16 = 0x0004; +pub const USB_PRODUCT_ID_UAS: u16 = 0x0005; diff --git a/devices/src/usb/descriptor.rs b/devices/src/usb/descriptor.rs new file mode 100644 index 0000000000000000000000000000000000000000..55858cd4dc7d2a7d9043a9849a9ad882b151fe2a --- /dev/null +++ b/devices/src/usb/descriptor.rs @@ -0,0 +1,564 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::Arc; + +use anyhow::{bail, Context, Result}; + +use super::config::*; +use super::UsbDeviceBase; +use util::byte_code::ByteCode; + +pub const USB_MAX_INTERFACES: u32 = 16; +const USB_DESCRIPTOR_TYPE_SHIFT: u32 = 8; +const USB_DESCRIPTOR_INDEX_MASK: u32 = 0xff; +// The max length of the string descriptor is 255. +// And the header occupies 2 bytes, and each character occupies 2 bytes. +const USB_STRING_MAX_LEN: usize = 126; + +/// USB device descriptor for transfer +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbDeviceDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bcdUSB: u16, + pub bDeviceClass: u8, + pub bDeviceSubClass: u8, + pub bDeviceProtocol: u8, + pub bMaxPacketSize0: u8, + pub idVendor: u16, + pub idProduct: u16, + pub bcdDevice: u16, + pub iManufacturer: u8, + pub iProduct: u8, + pub iSerialNumber: u8, + pub bNumConfigurations: u8, +} + +impl ByteCode for UsbDeviceDescriptor {} + +/// USB device qualifier descriptor for transfer +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbDeviceQualifierDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bcdUSB: u16, + pub bDeviceClass: u8, + pub bDeviceSubClass: u8, + pub bDeviceProtocol: u8, + pub bMaxPacketSize0: u8, + pub bNumConfigurations: u8, + pub bReserved: u8, +} +impl ByteCode for UsbDeviceQualifierDescriptor {} + +/// USB config descriptor for transfer +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbConfigDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub wTotalLength: u16, + pub bNumInterfaces: u8, + pub bConfigurationValue: u8, + pub iConfiguration: u8, + pub bmAttributes: u8, + pub bMaxPower: u8, +} + +impl ByteCode for UsbConfigDescriptor {} + +/// USB interface descriptor for transfer +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbInterfaceDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bInterfaceNumber: u8, + pub bAlternateSetting: u8, + pub bNumEndpoints: u8, + pub bInterfaceClass: u8, + pub bInterfaceSubClass: u8, + pub bInterfaceProtocol: u8, + pub iInterface: u8, +} + +impl ByteCode for UsbInterfaceDescriptor {} + +/// USB endpoint descriptor for transfer +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbEndpointDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bEndpointAddress: u8, + pub bmAttributes: u8, + pub wMaxPacketSize: u16, + pub bInterval: u8, +} + +impl ByteCode for UsbEndpointDescriptor {} + +/// USB binary device object store descriptor for transfer. +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct UsbBOSDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub wTotalLength: u16, + pub bNumDeviceCaps: u8, +} + +impl ByteCode for UsbBOSDescriptor {} + +/// USB super speed capability descriptor for transfer. +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbSuperSpeedCapDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bDevCapabilityType: u8, + pub bmAttributes: u8, + pub wSpeedsSupported: u16, + pub bFunctionalitySupport: u8, + pub bU1DevExitLat: u8, + pub wU2DevExitLat: u16, +} + +impl ByteCode for UsbSuperSpeedCapDescriptor {} + +/// USB super speed endpoint companion descriptor for transfer. +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbSuperSpeedEndpointCompDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bMaxBurst: u8, + pub bmAttributes: u8, + pub wBytesPerInterval: u16, +} + +impl ByteCode for UsbSuperSpeedEndpointCompDescriptor {} + +/// USB device descriptor. +pub struct UsbDescDevice { + pub device_desc: UsbDeviceDescriptor, + pub configs: Vec>, +} + +/// USB config descriptor. +pub struct UsbDescConfig { + pub config_desc: UsbConfigDescriptor, + pub iad_desc: Vec>, + pub interfaces: Vec>, +} + +/// USB Interface Association Descriptor, and related interfaces +pub struct UsbDescIAD { + pub iad_desc: UsbIadDescriptor, + pub itfs: Vec>, +} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct UsbIadDescriptor { + pub bLength: u8, + pub bDescriptorType: u8, + pub bFirstInterface: u8, + pub bInterfaceCount: u8, + pub bFunctionClass: u8, + pub bFunctionSubClass: u8, + pub bFunctionProtocol: u8, + pub iFunction: u8, +} + +impl ByteCode for UsbIadDescriptor {} + +/// USB interface descriptor. +pub struct UsbDescIface { + pub interface_desc: UsbInterfaceDescriptor, + pub other_desc: Vec>, + pub endpoints: Vec>, +} + +/// USB other descriptor. +#[derive(Debug)] +pub struct UsbDescOther { + pub data: Vec, +} + +/// USB endpoint descriptor. +pub struct UsbDescEndpoint { + pub endpoint_desc: UsbEndpointDescriptor, + pub extra: Vec, +} + +/// USB Descriptor. +pub struct UsbDescriptor { + pub device_desc: Option>, + pub configuration_selected: Option>, + pub interfaces: Vec>>, + pub altsetting: Vec, + pub interface_number: u32, + pub strings: Vec, + pub capabilities: Vec, +} + +impl UsbDescriptor { + pub fn new() -> Self { + Self { + device_desc: None, + configuration_selected: None, + interfaces: vec![None; USB_MAX_INTERFACES as usize], + altsetting: vec![0; USB_MAX_INTERFACES as usize], + interface_number: 0, + strings: Vec::new(), + capabilities: Vec::new(), + } + } + + fn get_device_descriptor(&self) -> Result> { + if let Some(desc) = self.device_desc.as_ref() { + Ok(desc.device_desc.as_bytes().to_vec()) + } else { + bail!("Device descriptor not found"); + } + } + + fn get_config_descriptor(&self, index: u32) -> Result> { + let confs = self + .device_desc + .as_ref() + .map(|desc| &desc.configs) + .with_context(|| "Device descriptor not found")?; + let conf = confs + .get(index as usize) + .with_context(|| format!("Config descriptor index {} is invalid", index))?; + let mut config_desc = conf.config_desc; + let mut iads = self.get_iads_descriptor(conf.iad_desc.as_ref())?; + let mut ifs = self.get_interfaces_descriptor(conf.interfaces.as_ref())?; + + config_desc.wTotalLength = + u16::from(config_desc.bLength) + iads.len() as u16 + ifs.len() as u16; + + let mut buf = config_desc.as_bytes().to_vec(); + buf.append(&mut iads); + buf.append(&mut ifs); + Ok(buf) + } + + fn get_iads_descriptor(&self, iad_desc: &[Arc]) -> Result> { + let mut iads = Vec::new(); + for iad in iad_desc { + let mut buf = self.get_single_iad_descriptor(iad.as_ref())?; + iads.append(&mut buf); + } + Ok(iads) + } + + fn get_single_iad_descriptor(&self, iad: &UsbDescIAD) -> Result> { + let mut buf = iad.iad_desc.as_bytes().to_vec(); + + let mut ifs = self.get_interfaces_descriptor(iad.itfs.as_ref())?; + buf.append(&mut ifs); + + Ok(buf) + } + + fn get_interfaces_descriptor(&self, ifaces: &[Arc]) -> Result> { + let mut ifs = Vec::new(); + for iface in ifaces { + let mut buf = self.get_single_interface_descriptor(iface.as_ref())?; + ifs.append(&mut buf); + } + + Ok(ifs) + } + + fn get_single_interface_descriptor(&self, iface: &UsbDescIface) -> Result> { + let desc = iface.interface_desc; + let mut buf = desc.as_bytes().to_vec(); + for i in 0..iface.other_desc.len() { + let desc = iface.other_desc[i].as_ref(); + for x in &desc.data { + buf.push(*x); + } + } + for i in 0..desc.bNumEndpoints as usize { + let mut ep = self.get_endpoint_descriptor(iface.endpoints[i].as_ref())?; + buf.append(&mut ep); + } + Ok(buf) + } + + fn get_endpoint_descriptor(&self, ep: &UsbDescEndpoint) -> Result> { + let desc = ep.endpoint_desc; + let mut buf = desc.as_bytes().to_vec(); + buf.append(&mut ep.extra.clone()); + Ok(buf) + } + + fn get_string_descriptor(&self, index: u32) -> Result> { + if index == 0 { + // Language ID + let str: [u8; 4] = [4, 3, 9, 4]; + return Ok(str.to_vec()); + } + let found_str = self + .strings + .get(index as usize) + .with_context(|| format!("String descriptor index {} is invalid", index))?; + let str_max_len = std::cmp::min(USB_STRING_MAX_LEN, found_str.len()); + let len = str_max_len as u8 * 2 + 2; + let mut vec = vec![0_u8; len as usize]; + vec[0] = len; + vec[1] = USB_DT_STRING; + + let mut pos = 2; + for i in 0..str_max_len { + vec[pos] = found_str.as_bytes()[i]; + vec[pos + 1] = 0; + pos += 2; + } + Ok(vec) + } + + fn get_device_qualifier_descriptor(&self) -> Result> { + if self.device_desc.is_none() { + bail!("device qualifier descriptor not found"); + } + + // SAFETY: device_desc has just been checked + let device_desc = &self.device_desc.as_ref().unwrap().device_desc; + let device_qualifier_desc = UsbDeviceQualifierDescriptor { + bLength: USB_DT_DEVICE_QUALIFIER_SIZE, + bDescriptorType: USB_DT_DEVICE_QUALIFIER, + bcdUSB: device_desc.bcdUSB, + bDeviceClass: device_desc.bDeviceClass, + bDeviceSubClass: device_desc.bDeviceSubClass, + bDeviceProtocol: device_desc.bDeviceProtocol, + bMaxPacketSize0: device_desc.bMaxPacketSize0, + bNumConfigurations: device_desc.bNumConfigurations, + bReserved: 0, + }; + + Ok(device_qualifier_desc.as_bytes().to_vec()) + } + + fn get_debug_descriptor(&self) -> Result> { + log::debug!("usb DEBUG descriptor"); + Ok(vec![]) + } + + fn get_bos_descriptor(&self, speed: u32) -> Result> { + let mut total = u16::from(USB_DT_BOS_SIZE); + let mut cap = Vec::new(); + let mut cap_num = 0; + + if speed == USB_SPEED_SUPER { + let default_cap = if self.capabilities.is_empty() { + vec![UsbSuperSpeedCapDescriptor { + bLength: USB_DT_SS_CAP_SIZE, + bDescriptorType: USB_DT_DEVICE_CAPABILITY, + bDevCapabilityType: USB_SS_DEVICE_CAP, + bmAttributes: 0, + wSpeedsSupported: USB_SS_DEVICE_SPEED_SUPPORTED_SUPER, + bFunctionalitySupport: USB_SS_DEVICE_FUNCTIONALITY_SUPPORT_SUPER, + bU1DevExitLat: 0xa, + wU2DevExitLat: 0x20, + }] + } else { + Vec::new() + }; + + for desc in default_cap.iter().chain(self.capabilities.iter()) { + let mut super_buf = (*desc).as_bytes().to_vec(); + cap_num += 1; + total += super_buf.len() as u16; + cap.append(&mut super_buf); + } + } + + let bos = UsbBOSDescriptor { + bLength: USB_DT_BOS_SIZE, + bDescriptorType: USB_DT_BOS, + wTotalLength: total, + bNumDeviceCaps: cap_num, + }; + let mut buf = bos.as_bytes().to_vec(); + buf.append(&mut cap); + Ok(buf) + } + + fn find_interface(&self, nif: u32, alt: u32) -> Option> { + let conf = self.configuration_selected.as_ref()?; + + for i in 0..conf.iad_desc.len() { + let ifaces = &conf.iad_desc[i].as_ref().itfs; + for iface in ifaces { + if u32::from(iface.interface_desc.bInterfaceNumber) == nif + && u32::from(iface.interface_desc.bAlternateSetting) == alt + { + return Some(iface.clone()); + } + } + } + for i in 0..conf.interfaces.len() { + let iface = conf.interfaces[i].as_ref(); + if u32::from(iface.interface_desc.bInterfaceNumber) == nif + && u32::from(iface.interface_desc.bAlternateSetting) == alt + { + return Some(conf.interfaces[i].clone()); + } + } + None + } +} + +impl Default for UsbDescriptor { + fn default() -> Self { + Self::new() + } +} + +/// USB descriptor ops including get/set descriptor. +pub trait UsbDescriptorOps { + /// Get device/configuration/string descriptor. + fn get_descriptor(&self, value: u32) -> Result>; + + /// Set configuration descriptor with the Configuration Value. + fn set_config_descriptor(&mut self, v: u8) -> Result<()>; + + /// Set interface descriptor with the Interface and Alternate Setting. + fn set_interface_descriptor(&mut self, index: u32, v: u32) -> Result<()>; + + /// Set super speed capability descriptors. + fn set_capability_descriptors(&mut self, caps: Vec); + + /// Init all endpoint descriptors and reset the USB endpoint. + fn init_endpoint(&mut self) -> Result<()>; + + /// Init descriptor with the device descriptor and string descriptors. + fn init_descriptor(&mut self, desc: Arc, str: Vec) -> Result<()>; +} + +impl UsbDescriptorOps for UsbDeviceBase { + fn get_descriptor(&self, value: u32) -> Result> { + let desc_type = value >> USB_DESCRIPTOR_TYPE_SHIFT; + let index = value & USB_DESCRIPTOR_INDEX_MASK; + let vec = match desc_type as u8 { + USB_DT_DEVICE => self.descriptor.get_device_descriptor()?, + USB_DT_CONFIGURATION => self.descriptor.get_config_descriptor(index)?, + USB_DT_STRING => self.descriptor.get_string_descriptor(index)?, + USB_DT_DEVICE_QUALIFIER => self.descriptor.get_device_qualifier_descriptor()?, + USB_DT_DEBUG => self.descriptor.get_debug_descriptor()?, + USB_DT_BOS => self.descriptor.get_bos_descriptor(self.speed)?, + _ => { + bail!("Unknown descriptor type {}", desc_type); + } + }; + Ok(vec) + } + + fn set_config_descriptor(&mut self, v: u8) -> Result<()> { + if v == 0 { + self.descriptor.interface_number = 0; + self.descriptor.configuration_selected = None; + } else { + let desc = self + .descriptor + .device_desc + .as_ref() + .with_context(|| "Device Descriptor not found")?; + let num = desc.device_desc.bNumConfigurations; + let mut found = false; + for i in 0..num as usize { + if desc.configs[i].config_desc.bConfigurationValue == v { + self.descriptor.interface_number = + u32::from(desc.configs[i].config_desc.bNumInterfaces); + self.descriptor.configuration_selected = Some(desc.configs[i].clone()); + found = true; + } + } + if !found { + bail!("Invalid bConfigurationValue {}", v); + } + } + for i in 0..self.descriptor.interface_number { + self.set_interface_descriptor(i, 0)?; + } + for i in self.descriptor.altsetting.iter_mut() { + *i = 0; + } + for it in self.descriptor.interfaces.iter_mut() { + *it = None; + } + Ok(()) + } + + fn set_interface_descriptor(&mut self, index: u32, v: u32) -> Result<()> { + let iface = self.descriptor.find_interface(index, v).with_context(|| { + format!( + "Interface descriptor not found. index {} value {}", + index, v + ) + })?; + self.descriptor.altsetting[index as usize] = v; + self.descriptor.interfaces[index as usize] = Some(iface); + self.init_endpoint()?; + Ok(()) + } + + fn set_capability_descriptors(&mut self, caps: Vec) { + self.descriptor.capabilities = caps; + } + + fn init_endpoint(&mut self) -> Result<()> { + self.reset_usb_endpoint(); + for i in 0..self.descriptor.interface_number { + let iface = self.descriptor.interfaces[i as usize].as_ref(); + if iface.is_none() { + continue; + } + let iface = iface.unwrap().clone(); + for e in 0..iface.interface_desc.bNumEndpoints { + let in_direction = iface.endpoints[e as usize].endpoint_desc.bEndpointAddress + & USB_DIRECTION_DEVICE_TO_HOST + == USB_DIRECTION_DEVICE_TO_HOST; + let ep = iface.endpoints[e as usize].endpoint_desc.bEndpointAddress + & USB_ENDPOINT_ADDRESS_NUMBER_MASK; + let usb_ep = self.get_mut_endpoint(in_direction, ep); + usb_ep.ep_type = iface.endpoints[e as usize].endpoint_desc.bmAttributes + & USB_ENDPOINT_ATTR_TRANSFER_TYPE_MASK; + } + } + Ok(()) + } + + fn init_descriptor(&mut self, device_desc: Arc, str: Vec) -> Result<()> { + self.descriptor.device_desc = Some(device_desc); + self.descriptor.strings = str; + self.set_config_descriptor(0)?; + Ok(()) + } +} diff --git a/devices/src/usb/error.rs b/devices/src/usb/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..b3b21297b91430668d1036bb2afe225ec7c4b4c2 --- /dev/null +++ b/devices/src/usb/error.rs @@ -0,0 +1,34 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum UsbError { + #[error("PciErr")] + PciErr { + #[from] + source: crate::pci::error::PciError, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Memory access overflow, addr: 0x{0:X} offset: 0x{1:X}")] + MemoryAccessOverflow(u64, u64), +} diff --git a/devices/src/usb/hid.rs b/devices/src/usb/hid.rs new file mode 100644 index 0000000000000000000000000000000000000000..513005ef8d71ae8e99016551d79f26b3ee7ff824 --- /dev/null +++ b/devices/src/usb/hid.rs @@ -0,0 +1,541 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fmt::{Display, Formatter, Result as FmtResult}; + +use log::error; + +use super::config::*; +use super::{UsbDeviceRequest, UsbPacket, UsbPacketStatus}; +use ui::input::set_kbd_led_state; + +/// HID keycode +const HID_KEYBOARD_LEFT_CONTROL: u8 = 0xe0; +#[allow(unused)] +const HID_KEYBOARD_LEFT_SHIFT: u8 = 0xe1; +#[allow(unused)] +const HID_KEYBOARD_LEFT_ALT: u8 = 0xe2; +#[allow(unused)] +const HID_KEYBOARD_LEFT_GUI: u8 = 0xe3; +#[allow(unused)] +const HID_KEYBOARD_RIGHT_CONTROL: u8 = 0xe4; +#[allow(unused)] +const HID_KEYBOARD_RIGHT_SHIFT: u8 = 0xe5; +#[allow(unused)] +const HID_KEYBOARD_RIGHT_ALT: u8 = 0xe6; +const HID_KEYBOARD_RIGHT_GUI: u8 = 0xe7; + +/// See the spec section 7.2 Class-Specific Requests +pub const HID_GET_REPORT: u8 = 0x01; +pub const HID_GET_IDLE: u8 = 0x02; +pub const HID_GET_PROTOCOL: u8 = 0x03; +pub const HID_SET_REPORT: u8 = 0x09; +pub const HID_SET_IDLE: u8 = 0x0a; +pub const HID_SET_PROTOCOL: u8 = 0x0b; + +/// See the spec section 7.2.5 Get Protocol Request +#[allow(unused)] +const HID_PROTOCTL_BOOT: u8 = 0; +const HID_PROTOCOL_REPORT: u8 = 1; +const KEYCODE_UP: u32 = 0x80; +pub const QUEUE_LENGTH: u32 = 16; +pub const QUEUE_MASK: u32 = QUEUE_LENGTH - 1; +const HID_USAGE_ERROR_ROLLOVER: u8 = 0x1; + +/// QKeyCode to HID code table +const HID_CODE: [u8; 0x100] = [ + 0x00, 0x29, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x2d, 0x2e, 0x2a, 0x2b, + 0x14, 0x1a, 0x08, 0x15, 0x17, 0x1c, 0x18, 0x0c, 0x12, 0x13, 0x2f, 0x30, 0x28, 0xe0, 0x04, 0x16, + 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x0f, 0x33, 0x34, 0x35, 0xe1, 0x31, 0x1d, 0x1b, 0x06, 0x19, + 0x05, 0x11, 0x10, 0x36, 0x37, 0x38, 0xe5, 0x55, 0xe2, 0x2c, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x42, 0x43, 0x53, 0x47, 0x5f, 0x60, 0x61, 0x56, 0x5c, 0x5d, 0x5e, 0x57, 0x59, + 0x5a, 0x5b, 0x62, 0x63, 0x46, 0x00, 0x64, 0x44, 0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, + 0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, + 0x88, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0x00, 0x8b, 0x00, 0x89, 0xe7, 0x65, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0xe4, 0x00, 0x00, + 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x46, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x48, 0x4a, 0x52, 0x4b, 0x00, 0x50, 0x00, 0x4f, 0x00, 0x4d, + 0x51, 0x4e, 0x49, 0x4c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65, 0x66, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; +/// Tablet report descriptor +const TABLET_REPORT_DESCRIPTOR: [u8; 89] = [ + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x02, // Usage (Mouse) + 0xa1, 0x01, // Collection (Application) + 0x09, 0x01, // Usage (Pointer) + 0xa1, 0x00, // Collection (Physical) + 0x05, 0x09, // Usage Page (Button) + 0x19, 0x01, // Usage Minimum (1) + 0x29, 0x05, // Usage Maximum (5) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x95, 0x05, // Report Count (5) + 0x75, 0x01, // Report Size (1) + 0x81, 0x02, // Input (Data, Variable, Absolute) + 0x95, 0x01, // Report Count (1) + 0x75, 0x03, // Report Size (3) + 0x81, 0x01, // Input (Constant) + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x30, // Usage (X) + 0x09, 0x31, // Usage (Y) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x7f, // Logical Maximum (0x7fff) + 0x35, 0x00, // Physical Minimum (0) + 0x46, 0xff, 0x7f, // Physical Maximum (0x7fff) + 0x75, 0x10, // Report Size (16) + 0x95, 0x02, // Report Count (2) + 0x81, 0x02, // Input (Data, Variable, Absolute) + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x38, // Usage (Wheel) + 0x15, 0x81, // Logical Minimum (-0x7f) + 0x25, 0x7f, // Logical Maximum (0x7f) + 0x35, 0x00, // Physical Minimum (same as logical) + 0x45, 0x00, // Physical Maximum (same as logical) + 0x75, 0x08, // Report Size (8) + 0x95, 0x01, // Report Count (1) + 0x81, 0x06, // Input (Data, Variable, Relative) + 0x05, 0x0c, // Usage Page (Consumer Device) + 0x0a, 0x38, 0x02, // Usage (AC Pan) + 0x15, 0x81, // Logical Minimum (-0x7f) + 0x25, 0x7f, // Logical Maximum (0x7f) + 0x75, 0x08, // Report Size (8) + 0x95, 0x01, // Report Count (1) + 0x81, 0x06, // Input (Data, Variable, Relative) + 0xc0, 0xc0, // End Collection +]; +/// Keyboard report descriptor +const KEYBOARD_REPORT_DESCRIPTOR: [u8; 63] = [ + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x06, // Usage (Keyboard) + 0xa1, 0x01, // Collection (Application) + 0x75, 0x01, // Report Size (1) + 0x95, 0x08, // Report Count (8) + 0x05, 0x07, // Usage Page (Key Codes) + 0x19, 0xe0, // Usage Minimum (224) + 0x29, 0xe7, // Usage Maximum (231) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x81, 0x02, // Input (Data, Variable, Absolute) + 0x95, 0x01, // Report Count (1) + 0x75, 0x08, // Report Size (8) + 0x81, 0x01, // Input (Constant) + 0x95, 0x05, // Report Count (5) + 0x75, 0x01, // Report Size (1) + 0x05, 0x08, // Usage Page (LEDs) + 0x19, 0x01, // Usage Minimum (1) + 0x29, 0x05, // Usage Maximum (5) + 0x91, 0x02, // Output (Data, Variable, Absolute) + 0x95, 0x01, // Report Count (1) + 0x75, 0x03, // Report Size (3) + 0x91, 0x01, // Output (Constant) + 0x95, 0x06, // Report Count (6) + 0x75, 0x08, // Report Size (8) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0xff, // Logical Maximum (255) + 0x05, 0x07, // Usage Page (Key Codes) + 0x19, 0x00, // Usage Minimum (0) + 0x29, 0xff, // Usage Maximum (255) + 0x81, 0x00, // Input (Data, Array) + 0xc0, // End Collection +]; + +/// HID type +#[derive(Debug)] +pub enum HidType { + Mouse, + Tablet, + Keyboard, + UnKnown, +} + +/// HID keyboard including keycode and modifier. +pub struct HidKeyboard { + /// Receive keycode from VNC. + pub keycodes: [u32; QUEUE_LENGTH as usize], + pub modifiers: u16, + /// Send keycode to driver. + pub key_buf: [u8; QUEUE_LENGTH as usize], + pub key_num: u32, +} + +impl HidKeyboard { + fn new() -> HidKeyboard { + HidKeyboard { + keycodes: [0; QUEUE_LENGTH as usize], + modifiers: 0, + key_buf: [0; QUEUE_LENGTH as usize], + key_num: 0, + } + } + + fn reset(&mut self) { + self.keycodes.iter_mut().for_each(|x| *x = 0); + self.modifiers = 0; + self.key_buf.iter_mut().for_each(|x| *x = 0); + self.key_num = 0; + } +} + +/// HID pointer event including position and button state. +#[derive(Debug, Clone, Copy, Default)] +pub struct HidPointerEvent { + /// Direction: left to right. + pub pos_x: u32, + /// Direction: up to down. + pub pos_y: u32, + /// Vertical scroll wheel. + pub v_wheel: i32, + /// Horizontal scroll wheel. + pub h_wheel: i32, + pub button_state: u32, +} + +/// HID pointer which include hid pointer event. +pub struct HidPointer { + pub queue: [HidPointerEvent; QUEUE_LENGTH as usize], +} + +impl HidPointer { + fn new() -> Self { + HidPointer { + queue: [HidPointerEvent::default(); QUEUE_LENGTH as usize], + } + } + + fn reset(&mut self) { + self.queue + .iter_mut() + .for_each(|x| *x = HidPointerEvent::default()); + } +} + +/// Human Interface Device. +pub struct Hid { + pub(crate) head: u32, + pub(crate) num: u32, + pub(crate) kind: HidType, + protocol: u8, + idle: u8, + pub(crate) keyboard: HidKeyboard, + pub(crate) pointer: HidPointer, +} + +impl Hid { + pub fn new(kind: HidType) -> Self { + Hid { + head: 0, + num: 0, + kind, + protocol: 0, + idle: 0, + keyboard: HidKeyboard::new(), + pointer: HidPointer::new(), + } + } + + pub fn reset(&mut self) { + self.head = 0; + self.num = 0; + self.protocol = HID_PROTOCOL_REPORT; + self.idle = 0; + self.keyboard.reset(); + self.pointer.reset(); + } + + fn convert_to_hid_code(&mut self) { + if self.num == 0 { + return; + } + let slot = self.head & QUEUE_MASK; + self.increase_head(); + self.num -= 1; + let keycode = self.keyboard.keycodes[slot as usize]; + let key = keycode & 0x7f; + let index = key | ((u32::from(self.keyboard.modifiers) & (1 << 8)) >> 1); + let hid_code = HID_CODE[index as usize]; + self.keyboard.modifiers &= !(1 << 8); + trace::usb_convert_to_hid_code(&hid_code, &index, &key); + + if hid_code == 0x0 { + return; + } + if hid_code == HID_KEYBOARD_LEFT_CONTROL && self.keyboard.modifiers & (1 << 9) == (1 << 9) { + self.keyboard.modifiers ^= (1 << 8) | (1 << 9); + return; + } + if (HID_KEYBOARD_LEFT_CONTROL..=HID_KEYBOARD_RIGHT_GUI).contains(&hid_code) + && keycode & KEYCODE_UP == KEYCODE_UP + { + self.keyboard.modifiers &= !(1 << (hid_code & 0x0f)); + return; + } + if (HID_KEYBOARD_LEFT_CONTROL..=0xe9).contains(&hid_code) { + self.keyboard.modifiers |= 1 << (hid_code & 0x0f); + return; + } + // Invalid code. + if (0xea..=0xef).contains(&hid_code) { + error!("Convert error, invalid code {}", hid_code); + return; + } + if keycode & KEYCODE_UP == KEYCODE_UP { + let mut i = self.keyboard.key_num as i32 - 1; + while i >= 0 { + if self.keyboard.key_buf[i as usize] == hid_code { + self.keyboard.key_num -= 1; + self.keyboard.key_buf[i as usize] = + self.keyboard.key_buf[self.keyboard.key_num as usize]; + self.keyboard.key_buf[self.keyboard.key_num as usize] = 0x0; + break; + } + i -= 1; + } + } else { + let mut i = self.keyboard.key_num as i32 - 1; + while i >= 0 { + if self.keyboard.key_buf[i as usize] == hid_code { + break; + } + i -= 1; + } + if i < 0 && self.keyboard.key_num < self.keyboard.key_buf.len() as u32 { + self.keyboard.key_buf[self.keyboard.key_num as usize] = hid_code; + self.keyboard.key_num += 1; + } + } + } + + fn keyboard_poll(&mut self) -> Vec { + let mut data = vec![0; 8]; + self.convert_to_hid_code(); + data[0] = self.keyboard.modifiers as u8; + data[1] = 0; + let len = data.len() - 2; + if self.keyboard.key_num > 6 { + for i in 0..len { + data[i + 2] = HID_USAGE_ERROR_ROLLOVER; + } + } else { + data[2..(len + 2)].clone_from_slice(&self.keyboard.key_buf[..len]); + } + data + } + + fn pointer_poll(&mut self) -> Vec { + let index = self.head; + if self.num != 0 { + self.increase_head(); + self.num -= 1; + } + let evt = &mut self.pointer.queue[(index & QUEUE_MASK) as usize]; + vec![ + evt.button_state as u8, + evt.pos_x as u8, + (evt.pos_x >> 8) as u8, + evt.pos_y as u8, + (evt.pos_y >> 8) as u8, + evt.v_wheel as u8, + evt.h_wheel as u8, + ] + } + + fn increase_head(&mut self) { + if self.head + 1 >= QUEUE_LENGTH { + self.head = 0; + } else { + self.head += 1; + } + } + + /// USB HID device handle control packet. + pub fn handle_control_packet( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + data: &mut [u8], + ) { + match device_req.request_type { + USB_INTERFACE_IN_REQUEST => { + self.do_interface_in_request(packet, device_req, data); + } + USB_INTERFACE_CLASS_IN_REQUEST => { + self.do_interface_class_in_request(packet, device_req, data); + } + USB_INTERFACE_CLASS_OUT_REQUEST => { + self.do_interface_class_out_request(packet, device_req, data); + } + _ => { + error!("Unhandled request {}", device_req.request); + packet.status = UsbPacketStatus::Stall; + } + } + } + + fn do_interface_in_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + data: &mut [u8], + ) { + match device_req.request { + USB_REQUEST_GET_DESCRIPTOR => match device_req.value >> 8 { + 0x22 => match self.kind { + HidType::Tablet => { + data[..TABLET_REPORT_DESCRIPTOR.len()] + .clone_from_slice(&TABLET_REPORT_DESCRIPTOR[..]); + packet.actual_length = TABLET_REPORT_DESCRIPTOR.len() as u32; + } + HidType::Keyboard => { + data[..KEYBOARD_REPORT_DESCRIPTOR.len()] + .clone_from_slice(&KEYBOARD_REPORT_DESCRIPTOR[..]); + packet.actual_length = KEYBOARD_REPORT_DESCRIPTOR.len() as u32; + } + _ => { + error!("Unknown HID type"); + packet.status = UsbPacketStatus::Stall; + } + }, + _ => { + error!("Invalid value: {:?}", device_req); + packet.status = UsbPacketStatus::Stall; + } + }, + _ => { + error!("Unhandled request {}", device_req.request); + packet.status = UsbPacketStatus::Stall; + } + } + } + + fn do_interface_class_in_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + data: &mut [u8], + ) { + match device_req.request { + HID_GET_REPORT => match self.kind { + HidType::Tablet => { + let buf = self.pointer_poll(); + data[0..buf.len()].copy_from_slice(buf.as_slice()); + packet.actual_length = buf.len() as u32; + } + HidType::Keyboard => { + let buf = self.keyboard_poll(); + data[0..buf.len()].copy_from_slice(buf.as_slice()); + packet.actual_length = buf.len() as u32; + } + _ => { + error!("Unsupported HID type for report"); + packet.status = UsbPacketStatus::Stall; + } + }, + HID_GET_PROTOCOL => { + data[0] = self.protocol; + packet.actual_length = 1; + } + HID_GET_IDLE => { + data[0] = self.idle; + packet.actual_length = 1; + } + _ => { + error!("Unhandled request {}", device_req.request); + packet.status = UsbPacketStatus::Stall; + } + } + } + + fn do_interface_class_out_request( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + data: &[u8], + ) { + match device_req.request { + HID_SET_REPORT => match self.kind { + HidType::Keyboard => { + trace::usb_keyboard_set_report(&data[0]); + set_kbd_led_state(data[0]); + } + _ => { + error!("Unsupported to set report"); + packet.status = UsbPacketStatus::Stall; + } + }, + HID_SET_PROTOCOL => { + self.protocol = device_req.value as u8; + } + HID_SET_IDLE => { + self.idle = (device_req.value >> 8) as u8; + } + _ => { + error!("Unhandled request {}", device_req.request); + packet.status = UsbPacketStatus::Stall; + } + } + } + + /// USB HID device handle data packet. + pub fn handle_data_packet(&mut self, p: &mut UsbPacket) { + match p.pid as u8 { + USB_TOKEN_IN => { + self.handle_token_in(p); + } + _ => { + error!("Unhandled packet {}", p.pid); + p.status = UsbPacketStatus::Stall; + } + }; + } + + fn handle_token_in(&mut self, p: &mut UsbPacket) { + let mut buf = Vec::new(); + if p.ep_number == 1 { + if self.num == 0 { + trace::usb_no_data_in_usb_device(); + p.status = UsbPacketStatus::Nak; + return; + } + match self.kind { + HidType::Keyboard => { + buf = self.keyboard_poll(); + } + HidType::Tablet => { + buf = self.pointer_poll(); + } + _ => { + error!("Unsupported HID device"); + p.status = UsbPacketStatus::Stall; + } + } + let len = buf.len(); + p.transfer_packet(&mut buf, len); + } else { + error!("Unhandled endpoint {}", p.ep_number); + p.status = UsbPacketStatus::Stall; + } + } +} + +impl Display for Hid { + fn fmt(&self, f: &mut Formatter) -> FmtResult { + write!( + f, + "HID head {} num {} kind {:?} protocol {} idle {}", + self.head, self.num, self.kind, self.protocol, self.idle + ) + } +} diff --git a/devices/src/usb/keyboard.rs b/devices/src/usb/keyboard.rs new file mode 100644 index 0000000000000000000000000000000000000000..418d6043464a898b92105f1cd3c16c899053fadd --- /dev/null +++ b/devices/src/usb/keyboard.rs @@ -0,0 +1,264 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::Result; +use clap::Parser; +use log::{debug, info, warn}; +use once_cell::sync::Lazy; + +use super::descriptor::{ + UsbConfigDescriptor, UsbDescConfig, UsbDescDevice, UsbDescEndpoint, UsbDescIface, UsbDescOther, + UsbDescriptorOps, UsbDeviceDescriptor, UsbEndpointDescriptor, UsbInterfaceDescriptor, +}; +use super::hid::{Hid, HidType, QUEUE_LENGTH, QUEUE_MASK}; +use super::xhci::xhci_controller::{endpoint_number_to_id, XhciDevice}; +use super::{config::*, USB_DEVICE_BUFFER_DEFAULT_LEN}; +use super::{ + notify_controller, UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbPacket, UsbPacketStatus, +}; +use machine_manager::config::valid_id; +use ui::input::{register_keyboard, unregister_keyboard, KeyboardOpts}; +use util::gen_base_func; + +/// Keyboard device descriptor +static DESC_DEVICE_KEYBOARD: Lazy> = Lazy::new(|| { + Arc::new(UsbDescDevice { + device_desc: UsbDeviceDescriptor { + bLength: USB_DT_DEVICE_SIZE, + bDescriptorType: USB_DT_DEVICE, + idVendor: 0x0627, + idProduct: USB_PRODUCT_ID_KEYBOARD, + bcdDevice: 0, + iManufacturer: STR_MANUFACTURER_INDEX, + iProduct: STR_PRODUCT_KEYBOARD_INDEX, + iSerialNumber: STR_SERIAL_KEYBOARD_INDEX, + bcdUSB: 0x0100, + bDeviceClass: 0, + bDeviceSubClass: 0, + bDeviceProtocol: 0, + bMaxPacketSize0: 8, + bNumConfigurations: 1, + }, + configs: vec![Arc::new(UsbDescConfig { + config_desc: UsbConfigDescriptor { + bLength: USB_DT_CONFIG_SIZE, + bDescriptorType: USB_DT_CONFIGURATION, + wTotalLength: 0, + bNumInterfaces: 1, + bConfigurationValue: 1, + iConfiguration: STR_CONFIG_KEYBOARD_INDEX, + bmAttributes: USB_CONFIGURATION_ATTR_ONE | USB_CONFIGURATION_ATTR_REMOTE_WAKEUP, + bMaxPower: 50, + }, + iad_desc: vec![], + interfaces: vec![DESC_IFACE_KEYBOARD.clone()], + })], + }) +}); +/// Keyboard interface descriptor +static DESC_IFACE_KEYBOARD: Lazy> = Lazy::new(|| { + Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: 0, + bAlternateSetting: 0, + bNumEndpoints: 1, + bInterfaceClass: USB_CLASS_HID, + bInterfaceSubClass: USB_SUBCLASS_BOOT, + bInterfaceProtocol: USB_IFACE_PROTOCOL_KEYBOARD, + iInterface: 0, + }, + other_desc: vec![Arc::new(UsbDescOther { + // HID descriptor + data: vec![0x09, 0x21, 0x11, 0x01, 0x00, 0x01, 0x22, 0x3f, 0], + })], + endpoints: vec![Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | 0x1, + bmAttributes: USB_ENDPOINT_ATTR_INT, + wMaxPacketSize: 8, + bInterval: 0xa, + }, + extra: Vec::new(), + })], + }) +}); + +/// String descriptor index +const STR_MANUFACTURER_INDEX: u8 = 1; +const STR_PRODUCT_KEYBOARD_INDEX: u8 = 2; +const STR_CONFIG_KEYBOARD_INDEX: u8 = 3; +const STR_SERIAL_KEYBOARD_INDEX: u8 = 4; + +// Up flag. +const SCANCODE_UP: u16 = 0x80; +// Grey keys. +const SCANCODE_GREY: u16 = 0x80; +// Used to expand Grey keys. +const SCANCODE_EMUL0: u16 = 0xe0; + +/// String descriptor +const DESC_STRINGS: [&str; 5] = [ + "", + "StratoVirt", + "StratoVirt USB Keyboard", + "HID Keyboard", + "1", +]; + +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct UsbKeyboardConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long)] + bus: Option, + #[arg(long)] + port: Option, +} + +/// USB keyboard device. +pub struct UsbKeyboard { + base: UsbDeviceBase, + hid: Hid, + /// USB controller used to notify controller to transfer data. + cntlr: Option>>, +} + +pub struct UsbKeyboardAdapter { + usb_kbd: Arc>, +} + +impl KeyboardOpts for UsbKeyboardAdapter { + fn do_key_event(&mut self, keycode: u16, down: bool) -> Result<()> { + trace::usb_keyboard_event(&keycode, &down); + + let mut scan_codes = Vec::new(); + let mut keycode = keycode; + if keycode & SCANCODE_GREY != 0 { + scan_codes.push(u32::from(SCANCODE_EMUL0)); + keycode &= !SCANCODE_GREY; + } + + if !down { + keycode |= SCANCODE_UP; + } + scan_codes.push(u32::from(keycode)); + + let mut locked_kbd = self.usb_kbd.lock().unwrap(); + if scan_codes.len() as u32 + locked_kbd.hid.num > QUEUE_LENGTH { + trace::usb_keyboard_queue_full(); + // Return ok to ignore the request. + return Ok(()); + } + for code in scan_codes { + let index = ((locked_kbd.hid.head + locked_kbd.hid.num) & QUEUE_MASK) as usize; + locked_kbd.hid.num += 1; + locked_kbd.hid.keyboard.keycodes[index] = code; + } + drop(locked_kbd); + let clone_kbd = self.usb_kbd.clone(); + // Wakeup endpoint. + let ep_id = endpoint_number_to_id(true, 1); + notify_controller(&(clone_kbd as Arc>), ep_id) + } +} + +impl UsbKeyboard { + pub fn new(config: UsbKeyboardConfig) -> Self { + Self { + base: UsbDeviceBase::new(config.id, USB_DEVICE_BUFFER_DEFAULT_LEN), + hid: Hid::new(HidType::Keyboard), + cntlr: None, + } + } +} + +impl UsbDevice for UsbKeyboard { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + self.base.reset_usb_endpoint(); + self.base.speed = USB_SPEED_FULL; + let mut s: Vec = DESC_STRINGS.iter().map(|&s| s.to_string()).collect(); + let prefix = &s[STR_SERIAL_KEYBOARD_INDEX as usize]; + s[STR_SERIAL_KEYBOARD_INDEX as usize] = self.base.generate_serial_number(prefix); + self.base.init_descriptor(DESC_DEVICE_KEYBOARD.clone(), s)?; + let id = self.device_id().to_string(); + let kbd = Arc::new(Mutex::new(self)); + let kbd_adapter = Arc::new(Mutex::new(UsbKeyboardAdapter { + usb_kbd: kbd.clone(), + })); + register_keyboard(&id, kbd_adapter); + + Ok(kbd) + } + + fn unrealize(&mut self) -> Result<()> { + unregister_keyboard(self.device_id()); + Ok(()) + } + + fn cancel_packet(&mut self, _packet: &Arc>) {} + + fn reset(&mut self) { + info!("Keyboard device reset"); + self.base.remote_wakeup = 0; + self.base.addr = 0; + self.hid.reset(); + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + let mut locked_packet = packet.lock().unwrap(); + match self + .base + .handle_control_for_descriptor(&mut locked_packet, device_req) + { + Ok(handled) => { + if handled { + debug!("Keyboard control handled by descriptor, return directly."); + return; + } + } + Err(e) => { + warn!( + "Received incorrect USB Keyboard descriptor message: {:?}", + e + ); + locked_packet.status = UsbPacketStatus::Stall; + return; + } + } + self.hid + .handle_control_packet(&mut locked_packet, device_req, &mut self.base.data_buf); + } + + fn handle_data(&mut self, p: &Arc>) { + let mut locked_p = p.lock().unwrap(); + self.hid.handle_data_packet(&mut locked_p); + } + + fn set_controller(&mut self, cntlr: Weak>) { + self.cntlr = Some(cntlr); + } + + fn get_controller(&self) -> Option>> { + self.cntlr.clone() + } +} diff --git a/devices/src/usb/mod.rs b/devices/src/usb/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..110152b84ff12b189f9dae00aa5036e8b4f455cf --- /dev/null +++ b/devices/src/usb/mod.rs @@ -0,0 +1,781 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "usb_camera")] +pub mod camera; +#[cfg(feature = "usb_camera")] +pub mod camera_media_type_guid; +pub mod config; +pub mod error; +pub mod hid; +pub mod keyboard; +pub mod storage; +pub mod tablet; +#[cfg(feature = "usb_uas")] +pub mod uas; +#[cfg(feature = "usb_host")] +pub mod usbhost; +pub mod xhci; + +mod descriptor; + +pub use error::UsbError; + +use std::cmp::min; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{bail, Context, Result}; +use log::{debug, error}; + +use self::descriptor::USB_MAX_INTERFACES; +use crate::DeviceBase; +use config::*; +use descriptor::{UsbDescriptor, UsbDescriptorOps}; +use machine_manager::qmp::qmp_channel::send_device_deleted_msg; +use util::aio::{mem_from_buf, mem_to_buf, Iovec}; +use util::byte_code::ByteCode; +use xhci::xhci_controller::{UsbPort, XhciDevice}; + +const USB_MAX_ENDPOINTS: u32 = 15; +/// USB max address. +const USB_MAX_ADDRESS: u8 = 127; +/// USB device default buffer length. +pub const USB_DEVICE_BUFFER_DEFAULT_LEN: usize = 4096; + +/// USB packet return status. +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub enum UsbPacketStatus { + Success, + #[default] + NoDev, + Nak, + Stall, + Babble, + IoError, +} + +/// USB request used to transfer to USB device. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Default)] +pub struct UsbDeviceRequest { + pub request_type: u8, + pub request: u8, + pub value: u16, + pub index: u16, + pub length: u16, +} + +impl ByteCode for UsbDeviceRequest {} + +impl std::fmt::Debug for UsbDeviceRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("UsbDeviceRequest") + .field("request_type", &parse_request_type(self.request_type)) + .field("request", &parse_request(self.request)) + .field("value", &self.value) + .field("index", &self.index) + .field("length", &self.length) + .finish() + } +} + +fn parse_request_type(request_type: u8) -> String { + let mut ret = "".to_string(); + + match request_type & USB_DIRECTION_DEVICE_TO_HOST { + USB_DIRECTION_DEVICE_TO_HOST => ret.push_str("IN"), + _ => ret.push_str("OUT"), + } + + ret.push(' '); + + match request_type & USB_TYPE_MASK { + USB_TYPE_STANDARD => ret.push_str("STD"), + USB_TYPE_CLASS => ret.push_str("CLASS"), + USB_TYPE_VENDOR => ret.push_str("VEND"), + _ => ret.push_str("RSVD"), + } + + ret.push(' '); + + match request_type & USB_RECIPIENT_MASK { + USB_RECIPIENT_DEVICE => ret.push_str("DEV"), + USB_RECIPIENT_INTERFACE => ret.push_str("IFACE"), + USB_RECIPIENT_ENDPOINT => ret.push_str("EP"), + _ => ret.push_str("OTHER"), + } + + ret +} + +fn parse_request(request: u8) -> String { + match request { + USB_REQUEST_GET_STATUS => "GET STAT".to_string(), + USB_REQUEST_CLEAR_FEATURE => "CLR FEAT".to_string(), + USB_REQUEST_SET_FEATURE => "SET FEAT".to_string(), + USB_REQUEST_SET_ADDRESS => "SET ADDR".to_string(), + USB_REQUEST_GET_DESCRIPTOR => "GET DESC".to_string(), + USB_REQUEST_SET_DESCRIPTOR => "SET DESC".to_string(), + USB_REQUEST_GET_CONFIGURATION => "GET CONF".to_string(), + USB_REQUEST_SET_CONFIGURATION => "SET CONF".to_string(), + USB_REQUEST_GET_INTERFACE => "GET IFACE".to_string(), + USB_REQUEST_SET_INTERFACE => "SET IFACE".to_string(), + USB_REQUEST_SYNCH_FRAME => "SYN FRAME".to_string(), + USB_REQUEST_SET_SEL => "SET SEL".to_string(), + USB_REQUEST_SET_ISOCH_DELAY => "SET ISO DEL".to_string(), + _ => format!("UNKNOWN {}", request), + } +} + +/// The data transmission channel. +#[derive(Default, Clone, Copy)] +pub struct UsbEndpoint { + pub ep_number: u8, + pub in_direction: bool, + pub ep_type: u8, + pub ifnum: u8, + pub halted: bool, + pub max_packet_size: u32, +} + +impl UsbEndpoint { + pub fn new(ep_number: u8, in_direction: bool, ep_type: u8) -> Self { + Self { + ep_number, + in_direction, + ep_type, + ..Default::default() + } + } + + pub fn set_max_packet_size(&mut self, raw: u16) { + let size = raw & 0x7ff; + let micro_frames: u32 = match (raw >> 11) & 3 { + 1 => 2, + 2 => 3, + _ => 1, + }; + + self.max_packet_size = u32::from(size) * micro_frames; + } +} + +/// USB device common structure. +pub struct UsbDeviceBase { + pub base: DeviceBase, + pub port: Option>>, + pub speed: u32, + pub addr: u8, + pub data_buf: Vec, + pub remote_wakeup: u32, + pub ep_ctl: UsbEndpoint, + pub ep_in: Vec, + pub ep_out: Vec, + /// USB descriptor + pub descriptor: UsbDescriptor, + /// Check whether the usb device is hot unplugged. + pub unplugged: bool, + /// The index of the interfaces. + pub altsetting: [u32; USB_MAX_INTERFACES as usize], +} + +impl UsbDeviceBase { + pub fn new(id: String, data_buf_len: usize) -> Self { + let mut dev = UsbDeviceBase { + base: DeviceBase::new(id, false, None), + port: None, + speed: 0, + addr: 0, + ep_ctl: UsbEndpoint::new(0, false, USB_ENDPOINT_ATTR_CONTROL), + ep_in: Vec::new(), + ep_out: Vec::new(), + data_buf: vec![0_u8; data_buf_len], + remote_wakeup: 0, + descriptor: UsbDescriptor::new(), + unplugged: false, + altsetting: [0_u32; USB_MAX_INTERFACES as usize], + }; + + for i in 0..USB_MAX_ENDPOINTS as u8 { + dev.ep_in + .push(UsbEndpoint::new(i + 1, true, USB_ENDPOINT_ATTR_INVALID)); + dev.ep_out + .push(UsbEndpoint::new(i + 1, false, USB_ENDPOINT_ATTR_INVALID)); + } + dev + } + + pub fn get_endpoint(&self, in_direction: bool, ep: u8) -> &UsbEndpoint { + if ep == 0 { + return &self.ep_ctl; + } + if in_direction { + &self.ep_in[(ep - 1) as usize] + } else { + &self.ep_out[(ep - 1) as usize] + } + } + + pub fn get_mut_endpoint(&mut self, in_direction: bool, ep: u8) -> &mut UsbEndpoint { + if ep == 0 { + return &mut self.ep_ctl; + } + if in_direction { + &mut self.ep_in[(ep - 1) as usize] + } else { + &mut self.ep_out[(ep - 1) as usize] + } + } + + pub fn reset_usb_endpoint(&mut self) { + self.ep_ctl.ep_number = 0; + self.ep_ctl.ep_type = USB_ENDPOINT_ATTR_CONTROL; + for i in 0..USB_MAX_ENDPOINTS { + self.ep_in[i as usize].ep_number = (i + 1) as u8; + self.ep_in[i as usize].in_direction = true; + self.ep_in[i as usize].ep_type = USB_ENDPOINT_ATTR_INVALID; + self.ep_out[i as usize].ep_number = (i + 1) as u8; + self.ep_out[i as usize].in_direction = false; + self.ep_out[i as usize].ep_type = USB_ENDPOINT_ATTR_INVALID; + } + } + + pub fn generate_serial_number(&self, prefix: &str) -> String { + format!("{}-{}", prefix, self.base.id) + } + + /// Handle USB control request which is for descriptor. + /// + /// # Arguments + /// + /// * `packet` - USB packet. + /// * `device_req` - USB device request. + /// * `data` - USB control transfer data. + /// + /// # Returns + /// + /// Return true if request is handled, false is unhandled. + pub fn handle_control_for_descriptor( + &mut self, + packet: &mut UsbPacket, + device_req: &UsbDeviceRequest, + ) -> Result { + let value = u32::from(device_req.value); + let index = u32::from(device_req.index); + let length = u32::from(device_req.length); + match device_req.request_type { + USB_DEVICE_IN_REQUEST => match device_req.request { + USB_REQUEST_GET_DESCRIPTOR => { + let res = self.get_descriptor(value)?; + let len = std::cmp::min(res.len() as u32, length); + self.data_buf[..(len as usize)].clone_from_slice(&res[..(len as usize)]); + packet.actual_length = len; + } + USB_REQUEST_GET_CONFIGURATION => { + self.data_buf[0] = if let Some(conf) = &self.descriptor.configuration_selected { + conf.config_desc.bConfigurationValue + } else { + 0 + }; + packet.actual_length = 1; + } + USB_REQUEST_GET_STATUS => { + let conf = if let Some(conf) = &self.descriptor.configuration_selected { + conf.clone() + } else { + let desc = self + .descriptor + .device_desc + .as_ref() + .with_context(|| "Device descriptor not found")?; + desc.configs + .first() + .with_context(|| "Config descriptor not found")? + .clone() + }; + self.data_buf[0] = 0; + if conf.config_desc.bmAttributes & USB_CONFIGURATION_ATTR_SELF_POWER + == USB_CONFIGURATION_ATTR_SELF_POWER + { + self.data_buf[0] |= 1 << USB_DEVICE_SELF_POWERED; + } + + if self.remote_wakeup & USB_DEVICE_REMOTE_WAKEUP == USB_DEVICE_REMOTE_WAKEUP { + self.data_buf[0] |= 1 << USB_DEVICE_REMOTE_WAKEUP; + } + self.data_buf[1] = 0x00; + packet.actual_length = 2; + } + _ => { + return Ok(false); + } + }, + USB_DEVICE_OUT_REQUEST => match device_req.request { + USB_REQUEST_SET_ADDRESS => { + if value as u8 > USB_MAX_ADDRESS { + packet.status = UsbPacketStatus::Stall; + bail!("The address is invalid {}", value); + } else { + self.addr = value as u8; + } + } + USB_REQUEST_SET_CONFIGURATION => { + self.set_config_descriptor(value as u8)?; + } + USB_REQUEST_CLEAR_FEATURE => { + if value == USB_DEVICE_REMOTE_WAKEUP { + self.remote_wakeup = 0; + } + } + USB_REQUEST_SET_FEATURE => { + if value == USB_DEVICE_REMOTE_WAKEUP { + self.remote_wakeup = 1; + } + } + USB_REQUEST_SET_SEL => { + if self.speed == USB_SPEED_SUPER { + return Ok(true); + } + } + USB_REQUEST_SET_ISOCH_DELAY => { + if self.speed == USB_SPEED_SUPER { + return Ok(true); + } + } + _ => { + return Ok(false); + } + }, + USB_INTERFACE_IN_REQUEST => match device_req.request { + USB_REQUEST_GET_INTERFACE => { + if index < self.descriptor.interface_number { + self.data_buf[0] = self.descriptor.altsetting[index as usize] as u8; + packet.actual_length = 1; + } + } + _ => { + return Ok(false); + } + }, + USB_INTERFACE_OUT_REQUEST => match device_req.request { + USB_REQUEST_SET_INTERFACE => { + self.set_interface_descriptor(index, value)?; + } + _ => { + return Ok(false); + } + }, + _ => { + return Ok(false); + } + } + Ok(true) + } +} + +impl Drop for UsbDeviceBase { + fn drop(&mut self) { + if self.unplugged { + send_device_deleted_msg(&self.base.id); + } + } +} + +/// UsbDevice is the interface for USB device. +/// Include device handle attach/detach and the transfer between controller and device. +pub trait UsbDevice: Send + Sync { + /// Get the UsbDeviceBase. + fn usb_device_base(&self) -> &UsbDeviceBase; + + /// Get the mut UsbDeviceBase. + fn usb_device_base_mut(&mut self) -> &mut UsbDeviceBase; + + /// Realize the USB device. + fn realize(self) -> Result>>; + + /// Unrealize the USB device. + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } + + /// Cancel specified USB packet. + fn cancel_packet(&mut self, packet: &Arc>); + + /// Handle the attach ops when attach device to controller. + fn handle_attach(&mut self) -> Result<()> { + let usb_dev = self.usb_device_base_mut(); + usb_dev.set_config_descriptor(0)?; + Ok(()) + } + + /// Reset the USB device. + fn reset(&mut self); + + /// Set the controller which the USB device attached. + /// USB device need to kick controller in some cases. + fn set_controller(&mut self, cntlr: Weak>); + + /// Get the controller which the USB device attached. + fn get_controller(&self) -> Option>>; + + /// Set the attached USB port. + fn set_usb_port(&mut self, port: Option>>) { + let usb_dev = self.usb_device_base_mut(); + usb_dev.port = port; + } + + /// Handle usb packet, used for controller to deliver packet to device. + fn handle_packet(&mut self, packet: &Arc>) { + let mut locked_packet = packet.lock().unwrap(); + locked_packet.status = UsbPacketStatus::Success; + let ep_nr = locked_packet.ep_number; + drop(locked_packet); + debug!("handle packet endpoint number {}", ep_nr); + if ep_nr == 0 { + if let Err(e) = self.do_parameter(packet) { + error!("Failed to handle control packet {:?}", e); + } + } else { + self.handle_data(packet); + } + } + + /// Handle control packet. + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest); + + /// Handle data packet. + fn handle_data(&mut self, packet: &Arc>); + + /// Unique device id. + fn device_id(&self) -> &str { + &self.usb_device_base().base.id + } + + /// Get the device speed. + fn speed(&self) -> u32 { + let usb_dev = self.usb_device_base(); + usb_dev.speed + } + + fn do_parameter(&mut self, packet: &Arc>) -> Result<()> { + let usb_dev = self.usb_device_base_mut(); + let mut locked_p = packet.lock().unwrap(); + let device_req = UsbDeviceRequest { + request_type: locked_p.parameter as u8, + request: (locked_p.parameter >> 8) as u8, + value: (locked_p.parameter >> 16) as u16, + index: (locked_p.parameter >> 32) as u16, + length: (locked_p.parameter >> 48) as u16, + }; + if device_req.length as usize > usb_dev.data_buf.len() { + locked_p.status = UsbPacketStatus::Stall; + bail!("data buffer small len {}", device_req.length); + } + if locked_p.pid as u8 == USB_TOKEN_OUT { + locked_p.transfer_packet(&mut usb_dev.data_buf, device_req.length as usize); + } + drop(locked_p); + self.handle_control(packet, &device_req); + let mut locked_p = packet.lock().unwrap(); + let usb_dev = self.usb_device_base_mut(); + if locked_p.is_async { + return Ok(()); + } + let mut len = device_req.length; + if len > locked_p.actual_length as u16 { + len = locked_p.actual_length as u16; + } + if locked_p.pid as u8 == USB_TOKEN_IN { + locked_p.actual_length = 0; + locked_p.transfer_packet(&mut usb_dev.data_buf, len as usize); + } + Ok(()) + } +} + +/// Notify controller to process data request. +pub fn notify_controller(dev: &Arc>, ep_id: u8) -> Result<()> { + let locked_dev = dev.lock().unwrap(); + let xhci = if let Some(cntlr) = &locked_dev.get_controller() { + cntlr.upgrade().unwrap() + } else { + bail!("USB controller not found"); + }; + let usb_dev = locked_dev.usb_device_base(); + let usb_port = if let Some(port) = &usb_dev.port { + port.upgrade().unwrap() + } else { + bail!("No usb port found"); + }; + let slot_id = usb_dev.addr; + let wakeup = usb_dev.remote_wakeup & USB_DEVICE_REMOTE_WAKEUP == USB_DEVICE_REMOTE_WAKEUP; + // Drop the small lock. + drop(locked_dev); + let mut locked_xhci = xhci.lock().unwrap(); + if wakeup { + let mut locked_port = usb_port.lock().unwrap(); + let port_status = locked_port.get_port_link_state(); + if port_status == PLS_U3 { + locked_port.set_port_link_state(PLS_RESUME); + debug!( + "Update portsc when notify controller, port {} status {}", + locked_port.portsc, port_status + ); + drop(locked_port); + locked_xhci.port_notify(&usb_port, PORTSC_PLC)?; + } + } + if let Err(e) = locked_xhci.wakeup_endpoint(u32::from(slot_id), u32::from(ep_id), 0) { + error!("Failed to wakeup endpoint {:?}", e); + } + Ok(()) +} + +/// Transfer ops for submit callback. +pub trait TransferOps: Send + Sync { + fn submit_transfer(&mut self); +} + +/// Usb packet used for device transfer data. +#[derive(Default)] +pub struct UsbPacket { + /// Unique number for packet tracking. + pub packet_id: u32, + /// USB packet id (direction of the transfer). + pub pid: u32, + pub is_async: bool, + pub iovecs: Vec, + /// control transfer parameter. + pub parameter: u64, + /// USB packet return status. + pub status: UsbPacketStatus, + /// Actually transfer length. + pub actual_length: u32, + /// Endpoint number. + pub ep_number: u8, + /// Stream id. + pub stream: u32, + /// Transfer for complete packet. + pub xfer_ops: Option>>, + /// Target USB device for this packet. + pub target_dev: Option>>, +} + +impl std::fmt::Display for UsbPacket { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "pid {} param {} status {:?} actual_length {}", + self.pid, self.parameter, self.status, self.actual_length + ) + } +} + +impl UsbPacket { + pub fn new( + packet_id: u32, + pid: u32, + ep_number: u8, + stream: u32, + iovecs: Vec, + xfer_ops: Option>>, + target_dev: Option>>, + ) -> Self { + Self { + packet_id, + pid, + is_async: false, + iovecs, + parameter: 0, + status: UsbPacketStatus::Success, + actual_length: 0, + ep_number, + stream, + xfer_ops, + target_dev, + } + } + + /// Transfer USB packet from host to device or from device to host. + /// + /// # Arguments + /// + /// * `vec` - Data buffer. + /// * `len` - Transfer length. + pub fn transfer_packet(&mut self, vec: &mut [u8], len: usize) { + let len = min(vec.len(), len); + let to_host = self.pid as u8 & USB_TOKEN_IN == USB_TOKEN_IN; + let mut copied = 0; + if to_host { + for iov in &self.iovecs { + if iov.iov_len == 0 { + continue; + } + if len == copied { + break; + } + let cnt = min(iov.iov_len as usize, len - copied); + let tmp = &vec[copied..(copied + cnt)]; + // SAFETY: iovecs is generated by address_space and len is not less than tmp's. + if let Err(e) = unsafe { mem_from_buf(tmp, iov.iov_base) } { + error!("Failed to write mem: {:?}", e); + } + copied += cnt; + } + } else { + for iov in &self.iovecs { + if iov.iov_len == 0 { + continue; + } + if len == copied { + break; + } + let cnt = min(iov.iov_len as usize, len - copied); + let tmp = &mut vec[copied..(copied + cnt)]; + // SAFETY: iovecs is generation by address_space and len is not less than tmp's. + if let Err(e) = unsafe { mem_to_buf(tmp, iov.iov_base) } { + error!("Failed to read mem {:?}", e); + } + copied += cnt; + } + } + self.actual_length = copied as u32; + } + + pub fn get_iovecs_size(&self) -> u64 { + let mut size = 0; + for iov in &self.iovecs { + size += iov.iov_len; + } + + size + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_usb_packet_transfer_in() { + let buf = [0_u8; 10]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_IN); + packet.iovecs.push(Iovec::new(hva, 4)); + packet.iovecs.push(Iovec::new(hva + 4, 2)); + let mut data: Vec = vec![1, 2, 3, 4, 5, 6]; + packet.transfer_packet(&mut data, 6); + assert_eq!(packet.actual_length, 6); + assert_eq!(buf, [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_in_over() { + let buf = [0_u8; 10]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_IN); + packet.iovecs.push(Iovec::new(hva, 4)); + + let mut data: Vec = vec![1, 2, 3, 4, 5, 6]; + packet.transfer_packet(&mut data, 6); + assert_eq!(packet.actual_length, 4); + assert_eq!(buf, [1, 2, 3, 4, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_in_under() { + let buf = [0_u8; 10]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_IN); + packet.iovecs.push(Iovec::new(hva, 4)); + + let mut data: Vec = vec![1, 2, 3, 4, 5, 6]; + packet.transfer_packet(&mut data, 2); + assert_eq!(packet.actual_length, 2); + assert_eq!(buf, [1, 2, 0, 0, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_in_over_buffer() { + let buf = [0_u8; 10]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_IN); + packet.iovecs.push(Iovec::new(hva, 10)); + + let mut data: Vec = vec![1, 2, 3, 4, 5, 6]; + packet.transfer_packet(&mut data, 10); + assert_eq!(packet.actual_length, 6); + assert_eq!(buf, [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_out() { + let buf: [u8; 10] = [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_OUT); + packet.iovecs.push(Iovec::new(hva, 4)); + packet.iovecs.push(Iovec::new(hva + 4, 2)); + + let mut data = [0_u8; 10]; + packet.transfer_packet(&mut data, 6); + assert_eq!(packet.actual_length, 6); + assert_eq!(data, [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_out_over() { + let buf: [u8; 10] = [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_OUT); + packet.iovecs.push(Iovec::new(hva, 4)); + packet.iovecs.push(Iovec::new(hva + 4, 2)); + + let mut data = [0_u8; 10]; + packet.transfer_packet(&mut data, 10); + assert_eq!(packet.actual_length, 6); + assert_eq!(data, [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_out_under() { + let buf: [u8; 10] = [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_OUT); + packet.iovecs.push(Iovec::new(hva, 4)); + + let mut data = [0_u8; 10]; + packet.transfer_packet(&mut data, 2); + assert_eq!(packet.actual_length, 2); + assert_eq!(data, [1, 2, 0, 0, 0, 0, 0, 0, 0, 0]); + } + + #[test] + fn test_usb_packet_transfer_out_over_buffer() { + let buf: [u8; 10] = [1, 2, 3, 4, 5, 6, 0, 0, 0, 0]; + let hva = buf.as_ptr() as u64; + let mut packet = UsbPacket::default(); + packet.pid = u32::from(USB_TOKEN_OUT); + packet.iovecs.push(Iovec::new(hva, 6)); + + let mut data = [0_u8; 2]; + packet.transfer_packet(&mut data, 6); + assert_eq!(packet.actual_length, 2); + assert_eq!(data, [1, 2]); + } +} diff --git a/devices/src/usb/storage.rs b/devices/src/usb/storage.rs new file mode 100644 index 0000000000000000000000000000000000000000..aaa04aa08d79e54fc77964fbdc0ed137eea20fec --- /dev/null +++ b/devices/src/usb/storage.rs @@ -0,0 +1,637 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + collections::HashMap, + sync::{Arc, Mutex, Weak}, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use clap::Parser; +use log::{error, info, warn}; +use once_cell::sync::Lazy; + +use super::descriptor::{ + UsbConfigDescriptor, UsbDescConfig, UsbDescDevice, UsbDescEndpoint, UsbDescIface, + UsbDescriptorOps, UsbDeviceDescriptor, UsbEndpointDescriptor, UsbInterfaceDescriptor, +}; +use super::xhci::xhci_controller::XhciDevice; +use super::{config::*, USB_DEVICE_BUFFER_DEFAULT_LEN}; +use super::{UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbPacket, UsbPacketStatus}; +use crate::ScsiBus::{ + get_scsi_key, ScsiBus, ScsiRequest, ScsiRequestOps, ScsiSense, ScsiXferMode, EMULATE_SCSI_OPS, + GOOD, SCSI_CMD_BUF_SIZE, +}; +use crate::ScsiDisk::{ScsiDevConfig, ScsiDevice}; +use crate::{Bus, Device}; +use machine_manager::config::{DriveConfig, DriveFile}; +use util::aio::AioEngine; +use util::gen_base_func; + +// Storage device descriptor +static DESC_DEVICE_STORAGE: Lazy> = Lazy::new(|| { + Arc::new(UsbDescDevice { + device_desc: UsbDeviceDescriptor { + bLength: USB_DT_DEVICE_SIZE, + bDescriptorType: USB_DT_DEVICE, + idVendor: USB_STORAGE_VENDOR_ID, + idProduct: USB_PRODUCT_ID_STORAGE, + bcdDevice: 0, + iManufacturer: STR_MANUFACTURER_INDEX, + iProduct: STR_PRODUCT_STORAGE_INDEX, + iSerialNumber: STR_SERIAL_STORAGE_INDEX, + bcdUSB: 0x0200, + bDeviceClass: 0, + bDeviceSubClass: 0, + bDeviceProtocol: 0, + bMaxPacketSize0: 64, + bNumConfigurations: 1, + }, + configs: vec![Arc::new(UsbDescConfig { + config_desc: UsbConfigDescriptor { + bLength: USB_DT_CONFIG_SIZE, + bDescriptorType: USB_DT_CONFIGURATION, + wTotalLength: 0, + bNumInterfaces: 1, + bConfigurationValue: 1, + iConfiguration: STR_CONFIG_STORAGE_HIGH_INDEX, + bmAttributes: USB_CONFIGURATION_ATTR_ONE | USB_CONFIGURATION_ATTR_SELF_POWER, + bMaxPower: 50, + }, + iad_desc: vec![], + interfaces: vec![DESC_IFACE_STORAGE.clone()], + })], + }) +}); + +// Storage interface descriptor +static DESC_IFACE_STORAGE: Lazy> = Lazy::new(|| { + Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: 0, + bAlternateSetting: 0, + bNumEndpoints: 2, + bInterfaceClass: USB_CLASS_MASS_STORAGE, + bInterfaceSubClass: USB_SUBCLASS_SCSI, + bInterfaceProtocol: USB_IFACE_PROTOCOL_BOT, + iInterface: 0, + }, + other_desc: vec![], + endpoints: vec![ + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | 0x01, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 512, + bInterval: 0, + }, + extra: Vec::new(), + }), + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_HOST_TO_DEVICE | 0x02, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 512, + bInterval: 0, + }, + extra: Vec::new(), + }), + ], + }) +}); + +// CRC16 of "STRATOVIRT" +const USB_STORAGE_VENDOR_ID: u16 = 0xB74C; + +// String descriptor index +const STR_MANUFACTURER_INDEX: u8 = 1; +const STR_PRODUCT_STORAGE_INDEX: u8 = 2; +const STR_SERIAL_STORAGE_INDEX: u8 = 3; +const STR_CONFIG_STORAGE_HIGH_INDEX: u8 = 5; + +// String descriptor +const DESC_STRINGS: [&str; 7] = [ + "", + "StratoVirt", + "StratoVirt USB Storage", + "3", + "Full speed config (usb 1.1)", + "High speed config (usb 2.0)", + "Super speed config (usb 3.0)", +]; + +pub const GET_MAX_LUN: u8 = 0xfe; +pub const MASS_STORAGE_RESET: u8 = 0xff; + +pub const CBW_SIGNATURE: u32 = 0x43425355; +pub const CSW_SIGNATURE: u32 = 0x53425355; +pub const CBW_FLAG_IN: u8 = 1 << 7; +pub const CBW_FLAG_OUT: u8 = 0; +pub const CBW_SIZE: u8 = 31; +pub const CSW_SIZE: u8 = 13; + +// USB-storage has only target 0 and lun 0. +const USB_STORAGE_SCSI_LUN_ID: u8 = 0; + +struct UsbStorageState { + mode: UsbMsdMode, + cbw: UsbMsdCbw, + csw: UsbMsdCsw, + cdb: Option<[u8; SCSI_CMD_BUF_SIZE]>, + iovec_len: u32, +} + +impl ScsiRequestOps for UsbMsdCsw { + fn scsi_request_complete_cb(&mut self, status: u8, _: Option) -> Result<()> { + if status != GOOD { + self.status = UsbMsdCswStatus::Failed as u8; + } + Ok(()) + } +} + +impl UsbStorageState { + fn new() -> Self { + UsbStorageState { + mode: UsbMsdMode::Cbw, + cbw: UsbMsdCbw::default(), + csw: UsbMsdCsw::new(), + cdb: None, + iovec_len: 0, + } + } + + /// Check if there exists SCSI CDB. + /// + /// # Arguments + /// + /// `exist` - Expected existence status. + /// + /// Return Error if expected existence status is not equal to the actual situation. + fn check_cdb_exist(&self, exist: bool) -> Result<()> { + if exist { + self.cdb.with_context(|| "No scsi CDB can be executed")?; + } else if self.cdb.is_some() { + bail!( + "Another request has not been done! cdb {:x?}", + self.cdb.unwrap() + ); + } + + Ok(()) + } + + /// Check if Iovec is empty. + /// + /// # Arguments + /// + /// `empty` - Expected status. If true, expect empty iovec. + /// + /// Return Error if expected iovec status is not equal to the actual situation. + fn check_iovec_empty(&self, empty: bool) -> Result<()> { + if empty != (self.iovec_len == 0) { + match empty { + true => { + bail!( + "Another request has not been done! Data buffer length {}.", + self.iovec_len + ); + } + false => { + bail!("Missing data buffer!"); + } + }; + } + + Ok(()) + } +} + +#[derive(Parser, Clone, Debug)] +#[command(no_binary_name(true))] +pub struct UsbStorageConfig { + #[arg(long, value_parser = ["usb-storage"])] + pub classtype: String, + #[arg(long)] + pub id: String, + #[arg(long)] + pub drive: String, + #[arg(long)] + pub(super) bus: Option, + #[arg(long)] + pub(super) port: Option, +} + +/// USB storage device. +pub struct UsbStorage { + base: UsbDeviceBase, + state: UsbStorageState, + /// USB controller used to notify controller to transfer data. + cntlr: Option>>, + /// Configuration of the USB storage device. + pub dev_cfg: UsbStorageConfig, + /// Configuration of the USB storage device's drive. + pub drive_cfg: DriveConfig, + /// Scsi bus attached to this usb-storage device. + scsi_bus: Arc>, + /// Effective scsi backend. + // Note: scsi device should attach to scsi bus. Logically, scsi device should not be placed in + // UsbStorage. But scsi device is needed in processing scsi request. Because the three + // (usb-storage/scsi bus/scsi device) correspond one-to-one, add scsi device member here + // for the execution efficiency (No need to find a unique device from the hash table of the + // unique bus). + scsi_dev: Option>>, + /// Drive backend files. + drive_files: Arc>>, +} + +#[derive(Debug)] +enum UsbMsdMode { + Cbw, + DataOut, + DataIn, + Csw, +} + +pub enum UsbMsdCswStatus { + Passed, + Failed, + PhaseError, +} + +#[derive(Debug, Default)] +struct UsbMsdCbw { + sig: u32, + tag: u32, + data_len: u32, + flags: u8, + lun: u8, + cmd_len: u8, + cmd: [u8; 16], +} + +impl UsbMsdCbw { + fn convert(&mut self, data: &[u8]) { + self.sig = LittleEndian::read_u32(&data[0..4]); + self.tag = LittleEndian::read_u32(&data[4..8]); + self.data_len = LittleEndian::read_u32(&data[8..12]); + self.flags = data[12]; + self.lun = data[13]; + self.cmd_len = data[14]; + self.cmd.copy_from_slice(&data[15..31]); + } +} + +#[derive(Debug, Copy, Clone)] +struct UsbMsdCsw { + sig: u32, + tag: u32, + residue: u32, + status: u8, +} + +impl UsbMsdCsw { + fn new() -> Self { + UsbMsdCsw { + sig: CSW_SIGNATURE, + tag: 0, + residue: 0, + status: 0, + } + } + + fn convert(&mut self, data: &mut [u8]) { + LittleEndian::write_u32(&mut data[0..4], self.sig); + LittleEndian::write_u32(&mut data[4..8], self.tag); + LittleEndian::write_u32(&mut data[8..12], self.residue); + data[12] = self.status; + } +} + +impl UsbStorage { + pub fn new( + dev_cfg: UsbStorageConfig, + drive_cfg: DriveConfig, + drive_files: Arc>>, + ) -> Result { + if drive_cfg.aio != AioEngine::Off || drive_cfg.direct { + bail!("USB-storage: \"aio=off,direct=false\" must be configured."); + } + + Ok(Self { + base: UsbDeviceBase::new(dev_cfg.id.clone(), USB_DEVICE_BUFFER_DEFAULT_LEN), + state: UsbStorageState::new(), + cntlr: None, + dev_cfg, + drive_cfg, + scsi_bus: Arc::new(Mutex::new(ScsiBus::new("".to_string()))), + scsi_dev: None, + drive_files, + }) + } + + pub fn do_realize(&mut self) -> Result<()> { + self.base.reset_usb_endpoint(); + self.base.speed = USB_SPEED_HIGH; + let mut s: Vec = DESC_STRINGS.iter().map(|&s| s.to_string()).collect(); + let prefix = &s[STR_SERIAL_STORAGE_INDEX as usize]; + s[STR_SERIAL_STORAGE_INDEX as usize] = self.base.generate_serial_number(prefix); + self.base.init_descriptor(DESC_DEVICE_STORAGE.clone(), s)?; + + // NOTE: "aio=off,direct=false" must be configured and other aio/direct values are not + // supported. + let scsidev_classtype = match self.drive_cfg.media.as_str() { + "disk" => "scsi-hd".to_string(), + _ => "scsi-cd".to_string(), + }; + let scsi_dev_cfg = ScsiDevConfig { + classtype: scsidev_classtype, + drive: self.dev_cfg.drive.clone(), + ..Default::default() + }; + let scsi_device = ScsiDevice::new( + scsi_dev_cfg, + self.drive_cfg.clone(), + self.drive_files.clone(), + None, + self.scsi_bus.clone(), + ); + let realized_scsi = scsi_device.realize()?; + self.scsi_dev = Some(realized_scsi.clone()); + + self.scsi_bus + .lock() + .unwrap() + .attach_child(get_scsi_key(0, 0), realized_scsi) + } + + pub fn handle_control_packet(&mut self, packet: &mut UsbPacket, device_req: &UsbDeviceRequest) { + match device_req.request_type { + USB_ENDPOINT_OUT_REQUEST => { + if device_req.request == USB_REQUEST_CLEAR_FEATURE { + return; + } + } + USB_INTERFACE_CLASS_OUT_REQUEST => { + if device_req.request == MASS_STORAGE_RESET { + self.state.mode = UsbMsdMode::Cbw; + return; + } + } + USB_INTERFACE_CLASS_IN_REQUEST => { + if device_req.request == GET_MAX_LUN { + // TODO: Now only supports 1 LUN. + let maxlun = USB_STORAGE_SCSI_LUN_ID; + self.base.data_buf[0] = maxlun; + packet.actual_length = 1; + return; + } + } + _ => {} + } + + error!("Unhandled USB Storage request {}", device_req.request); + packet.status = UsbPacketStatus::Stall; + } + + fn handle_token_out(&mut self, packet: &mut UsbPacket) -> Result<()> { + if packet.ep_number != 2 { + bail!("Error ep_number {}!", packet.ep_number); + } + + match self.state.mode { + UsbMsdMode::Cbw => { + if packet.get_iovecs_size() < u64::from(CBW_SIZE) { + bail!("Bad CBW size {}", packet.get_iovecs_size()); + } + self.state.check_cdb_exist(false)?; + + let mut cbw_buf = [0_u8; CBW_SIZE as usize]; + packet.transfer_packet(&mut cbw_buf, CBW_SIZE as usize); + self.state.cbw.convert(&cbw_buf); + trace::usb_storage_handle_token_out(&self.state.cbw); + + if self.state.cbw.sig != CBW_SIGNATURE { + bail!("Bad signature {:x}", self.state.cbw.sig); + } + if self.state.cbw.lun != USB_STORAGE_SCSI_LUN_ID { + bail!( + "Bad lun id {:x}. Usb-storage only supports lun id 0!", + self.state.cbw.lun + ); + } + + self.state.cdb = Some(self.state.cbw.cmd); + + if self.state.cbw.data_len == 0 { + self.handle_scsi_request(packet)?; + self.state.mode = UsbMsdMode::Csw; + } else if self.state.cbw.flags & CBW_FLAG_IN == CBW_FLAG_IN { + self.state.mode = UsbMsdMode::DataIn; + } else { + self.state.mode = UsbMsdMode::DataOut; + } + } + UsbMsdMode::DataOut => { + self.handle_data_inout_packet(packet, UsbMsdMode::DataOut)?; + } + _ => { + bail!( + "Unexpected token out. Expected mode {:?} packet.", + self.state.mode + ); + } + } + Ok(()) + } + + fn handle_token_in(&mut self, packet: &mut UsbPacket) -> Result<()> { + if packet.ep_number != 1 { + bail!("Error ep_number {}!", packet.ep_number); + } + + match self.state.mode { + UsbMsdMode::DataOut => { + bail!("Not supported usb packet(Token_in and data_out)."); + } + UsbMsdMode::Csw => { + if packet.get_iovecs_size() < u64::from(CSW_SIZE) { + bail!("Bad CSW size {}", packet.get_iovecs_size()); + } + self.state.check_cdb_exist(true)?; + self.state.check_iovec_empty(self.state.cbw.data_len == 0)?; + + let mut csw_buf = [0_u8; CSW_SIZE as usize]; + self.state.csw.tag = self.state.cbw.tag; + self.state.csw.convert(&mut csw_buf); + trace::usb_storage_handle_token_in(&self.state.csw); + packet.transfer_packet(&mut csw_buf, CSW_SIZE as usize); + + // Reset UsbStorageState. + self.state = UsbStorageState::new(); + } + UsbMsdMode::DataIn => { + self.handle_data_inout_packet(packet, UsbMsdMode::DataIn)?; + } + _ => { + bail!( + "Unexpected token in. Expected mode {:?} packet.", + self.state.mode + ); + } + } + Ok(()) + } + + fn handle_data_inout_packet(&mut self, packet: &mut UsbPacket, mode: UsbMsdMode) -> Result<()> { + self.state.check_cdb_exist(true)?; + self.state.check_iovec_empty(true)?; + + // Safety: iovecs are set in `setup_usb_packet` and iovec_len is no more than TRB_TR_LEN_MASK. + let iovec_len = packet.get_iovecs_size() as u32; + if iovec_len < self.state.cbw.data_len { + bail!( + "Insufficient transmission buffer, transfer size {}, buffer size {}, MSD mode {:?}!", + self.state.cbw.data_len, + iovec_len, + mode, + ); + } + + self.state.iovec_len = iovec_len; + self.handle_scsi_request(packet)?; + packet.actual_length = iovec_len; + self.state.mode = UsbMsdMode::Csw; + trace::usb_storage_handle_data_inout_packet(iovec_len); + + Ok(()) + } + + // Handle scsi request and save result in self.csw for next CSW packet. + fn handle_scsi_request(&mut self, packet: &mut UsbPacket) -> Result<()> { + self.state + .cdb + .with_context(|| "No scsi CDB can be executed")?; + + let csw = Box::new(UsbMsdCsw::new()); + let sreq = ScsiRequest::new( + self.state.cdb.unwrap(), + 0, + packet.iovecs.clone(), + self.state.iovec_len, + self.scsi_dev.as_ref().unwrap().clone(), + csw, + ) + .with_context(|| "Error in creating scsirequest.")?; + + if sreq.cmd.xfer > u64::from(sreq.datalen) && sreq.cmd.mode != ScsiXferMode::ScsiXferNone { + // Wrong USB packet which doesn't provide enough datain/dataout buffer. + bail!( + "command {:x} requested data's length({}), provided buffer length({})", + sreq.cmd.op, + sreq.cmd.xfer, + sreq.datalen + ); + } + + let sreq_h = match sreq.opstype { + EMULATE_SCSI_OPS => sreq.emulate_execute(), + _ => sreq.execute(), + } + .with_context(|| "Error in executing scsi request.")?; + + let csw_h = &sreq_h.lock().unwrap().upper_req; + let csw = csw_h.as_ref().as_any().downcast_ref::().unwrap(); + self.state.csw = *csw; + trace::usb_storage_handle_scsi_request(csw); + + Ok(()) + } +} + +impl UsbDevice for UsbStorage { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + self.do_realize()?; + let storage: Arc> = Arc::new(Mutex::new(self)); + Ok(storage) + } + + fn cancel_packet(&mut self, _packet: &Arc>) {} + + fn reset(&mut self) { + info!("Storage device reset"); + self.base.remote_wakeup = 0; + self.base.addr = 0; + self.state = UsbStorageState::new(); + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + let mut locked_packet = packet.lock().unwrap(); + match self + .base + .handle_control_for_descriptor(&mut locked_packet, device_req) + { + Ok(handled) => { + if handled { + trace::usb_storage_handle_control(); + return; + } + self.handle_control_packet(&mut locked_packet, device_req) + } + Err(e) => { + warn!("Received incorrect USB Storage descriptor message: {:?}", e); + locked_packet.status = UsbPacketStatus::Stall; + } + } + } + + fn handle_data(&mut self, packet: &Arc>) { + let mut locked_packet = packet.lock().unwrap(); + trace::usb_storage_handle_data( + locked_packet.ep_number, + locked_packet.pid, + &self.state.mode, + ); + + let result = match locked_packet.pid as u8 { + USB_TOKEN_OUT => self.handle_token_out(&mut locked_packet), + USB_TOKEN_IN => self.handle_token_in(&mut locked_packet), + _ => Err(anyhow!("Bad token!")), + }; + + if let Err(e) = result { + warn!( + "USB-storage {}: handle data error: {:?}", + self.device_id(), + e + ); + locked_packet.status = UsbPacketStatus::Stall; + } + } + + fn set_controller(&mut self, cntlr: Weak>) { + self.cntlr = Some(cntlr); + } + + fn get_controller(&self) -> Option>> { + self.cntlr.clone() + } +} diff --git a/devices/src/usb/tablet.rs b/devices/src/usb/tablet.rs new file mode 100644 index 0000000000000000000000000000000000000000..b9c54fc9735768223392c28ec83d2098a38c5128 --- /dev/null +++ b/devices/src/usb/tablet.rs @@ -0,0 +1,305 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cmp::min; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{bail, Result}; +use clap::Parser; +use log::{debug, info, warn}; +use once_cell::sync::Lazy; + +use super::descriptor::{ + UsbConfigDescriptor, UsbDescConfig, UsbDescDevice, UsbDescEndpoint, UsbDescIface, UsbDescOther, + UsbDescriptorOps, UsbDeviceDescriptor, UsbEndpointDescriptor, UsbInterfaceDescriptor, +}; +use super::hid::{Hid, HidType, QUEUE_LENGTH, QUEUE_MASK}; +use super::xhci::xhci_controller::{endpoint_number_to_id, XhciDevice}; +use super::{ + config::*, notify_controller, UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbPacket, + UsbPacketStatus, USB_DEVICE_BUFFER_DEFAULT_LEN, +}; +use machine_manager::config::valid_id; +use ui::input::{ + register_pointer, unregister_pointer, Axis, InputEvent, InputType, PointerOpts, + INPUT_BUTTON_MASK, INPUT_BUTTON_WHEEL_DOWN, INPUT_BUTTON_WHEEL_LEFT, INPUT_BUTTON_WHEEL_RIGHT, + INPUT_BUTTON_WHEEL_UP, +}; +use util::gen_base_func; + +const INPUT_COORDINATES_MAX: u32 = 0x7fff; + +/// Tablet device descriptor +static DESC_DEVICE_TABLET: Lazy> = Lazy::new(|| { + Arc::new(UsbDescDevice { + device_desc: UsbDeviceDescriptor { + bLength: USB_DT_DEVICE_SIZE, + bDescriptorType: USB_DT_DEVICE, + idVendor: 0x0627, + idProduct: USB_PRODUCT_ID_TABLET, + bcdDevice: 0, + iManufacturer: STR_MANUFACTURER_INDEX, + iProduct: STR_PRODUCT_TABLET_INDEX, + iSerialNumber: STR_SERIAL_TABLET_INDEX, + bcdUSB: 0x0100, + bDeviceClass: 0, + bDeviceSubClass: 0, + bDeviceProtocol: 0, + bMaxPacketSize0: 8, + bNumConfigurations: 1, + }, + configs: vec![Arc::new(UsbDescConfig { + config_desc: UsbConfigDescriptor { + bLength: USB_DT_CONFIG_SIZE, + bDescriptorType: USB_DT_CONFIGURATION, + wTotalLength: 0, + bNumInterfaces: 1, + bConfigurationValue: 1, + iConfiguration: STR_CONFIG_TABLET_INDEX, + bmAttributes: USB_CONFIGURATION_ATTR_ONE | USB_CONFIGURATION_ATTR_REMOTE_WAKEUP, + bMaxPower: 50, + }, + iad_desc: vec![], + interfaces: vec![DESC_IFACE_TABLET.clone()], + })], + }) +}); +/// Tablet interface descriptor +static DESC_IFACE_TABLET: Lazy> = Lazy::new(|| { + Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: 0, + bAlternateSetting: 0, + bNumEndpoints: 1, + bInterfaceClass: USB_CLASS_HID, + bInterfaceSubClass: 0, + bInterfaceProtocol: 0, + iInterface: 0, + }, + other_desc: vec![Arc::new(UsbDescOther { + // HID descriptor + data: vec![0x09, 0x21, 0x01, 0x0, 0x0, 0x01, 0x22, 89, 0x0], + })], + endpoints: vec![Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | 0x1, + bmAttributes: USB_ENDPOINT_ATTR_INT, + wMaxPacketSize: 8, + bInterval: 0xa, + }, + extra: Vec::new(), + })], + }) +}); + +/// String descriptor index +const STR_MANUFACTURER_INDEX: u8 = 1; +const STR_PRODUCT_TABLET_INDEX: u8 = 2; +const STR_CONFIG_TABLET_INDEX: u8 = 3; +const STR_SERIAL_TABLET_INDEX: u8 = 4; + +/// String descriptor +const DESC_STRINGS: [&str; 5] = ["", "StratoVirt", "StratoVirt USB Tablet", "HID Tablet", "2"]; + +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct UsbTabletConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long)] + bus: Option, + #[arg(long)] + port: Option, +} + +/// USB tablet device. +pub struct UsbTablet { + base: UsbDeviceBase, + hid: Hid, + /// USB controller used to notify controller to transfer data. + cntlr: Option>>, +} + +impl UsbTablet { + pub fn new(config: UsbTabletConfig) -> Self { + Self { + base: UsbDeviceBase::new(config.id, USB_DEVICE_BUFFER_DEFAULT_LEN), + hid: Hid::new(HidType::Tablet), + cntlr: None, + } + } +} + +pub struct UsbTabletAdapter { + tablet: Arc>, +} + +impl PointerOpts for UsbTabletAdapter { + fn update_point_state(&mut self, input_event: InputEvent) -> Result<()> { + trace::usb_tablet_update_point_state(&input_event.input_type); + + let mut locked_tablet = self.tablet.lock().unwrap(); + if locked_tablet.hid.num >= QUEUE_LENGTH { + trace::usb_tablet_queue_full(); + + // Return ok to ignore the request. + return Ok(()); + } + let index = ((locked_tablet.hid.head + locked_tablet.hid.num) & QUEUE_MASK) as usize; + let evt = &mut locked_tablet.hid.pointer.queue[index]; + + match input_event.input_type { + InputType::ButtonEvent => { + let button_event = &input_event.button_event; + if button_event.down { + if button_event.button & INPUT_BUTTON_WHEEL_LEFT != 0 { + evt.h_wheel = -1; + } else if button_event.button & INPUT_BUTTON_WHEEL_RIGHT != 0 { + evt.h_wheel = 1; + } else { + evt.h_wheel = 0; + } + + if button_event.button & INPUT_BUTTON_WHEEL_UP != 0 { + evt.v_wheel = 1; + } else if button_event.button & INPUT_BUTTON_WHEEL_DOWN != 0 { + evt.v_wheel = -1; + } else { + evt.v_wheel = 0; + } + + evt.button_state |= button_event.button & INPUT_BUTTON_MASK; + } else { + evt.button_state &= !(button_event.button & INPUT_BUTTON_MASK); + } + } + InputType::MoveEvent => { + let move_event = &input_event.move_event; + match move_event.axis { + Axis::X => evt.pos_x = min(move_event.data, INPUT_COORDINATES_MAX), + Axis::Y => evt.pos_y = min(move_event.data, INPUT_COORDINATES_MAX), + } + } + _ => bail!( + "Input type: {:?} is unsupported by usb tablet!", + input_event.input_type + ), + }; + + Ok(()) + } + + fn sync(&mut self) -> Result<()> { + trace::usb_tablet_point_sync(); + let mut locked_tablet = self.tablet.lock().unwrap(); + + // The last evt is used to save the latest button state, + // so the max number of events can be cached at one time is QUEUE_LENGTH - 1. + if locked_tablet.hid.num == QUEUE_LENGTH - 1 { + trace::usb_tablet_queue_full(); + return Ok(()); + } + let curr_index = ((locked_tablet.hid.head + locked_tablet.hid.num) & QUEUE_MASK) as usize; + let next_index = (curr_index + 1) & QUEUE_MASK as usize; + let curr_evt = locked_tablet.hid.pointer.queue[curr_index]; + let next_evt = &mut locked_tablet.hid.pointer.queue[next_index]; + + // Update the status of the next event in advance. + next_evt.v_wheel = 0; + next_evt.h_wheel = 0; + next_evt.button_state = curr_evt.button_state; + next_evt.pos_x = curr_evt.pos_x; + next_evt.pos_y = curr_evt.pos_y; + + locked_tablet.hid.num += 1; + drop(locked_tablet); + let clone_tablet = self.tablet.clone(); + // Wakeup endpoint. + let ep_id = endpoint_number_to_id(true, 1); + notify_controller(&(clone_tablet as Arc>), ep_id) + } +} + +impl UsbDevice for UsbTablet { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + self.base.reset_usb_endpoint(); + self.base.speed = USB_SPEED_FULL; + let mut s: Vec = DESC_STRINGS.iter().map(|&s| s.to_string()).collect(); + let prefix = &s[STR_SERIAL_TABLET_INDEX as usize]; + s[STR_SERIAL_TABLET_INDEX as usize] = self.base.generate_serial_number(prefix); + self.base.init_descriptor(DESC_DEVICE_TABLET.clone(), s)?; + let id = self.device_id().to_string(); + let tablet = Arc::new(Mutex::new(self)); + let tablet_adapter = Arc::new(Mutex::new(UsbTabletAdapter { + tablet: tablet.clone(), + })); + register_pointer(&id, tablet_adapter); + Ok(tablet) + } + + fn unrealize(&mut self) -> Result<()> { + unregister_pointer(self.device_id()); + Ok(()) + } + + fn cancel_packet(&mut self, _packet: &Arc>) {} + + fn reset(&mut self) { + info!("Tablet device reset"); + self.base.remote_wakeup = 0; + self.base.addr = 0; + self.hid.reset(); + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + let mut locked_packet = packet.lock().unwrap(); + match self + .base + .handle_control_for_descriptor(&mut locked_packet, device_req) + { + Ok(handled) => { + if handled { + debug!("Tablet control handled by descriptor, return directly."); + return; + } + } + Err(e) => { + warn!("Received incorrect USB Tablet descriptor message: {:?}", e); + locked_packet.status = UsbPacketStatus::Stall; + return; + } + } + self.hid + .handle_control_packet(&mut locked_packet, device_req, &mut self.base.data_buf); + } + + fn handle_data(&mut self, p: &Arc>) { + let mut locked_p = p.lock().unwrap(); + self.hid.handle_data_packet(&mut locked_p); + } + + fn set_controller(&mut self, cntlr: Weak>) { + self.cntlr = Some(cntlr); + } + + fn get_controller(&self) -> Option>> { + self.cntlr.clone() + } +} diff --git a/devices/src/usb/uas.rs b/devices/src/usb/uas.rs new file mode 100644 index 0000000000000000000000000000000000000000..301dff080518d086d588f79c0607f7852be1c32d --- /dev/null +++ b/devices/src/usb/uas.rs @@ -0,0 +1,1111 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::array; +use std::cmp::min; +use std::collections::HashMap; +use std::mem::size_of; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use log::{debug, error, info, warn}; +use once_cell::sync::Lazy; +use strum::EnumCount; +use strum_macros::EnumCount; + +use super::config::*; +use super::descriptor::{ + UsbConfigDescriptor, UsbDescConfig, UsbDescDevice, UsbDescEndpoint, UsbDescIface, + UsbDescriptorOps, UsbDeviceDescriptor, UsbEndpointDescriptor, UsbInterfaceDescriptor, + UsbSuperSpeedCapDescriptor, UsbSuperSpeedEndpointCompDescriptor, +}; +use super::storage::{UsbStorage, UsbStorageConfig, GET_MAX_LUN, MASS_STORAGE_RESET}; +use super::xhci::xhci_controller::XhciDevice; +use super::{ + UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbPacket, UsbPacketStatus, + USB_DEVICE_BUFFER_DEFAULT_LEN, +}; +use crate::ScsiBus::{ + get_scsi_key, scsi_cdb_xfer, ScsiBus, ScsiRequest, ScsiRequestOps, ScsiSense, ScsiXferMode, + CHECK_CONDITION, EMULATE_SCSI_OPS, GOOD, SCSI_SENSE_INVALID_PARAM_VALUE, + SCSI_SENSE_INVALID_TAG, SCSI_SENSE_NO_SENSE, +}; +use crate::ScsiDisk::{ScsiDevConfig, ScsiDevice}; +use crate::{Bus, Device}; +use machine_manager::config::{DriveConfig, DriveFile}; +use util::gen_base_func; +use util::{aio::AioEngine, byte_code::ByteCode}; + +// Size of UasIUBody +const UAS_IU_BODY_SIZE: usize = 30; + +// Size of cdb in UAS Command IU +const UAS_COMMAND_CDB_SIZE: usize = 16; + +// UAS Pipe IDs +const UAS_PIPE_ID_COMMAND: u8 = 0x01; +const UAS_PIPE_ID_STATUS: u8 = 0x02; +const UAS_PIPE_ID_DATA_IN: u8 = 0x03; +const UAS_PIPE_ID_DATA_OUT: u8 = 0x04; + +// UAS Streams Attributes +const UAS_MAX_STREAMS_BM_ATTR: u8 = 4; +const UAS_MAX_STREAMS: usize = 1 << UAS_MAX_STREAMS_BM_ATTR; + +// UAS IU IDs +const UAS_IU_ID_COMMAND: u8 = 0x01; +const UAS_IU_ID_SENSE: u8 = 0x03; +const UAS_IU_ID_RESPONSE: u8 = 0x04; +const UAS_IU_ID_TASK_MGMT: u8 = 0x05; + +// UAS Response Codes +const UAS_RC_TMF_COMPLETE: u8 = 0x00; +const _UAS_RC_INVALID_IU: u8 = 0x02; +const UAS_RC_TMF_NOT_SUPPORTED: u8 = 0x04; +const _UAS_RC_TMF_FAILED: u8 = 0x05; +const _UAS_RC_TMF_SUCCEEDED: u8 = 0x08; +const _UAS_RC_INCORRECT_LUN: u8 = 0x09; +const _UAS_RC_OVERLAPPED_TAG: u8 = 0x0A; + +// UAS Task Management Functions +const UAS_TMF_ABORT_TASK: u8 = 0x01; +const _UAS_TMF_ABORT_TASK_SET: u8 = 0x02; +const _UAS_TMF_CLEAR_TASK_SET: u8 = 0x04; +const _UAS_TMF_LOGICAL_UNIT_RESET: u8 = 0x08; +const _UAS_TMF_I_T_NEXUS_RESET: u8 = 0x10; +const _UAS_TMF_CLEAR_ACA: u8 = 0x40; +const _UAS_TMF_QUERY_TASK: u8 = 0x80; +const _UAS_TMF_QUERY_TASK_SET: u8 = 0x81; +const _UAS_TMF_QUERY_ASYNC_EVENT: u8 = 0x82; + +// Interface alt settings +const UAS_ALT_SETTING_BOT: u8 = 0; +const UAS_ALT_SETTING_UAS: u8 = 1; + +#[derive(Parser, Clone, Debug)] +#[command(no_binary_name(true))] +pub struct UsbUasConfig { + #[arg(long, value_parser = ["usb-uas"])] + pub classtype: String, + #[arg(long)] + pub drive: String, + #[arg(long)] + pub id: String, + #[arg(long)] + bus: Option, + #[arg(long)] + port: Option, +} + +impl From for UsbStorageConfig { + fn from(uas_config: UsbUasConfig) -> Self { + Self { + classtype: uas_config.classtype, + id: String::new(), + drive: uas_config.drive, + bus: uas_config.bus, + port: uas_config.port, + } + } +} + +pub struct UsbUas { + base: UsbDeviceBase, + uas_config: UsbUasConfig, + scsi_bus: Arc>, + scsi_device: Option>>, + drive_cfg: DriveConfig, + drive_files: Arc>>, + commands: [Option; UAS_MAX_STREAMS + 1], + statuses: [Option>>; UAS_MAX_STREAMS + 1], + data: [Option>>; UAS_MAX_STREAMS + 1], + bot: UsbStorage, + is_bot: bool, +} + +#[derive(Debug, Default, EnumCount)] +enum UsbUasStringId { + #[default] + Invalid = 0, + Manufacturer = 1, + Product = 2, + SerialNumber = 3, + Configuration = 4, +} + +const UAS_DESC_STRINGS: [&str; UsbUasStringId::COUNT] = [ + "", + "StratoVirt", + "StratoVirt USB Uas", + "5", + "Super speed config (usb 3.0)", +]; + +struct UasRequest { + data: Option>>, + status: Arc>, + iu: UasIU, + completed: bool, +} + +impl ScsiRequestOps for UasRequest { + fn scsi_request_complete_cb( + &mut self, + scsi_status: u8, + scsi_sense: Option, + ) -> Result<()> { + let tag = u16::from_be(self.iu.header.tag); + let sense = scsi_sense.unwrap_or(SCSI_SENSE_NO_SENSE); + UsbUas::fill_sense(&mut self.status.lock().unwrap(), tag, sense, scsi_status); + self.complete(); + Ok(()) + } +} + +#[derive(Debug, PartialEq, Eq)] +enum UasPacketStatus { + Completed = 0, + Pending = 1, +} + +impl From for UasPacketStatus { + fn from(status: bool) -> Self { + match status { + true => Self::Completed, + false => Self::Pending, + } + } +} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct UsbPipeUsageDescriptor { + bLength: u8, + bDescriptorType: u8, + bPipeId: u8, + bReserved: u8, +} + +impl ByteCode for UsbPipeUsageDescriptor {} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIUHeader { + id: u8, + reserved: u8, + tag: u16, +} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIUCommand { + prio_task_attr: u8, // 6:3 priority, 2:0 task attribute + reserved_1: u8, + add_cdb_len: u8, + reserved_2: u8, + lun: u64, + cdb: [u8; UAS_COMMAND_CDB_SIZE], + add_cdb: [u8; 1], // not supported by stratovirt +} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIUSense { + status_qualifier: u16, + status: u8, + reserved: [u8; 7], + sense_length: u16, + sense_data: [u8; 18], +} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIUResponse { + add_response_info: [u8; 3], + response_code: u8, +} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIUTaskManagement { + function: u8, + reserved: u8, + task_tag: u16, + lun: u64, +} + +#[repr(C, packed)] +#[derive(Clone, Copy)] +union UasIUBody { + command: UasIUCommand, + sense: UasIUSense, + response: UasIUResponse, + task_management: UasIUTaskManagement, + raw_data: [u8; UAS_IU_BODY_SIZE], +} + +impl Default for UasIUBody { + fn default() -> Self { + Self { + raw_data: [0; UAS_IU_BODY_SIZE], + } + } +} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct UasIU { + header: UasIUHeader, + body: UasIUBody, +} + +impl ByteCode for UasIU {} + +static DESC_DEVICE_UAS: Lazy> = Lazy::new(|| { + Arc::new(UsbDescDevice { + device_desc: UsbDeviceDescriptor { + bLength: USB_DT_DEVICE_SIZE, + bDescriptorType: USB_DT_DEVICE, + bcdUSB: 0x0300, + bDeviceClass: 0, + bDeviceSubClass: 0, + bDeviceProtocol: 0, + bMaxPacketSize0: 9, + idVendor: USB_VENDOR_ID_STRATOVIRT, + idProduct: USB_PRODUCT_ID_UAS, + bcdDevice: 0, + iManufacturer: UsbUasStringId::Manufacturer as u8, + iProduct: UsbUasStringId::Product as u8, + iSerialNumber: UsbUasStringId::SerialNumber as u8, + bNumConfigurations: 1, + }, + configs: vec![Arc::new(UsbDescConfig { + config_desc: UsbConfigDescriptor { + bLength: USB_DT_CONFIG_SIZE, + bDescriptorType: USB_DT_CONFIGURATION, + wTotalLength: 0, + bNumInterfaces: 1, + bConfigurationValue: 1, + iConfiguration: UsbUasStringId::Configuration as u8, + bmAttributes: USB_CONFIGURATION_ATTR_ONE | USB_CONFIGURATION_ATTR_SELF_POWER, + bMaxPower: 50, + }, + iad_desc: vec![], + interfaces: vec![DESC_IFACE_BOT.clone(), DESC_IFACE_UAS.clone()], + })], + }) +}); + +static DESC_IFACE_UAS: Lazy> = Lazy::new(|| { + Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: 0, + bAlternateSetting: UAS_ALT_SETTING_UAS, + bNumEndpoints: 4, + bInterfaceClass: USB_CLASS_MASS_STORAGE, + bInterfaceSubClass: USB_SUBCLASS_SCSI, + bInterfaceProtocol: USB_IFACE_PROTOCOL_UAS, + iInterface: 0, + }, + other_desc: vec![], + endpoints: vec![ + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_HOST_TO_DEVICE | UAS_PIPE_ID_COMMAND, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: [ + UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: 0, + wBytesPerInterval: 0, + } + .as_bytes(), + UsbPipeUsageDescriptor { + bLength: USB_DT_PIPE_USAGE_SIZE, + bDescriptorType: USB_DT_PIPE_USAGE, + bPipeId: UAS_PIPE_ID_COMMAND, + bReserved: 0, + } + .as_bytes(), + ] + .concat() + .to_vec(), + }), + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | UAS_PIPE_ID_STATUS, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: [ + UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: UAS_MAX_STREAMS_BM_ATTR, + wBytesPerInterval: 0, + } + .as_bytes(), + UsbPipeUsageDescriptor { + bLength: USB_DT_PIPE_USAGE_SIZE, + bDescriptorType: USB_DT_PIPE_USAGE, + bPipeId: UAS_PIPE_ID_STATUS, + bReserved: 0, + } + .as_bytes(), + ] + .concat() + .to_vec(), + }), + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | UAS_PIPE_ID_DATA_IN, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: [ + UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: UAS_MAX_STREAMS_BM_ATTR, + wBytesPerInterval: 0, + } + .as_bytes(), + UsbPipeUsageDescriptor { + bLength: USB_DT_PIPE_USAGE_SIZE, + bDescriptorType: USB_DT_PIPE_USAGE, + bPipeId: UAS_PIPE_ID_DATA_IN, + bReserved: 0, + } + .as_bytes(), + ] + .concat() + .to_vec(), + }), + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_HOST_TO_DEVICE | UAS_PIPE_ID_DATA_OUT, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: [ + UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: UAS_MAX_STREAMS_BM_ATTR, + wBytesPerInterval: 0, + } + .as_bytes(), + UsbPipeUsageDescriptor { + bLength: USB_DT_PIPE_USAGE_SIZE, + bDescriptorType: USB_DT_PIPE_USAGE, + bPipeId: UAS_PIPE_ID_DATA_OUT, + bReserved: 0, + } + .as_bytes(), + ] + .concat() + .to_vec(), + }), + ], + }) +}); + +static DESC_IFACE_BOT: Lazy> = Lazy::new(|| { + Arc::new(UsbDescIface { + interface_desc: UsbInterfaceDescriptor { + bLength: USB_DT_INTERFACE_SIZE, + bDescriptorType: USB_DT_INTERFACE, + bInterfaceNumber: 0, + bAlternateSetting: UAS_ALT_SETTING_BOT, + bNumEndpoints: 2, + bInterfaceClass: USB_CLASS_MASS_STORAGE, + bInterfaceSubClass: USB_SUBCLASS_SCSI, + bInterfaceProtocol: USB_IFACE_PROTOCOL_BOT, + iInterface: 0, + }, + other_desc: vec![], + endpoints: vec![ + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_DEVICE_TO_HOST | 0x01, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: 0, + wBytesPerInterval: 0, + } + .as_bytes() + .to_vec(), + }), + Arc::new(UsbDescEndpoint { + endpoint_desc: UsbEndpointDescriptor { + bLength: USB_DT_ENDPOINT_SIZE, + bDescriptorType: USB_DT_ENDPOINT, + bEndpointAddress: USB_DIRECTION_HOST_TO_DEVICE | 0x02, + bmAttributes: USB_ENDPOINT_ATTR_BULK, + wMaxPacketSize: 1024, + bInterval: 0, + }, + extra: UsbSuperSpeedEndpointCompDescriptor { + bLength: USB_DT_SS_EP_COMP_SIZE, + bDescriptorType: USB_DT_ENDPOINT_COMPANION, + bMaxBurst: 15, + bmAttributes: 0, + wBytesPerInterval: 0, + } + .as_bytes() + .to_vec(), + }), + ], + }) +}); + +static DESC_CAP_UAS: UsbSuperSpeedCapDescriptor = UsbSuperSpeedCapDescriptor { + bLength: USB_DT_SS_CAP_SIZE, + bDescriptorType: USB_DT_DEVICE_CAPABILITY, + bDevCapabilityType: USB_SS_DEVICE_CAP, + bmAttributes: 0, + wSpeedsSupported: USB_SS_DEVICE_SPEED_SUPPORTED_SUPER | USB_SS_DEVICE_SPEED_SUPPORTED_HIGH, + bFunctionalitySupport: USB_SS_DEVICE_FUNCTIONALITY_SUPPORT_HIGH, + bU1DevExitLat: 0xA, + wU2DevExitLat: 0x20, +}; + +fn complete_async_packet(packet: &Arc>) { + let locked_packet = packet.lock().unwrap(); + + if let Some(xfer_ops) = locked_packet.xfer_ops.as_ref() { + if let Some(xfer_ops) = xfer_ops.clone().upgrade() { + drop(locked_packet); + xfer_ops.lock().unwrap().submit_transfer(); + } + } +} + +impl UsbUas { + pub fn new( + uas_config: UsbUasConfig, + drive_cfg: DriveConfig, + drive_files: Arc>>, + ) -> Result { + if drive_cfg.aio != AioEngine::Off || drive_cfg.direct { + bail!("USB UAS: \"aio=off,direct=false\" must be configured."); + } + + Ok(Self { + base: UsbDeviceBase::new(uas_config.id.clone(), USB_DEVICE_BUFFER_DEFAULT_LEN), + uas_config: uas_config.clone(), + scsi_bus: Arc::new(Mutex::new(ScsiBus::new("".to_string()))), + scsi_device: None, + drive_cfg: drive_cfg.clone(), + drive_files: drive_files.clone(), + commands: array::from_fn(|_| None), + statuses: array::from_fn(|_| None), + data: array::from_fn(|_| None), + bot: UsbStorage::new(uas_config.into(), drive_cfg, drive_files)?, + is_bot: true, + }) + } + + fn cancel_io(&mut self) { + self.commands = array::from_fn(|_| None); + self.statuses = array::from_fn(|_| None); + self.data = array::from_fn(|_| None); + } + + /// Class (Mass Storage) specific requests. + fn handle_control_for_device(&mut self, packet: &mut UsbPacket, device_req: &UsbDeviceRequest) { + match device_req.request_type { + USB_ENDPOINT_OUT_REQUEST => { + if device_req.request == USB_REQUEST_CLEAR_FEATURE { + return; + } + } + USB_INTERFACE_CLASS_OUT_REQUEST => { + // NOTE: See USB Mass Storage Class specification: 3.1 Bulk-Only Mass Storage Reset + if device_req.request == MASS_STORAGE_RESET { + // Set storage state mode. + self.bot.handle_control_packet(packet, device_req); + self.cancel_io(); + return; + } + } + USB_INTERFACE_CLASS_IN_REQUEST => { + // NOTE: See USB Mass Storage Class specification: 3.2 Get Max LUN + if device_req.request == GET_MAX_LUN { + // Now only supports 1 LUN. + self.base.data_buf[0] = 0; + packet.actual_length = 1; + return; + } + } + _ => (), + } + + error!( + "UAS {} device unhandled control request {:?}.", + self.device_id(), + device_req + ); + packet.status = UsbPacketStatus::Stall; + } + + fn handle_iu_command( + &mut self, + iu: &UasIU, + mut uas_request: UasRequest, + ) -> Result { + // SAFETY: IU is guaranteed to be of type command. + let add_cdb_len = unsafe { iu.body.command.add_cdb_len }; + let tag = u16::from_be(iu.header.tag); + + if add_cdb_len > 0 { + Self::fill_fake_sense( + &mut uas_request.status.lock().unwrap(), + tag, + SCSI_SENSE_INVALID_PARAM_VALUE, + ); + uas_request.complete(); + bail!("additional cdb length is not supported"); + } + + if tag > UAS_MAX_STREAMS as u16 { + Self::fill_fake_sense( + &mut uas_request.status.lock().unwrap(), + tag, + SCSI_SENSE_INVALID_TAG, + ); + uas_request.complete(); + bail!("invalid tag {}", tag); + } + + let (scsi_iovec, scsi_iovec_size) = match &uas_request.data { + Some(data) => { + let mut locked_data = data.lock().unwrap(); + let iov_size = locked_data.get_iovecs_size() as u32; + locked_data.actual_length = iov_size; + (locked_data.iovecs.clone(), iov_size) + } + None => (Vec::new(), 0), + }; + + // SAFETY: IU is guaranteed to of type command. + let cdb = unsafe { iu.body.command.cdb }; + // SAFETY: IU is guaranteed to of type command. + let lun = unsafe { iu.body.command.lun } as u16; + trace::usb_uas_handle_iu_command(self.device_id(), cdb[0]); + let uas_request = Box::new(uas_request); + let scsi_request = ScsiRequest::new( + cdb, + lun, + scsi_iovec, + scsi_iovec_size, + self.scsi_device.as_ref().unwrap().clone(), + uas_request, + ) + .with_context(|| "failed to create SCSI request")?; + + if scsi_request.cmd.xfer > u64::from(scsi_request.datalen) + && scsi_request.cmd.mode != ScsiXferMode::ScsiXferNone + { + bail!( + "insufficient buffer provided (requested length {}, provided length {})", + scsi_request.cmd.xfer, + scsi_request.datalen + ); + } + + let scsi_request = match scsi_request.opstype { + EMULATE_SCSI_OPS => scsi_request.emulate_execute(), + _ => scsi_request.execute(), + } + .with_context(|| "failed to execute SCSI request")?; + + let upper_request = &mut scsi_request.lock().unwrap().upper_req; + let uas_request = upper_request + .as_mut() + .as_any_mut() + .downcast_mut::() + .unwrap(); + + Ok(uas_request.completed.into()) + } + + fn handle_iu_task_management( + &mut self, + iu: &UasIU, + mut uas_request: UasRequest, + ) -> Result { + let tag = u16::from_be(iu.header.tag); + + if tag > UAS_MAX_STREAMS as u16 { + Self::fill_fake_sense( + &mut uas_request.status.lock().unwrap(), + tag, + SCSI_SENSE_INVALID_TAG, + ); + uas_request.complete(); + bail!("invalid tag {}", tag); + } + + // SAFETY: IU is guaranteed to be of type task management. + let tmf = unsafe { iu.body.task_management.function }; + trace::usb_uas_handle_iu_task_management(self.device_id(), tmf, tag); + + match tmf { + UAS_TMF_ABORT_TASK => { + // SAFETY: IU is guaranteed to be of type task management. + let task_tag = unsafe { iu.body.task_management.task_tag } as usize; + self.commands[task_tag] = None; + self.statuses[task_tag] = None; + self.data[task_tag] = None; + trace::usb_uas_tmf_abort_task(self.device_id(), task_tag); + Self::fill_response( + &mut uas_request.status.lock().unwrap(), + tag, + UAS_RC_TMF_COMPLETE, + ); + } + _ => { + warn!("UAS {} device unsupported TMF {}.", self.device_id(), tmf); + Self::fill_response( + &mut uas_request.status.lock().unwrap(), + tag, + UAS_RC_TMF_NOT_SUPPORTED, + ); + } + }; + + uas_request.complete(); + Ok(UasPacketStatus::Completed) + } + + fn fill_response(packet: &mut UsbPacket, tag: u16, code: u8) { + let mut iu = UasIU::new(UAS_IU_ID_RESPONSE, tag); + iu.body.response.response_code = code; + let iu_len = size_of::() + size_of::(); + Self::fill_packet(packet, &mut iu, iu_len); + } + + fn fill_fake_sense(packet: &mut UsbPacket, tag: u16, sense: ScsiSense) { + let mut iu = UasIU::new(UAS_IU_ID_SENSE, tag); + // SAFETY: IU is guaranteed to be of type status. + let iu_sense = unsafe { &mut iu.body.sense }; + + iu_sense.status = CHECK_CONDITION; + iu_sense.status_qualifier = 0_u16.to_be(); + iu_sense.sense_length = 18_u16.to_be(); + iu_sense.sense_data[0] = 0x70; // Error code: current errors + iu_sense.sense_data[2] = sense.key; + iu_sense.sense_data[7] = 10; // Additional sense length: total length - 8 + iu_sense.sense_data[12] = sense.asc; + iu_sense.sense_data[13] = sense.ascq; + + let iu_len = size_of::() + size_of::(); + trace::usb_uas_fill_fake_sense(CHECK_CONDITION, iu_len, iu_sense.sense_length as usize); + Self::fill_packet(packet, &mut iu, iu_len); + } + + fn fill_sense(packet: &mut UsbPacket, tag: u16, sense: ScsiSense, status: u8) { + let mut iu = UasIU::new(UAS_IU_ID_SENSE, tag); + // SAFETY: IU is guaranteed to be of type status. + let iu_sense = unsafe { &mut iu.body.sense }; + + iu_sense.status = status; + iu_sense.status_qualifier = 0_u16.to_be(); + iu_sense.sense_length = 0_u16.to_be(); + + if status != GOOD { + iu_sense.sense_length = 18_u16.to_be(); + iu_sense.sense_data[0] = 0x71; // Error code: deferred errors + iu_sense.sense_data[2] = sense.key; + iu_sense.sense_data[7] = 10; // Additional sense length: total length - 8 + iu_sense.sense_data[12] = sense.asc; + iu_sense.sense_data[13] = sense.ascq; + } + + let sense_len = + size_of::() - iu_sense.sense_data.len() + iu_sense.sense_length as usize; + let iu_len = size_of::() + sense_len; + trace::usb_uas_fill_sense(status, iu_len, iu_sense.sense_length as usize); + Self::fill_packet(packet, &mut iu, iu_len); + } + + fn fill_packet(packet: &mut UsbPacket, iu: &mut UasIU, iu_len: usize) { + let iov_size = packet.get_iovecs_size() as usize; + let iu_len = min(iov_size, iu_len); + trace::usb_uas_fill_packet(iov_size); + packet.transfer_packet(iu.as_mut_bytes(), iu_len); + } + + fn try_start_next_transfer(&mut self, stream: usize) -> UasPacketStatus { + if self.commands[stream].is_none() { + debug!( + "UAS {} device no inflight command on stream {}.", + self.device_id(), + stream + ); + return UasPacketStatus::Pending; + } + + if self.statuses[stream].is_none() { + debug!( + "UAS {} device no inflight status on stream {}.", + self.device_id(), + stream + ); + return UasPacketStatus::Pending; + } + + // SAFETY: Command was checked to be Some. + let command = self.commands[stream].as_ref().unwrap(); + // SAFETY: IU is guaranteed to be of type command. + let cdb = unsafe { &command.body.command.cdb }; + let xfer_len = scsi_cdb_xfer(cdb, self.scsi_device.as_ref().unwrap().clone()); + trace::usb_uas_try_start_next_transfer(self.device_id(), xfer_len); + + if xfer_len == 0 { + return self.start_next_transfer(stream); + } + + if self.data[stream].is_some() { + self.start_next_transfer(stream) + } else { + debug!( + "UAS {} device no inflight data on stream {}.", + self.device_id(), + stream + ); + UasPacketStatus::Pending + } + } + + fn start_next_transfer(&mut self, stream: usize) -> UasPacketStatus { + trace::usb_uas_start_next_transfer(self.device_id(), stream); + // SAFETY: Status and command must have been checked in try_start_next_transfer. + let status = self.statuses[stream].take().unwrap(); + let command = self.commands[stream].take().unwrap(); + let mut uas_request = UasRequest::new(&status, &command); + uas_request.data = self.data[stream].take(); + + let result = match command.header.id { + UAS_IU_ID_COMMAND => self.handle_iu_command(&command, uas_request), + UAS_IU_ID_TASK_MGMT => self.handle_iu_task_management(&command, uas_request), + _ => Err(anyhow!("impossible command IU {}", command.header.id)), + }; + + match result { + Ok(result) => result, + Err(err) => { + error!("UAS {} device error: {:?}.", self.device_id(), err); + UasPacketStatus::Completed + } + } + } +} + +impl UsbDevice for UsbUas { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + info!("UAS {} device realize.", self.device_id()); + self.base.reset_usb_endpoint(); + self.base.speed = USB_SPEED_SUPER; + let mut s: Vec = UAS_DESC_STRINGS.iter().map(|&s| s.to_string()).collect(); + let prefix = &s[UsbUasStringId::SerialNumber as usize]; + s[UsbUasStringId::SerialNumber as usize] = self.base.generate_serial_number(prefix); + self.base.init_descriptor(DESC_DEVICE_UAS.clone(), s)?; + self.base.set_capability_descriptors(vec![DESC_CAP_UAS]); + + // NOTE: "aio=off,direct=false" must be configured and other aio/direct values are not + // supported. + let scsidev_classtype = match self.drive_cfg.media.as_str() { + "disk" => "scsi-hd".to_string(), + _ => "scsi-cd".to_string(), + }; + let scsi_dev_cfg = ScsiDevConfig { + classtype: scsidev_classtype, + drive: self.uas_config.drive.clone(), + ..Default::default() + }; + let scsi_device = ScsiDevice::new( + scsi_dev_cfg, + self.drive_cfg.clone(), + self.drive_files.clone(), + None, + self.scsi_bus.clone(), + ); + let realized_scsi = scsi_device.realize()?; + self.scsi_device = Some(realized_scsi.clone()); + self.scsi_bus + .lock() + .unwrap() + .attach_child(get_scsi_key(0, 0), realized_scsi)?; + + self.bot.do_realize()?; + let uas = Arc::new(Mutex::new(self)); + Ok(uas) + } + + fn cancel_packet(&mut self, _packet: &Arc>) { + self.cancel_io(); + } + + fn reset(&mut self) { + info!("UAS {} device reset.", self.device_id()); + self.base.remote_wakeup = 0; + self.base.addr = 0; + self.cancel_io(); + // Reset storage state. + self.bot.reset(); + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + let mut locked_packet = packet.lock().unwrap(); + trace::usb_uas_handle_control( + locked_packet.packet_id, + self.device_id(), + device_req.as_bytes(), + ); + + if device_req.request_type == USB_INTERFACE_OUT_REQUEST + && device_req.request == USB_REQUEST_SET_INTERFACE + { + self.is_bot = device_req.value != UAS_ALT_SETTING_UAS as u16; + } + + match self + .base + .handle_control_for_descriptor(&mut locked_packet, device_req) + { + Ok(handled) => { + if handled { + debug!( + "UAS {} device control handled by descriptor, return directly.", + self.device_id() + ); + return; + } + + self.handle_control_for_device(&mut locked_packet, device_req); + } + Err(err) => { + warn!( + "{} received incorrect UAS descriptor message: {:?}", + self.device_id(), + err + ); + locked_packet.status = UsbPacketStatus::Stall; + } + } + } + + fn handle_data(&mut self, packet: &Arc>) { + if self.is_bot { + return self.bot.handle_data(packet); + } + + let locked_packet = packet.lock().unwrap(); + let stream = locked_packet.stream as usize; + let ep_number = locked_packet.ep_number; + let packet_id = locked_packet.packet_id; + trace::usb_uas_handle_data(self.device_id(), ep_number, stream); + drop(locked_packet); + + if stream > UAS_MAX_STREAMS || ep_number != UAS_PIPE_ID_COMMAND && stream == 0 { + warn!("UAS {} device invalid stream {}.", self.device_id(), stream); + packet.lock().unwrap().status = UsbPacketStatus::Stall; + return; + } + + // NOTE: The architecture of this device is rather simple: it first waits for all of the + // required USB packets to arrive, and only then creates and sends an actual UAS request. + // The number of USB packets differs from 2 to 3 and depends on whether the command involves + // data transfers or not. Since the packets arrive in arbitrary order, some of them may be + // queued asynchronously. Note that the command packet is always completed right away. For + // all the other types of packets, their asynchronous status is determined by the return + // value of try_start_next_transfer(). All the asynchronously queued packets will be + // completed in scsi_request_complete_cb() callback. + match ep_number { + UAS_PIPE_ID_COMMAND => { + let mut locked_packet = packet.lock().unwrap(); + let mut iu = UasIU::default(); + let iov_size = locked_packet.get_iovecs_size() as usize; + let iu_len = min(iov_size, size_of::()); + locked_packet.transfer_packet(iu.as_mut_bytes(), iu_len); + let stream = u16::from_be(iu.header.tag) as usize; + + if self.commands[stream].is_some() { + warn!( + "UAS {} device multiple command packets on stream {}.", + self.device_id(), + stream + ); + packet.lock().unwrap().status = UsbPacketStatus::Stall; + return; + } + + trace::usb_uas_command_received(packet_id, self.device_id()); + self.commands[stream] = Some(iu); + self.try_start_next_transfer(stream); + trace::usb_uas_command_completed(packet_id, self.device_id()); + } + UAS_PIPE_ID_STATUS => { + if self.statuses[stream].is_some() { + warn!( + "UAS {} device multiple status packets on stream {}.", + self.device_id(), + stream + ); + packet.lock().unwrap().status = UsbPacketStatus::Stall; + return; + } + + trace::usb_uas_status_received(packet_id, self.device_id()); + self.statuses[stream] = Some(Arc::clone(packet)); + let result = self.try_start_next_transfer(stream); + + match result { + UasPacketStatus::Completed => { + trace::usb_uas_status_completed(packet_id, self.device_id()) + } + UasPacketStatus::Pending => { + packet.lock().unwrap().is_async = true; + trace::usb_uas_status_queued_async(packet_id, self.device_id()); + } + } + } + UAS_PIPE_ID_DATA_OUT | UAS_PIPE_ID_DATA_IN => { + if self.data[stream].is_some() { + warn!( + "UAS {} device multiple data packets on stream {}.", + self.device_id(), + stream + ); + packet.lock().unwrap().status = UsbPacketStatus::Stall; + return; + } + + trace::usb_uas_data_received(packet_id, self.device_id()); + self.data[stream] = Some(Arc::clone(packet)); + let result = self.try_start_next_transfer(stream); + + match result { + UasPacketStatus::Completed => { + trace::usb_uas_data_completed(packet_id, self.device_id()) + } + UasPacketStatus::Pending => { + packet.lock().unwrap().is_async = true; + trace::usb_uas_data_queued_async(packet_id, self.device_id()); + } + } + } + _ => { + error!( + "UAS {} device bad endpoint number {}.", + self.device_id(), + ep_number + ); + } + } + } + + fn set_controller(&mut self, _cntlr: std::sync::Weak>) {} + + fn get_controller(&self) -> Option>> { + None + } +} + +impl UasRequest { + fn new(status: &Arc>, iu: &UasIU) -> Self { + Self { + data: None, + status: Arc::clone(status), + iu: *iu, + completed: false, + } + } + + fn complete(&mut self) { + let status = &self.status; + let status_async = status.lock().unwrap().is_async; + + // NOTE: Due to the specifics of this device, it waits for all of the required USB packets + // to arrive before starting an actual transfer. Therefore, some packets may arrive earlier + // than others, and they won't be completed right away (except for the command packets), but + // rather queued asynchronously. A certain packet may also be async if it was the last to + // arrive, but UasRequest didn't complete right away. + if status_async { + complete_async_packet(status); + } + + if let Some(data) = &self.data { + let data_async = data.lock().unwrap().is_async; + + if data_async { + complete_async_packet(data); + } + } + + self.completed = true; + } +} + +impl UasIUHeader { + fn new(id: u8, tag: u16) -> Self { + UasIUHeader { + id, + reserved: 0, + tag: tag.to_be(), + } + } +} + +impl UasIU { + fn new(id: u8, tag: u16) -> Self { + Self { + header: UasIUHeader::new(id, tag), + body: UasIUBody::default(), + } + } +} diff --git a/devices/src/usb/usbhost/host_usblib.rs b/devices/src/usb/usbhost/host_usblib.rs new file mode 100644 index 0000000000000000000000000000000000000000..8c5542afb26a708b42af12913d8af874b1e9353a --- /dev/null +++ b/devices/src/usb/usbhost/host_usblib.rs @@ -0,0 +1,475 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + iter::Iterator, + os::unix::io::{AsRawFd, RawFd}, + rc::Rc, + slice, + sync::{Arc, Mutex}, +}; + +use libc::{c_int, c_short, c_uint, c_void, EPOLLIN, EPOLLOUT}; +#[cfg(all(target_arch = "aarch64", target_env = "ohos"))] +use libusb1_sys::{constants::LIBUSB_SUCCESS, libusb_context, libusb_set_option}; +use libusb1_sys::{ + constants::{ + LIBUSB_ERROR_ACCESS, LIBUSB_ERROR_BUSY, LIBUSB_ERROR_INTERRUPTED, + LIBUSB_ERROR_INVALID_PARAM, LIBUSB_ERROR_IO, LIBUSB_ERROR_NOT_FOUND, + LIBUSB_ERROR_NOT_SUPPORTED, LIBUSB_ERROR_NO_DEVICE, LIBUSB_ERROR_NO_MEM, + LIBUSB_ERROR_OVERFLOW, LIBUSB_ERROR_PIPE, LIBUSB_ERROR_TIMEOUT, LIBUSB_TRANSFER_CANCELLED, + LIBUSB_TRANSFER_COMPLETED, LIBUSB_TRANSFER_ERROR, LIBUSB_TRANSFER_NO_DEVICE, + LIBUSB_TRANSFER_STALL, LIBUSB_TRANSFER_TIMED_OUT, LIBUSB_TRANSFER_TYPE_ISOCHRONOUS, + }, + libusb_free_pollfds, libusb_get_pollfds, libusb_iso_packet_descriptor, libusb_pollfd, + libusb_transfer, +}; +use log::error; +use rusb::{Context, DeviceHandle, Error, Result, TransferType, UsbContext}; +use vmm_sys_util::epoll::EventSet; + +use super::{IsoTransfer, UsbHost, UsbHostRequest}; +use crate::usb::{UsbPacket, UsbPacketStatus, USB_TOKEN_IN}; +use util::{ + link_list::Node, + loop_context::{EventNotifier, NotifierCallback, NotifierOperation}, +}; + +const CONTROL_TIMEOUT: u32 = 10000; // 10s +const BULK_TIMEOUT: u32 = 0; +const INTERRUPT_TIMEOUT: u32 = 0; + +fn from_libusb(err: i32) -> Error { + match err { + LIBUSB_ERROR_IO => Error::Io, + LIBUSB_ERROR_INVALID_PARAM => Error::InvalidParam, + LIBUSB_ERROR_ACCESS => Error::Access, + LIBUSB_ERROR_NO_DEVICE => Error::NoDevice, + LIBUSB_ERROR_NOT_FOUND => Error::NotFound, + LIBUSB_ERROR_BUSY => Error::Busy, + LIBUSB_ERROR_TIMEOUT => Error::Timeout, + LIBUSB_ERROR_OVERFLOW => Error::Overflow, + LIBUSB_ERROR_PIPE => Error::Pipe, + LIBUSB_ERROR_INTERRUPTED => Error::Interrupted, + LIBUSB_ERROR_NO_MEM => Error::NoMem, + LIBUSB_ERROR_NOT_SUPPORTED => Error::NotSupported, + _ => Error::Other, + } +} + +macro_rules! try_unsafe { + ($x:expr) => { + // SAFETY: expression is calling C library of libusb. + match unsafe { $x } { + 0 => (), + err => return Err(from_libusb(err)), + } + }; +} + +pub fn get_node_from_transfer(transfer: *mut libusb_transfer) -> Box> { + // SAFETY: cast the raw pointer of transfer's user_data to the + // Box>. + unsafe { Box::from_raw((*transfer).user_data.cast::>()) } +} + +pub fn get_iso_transfer_from_transfer(transfer: *mut libusb_transfer) -> Arc> { + // SAFETY: cast the raw pointer of transfer's user_data to the + // Arc>. + unsafe { + let ptr = (*transfer).user_data.cast::>(); + Arc::increment_strong_count(ptr); + Arc::from_raw(ptr) + } +} + +pub fn get_buffer_from_transfer(transfer: *mut libusb_transfer, len: usize) -> &'static mut [u8] { + // SAFETY: cast the raw pointer of transfer's buffer which is transformed + // from a slice with actual_length to a mutable slice. + unsafe { std::slice::from_raw_parts_mut((*transfer).buffer, len) } +} + +pub fn get_length_from_transfer(transfer: *mut libusb_transfer) -> i32 { + // SAFETY: cast the raw pointer of transfer's actual_length to a integer. + unsafe { (*transfer).actual_length } +} + +pub fn get_status_from_transfer(transfer: *mut libusb_transfer) -> i32 { + // SAFETY: cast the raw pointer of transfer's status which is to a integer. + unsafe { (*transfer).status } +} + +pub fn map_packet_status(status: i32) -> UsbPacketStatus { + match status { + LIBUSB_TRANSFER_COMPLETED => UsbPacketStatus::Success, + LIBUSB_TRANSFER_ERROR => UsbPacketStatus::IoError, + LIBUSB_TRANSFER_TIMED_OUT => UsbPacketStatus::IoError, + LIBUSB_TRANSFER_CANCELLED => UsbPacketStatus::IoError, + LIBUSB_TRANSFER_STALL => UsbPacketStatus::Stall, + LIBUSB_TRANSFER_NO_DEVICE => UsbPacketStatus::NoDev, + _ => UsbPacketStatus::Babble, + } +} + +pub fn set_pollfd_notifiers( + pollfds: PollFds, + notifiers: &mut Vec, + handler: Rc, +) { + for pollfd in pollfds.iter() { + if i32::from(pollfd.events()) == EPOLLIN { + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + pollfd.as_raw_fd(), + None, + EventSet::IN, + vec![handler.clone()], + )); + } else if i32::from(pollfd.events()) == EPOLLOUT { + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + pollfd.as_raw_fd(), + None, + EventSet::OUT, + vec![handler.clone()], + )); + } + } +} + +pub fn get_iso_packet_nums(host_transfer: *mut libusb_transfer) -> u32 { + // SAFETY: host_transfer is guaranteed to be valid once created. + unsafe { (*host_transfer).num_iso_packets as u32 } +} + +pub fn set_iso_packet_length( + host_transfer: *mut libusb_transfer, + packet: u32, + max_packet_size: u32, +) { + let iso_packet_desc: *mut libusb_iso_packet_descriptor; + // SAFETY: host_transfer is guaranteed to be valid once created. + unsafe { iso_packet_desc = (*host_transfer).iso_packet_desc.as_mut_ptr() } + // SAFETY: iso_packet_desc is guaranteed to be valid once host_transfer is created + // and packet is guaranteed to be not out of boundary. + unsafe { (*iso_packet_desc.offset(packet as isize)).length = max_packet_size as c_uint } +} + +pub fn get_iso_packet_acl_length(host_transfer: *mut libusb_transfer, packet: u32) -> u32 { + let iso_packet_desc: *mut libusb_iso_packet_descriptor; + // SAFETY: host_transfer is guaranteed to be valid once created. + unsafe { iso_packet_desc = (*host_transfer).iso_packet_desc.as_mut_ptr() } + // SAFETY: iso_packet_desc is guaranteed to be valid once host_transfer is created + // and packet is guaranteed to be not out of boundary. + unsafe { (*iso_packet_desc.offset(packet as isize)).actual_length } +} + +pub fn alloc_host_transfer(iso_packets: c_int) -> *mut libusb_transfer { + if iso_packets < 0 { + error!( + "The number of iso packets cannot be less than 0, it is {}", + iso_packets + ); + return std::ptr::null_mut(); + } + + // SAFETY: have checked the validity of iso_packets before call C + // library of libusb to get the pointer of transfer. + unsafe { libusb1_sys::libusb_alloc_transfer(iso_packets) } +} + +extern "system" fn req_complete(host_transfer: *mut libusb_transfer) { + // SAFETY: transfer is still valid because libusb just completed it + // but we haven't told anyone yet. user_data remains valid because + // it is dropped only when the request is completed and removed here. + let mut node = get_node_from_transfer(host_transfer); + let request = &mut node.value; + let requests = match request.requests.upgrade() { + Some(requests) => requests, + None => return, + }; + + // Before operating a node, lock requests to prevent multiple threads from operating + // the node at the same time. + let mut locked_requests = requests.lock().unwrap(); + let packet = request.packet.clone(); + let mut locked_packet = packet.lock().unwrap(); + + if !locked_packet.is_async { + request.free(); + locked_requests.unlink(&node); + return; + } + + let actual_length = get_length_from_transfer(host_transfer) as usize; + let transfer_status = get_status_from_transfer(host_transfer); + locked_packet.status = map_packet_status(transfer_status); + + if request.is_control { + request.ctrl_transfer_packet(&mut locked_packet, actual_length); + } else if locked_packet.pid as u8 == USB_TOKEN_IN && actual_length != 0 { + let data = get_buffer_from_transfer(host_transfer, actual_length); + locked_packet.transfer_packet(data, actual_length); + } + + trace::usb_host_req_complete( + request.hostbus, + request.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + actual_length, + ); + + if let Some(transfer) = locked_packet.xfer_ops.as_ref() { + if let Some(ops) = transfer.clone().upgrade() { + drop(locked_packet); + ops.lock().unwrap().submit_transfer(); + } + } + + request.free(); + locked_requests.unlink(&node); +} + +extern "system" fn req_complete_iso(host_transfer: *mut libusb_transfer) { + // SAFETY: the pointer has been verified. + if host_transfer.is_null() || unsafe { (*host_transfer).user_data.is_null() } { + free_host_transfer(host_transfer); + return; + } + + let iso_transfer = get_iso_transfer_from_transfer(host_transfer); + let locketd_iso_transfer = iso_transfer.lock().unwrap(); + + if let Some(iso_queue) = locketd_iso_transfer.iso_queue.clone().upgrade() { + drop(locketd_iso_transfer); + let mut locked_iso_queue = iso_queue.lock().unwrap(); + let iso_transfer = locked_iso_queue.inflight.pop_front().unwrap(); + if locked_iso_queue.inflight.is_empty() { + let queue = &locked_iso_queue; + trace::usb_host_iso_stop(queue.hostbus, queue.hostaddr, queue.ep_number); + } + locked_iso_queue.copy.push_back(iso_transfer); + } +} + +pub fn fill_transfer_by_type( + transfer: *mut libusb_transfer, + handle: Option<&mut DeviceHandle>, + ep_number: u8, + node: *mut Node, + transfer_type: TransferType, +) { + // SAFETY: node only deleted when request completed. + let packet = unsafe { (*node).value.packet.clone() }; + // SAFETY: the reason is same as above. + let buffer_ptr = unsafe { (*node).value.buffer.as_mut_ptr() }; + let size = packet.lock().unwrap().get_iovecs_size(); + + if transfer.is_null() { + error!("Failed to fill bulk transfer, transfer is none"); + return; + } + + // SAFETY: have checked the validity of parameters of libusb_fill_*_transfer + // before call libusb_fill_*_transfer. + match transfer_type { + TransferType::Control => + // SAFETY: the reason is as shown above. + unsafe { + libusb1_sys::libusb_fill_control_transfer( + transfer, + handle.unwrap().as_raw(), + buffer_ptr, + req_complete, + node.cast::(), + CONTROL_TIMEOUT, + ); + }, + TransferType::Bulk => + // SAFETY: the reason is as shown above. + unsafe { + libusb1_sys::libusb_fill_bulk_transfer( + transfer, + handle.unwrap().as_raw(), + ep_number, + buffer_ptr, + size as i32, + req_complete, + node.cast::(), + BULK_TIMEOUT, + ); + }, + TransferType::Interrupt => + // SAFETY: the reason is as shown above. + unsafe { + libusb1_sys::libusb_fill_interrupt_transfer( + transfer, + handle.unwrap().as_raw(), + ep_number, + buffer_ptr, + size as i32, + req_complete, + node.cast::(), + INTERRUPT_TIMEOUT, + ); + }, + _ => error!("Unsupported transfer type: {:?}", transfer_type), + } +} + +pub fn fill_iso_transfer( + transfer: *mut libusb_transfer, + handle: &mut DeviceHandle, + ep_number: u8, + user_data: *mut c_void, + packets: u32, + length: u32, + buffer: &mut Vec, +) { + // SAFETY: have checked the validity of transfer before call fill_iso_transfer. + unsafe { + (*transfer).dev_handle = handle.as_raw(); + (*transfer).transfer_type = LIBUSB_TRANSFER_TYPE_ISOCHRONOUS; + (*transfer).endpoint = ep_number; + (*transfer).callback = req_complete_iso; + (*transfer).user_data = user_data; + (*transfer).num_iso_packets = packets as c_int; + (*transfer).length = length as c_int; + (*transfer).buffer = buffer.as_mut_ptr(); + } +} + +pub fn submit_host_transfer(transfer: *mut libusb_transfer) -> Result<()> { + if transfer.is_null() { + return Err(Error::NoMem); + } + try_unsafe!(libusb1_sys::libusb_submit_transfer(transfer)); + Ok(()) +} + +pub fn cancel_host_transfer(transfer: *mut libusb_transfer) -> Result<()> { + if transfer.is_null() { + return Ok(()); + } + try_unsafe!(libusb1_sys::libusb_cancel_transfer(transfer)); + Ok(()) +} + +pub fn free_host_transfer(transfer: *mut libusb_transfer) { + if transfer.is_null() { + return; + } + + // SAFETY: have checked the validity of transfer before call libusb_free_transfer. + unsafe { libusb1_sys::libusb_free_transfer(transfer) }; +} + +#[cfg(all(target_arch = "aarch64", target_env = "ohos"))] +pub fn set_option(opt: u32) -> Result<()> { + // SAFETY: This function will only configure a specific option within libusb, null for ctx is valid. + let err = unsafe { + libusb_set_option( + std::ptr::null_mut() as *mut libusb_context, + opt, + std::ptr::null_mut() as *mut c_void, + ) + }; + if err != LIBUSB_SUCCESS { + return Err(from_libusb(err)); + } + + Ok(()) +} + +#[derive(Debug)] +pub struct PollFd { + fd: c_int, + events: c_short, +} + +impl PollFd { + unsafe fn from_raw(raw: *mut libusb_pollfd) -> Self { + Self { + fd: (*raw).fd, + events: (*raw).events, + } + } + + pub fn events(&self) -> c_short { + self.events + } +} + +impl AsRawFd for PollFd { + fn as_raw_fd(&self) -> RawFd { + self.fd + } +} + +pub struct PollFds { + poll_fds: *const *mut libusb_pollfd, +} + +impl PollFds { + pub unsafe fn new(usbhost: Arc>) -> Result { + let poll_fds = libusb_get_pollfds(usbhost.lock().unwrap().context.as_raw()); + if poll_fds.is_null() { + Err(Error::NotFound) + } else { + Ok(Self { poll_fds }) + } + } + + pub fn iter(&self) -> PollFdIter { + let mut len: usize = 0; + // SAFETY: self.poll_fds is acquired from libusb_get_pollfds which is guaranteed to be valid. + unsafe { + while !(*self.poll_fds.add(len)).is_null() { + len += 1; + } + PollFdIter { + fds: slice::from_raw_parts(self.poll_fds, len), + index: 0, + } + } + } +} + +impl Drop for PollFds { + fn drop(&mut self) { + // SAFETY: self.poll_fds is acquired from libusb_get_pollfds which is guaranteed to be valid. + unsafe { + libusb_free_pollfds(self.poll_fds); + } + } +} + +pub struct PollFdIter<'a> { + fds: &'a [*mut libusb_pollfd], + index: usize, +} + +impl<'a> Iterator for PollFdIter<'a> { + type Item = PollFd; + + fn next(&mut self) -> Option { + if self.index < self.fds.len() { + // SAFETY: self.fds is guaranteed to be valid. + let poll_fd = unsafe { PollFd::from_raw(self.fds[self.index]) }; + self.index += 1; + Some(poll_fd) + } else { + None + } + } +} diff --git a/devices/src/usb/usbhost/mod.rs b/devices/src/usb/usbhost/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..17a888a56e18050621297f577fd7aa9202fe9b73 --- /dev/null +++ b/devices/src/usb/usbhost/mod.rs @@ -0,0 +1,1334 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod host_usblib; +#[cfg(all(target_arch = "aarch64", target_env = "ohos"))] +mod ohusb; + +use std::{ + collections::LinkedList, + os::unix::io::RawFd, + rc::Rc, + sync::{Arc, Mutex, Weak}, + time::Duration, +}; + +#[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] +use anyhow::Context as anyhowContext; +use anyhow::{anyhow, Result}; +use clap::Parser; +use libc::c_int; +use libusb1_sys::{ + libusb_get_iso_packet_buffer_simple, libusb_set_iso_packet_lengths, libusb_transfer, +}; +use log::{error, info, warn}; +use rusb::{ + constants::LIBUSB_CLASS_HUB, Context, Device, DeviceDescriptor, DeviceHandle, Direction, Error, + TransferType, UsbContext, +}; + +use crate::usb::{ + config::{ + USB_DEVICE_OUT_REQUEST, USB_DIRECTION_DEVICE_TO_HOST, USB_ENDPOINT_ATTR_BULK, + USB_ENDPOINT_ATTR_INT, USB_ENDPOINT_ATTR_INVALID, USB_ENDPOINT_ATTR_ISOC, + USB_ENDPOINT_OUT_REQUEST, USB_INTERFACE_OUT_REQUEST, USB_REQUEST_CLEAR_FEATURE, + USB_REQUEST_SET_ADDRESS, USB_REQUEST_SET_CONFIGURATION, USB_REQUEST_SET_INTERFACE, + USB_TOKEN_IN, USB_TOKEN_OUT, + }, + descriptor::USB_MAX_INTERFACES, + xhci::xhci_controller::XhciDevice, + UsbDevice, UsbDeviceBase, UsbDeviceRequest, UsbEndpoint, UsbPacket, UsbPacketStatus, +}; +use host_usblib::*; +use machine_manager::{ + config::valid_id, + event_loop::{register_event_helper, unregister_event_helper}, + temp_cleaner::{ExitNotifier, TempCleaner}, +}; +#[cfg(all(target_arch = "aarch64", target_env = "ohos"))] +use ohusb::OhUsbDev; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::link_list::{List, Node}; +use util::loop_context::{EventNotifier, EventNotifierHelper, NotifierCallback}; +use util::num_ops::str_to_num; + +const NON_ISO_PACKETS_NUMS: c_int = 0; +const HANDLE_TIMEOUT_MS: u64 = 2; +const USB_HOST_BUFFER_LEN: usize = 12 * 1024; +const USBHOST_ADDR_MAX: i64 = 127; + +#[derive(Default, Copy, Clone)] +struct InterfaceStatus { + detached: bool, + claimed: bool, +} + +pub struct UsbHostRequest { + pub hostbus: u8, + pub hostaddr: u8, + pub requests: Weak>>, + pub packet: Arc>, + pub host_transfer: *mut libusb_transfer, + /// Async data buffer. + pub buffer: Vec, + pub is_control: bool, +} + +impl UsbHostRequest { + pub fn new( + hostbus: u8, + hostaddr: u8, + requests: Weak>>, + packet: Arc>, + host_transfer: *mut libusb_transfer, + is_control: bool, + ) -> Self { + Self { + hostbus, + hostaddr, + requests, + packet, + host_transfer, + buffer: Vec::new(), + is_control, + } + } + + pub fn setup_data_buffer(&mut self) { + let mut locked_packet = self.packet.lock().unwrap(); + let size = locked_packet.get_iovecs_size(); + self.buffer = vec![0; size as usize]; + if locked_packet.pid as u8 == USB_TOKEN_OUT { + locked_packet.transfer_packet(self.buffer.as_mut(), size as usize); + } + } + + pub fn setup_ctrl_buffer(&mut self, data_buf: &[u8], device_req: &UsbDeviceRequest) { + self.buffer = vec![0; (device_req.length + 8) as usize]; + self.buffer[..8].copy_from_slice(device_req.as_bytes()); + if self.packet.lock().unwrap().pid as u8 == USB_TOKEN_OUT { + self.buffer[8..].copy_from_slice(data_buf); + } + } + + pub fn free(&mut self) { + free_host_transfer(self.host_transfer); + self.buffer.clear(); + self.host_transfer = std::ptr::null_mut(); + } + + pub fn abort_req(&mut self) { + let mut locked_packet = self.packet.lock().unwrap(); + if locked_packet.is_async { + locked_packet.status = UsbPacketStatus::NoDev; + locked_packet.is_async = false; + trace::usb_host_req_complete( + self.hostbus, + self.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + locked_packet.actual_length as usize, + ); + cancel_host_transfer(self.host_transfer) + .unwrap_or_else(|e| warn!("usb-host cancel host transfer is error: {:?}", e)); + + if let Some(transfer) = locked_packet.xfer_ops.as_ref() { + if let Some(ops) = transfer.clone().upgrade() { + if self.is_control { + self.ctrl_transfer_packet(&mut locked_packet, 0); + } + drop(locked_packet); + ops.lock().unwrap().submit_transfer(); + } + } + } + } + + pub fn ctrl_transfer_packet(&self, packet: &mut UsbPacket, actual_length: usize) { + let setup_buf = get_buffer_from_transfer(self.host_transfer, 8); + let mut len = (setup_buf[7] as usize) << 8 | setup_buf[6] as usize; + if len > actual_length { + len = actual_length; + } + + if packet.pid as u8 == USB_TOKEN_IN && actual_length != 0 { + let data = get_buffer_from_transfer(self.host_transfer, len + 8); + packet.transfer_packet(&mut data[8..], len); + } + } +} + +// SAFETY: The UsbHostRequest is created in main thread and then be passed to the +// libUSB thread. Once this data is processed, it is cleaned up. So there will be +// no problem with data sharing or synchronization. +unsafe impl Sync for UsbHostRequest {} +// SAFETY: The reason is same as above. +unsafe impl Send for UsbHostRequest {} + +pub struct IsoTransfer { + host_transfer: *mut libusb_transfer, + copy_completed: bool, + packet: u32, + buffer: Vec, + iso_queue: Weak>, +} + +impl IsoTransfer { + pub fn new(packets: u32, iso_queue: Weak>) -> Self { + let host_transfer = alloc_host_transfer(packets as i32); + Self { + host_transfer, + copy_completed: false, + packet: 0, + buffer: Vec::new(), + iso_queue, + } + } + + pub fn realize( + &mut self, + handle: &mut DeviceHandle, + packets: u32, + pid: u8, + ep_number: u8, + ep_max_packet_size: u32, + user_data: *mut libc::c_void, + ) { + let mut ep = ep_number; + let length = ep_max_packet_size * packets; + if pid == USB_TOKEN_IN { + ep |= USB_DIRECTION_DEVICE_TO_HOST; + } + self.buffer = vec![0; length as usize]; + fill_iso_transfer( + self.host_transfer, + handle, + ep, + user_data, + packets, + length, + &mut self.buffer, + ); + } + + pub fn reset(&mut self, max_packet_size: u32) { + // SAFETY: host_transfer is guaranteed to be valid once created. + unsafe { libusb_set_iso_packet_lengths(self.host_transfer, max_packet_size) }; + self.packet = 0; + self.copy_completed = false; + } + + pub fn clear(&mut self, inflight: bool) { + if inflight { + // SAFETY: host_transfer is guaranteed to be valid once created. + unsafe { + (*self.host_transfer).user_data = std::ptr::null_mut(); + } + } else { + self.buffer.clear(); + free_host_transfer(self.host_transfer); + } + } + + pub fn copy_data(&mut self, packet: Arc>, ep_max_packet_size: u32) -> bool { + let mut lockecd_packet = packet.lock().unwrap(); + let mut size: usize; + if lockecd_packet.pid == u32::from(USB_TOKEN_OUT) { + size = lockecd_packet.get_iovecs_size() as usize; + if size > ep_max_packet_size as usize { + size = ep_max_packet_size as usize; + } + set_iso_packet_length(self.host_transfer, self.packet, size as u32); + } else { + size = get_iso_packet_acl_length(self.host_transfer, self.packet) as usize; + if size > lockecd_packet.get_iovecs_size() as usize { + size = lockecd_packet.get_iovecs_size() as usize; + } + } + let buffer = + // SAFETY: host_transfer is guaranteed to be valid once created + // and packet is guaranteed to be not out of boundary. + unsafe { libusb_get_iso_packet_buffer_simple(self.host_transfer, self.packet) }; + + lockecd_packet.transfer_packet( + // SAFETY: buffer is already allocated and size will not be exceed + // the size of buffer. + unsafe { std::slice::from_raw_parts_mut(buffer, size) }, + size, + ); + + self.packet += 1; + self.copy_completed = self.packet == get_iso_packet_nums(self.host_transfer); + self.copy_completed + } +} + +// SAFETY: The operation of libusb_transfer is protected by lock. +unsafe impl Sync for IsoTransfer {} +// SAFETY: The reason is same as above. +unsafe impl Send for IsoTransfer {} + +pub struct IsoQueue { + hostbus: u8, + hostaddr: u8, + ep_number: u8, + unused: LinkedList>>, + inflight: LinkedList>>, + copy: LinkedList>>, +} + +impl IsoQueue { + pub fn new(hostbus: u8, hostaddr: u8, ep_number: u8) -> Self { + Self { + hostbus, + hostaddr, + ep_number, + unused: LinkedList::new(), + inflight: LinkedList::new(), + copy: LinkedList::new(), + } + } + + pub fn realize( + &mut self, + id: &str, + handle: &mut DeviceHandle, + iso_urb_count: u32, + iso_urb_frames: u32, + ep: &UsbEndpoint, + iso_queue: Arc>, + ) -> Result<()> { + let packets: u32 = iso_urb_frames; + let pid = if ep.in_direction { + USB_TOKEN_IN + } else { + USB_TOKEN_OUT + }; + let ep_number = ep.ep_number; + let max_packet_size = ep.max_packet_size; + + for i in 0..iso_urb_count { + let iso_xfer = Arc::new(Mutex::new(IsoTransfer::new( + packets, + Arc::downgrade(&iso_queue), + ))); + + if iso_xfer.lock().unwrap().host_transfer.is_null() { + return Err(anyhow!( + "Failed to allocate host transfer for {}th iso urb of device {} ep {}", + i, + id, + ep_number + )); + } + + let cloned_iso_xfer = iso_xfer.clone(); + iso_xfer.lock().unwrap().realize( + handle, + packets, + pid, + ep_number, + max_packet_size, + (Arc::into_raw(cloned_iso_xfer) as *mut Mutex).cast::(), + ); + self.unused.push_back(iso_xfer); + } + Ok(()) + } + + pub fn clear(&mut self) { + for xfer in self.unused.iter_mut() { + xfer.lock().unwrap().clear(false); + } + + for xfer in self.inflight.iter_mut() { + xfer.lock().unwrap().clear(true); + } + + for xfer in self.copy.iter_mut() { + xfer.lock().unwrap().clear(false); + } + } +} + +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct UsbHostConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long, default_value = "0")] + hostbus: u8, + #[arg(long, default_value = "0", value_parser = clap::value_parser!(u8).range(..=USBHOST_ADDR_MAX))] + hostaddr: u8, + #[arg(long)] + hostport: Option, + #[arg(long, default_value = "0", value_parser = str_to_num::)] + vendorid: u16, + #[arg(long, default_value = "0", value_parser = str_to_num::)] + productid: u16, + #[arg(long = "isobsize", default_value = "32")] + iso_urb_frames: u32, + #[arg(long = "isobufs", default_value = "4")] + iso_urb_count: u32, +} + +/// Abstract object of the host USB device. +pub struct UsbHost { + base: UsbDeviceBase, + config: UsbHostConfig, + /// Libusb context. + context: Context, + /// A reference to a USB device. + libdev: Option>, + /// A handle to an open USB device. + handle: Option>, + /// Describes a device. + ddesc: Option, + /// EventFd for libusb. + libevt: Vec, + /// Configuration interface number. + ifs_num: u8, + ifs: [InterfaceStatus; USB_MAX_INTERFACES as usize], + /// Callback for release dev to Host after the vm exited. + exit: Option>, + /// All pending asynchronous usb request. + requests: Arc>>, + /// ISO queues corresponding to all endpoints. + iso_queues: Arc>>>>, + iso_urb_frames: u32, + iso_urb_count: u32, + #[cfg(all(target_arch = "aarch64", target_env = "ohos"))] + oh_dev: OhUsbDev, +} + +// SAFETY: Send and Sync is not auto-implemented for util::link_list::List. +// Implementing them is safe because List add Mutex. +unsafe impl Sync for UsbHost {} +// SAFETY: The reason is same as above. +unsafe impl Send for UsbHost {} + +impl UsbHost { + pub fn new(config: UsbHostConfig) -> Result { + #[cfg(all(target_arch = "aarch64", target_env = "ohos"))] + let oh_dev = OhUsbDev::new(config.hostbus, config.hostaddr)?; + + let mut context = Context::new()?; + context.set_log_level(rusb::LogLevel::None); + let iso_urb_frames = config.iso_urb_frames; + let iso_urb_count = config.iso_urb_count; + let id = config.id.clone(); + Ok(Self { + config, + context, + libdev: None, + handle: None, + ddesc: None, + libevt: Vec::new(), + ifs_num: 0, + ifs: [InterfaceStatus::default(); USB_MAX_INTERFACES as usize], + base: UsbDeviceBase::new(id, USB_HOST_BUFFER_LEN), + exit: None, + requests: Arc::new(Mutex::new(List::new())), + iso_queues: Arc::new(Mutex::new(LinkedList::new())), + iso_urb_frames, + iso_urb_count, + #[cfg(all(target_arch = "aarch64", target_env = "ohos"))] + oh_dev, + }) + } + + #[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] + fn find_libdev(&self) -> Option> { + if self.config.vendorid != 0 && self.config.productid != 0 { + self.find_dev_by_vendor_product() + } else if self.config.hostport.is_some() { + self.find_dev_by_bus_port() + } else if self.config.hostbus != 0 && self.config.hostaddr != 0 { + self.find_dev_by_bus_addr() + } else { + None + } + } + + #[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] + fn find_dev_by_bus_addr(&self) -> Option> { + self.context + .devices() + .ok() + .map(|devices| { + devices.iter().find(|device| { + if check_device_valid(device) { + return device.bus_number() == self.config.hostbus + && device.address() == self.config.hostaddr; + } + false + }) + }) + .unwrap_or_else(|| None) + } + + #[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] + fn find_dev_by_vendor_product(&self) -> Option> { + self.context + .devices() + .ok() + .map(|devices| { + devices.iter().find(|device| { + if check_device_valid(device) { + let ddesc = device.device_descriptor().unwrap(); + return ddesc.vendor_id() == self.config.vendorid + && ddesc.product_id() == self.config.productid; + } + false + }) + }) + .unwrap_or_else(|| None) + } + + #[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] + fn find_dev_by_bus_port(&self) -> Option> { + let hostport: Vec<&str> = self.config.hostport.as_ref().unwrap().split('.').collect(); + let mut port: Vec = Vec::new(); + for elem in hostport { + let elem = elem.to_string().parse::(); + if elem.is_err() { + return None; + } + port.push(elem.unwrap()); + } + + if port.is_empty() { + return None; + } + + self.context + .devices() + .ok() + .map(|devices| { + devices.iter().find(|device| { + if check_device_valid(device) { + return device.bus_number() == self.config.hostbus + && port.eq(device.port_numbers().as_ref().unwrap()); + } + false + }) + }) + .unwrap_or_else(|| None) + } + + fn detach_kernel(&mut self) -> Result<()> { + let conf = self.libdev.as_ref().unwrap().active_config_descriptor()?; + + self.ifs_num = conf.num_interfaces(); + + for i in 0..self.ifs_num { + if !match self.handle.as_ref().unwrap().kernel_driver_active(i) { + Ok(rc) => { + if !rc { + self.ifs[i as usize].detached = true; + } + rc + } + Err(e) => { + error!("Failed to kernel driver active: {:?}", e); + false + } + } { + continue; + } + trace::usb_host_detach_kernel(self.config.hostbus, self.config.hostaddr, i); + self.handle + .as_mut() + .unwrap() + .detach_kernel_driver(i) + .unwrap_or_else(|e| error!("Failed to detach kernel driver: {:?}", e)); + self.ifs[i as usize].detached = true; + } + + Ok(()) + } + + fn attach_kernel(&mut self) { + if self + .libdev + .as_ref() + .unwrap() + .active_config_descriptor() + .is_err() + { + return; + } + for i in 0..self.ifs_num { + if !self.ifs[i as usize].detached { + continue; + } + trace::usb_host_attach_kernel(self.config.hostbus, self.config.hostaddr, i); + self.handle + .as_mut() + .unwrap() + .attach_kernel_driver(i) + .unwrap_or_else(|e| error!("Failed to attach kernel driver: {:?}", e)); + self.ifs[i as usize].detached = false; + } + } + + fn ep_update(&mut self) { + self.base.reset_usb_endpoint(); + let conf = match self.libdev.as_ref().unwrap().active_config_descriptor() { + Ok(conf) => conf, + Err(_) => return, + }; + + trace::usb_host_parse_config(self.config.hostbus, self.config.hostaddr, conf.number()); + for (i, intf) in conf.interfaces().enumerate() { + // The usb_deviec.altsetting indexes alternate settings by the interface number. + // Get the 0th alternate setting first so that we can grap the interface number, + // and then correct the alternate setting value if necessary. + let mut intf_desc = intf.descriptors().next(); + if intf_desc.is_none() { + continue; + } + let alt = self.base.altsetting[intf_desc.as_ref().unwrap().interface_number() as usize]; + if alt != 0 { + if alt >= intf.descriptors().count() as u32 { + error!( + "Interface index {} exceeds max counts {}", + alt, + intf.descriptors().count() + ); + return; + } + intf_desc = intf.descriptors().nth(alt as usize); + } + + trace::usb_host_parse_interface( + self.config.hostbus, + self.config.hostaddr, + intf_desc.as_ref().unwrap().interface_number(), + intf_desc.as_ref().unwrap().setting_number(), + ); + for ep in intf_desc.as_ref().unwrap().endpoint_descriptors() { + let pid = match ep.direction() { + Direction::In => USB_TOKEN_IN, + Direction::Out => USB_TOKEN_OUT, + }; + let ep_num = ep.number(); + let ep_type = ep.transfer_type() as u8; + if ep_num == 0 { + trace::usb_host_parse_error( + self.config.hostbus, + self.config.hostaddr, + "invalid endpoint address", + ); + return; + } + let in_direction = pid == USB_TOKEN_IN; + if self.base.get_endpoint(in_direction, ep_num).ep_type != USB_ENDPOINT_ATTR_INVALID + { + trace::usb_host_parse_error( + self.config.hostbus, + self.config.hostaddr, + "duplicate endpoint address", + ); + } + + trace::usb_host_parse_endpoint( + self.config.hostbus, + self.config.hostaddr, + ep_num, + &ep.direction(), + &ep.transfer_type(), + ); + let usb_ep = self.base.get_mut_endpoint(in_direction, ep_num); + usb_ep.set_max_packet_size(ep.max_packet_size()); + usb_ep.ep_type = ep_type; + usb_ep.ifnum = i as u8; + usb_ep.halted = false; + } + } + } + + fn init_usbdev(&mut self) -> Result<()> { + self.config.hostbus = self.libdev.as_ref().unwrap().bus_number(); + self.config.hostaddr = self.libdev.as_ref().unwrap().address(); + trace::usb_host_open_started(self.config.hostbus, self.config.hostaddr); + + self.detach_kernel()?; + + self.ddesc = self.libdev.as_ref().unwrap().device_descriptor().ok(); + + self.ep_update(); + + match self.libdev.as_ref().unwrap().speed() as u32 { + 0 => { + return Err(anyhow!( + "Failed to realize usb host device due to unknown device speed." + )) + } + speed => self.base.speed = speed - 1, + }; + + trace::usb_host_open_success(self.config.hostbus, self.config.hostaddr); + + Ok(()) + } + + fn register_exit(&mut self) { + let exit = self as *const Self as u64; + let exit_notifier = Arc::new(move || { + let usb_host = + // SAFETY: This callback is deleted after the device hot-unplug, so it is called only + // when the vm exits abnormally. + &mut unsafe { std::slice::from_raw_parts_mut(exit as *mut UsbHost, 1) }[0]; + usb_host.release_dev_to_host(); + }) as Arc; + self.exit = Some(exit_notifier.clone()); + TempCleaner::add_exit_notifier(self.device_id().to_string(), exit_notifier); + } + + fn release_interfaces(&mut self) { + for i in 0..self.ifs_num { + if !self.ifs[i as usize].claimed { + continue; + } + trace::usb_host_release_interface(self.config.hostbus, self.config.hostaddr, i); + self.handle + .as_mut() + .unwrap() + .release_interface(i) + .unwrap_or_else(|e| error!("Failed to release interface: {:?}", e)); + self.ifs[i as usize].claimed = false; + } + } + + fn claim_interfaces(&mut self) -> UsbPacketStatus { + self.base.altsetting = [0; USB_MAX_INTERFACES as usize]; + if self.detach_kernel().is_err() { + return UsbPacketStatus::Stall; + } + + let conf = match self.libdev.as_ref().unwrap().active_config_descriptor() { + Ok(conf) => conf, + Err(e) => { + if e == Error::NotFound { + // Ignore address state + return UsbPacketStatus::Success; + } + return UsbPacketStatus::Stall; + } + }; + + let mut claimed = 0; + for i in 0..self.ifs_num { + trace::usb_host_claim_interface(self.config.hostbus, self.config.hostaddr, i); + if self.handle.as_mut().unwrap().claim_interface(i).is_ok() { + self.ifs[i as usize].claimed = true; + claimed += 1; + if claimed == conf.num_interfaces() { + break; + } + } + } + + if claimed != conf.num_interfaces() { + return UsbPacketStatus::Stall; + } + + UsbPacketStatus::Success + } + + fn set_config(&mut self, config: u8, packet: &mut UsbPacket) { + trace::usb_host_set_config(self.config.hostbus, self.config.hostaddr, config); + self.release_interfaces(); + + if self.ddesc.is_some() && self.ddesc.as_ref().unwrap().num_configurations() != 1 { + if let Err(e) = self + .handle + .as_mut() + .unwrap() + .set_active_configuration(config) + { + error!("Failed to set active configuration: {:?}", e); + if e == Error::NoDevice { + packet.status = UsbPacketStatus::NoDev + } else { + packet.status = UsbPacketStatus::Stall; + } + return; + } + } + + packet.status = self.claim_interfaces(); + if packet.status == UsbPacketStatus::Success { + self.ep_update(); + } + } + + fn set_interface(&mut self, iface: u16, alt: u16, packet: &mut UsbPacket) { + trace::usb_host_set_interface(self.config.hostbus, self.config.hostaddr, iface, alt); + self.clear_iso_queues(); + + if iface > USB_MAX_INTERFACES as u16 { + packet.status = UsbPacketStatus::Stall; + return; + } + match self + .handle + .as_mut() + .unwrap() + .set_alternate_setting(iface as u8, alt as u8) + { + Ok(_) => { + self.base.altsetting[iface as usize] = u32::from(alt); + self.ep_update(); + } + Err(e) => { + if e == Error::NoDevice { + packet.status = UsbPacketStatus::NoDev + } else { + packet.status = UsbPacketStatus::Stall; + } + } + } + } + + fn clear_halt(&mut self, pid: u8, index: u8) { + if self.handle.as_mut().unwrap().clear_halt(index).is_err() { + warn!("Failed to clear halt"); + } + self.base + .get_mut_endpoint(pid == USB_TOKEN_IN, index & 0x0f) + .halted = false; + } + + fn release_dev_to_host(&mut self) { + if self.handle.is_none() { + return; + } + + trace::usb_host_close(self.config.hostbus, self.config.hostaddr); + + self.abort_host_transfers() + .unwrap_or_else(|e| error!("Failed to abort all libusb transfers: {:?}", e)); + self.release_interfaces(); + self.handle.as_mut().unwrap().reset().unwrap_or_else(|e| { + error!( + "Failed to reset the handle of UsbHost device {}: {:?}", + self.device_id(), + e + ) + }); + self.attach_kernel(); + } + + fn clear_iso_queues(&mut self) { + let mut locked_iso_queues = self.iso_queues.lock().unwrap(); + for queue in locked_iso_queues.iter() { + (*queue).lock().unwrap().clear(); + } + locked_iso_queues.clear(); + drop(locked_iso_queues); + } + + pub fn abort_host_transfers(&mut self) -> Result<()> { + for req in self.requests.lock().unwrap().iter_mut() { + req.abort_req(); + } + + // Max counts of uncompleted request to be handled. + let mut limit: i32 = 100; + loop { + if self.requests.lock().unwrap().is_empty() { + return Ok(()); + } + let timeout = Some(Duration::from_millis(HANDLE_TIMEOUT_MS)); + self.context.handle_events(timeout)?; + if limit == 0 { + self.requests = Arc::new(Mutex::new(List::new())); + return Ok(()); + } + limit -= 1; + } + } + + pub fn find_iso_queue(&self, ep_number: u8) -> Option>> { + for queue in self.iso_queues.lock().unwrap().iter() { + if (*queue).lock().unwrap().ep_number == ep_number { + return Some(queue.clone()); + } + } + None + } + + pub fn handle_iso_data_in(&mut self, packet: Arc>) { + let cloned_packet = packet.clone(); + let locked_packet = packet.lock().unwrap(); + let in_direction = locked_packet.pid == u32::from(USB_TOKEN_IN); + let iso_queue = if self.find_iso_queue(locked_packet.ep_number).is_some() { + self.find_iso_queue(locked_packet.ep_number).unwrap() + } else { + let iso_queue = Arc::new(Mutex::new(IsoQueue::new( + self.config.hostbus, + self.config.hostaddr, + locked_packet.ep_number, + ))); + let cloned_iso_queue = iso_queue.clone(); + let ep = self + .base + .get_endpoint(in_direction, locked_packet.ep_number); + let id = self.device_id().to_string(); + match iso_queue.lock().unwrap().realize( + &id, + self.handle.as_mut().unwrap(), + self.iso_urb_count, + self.iso_urb_frames, + ep, + cloned_iso_queue, + ) { + Ok(()) => { + self.iso_queues.lock().unwrap().push_back(iso_queue.clone()); + } + Err(_e) => { + return; + } + }; + iso_queue + }; + + let mut locked_iso_queue = iso_queue.lock().unwrap(); + + let in_direction = locked_packet.pid == u32::from(USB_TOKEN_IN); + let ep = self + .base + .get_endpoint(in_direction, locked_packet.ep_number); + drop(locked_packet); + + let iso_transfer = locked_iso_queue.copy.front_mut(); + if iso_transfer.is_some() + && iso_transfer + .unwrap() + .lock() + .unwrap() + .copy_data(cloned_packet, ep.max_packet_size) + { + let iso_transfer = locked_iso_queue.copy.pop_front().unwrap(); + locked_iso_queue.unused.push_back(iso_transfer); + } + drop(locked_iso_queue); + + loop { + let mut iso_transfer = iso_queue.lock().unwrap().unused.pop_front(); + if iso_transfer.is_none() { + break; + } + iso_transfer + .as_mut() + .unwrap() + .lock() + .unwrap() + .reset(ep.max_packet_size); + let host_transfer = iso_transfer.as_ref().unwrap().lock().unwrap().host_transfer; + let mut locked_iso_queue = iso_queue.lock().unwrap(); + match submit_host_transfer(host_transfer) { + Ok(()) => { + if locked_iso_queue.inflight.is_empty() { + trace::usb_host_iso_start( + self.config.hostbus, + self.config.hostaddr, + ep.ep_number, + ); + } + locked_iso_queue + .inflight + .push_back(iso_transfer.unwrap().clone()); + } + Err(e) => { + locked_iso_queue.unused.push_back(iso_transfer.unwrap()); + if e == Error::NoDevice || e == Error::Io { + // When the USB device reports the preceding error, XHCI notifies the guest + // of the error through packet status. The guest initiallizes the device + // again. + packet.lock().unwrap().status = UsbPacketStatus::Stall; + }; + break; + } + }; + } + } + + pub fn handle_iso_data_out(&mut self, _packet: Arc>) { + // TODO + error!("USBHost device Unsupported Isochronous Transfer from guest to device."); + } + + fn submit_host_transfer( + &mut self, + host_transfer: *mut libusb_transfer, + packet: &Arc>, + ) { + let mut locked_packet = packet.lock().unwrap(); + match submit_host_transfer(host_transfer) { + Ok(()) => {} + Err(Error::NoDevice) => { + locked_packet.status = UsbPacketStatus::NoDev; + trace::usb_host_req_complete( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + locked_packet.actual_length as usize, + ); + return; + } + _ => { + locked_packet.status = UsbPacketStatus::Stall; + self.reset(); + return; + } + }; + + locked_packet.is_async = true; + } + + #[cfg(not(all(target_arch = "aarch64", target_env = "ohos")))] + fn open_usbdev(&mut self) -> Result<()> { + self.libdev = Some( + self.find_libdev() + .with_context(|| format!("Invalid USB host config: {:?}", self.config))?, + ); + self.handle = Some(self.libdev.as_ref().unwrap().open()?); + Ok(()) + } + + #[cfg(all(target_arch = "aarch64", target_env = "ohos"))] + fn open_usbdev(&mut self) -> Result<()> { + self.handle = Some( + self.oh_dev + .open(self.config.clone(), self.context.clone())?, + ); + self.libdev = Some(self.handle.as_ref().unwrap().device()); + Ok(()) + } +} + +impl Drop for UsbHost { + fn drop(&mut self) { + self.release_dev_to_host(); + } +} + +impl EventNotifierHelper for UsbHost { + fn internal_notifiers(usbhost: Arc>) -> Vec { + let cloned_usbhost = usbhost.clone(); + let mut notifiers = Vec::new(); + + let timeout = Some(Duration::new(0, 0)); + let handler: Rc = Rc::new(move |_, _fd: RawFd| { + cloned_usbhost + .lock() + .unwrap() + .context + .handle_events(timeout) + .unwrap_or_else(|e| error!("Failed to handle event: {:?}", e)); + None + }); + // SAFETY: The usbhost is guaranteed to be valid. + if let Ok(pollfds) = unsafe { PollFds::new(usbhost) } { + set_pollfd_notifiers(pollfds, &mut notifiers, handler); + } + + notifiers + } +} + +impl UsbDevice for UsbHost { + gen_base_func!(usb_device_base, usb_device_base_mut, UsbDeviceBase, base); + + fn realize(mut self) -> Result>> { + info!("Open and init usbhost device: {:?}", self.config); + self.open_usbdev()?; + self.init_usbdev()?; + + let usbhost = Arc::new(Mutex::new(self)); + let notifiers = EventNotifierHelper::internal_notifiers(usbhost.clone()); + register_event_helper(notifiers, None, &mut usbhost.lock().unwrap().libevt)?; + // UsbHost addr is changed after Arc::new, so so the registration must be here. + usbhost.lock().unwrap().register_exit(); + + Ok(usbhost) + } + + fn unrealize(&mut self) -> Result<()> { + TempCleaner::remove_exit_notifier(self.device_id()); + unregister_event_helper(None, &mut self.libevt)?; + info!("Usb Host device {} is unrealized", self.device_id()); + Ok(()) + } + + fn cancel_packet(&mut self, _packet: &Arc>) {} + + fn reset(&mut self) { + info!("Usb Host device {} reset", self.device_id()); + if self.handle.is_none() { + return; + } + + self.clear_iso_queues(); + + trace::usb_host_reset(self.config.hostbus, self.config.hostaddr); + + self.handle + .as_mut() + .unwrap() + .reset() + .unwrap_or_else(|e| error!("Failed to reset the usb host device {:?}", e)); + } + + fn set_controller(&mut self, _cntlr: std::sync::Weak>) {} + + fn get_controller(&self) -> Option>> { + None + } + + fn handle_control(&mut self, packet: &Arc>, device_req: &UsbDeviceRequest) { + trace::usb_host_req_control(self.config.hostbus, self.config.hostaddr, device_req); + let mut locked_packet = packet.lock().unwrap(); + if self.handle.is_none() { + locked_packet.status = UsbPacketStatus::NoDev; + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + match device_req.request_type { + USB_DEVICE_OUT_REQUEST => { + if device_req.request == USB_REQUEST_SET_ADDRESS { + self.base.addr = device_req.value as u8; + trace::usb_host_set_address( + self.config.hostbus, + self.config.hostaddr, + self.base.addr, + ); + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } else if device_req.request == USB_REQUEST_SET_CONFIGURATION { + self.set_config(device_req.value as u8, &mut locked_packet); + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + } + USB_INTERFACE_OUT_REQUEST => { + if device_req.request == USB_REQUEST_SET_INTERFACE { + self.set_interface(device_req.index, device_req.value, &mut locked_packet); + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + } + USB_ENDPOINT_OUT_REQUEST => { + if device_req.request == USB_REQUEST_CLEAR_FEATURE && device_req.value == 0 { + self.clear_halt(locked_packet.pid as u8, device_req.index as u8); + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + } + _ => {} + } + drop(locked_packet); + + let host_transfer = alloc_host_transfer(NON_ISO_PACKETS_NUMS); + let mut node = Box::new(Node::new(UsbHostRequest::new( + self.config.hostbus, + self.config.hostaddr, + Arc::downgrade(&self.requests), + packet.clone(), + host_transfer, + true, + ))); + node.value.setup_ctrl_buffer( + &self.base.data_buf[..device_req.length as usize], + device_req, + ); + + fill_transfer_by_type( + host_transfer, + self.handle.as_mut(), + 0, + &mut (*node) as *mut Node, + TransferType::Control, + ); + + self.requests.lock().unwrap().add_tail(node); + + self.submit_host_transfer(host_transfer, packet); + } + + fn handle_data(&mut self, packet: &Arc>) { + let cloned_packet = packet.clone(); + let mut locked_packet = packet.lock().unwrap(); + + trace::usb_host_req_data( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + locked_packet.pid, + locked_packet.ep_number, + locked_packet.iovecs.len(), + ); + + if self.handle.is_none() { + locked_packet.status = UsbPacketStatus::NoDev; + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + let in_direction = locked_packet.pid as u8 == USB_TOKEN_IN; + if self + .base + .get_endpoint(in_direction, locked_packet.ep_number) + .halted + { + locked_packet.status = UsbPacketStatus::Stall; + trace::usb_host_req_emulated( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + ); + return; + } + + drop(locked_packet); + let mut ep_number = packet.lock().unwrap().ep_number; + let host_transfer: *mut libusb_transfer; + + match self.base.get_endpoint(in_direction, ep_number).ep_type { + USB_ENDPOINT_ATTR_BULK => { + host_transfer = alloc_host_transfer(NON_ISO_PACKETS_NUMS); + let mut node = Box::new(Node::new(UsbHostRequest::new( + self.config.hostbus, + self.config.hostaddr, + Arc::downgrade(&self.requests), + cloned_packet, + host_transfer, + false, + ))); + node.value.setup_data_buffer(); + + if packet.lock().unwrap().pid as u8 != USB_TOKEN_OUT { + ep_number |= USB_DIRECTION_DEVICE_TO_HOST; + } + fill_transfer_by_type( + host_transfer, + self.handle.as_mut(), + ep_number, + &mut (*node) as *mut Node, + TransferType::Bulk, + ); + self.requests.lock().unwrap().add_tail(node); + } + USB_ENDPOINT_ATTR_INT => { + host_transfer = alloc_host_transfer(NON_ISO_PACKETS_NUMS); + let mut node = Box::new(Node::new(UsbHostRequest::new( + self.config.hostbus, + self.config.hostaddr, + Arc::downgrade(&self.requests), + cloned_packet, + host_transfer, + false, + ))); + node.value.setup_data_buffer(); + + if packet.lock().unwrap().pid as u8 != USB_TOKEN_OUT { + ep_number |= USB_DIRECTION_DEVICE_TO_HOST; + } + fill_transfer_by_type( + host_transfer, + self.handle.as_mut(), + ep_number, + &mut (*node) as *mut Node, + TransferType::Interrupt, + ); + self.requests.lock().unwrap().add_tail(node); + } + USB_ENDPOINT_ATTR_ISOC => { + if packet.lock().unwrap().pid as u8 == USB_TOKEN_IN { + self.handle_iso_data_in(packet.clone()); + } else { + self.handle_iso_data_out(packet.clone()); + } + let locked_packet = packet.lock().unwrap(); + trace::usb_host_req_complete( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + locked_packet.actual_length as usize, + ); + return; + } + _ => { + packet.lock().unwrap().status = UsbPacketStatus::Stall; + let locked_packet = packet.lock().unwrap(); + trace::usb_host_req_complete( + self.config.hostbus, + self.config.hostaddr, + &*locked_packet as *const UsbPacket as u64, + &locked_packet.status, + locked_packet.actual_length as usize, + ); + return; + } + }; + self.submit_host_transfer(host_transfer, packet); + } +} + +pub fn check_device_valid(device: &Device) -> bool { + let ddesc = match device.device_descriptor() { + Ok(ddesc) => ddesc, + Err(_) => return false, + }; + if ddesc.class_code() == LIBUSB_CLASS_HUB { + return false; + } + true +} diff --git a/devices/src/usb/usbhost/ohusb.rs b/devices/src/usb/usbhost/ohusb.rs new file mode 100644 index 0000000000000000000000000000000000000000..0e56acef5827138883699185b93251c39f5fb9ab --- /dev/null +++ b/devices/src/usb/usbhost/ohusb.rs @@ -0,0 +1,78 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::ptr; + +use anyhow::{bail, Context as anyhowContext, Result}; +use libusb1_sys::constants::LIBUSB_OPTION_NO_DEVICE_DISCOVERY; +use log::info; +use rusb::{Context, DeviceHandle, UsbContext}; + +use super::host_usblib::set_option; +use super::{check_device_valid, UsbHostConfig}; +use util::ohos_binding::usb::*; + +pub struct OhUsbDev { + #[allow(dead_code)] + lib: OhUsb, + dev_file: File, +} + +impl OhUsbDev { + pub fn new(bus_num: u8, dev_addr: u8) -> Result { + // In combination with libusb_wrap_sys_device(), in order to access a device directly without prior device scanning on ohos. + set_option(LIBUSB_OPTION_NO_DEVICE_DISCOVERY)?; + + let mut ohusb_dev = OhusbDevice { + busNum: bus_num, + devAddr: dev_addr, + fd: -1, + }; + + let lib = OhUsb::new()?; + match lib.open_device(ptr::addr_of_mut!(ohusb_dev))? { + 0 => { + if ohusb_dev.fd < 0 { + bail!( + "Failed to open usb device due to invalid fd {}", + ohusb_dev.fd + ); + } + } + _ => bail!("Failed to open usb device"), + } + info!("OH USB: open_device: returned fd is {}", ohusb_dev.fd); + + Ok(Self { + lib, + // SAFETY: fd is passed from OH USB framework and we have checked the function return value. + // Now let's save it to rust File struct. + dev_file: unsafe { File::from_raw_fd(ohusb_dev.fd) }, + }) + } + + pub fn open(&mut self, cfg: UsbHostConfig, ctx: Context) -> Result> { + // SAFETY: The validation of fd is guaranteed by new function. + let handle = unsafe { + ctx.open_device_with_fd(self.dev_file.as_raw_fd()) + .with_context(|| format!("os last error: {:?}", std::io::Error::last_os_error()))? + }; + + if !check_device_valid(&handle.device()) { + bail!("Invalid USB host config: {:?}", cfg); + } + + Ok(handle) + } +} diff --git a/devices/src/usb/xhci/mod.rs b/devices/src/usb/xhci/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..63e0dcf0058cf6d66c0035527864881350d3bf2a --- /dev/null +++ b/devices/src/usb/xhci/mod.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod xhci_controller; +pub mod xhci_pci; +pub mod xhci_regs; +pub mod xhci_trb; + +mod xhci_ring; diff --git a/devices/src/usb/xhci/xhci_controller.rs b/devices/src/usb/xhci/xhci_controller.rs new file mode 100644 index 0000000000000000000000000000000000000000..14cd1351a9430217beaade589c2280181d8951c9 --- /dev/null +++ b/devices/src/usb/xhci/xhci_controller.rs @@ -0,0 +1,2528 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::LinkedList; +use std::mem::size_of; +use std::slice::{from_raw_parts, from_raw_parts_mut}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex, Weak}; +use std::time::Duration; + +use anyhow::{anyhow, bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::{error, info, warn}; + +use super::xhci_pci::XhciConfig; +use super::xhci_regs::{XhciInterrupter, XhciOperReg}; +use super::xhci_ring::{XhciCommandRing, XhciEventRingSeg, XhciTRB, XhciTransferRing}; +use super::xhci_trb::{ + TRBCCode, TRBType, SETUP_TRB_TR_LEN, TRB_EV_ED, TRB_SIZE, TRB_TR_DIR, TRB_TR_FRAMEID_MASK, + TRB_TR_FRAMEID_SHIFT, TRB_TR_IDT, TRB_TR_IOC, TRB_TR_ISP, TRB_TR_LEN_MASK, TRB_TR_SIA, + TRB_TYPE_SHIFT, +}; +use crate::usb::{config::*, TransferOps}; +use crate::usb::{UsbDevice, UsbDeviceRequest, UsbError, UsbPacket, UsbPacketStatus}; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use machine_manager::event_loop::EventLoop; + +const INVALID_SLOT_ID: u32 = 0; +pub const MAX_INTRS: u32 = 1; +pub const MAX_SLOTS: u32 = 64; +/// Endpoint state +pub const EP_STATE_MASK: u32 = 0x7; +pub const EP_DISABLED: u32 = 0; +pub const EP_RUNNING: u32 = 1; +pub const EP_HALTED: u32 = 2; +pub const EP_STOPPED: u32 = 3; +pub const EP_ERROR: u32 = 4; +/// Endpoint type +const EP_TYPE_SHIFT: u32 = 3; +const EP_TYPE_MASK: u32 = 0x7; +/// Slot state +const SLOT_STATE_MASK: u32 = 0x1f; +const SLOT_STATE_SHIFT: u32 = 27; +/// 6.2.3 Slot Context. Table 6-7. +/// The values of both enabled and disabled are 0. +pub const SLOT_DISABLED_ENABLED: u32 = 0; +pub const SLOT_DEFAULT: u32 = 1; +pub const SLOT_ADDRESSED: u32 = 2; +pub const SLOT_CONFIGURED: u32 = 3; +/// TRB flags +const TRB_CR_BSR: u32 = 1 << 9; +const TRB_CR_EPID_SHIFT: u32 = 16; +const TRB_CR_EPID_MASK: u32 = 0x1f; +const TRB_CR_STREAMID_SHIFT: u32 = 16; +const TRB_CR_STREAMID_MASK: u32 = 0xffff; +const TRB_CR_DC: u32 = 1 << 9; +const TRB_CR_SLOTID_SHIFT: u32 = 24; +const TRB_CR_SLOTID_MASK: u32 = 0xff; +const COMMAND_LIMIT: u32 = 256; +const EP_CTX_MAX_PSTREAMS_SHIFT: u32 = 10; +const EP_CTX_MAX_PSTREAMS_MASK: u32 = 0xf; +const EP_CTX_LSA_SHIFT: u32 = 15; +const EP_CTX_LSA_MASK: u32 = 0x01; +const EP_CTX_INTERVAL_SHIFT: u32 = 16; +const EP_CTX_INTERVAL_MASK: u32 = 0xff; +const EVENT_TRB_CCODE_SHIFT: u32 = 24; +const EVENT_TRB_SLOT_ID_SHIFT: u32 = 24; +const EVENT_TRB_EP_ID_SHIFT: u32 = 16; +const PORT_EVENT_ID_SHIFT: u32 = 24; +const SLOT_CTX_PORT_NUMBER_SHIFT: u32 = 16; +const ENDPOINT_ID_START: u32 = 1; +const MAX_ENDPOINTS: u32 = 31; +const TRANSFER_LEN_MASK: u32 = 0xffffff; +/// XHCI config +const XHCI_MAX_PORT2: u8 = 15; +const XHCI_MAX_PORT3: u8 = 15; +const XHCI_DEFAULT_PORT: u8 = 4; +/// Input Context. +const INPUT_CONTEXT_SIZE: u64 = 0x420; +/// Device Context. +const DEVICE_CONTEXT_SIZE: u64 = 0x400; +/// Slot Context. +const SLOT_INPUT_CTX_OFFSET: u64 = 0x20; +const SLOT_CONTEXT_MAX_EXIT_LATENCY_MASK: u32 = 0xffff; +const SLOT_CONTEXT_MAX_EXIT_LATENCY_SHIFT: u32 = 0; +const SLOT_CONTEXT_INTERRUPTER_TARGET_MASK: u32 = 0x3ff; +const SLOT_CONTEXT_INTERRUPTER_TARGET_SHIFT: u32 = 22; +const SLOT_CONTEXT_PORT_NUMBER_MASK: u32 = 0xff; +const SLOT_CONTEXT_PORT_NUMBER_SHIFT: u32 = 16; +const SLOT_CONTEXT_ENTRIES_MASK: u32 = 0x1f; +const SLOT_CONTEXT_ENTRIES_SHIFT: u32 = 27; +const SLOT_CONTEXT_DEVICE_ADDRESS_MASK: u32 = 0xff; +const SLOT_CONTEXT_DEVICE_ADDRESS_SHIFT: u32 = 0; +/// Endpoint Context. +const EP_INPUT_CTX_ENTRY_SIZE: u64 = 0x20; +const EP_INPUT_CTX_OFFSET: u64 = 0x40; +const EP_CTX_OFFSET: u64 = 0x20; +const EP_CTX_TR_DEQUEUE_POINTER_MASK: u64 = !0xf; +const EP_CTX_DCS: u64 = 1; +const EP_CONTEXT_MAX_PACKET_SIZE_MASK: u32 = 0xffff; +const EP_CONTEXT_MAX_PACKET_SIZE_SHIFT: u32 = 16; +const EP_CONTEXT_INTERVAL_MASK: u32 = 0xff; +const EP_CONTEXT_INTERVAL_SHIFT: u32 = 16; +const EP_CONTEXT_EP_STATE_MASK: u32 = 0x7; +const EP_CONTEXT_EP_STATE_SHIFT: u32 = 0; +const EP_CONTEXT_EP_TYPE_MASK: u32 = 0x7; +const EP_CONTEXT_EP_TYPE_SHIFT: u32 = 3; +const ISO_BASE_TIME_INTERVAL: u64 = 125000; +const MFINDEX_WRAP_NUM: u64 = 0x4000; +/// Stream Context. +const _STREAM_CTX_SCT_SHIFT: u32 = 1; +const _STREAM_CTX_SCT_MASK: u32 = 0x7; +const _STREAM_CTX_SCT_SECONDARY_TR: u32 = 0; +const _STREAM_CTX_SCT_PRIMARY_TR: u32 = 1; +const _STREAM_CTX_SCT_PRIMARY_SSA_8: u32 = 2; +const _STREAM_CTX_SCT_PRIMARY_SSA_16: u32 = 3; +const _STREAM_CTX_SCT_PRIMARY_SSA_32: u32 = 4; +const _STREAM_CTX_SCT_PRIMARY_SSA_64: u32 = 5; +const _STREAM_CTX_SCT_PRIMARY_SSA_128: u32 = 6; +const _STREAM_CTX_SCT_PRIMARY_SSA_256: u32 = 7; + +type DmaAddr = u64; + +/// Transfer data between controller and device. +pub struct XhciTransfer { + pub packet: Arc>, + status: TRBCCode, + td: Vec, + complete: bool, + slotid: u32, + epid: u32, + streamid: u32, + ep_context: XhciEpContext, + in_xfer: bool, + iso_xfer: bool, + timed_xfer: bool, + running_retry: bool, + running_async: bool, + interrupter: Arc>, + mfindex_kick: u64, +} + +impl XhciTransfer { + fn new( + ep_info: (u32, u32, u32), + ep_context: XhciEpContext, + in_xfer: bool, + td: Vec, + intr: &Arc>, + ) -> Self { + XhciTransfer { + packet: Arc::new(Mutex::new(UsbPacket::default())), + status: TRBCCode::Invalid, + td, + complete: false, + slotid: ep_info.0, + epid: ep_info.1, + streamid: ep_info.2, + ep_context, + in_xfer, + iso_xfer: false, + timed_xfer: false, + running_retry: false, + running_async: false, + interrupter: intr.clone(), + mfindex_kick: 0, + } + } + + pub fn complete_transfer(&mut self) -> Result<()> { + self.packet.lock().unwrap().is_async = false; + // NOTE: When entry this function, the transfer must be completed. + self.complete = true; + + self.status = usb_packet_status_to_trb_code(self.packet.lock().unwrap().status)?; + if self.status == TRBCCode::Success { + trace::usb_xhci_xfer_success(&self.packet.lock().unwrap().actual_length); + self.submit_transfer()?; + let ring = self.ep_context.get_ring(self.streamid).with_context(|| { + format!( + "Failed to find Transfer Ring with Endpoint ID {}, Slot ID {}, Stream ID {}.", + self.epid, self.slotid, self.streamid + ) + })?; + ring.refresh_dequeue_ptr(*self.ep_context.output_ctx_addr.lock().unwrap())?; + return Ok(()); + } + + trace::usb_xhci_xfer_error(&self.packet.lock().unwrap().status); + self.report_transfer_error()?; + let ep_type = self.ep_context.ep_type; + + if ep_type == EpType::IsoIn || ep_type == EpType::IsoOut { + return Ok(()); + } + // Set the endpoint state to halted if an error occurs in the packet. + self.ep_context.set_state(EP_HALTED, Some(self.streamid))?; + + Ok(()) + } + + /// Submit the succeed transfer TRBs. + pub fn submit_transfer(&mut self) -> Result<()> { + // Event Data Transfer Length Accumulator. + let mut edtla: u32 = 0; + let mut shortpkt = false; + let mut left = self.packet.lock().unwrap().actual_length; + for i in 0..self.td.len() { + let trb = &self.td[i]; + let trb_type = trb.get_type(); + let mut chunk = trb.status & TRB_TR_LEN_MASK; + match trb_type { + TRBType::TrSetup => {} + TRBType::TrData | TRBType::TrNormal | TRBType::TrIsoch => { + if chunk > left { + chunk = left; + if self.status == TRBCCode::Success { + shortpkt = true; + } + } + left -= chunk; + edtla = edtla.checked_add(chunk).with_context(|| + format!("Event Data Transfer Length Accumulator overflow, edtla {:x} offset {:x}", edtla, chunk) + )?; + } + TRBType::TrStatus => {} + _ => { + trace::usb_xhci_unimplemented(&format!( + "Ignore the TRB, unhandled trb type {:?}", + trb.get_type() + )); + } + } + if (trb.control & TRB_TR_IOC == TRB_TR_IOC) + || (shortpkt && (trb.control & TRB_TR_ISP == TRB_TR_ISP)) + || (self.status != TRBCCode::Success && left == 0) + { + self.send_transfer_event(trb, chunk, &mut edtla, shortpkt)?; + if self.status != TRBCCode::Success { + return Ok(()); + } + } + } + Ok(()) + } + + fn send_transfer_event( + &self, + trb: &XhciTRB, + transferred: u32, + edtla: &mut u32, + shortpkt: bool, + ) -> Result<()> { + let trb_type = trb.get_type(); + let mut evt = XhciEvent::new(TRBType::ErTransfer, TRBCCode::Success); + evt.slot_id = self.slotid as u8; + evt.ep_id = self.epid as u8; + evt.length = (trb.status & TRB_TR_LEN_MASK) - transferred; + evt.flags = 0; + evt.ptr = trb.addr; + evt.ccode = if self.status == TRBCCode::Success { + if shortpkt { + TRBCCode::ShortPacket + } else { + TRBCCode::Success + } + } else { + self.status + }; + if trb_type == TRBType::TrEvdata { + evt.ptr = trb.parameter; + evt.flags |= TRB_EV_ED; + evt.length = *edtla & TRANSFER_LEN_MASK; + *edtla = 0; + } + self.interrupter.lock().unwrap().send_event(&evt)?; + Ok(()) + } + + fn report_transfer_error(&mut self) -> Result<()> { + // An error occurs in the transfer. The transfer is set to the completed and will not be + // retried. + self.complete = true; + let mut evt = XhciEvent::new(TRBType::ErTransfer, TRBCCode::TrbError); + evt.slot_id = self.slotid as u8; + evt.ep_id = self.epid as u8; + evt.ccode = self.status; + // According to 4.10.1 Transfer TRBs, the TRB pointer field in a Transfer TRB not + // only references the TRB that generated the event, but it also provides system software + // with the latest value of the xHC Dequeue Pointer for the Transfer Ring. + if let Some(trb) = self.td.last() { + evt.ptr = trb.addr; + } + self.interrupter.lock().unwrap().send_event(&evt)?; + Ok(()) + } +} + +impl TransferOps for XhciTransfer { + fn submit_transfer(&mut self) { + if let Err(e) = self.complete_transfer() { + error!("Failed to submit transfer, error {:?}", e); + } + } +} + +/// Endpoint context which use the ring to transfer data. +#[derive(Clone)] +pub struct XhciEpContext { + epid: u32, + enabled: bool, + ring: Option>, + ep_type: EpType, + output_ctx_addr: Arc>, + state: Arc, + interval: u32, + mfindex_last: u64, + transfers: LinkedList>>, + retry: Option>>, + mem: Arc, + max_pstreams: u32, + lsa: bool, + stream_array: Option, +} + +impl XhciEpContext { + pub fn new(mem: &Arc) -> Self { + Self { + epid: 0, + enabled: false, + ring: None, + ep_type: EpType::Invalid, + output_ctx_addr: Arc::new(Mutex::new(GuestAddress(0))), + state: Arc::new(AtomicU32::new(0)), + interval: 0, + mfindex_last: 0, + transfers: LinkedList::new(), + retry: None, + mem: Arc::clone(mem), + max_pstreams: 0, + lsa: false, + stream_array: None, + } + } + + /// Init the endpoint context used the context read from memory. + fn init_ctx(&mut self, output_ctx: DmaAddr, ctx: &XhciEpCtx) -> Result<()> { + let dequeue: DmaAddr = addr64_from_u32(ctx.deq_lo & !0xf, ctx.deq_hi); + self.ep_type = ((ctx.ep_info2 >> EP_TYPE_SHIFT) & EP_TYPE_MASK).into(); + *self.output_ctx_addr.lock().unwrap() = GuestAddress(output_ctx); + self.max_pstreams = (ctx.ep_info >> EP_CTX_MAX_PSTREAMS_SHIFT) & EP_CTX_MAX_PSTREAMS_MASK; + self.lsa = ((ctx.ep_info >> EP_CTX_LSA_SHIFT) & EP_CTX_LSA_MASK) != 0; + self.interval = 1 << ((ctx.ep_info >> EP_CTX_INTERVAL_SHIFT) & EP_CTX_INTERVAL_MASK); + + if self.max_pstreams == 0 { + let ring = XhciTransferRing::new(&self.mem); + ring.init(dequeue); + ring.set_cycle_bit((ctx.deq_lo & 1) == 1); + self.ring = Some(Arc::new(ring)); + } else { + let stream_array = XhciStreamArray::new(&self.mem, self.max_pstreams); + stream_array + .init(dequeue) + .with_context(|| "Failed to initialize Stream Array.")?; + self.stream_array = Some(stream_array); + } + + Ok(()) + } + + fn get_ep_state(&self) -> u32 { + self.state.load(Ordering::Acquire) + } + + fn set_ep_state(&self, state: u32) { + self.state.store(state, Ordering::Release); + } + + /// Update the endpoint state and write the state to memory. + fn set_state(&self, state: u32, stream_id: Option) -> Result<()> { + let mut ep_ctx = XhciEpCtx::default(); + let output_addr = self.output_ctx_addr.lock().unwrap(); + dma_read_u32(&self.mem, *output_addr, ep_ctx.as_mut_dwords())?; + ep_ctx.ep_info &= !EP_STATE_MASK; + ep_ctx.ep_info |= state; + dma_write_u32(&self.mem, *output_addr, ep_ctx.as_dwords())?; + drop(output_addr); + self.flush_dequeue_to_memory(stream_id)?; + self.set_ep_state(state); + trace::usb_xhci_set_state(self.epid, state); + Ok(()) + } + + /// Update the dequeue pointer in endpoint or stream context. + /// If dequeue is None, only flush the dequeue pointer to memory. + fn update_dequeue(&self, dequeue: Option, stream_id: u32) -> Result<()> { + if let Some(dequeue) = dequeue { + let ring = self.get_ring(stream_id).with_context(|| { + format!( + "Failed to find Transfer Ring for Endpoint {}, Stream ID {}.", + self.epid, stream_id + ) + })?; + ring.init(dequeue & EP_CTX_TR_DEQUEUE_POINTER_MASK); + ring.set_cycle_bit((dequeue & EP_CTX_DCS) == EP_CTX_DCS); + trace::usb_xhci_update_dequeue(self.epid, dequeue, stream_id); + } + + self.flush_dequeue_to_memory(Some(stream_id))?; + Ok(()) + } + + /// Flush the dequeue pointer to the memory. + /// Stream Endpoints flush ring dequeue to both Endpoint and Stream context. + fn flush_dequeue_to_memory(&self, stream_id: Option) -> Result<()> { + let mut ep_ctx = XhciEpCtx::default(); + let output_addr = self.output_ctx_addr.lock().unwrap(); + dma_read_u32(&self.mem, *output_addr, ep_ctx.as_mut_dwords())?; + + if self.max_pstreams == 0 { + let ring = self.get_ring(0)?; + ring.update_dequeue_to_ctx(&mut ep_ctx.as_mut_dwords()[2..]); + } else if let Some(stream_id) = stream_id { + let mut stream_ctx = XhciStreamCtx::default(); + let stream = self.get_stream(stream_id)?; + let locked_stream = stream.lock().unwrap(); + let output_addr = locked_stream.dequeue; + let ring = locked_stream.ring.as_ref(); + dma_read_u32(&self.mem, output_addr, stream_ctx.as_mut_dwords())?; + ring.update_dequeue_to_ctx(stream_ctx.as_mut_dwords()); + ring.update_dequeue_to_ctx(&mut ep_ctx.as_mut_dwords()[2..]); + dma_write_u32(&self.mem, output_addr, stream_ctx.as_dwords())?; + } + + dma_write_u32(&self.mem, *output_addr, ep_ctx.as_dwords())?; + Ok(()) + } + + /// Flush the transfer list, remove the transfer which is completed. + fn flush_transfer(&mut self) { + let mut undo = LinkedList::new(); + while let Some(head) = self.transfers.pop_front() { + if !head.lock().unwrap().complete { + undo.push_back(head); + } + } + self.transfers = undo; + } + + /// Find and return a stream corresponding to the specified Stream ID. + /// Returns error if there is no stream support or LSA is not enabled. + fn get_stream(&self, stream_id: u32) -> Result>> { + let stream_arr = self + .stream_array + .as_ref() + .ok_or_else(|| anyhow!("Endpoint {} does not support streams.", self.epid))?; + + if !self.lsa { + bail!("Only Linear Streams Array (LSA) is supported."); + } + + let XhciStreamArray(pstreams) = &stream_arr; + let pstreams_num = pstreams.len() as u32; + + if stream_id >= pstreams_num || stream_id == 0 { + bail!( + "Stream ID {} is either invalid or reserved, max number of streams is {}.", + stream_id, + pstreams_num + ); + } + + let stream_context = &pstreams[stream_id as usize]; + let mut locked_context = stream_context.lock().unwrap(); + locked_context.try_refresh()?; + trace::usb_xhci_get_stream(stream_id, self.epid); + Ok(Arc::clone(stream_context)) + } + + /// Get a ring corresponding to the specified Stream ID if stream support is enabled, + /// return the standard Transfer Ring otherwise. + fn get_ring(&self, stream_id: u32) -> Result> { + if self.max_pstreams == 0 { + Ok(Arc::clone(self.ring.as_ref().ok_or_else(|| { + anyhow!( + "Failed to get the Transfer Ring for Endpoint {} without streams.", + self.epid + ) + })?)) + } else { + let stream = self.get_stream(stream_id).with_context(|| { + format!( + "Failed to find Stream Context with Stream ID {}.", + stream_id + ) + })?; + let locked_stream = stream.lock().unwrap(); + trace::usb_xhci_get_ring(self.epid, stream_id); + Ok(Arc::clone(&locked_stream.ring)) + } + } + + /// Reset all streams on this Endpoint. + fn reset_streams(&self) -> Result<()> { + let stream_arr = self.stream_array.as_ref().ok_or_else(|| { + anyhow!( + "Endpoint {} does not support streams, reset aborted.", + self.epid + ) + })?; + stream_arr.reset(); + trace::usb_xhci_reset_streams(self.epid); + Ok(()) + } +} + +/// Endpoint type, including control, bulk, interrupt and isochronous. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum EpType { + Invalid = 0, + IsoOut, + BulkOut, + IntrOut, + Control, + IsoIn, + BulkIn, + IntrIn, +} + +impl From for EpType { + fn from(t: u32) -> EpType { + match t { + 0 => EpType::Invalid, + 1 => EpType::IsoOut, + 2 => EpType::BulkOut, + 3 => EpType::IntrOut, + 4 => EpType::Control, + 5 => EpType::IsoIn, + 6 => EpType::BulkIn, + 7 => EpType::IntrIn, + _ => EpType::Invalid, + } + } +} + +/// Device slot, mainly including some endpoint. +pub struct XhciSlot { + pub enabled: bool, + pub addressed: bool, + pub slot_ctx_addr: GuestAddress, + pub usb_port: Option>>, + pub endpoints: Vec, +} + +impl XhciSlot { + fn new(mem: &Arc) -> Self { + let mut eps = Vec::new(); + for _ in 0..MAX_ENDPOINTS { + eps.push(XhciEpContext::new(mem)); + } + + XhciSlot { + enabled: false, + addressed: false, + slot_ctx_addr: GuestAddress(0), + usb_port: None, + endpoints: eps, + } + } + + /// Get the slot context from the memory. + fn get_slot_ctx(&self, mem: &Arc) -> Result { + let mut slot_ctx = XhciSlotCtx::default(); + dma_read_u32(mem, self.slot_ctx_addr, slot_ctx.as_mut_dwords())?; + Ok(slot_ctx) + } + + /// Get the slot state in slot context. + fn get_slot_state_in_context(&self, mem: &Arc) -> Result { + // Table 4-1: Device Slot State Code Definitions. + if self.slot_ctx_addr == GuestAddress(0) { + return Ok(SLOT_DISABLED_ENABLED); + } + let slot_ctx = self.get_slot_ctx(mem)?; + let slot_state = (slot_ctx.dev_state >> SLOT_STATE_SHIFT) & SLOT_STATE_MASK; + Ok(slot_state) + } + + fn slot_state_is_valid(&self, mem: &Arc) -> Result { + let slot_state = self.get_slot_state_in_context(mem)?; + let valid = slot_state == SLOT_DEFAULT + || slot_state == SLOT_ADDRESSED + || slot_state == SLOT_CONFIGURED; + Ok(valid) + } +} + +/// USB port which can attached device. +pub struct UsbPort { + pub xhci: Weak>, + /// Port Status and Control + pub portsc: u32, + /// Port ID + pub port_id: u8, + pub speed_mask: u32, + pub dev: Option>>, + pub used: bool, + pub slot_id: u32, +} + +impl UsbPort { + pub fn new(xhci: &Weak>, i: u8) -> Self { + Self { + xhci: xhci.clone(), + portsc: 0, + port_id: i, + speed_mask: 0, + dev: None, + used: false, + slot_id: INVALID_SLOT_ID, + } + } + + /// Get port link state from port status and control register. + pub fn get_port_link_state(&self) -> u32 { + self.portsc >> PORTSC_PLS_SHIFT & PORTSC_PLS_MASK + } + + /// Set port link state in port status and control register. + pub fn set_port_link_state(&mut self, pls: u32) { + self.portsc &= !(PORTSC_PLS_MASK << PORTSC_PLS_SHIFT); + self.portsc |= (pls & PORTSC_PLS_MASK) << PORTSC_PLS_SHIFT; + } + + /// Check the speed is supported by the usb port. + pub fn speed_supported(&self, speed: u32) -> bool { + let speed_mask = 1 << speed; + self.speed_mask & speed_mask == speed_mask + } +} + +/// Event usually send to drivers. +#[derive(Debug)] +pub struct XhciEvent { + pub trb_type: TRBType, + pub ccode: TRBCCode, + pub ptr: u64, + pub length: u32, + flags: u32, + slot_id: u8, + ep_id: u8, +} + +impl XhciEvent { + pub fn new(trb_type: TRBType, ccode: TRBCCode) -> Self { + Self { + trb_type, + ccode, + ptr: 0, + length: 0, + slot_id: 0, + flags: 0, + ep_id: 0, + } + } + + /// Convert event to trb. + pub fn to_trb(&self) -> XhciTRB { + XhciTRB { + parameter: self.ptr, + status: self.length | (self.ccode as u32) << EVENT_TRB_CCODE_SHIFT, + control: u32::from(self.slot_id) << EVENT_TRB_SLOT_ID_SHIFT + | u32::from(self.ep_id) << EVENT_TRB_EP_ID_SHIFT + | self.flags + | (self.trb_type as u32) << TRB_TYPE_SHIFT, + addr: 0, + ccs: false, + } + } +} + +/// Input Control Context. See the spec 6.2.5 Input Control Context. +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct XhciInputCtrlCtx { + pub drop_flags: u32, + pub add_flags: u32, +} + +impl DwordOrder for XhciInputCtrlCtx {} + +/// Slot Context. See the spec 6.2.2 Slot Context. +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct XhciSlotCtx { + pub dev_info: u32, + pub dev_info2: u32, + pub tt_info: u32, + pub dev_state: u32, +} + +impl XhciSlotCtx { + pub fn set_slot_state(&mut self, state: u32) { + self.dev_state &= !(SLOT_STATE_MASK << SLOT_STATE_SHIFT); + self.dev_state |= (state & SLOT_STATE_MASK) << SLOT_STATE_SHIFT; + } + + pub fn get_slot_state(&self) -> u32 { + self.dev_state >> SLOT_STATE_SHIFT & SLOT_STATE_MASK + } + + pub fn set_context_entry(&mut self, num: u32) { + self.dev_info &= !(SLOT_CONTEXT_ENTRIES_MASK << SLOT_CONTEXT_ENTRIES_SHIFT); + self.dev_info |= (num & SLOT_CONTEXT_ENTRIES_MASK) << SLOT_CONTEXT_ENTRIES_SHIFT; + } + + pub fn set_port_number(&mut self, port_number: u32) { + self.dev_info &= !(SLOT_CONTEXT_PORT_NUMBER_MASK << SLOT_CONTEXT_PORT_NUMBER_SHIFT); + self.dev_info2 |= + (port_number & SLOT_CONTEXT_PORT_NUMBER_MASK) << SLOT_CONTEXT_PORT_NUMBER_SHIFT; + } + + pub fn get_max_exit_latency(&self) -> u32 { + self.dev_info2 >> SLOT_CONTEXT_MAX_EXIT_LATENCY_SHIFT & SLOT_CONTEXT_MAX_EXIT_LATENCY_MASK + } + + pub fn set_max_exit_latency(&mut self, state: u32) { + self.dev_info2 &= + !(SLOT_CONTEXT_MAX_EXIT_LATENCY_MASK << SLOT_CONTEXT_MAX_EXIT_LATENCY_SHIFT); + self.dev_info2 |= + (state & SLOT_CONTEXT_MAX_EXIT_LATENCY_MASK) << SLOT_CONTEXT_MAX_EXIT_LATENCY_SHIFT; + } + + pub fn get_interrupter_target(&self) -> u32 { + self.tt_info >> SLOT_CONTEXT_INTERRUPTER_TARGET_SHIFT & SLOT_CONTEXT_INTERRUPTER_TARGET_MASK + } + + pub fn set_interrupter_target(&mut self, state: u32) { + self.tt_info &= + !(SLOT_CONTEXT_INTERRUPTER_TARGET_MASK << SLOT_CONTEXT_INTERRUPTER_TARGET_SHIFT); + self.tt_info |= + (state & SLOT_CONTEXT_INTERRUPTER_TARGET_MASK) << SLOT_CONTEXT_INTERRUPTER_TARGET_SHIFT; + } + + pub fn get_usb_device_address(&self) -> u32 { + self.dev_state >> SLOT_CONTEXT_DEVICE_ADDRESS_SHIFT & SLOT_CONTEXT_DEVICE_ADDRESS_MASK + } + + pub fn set_usb_device_address(&mut self, state: u32) { + self.dev_state &= !(SLOT_CONTEXT_DEVICE_ADDRESS_MASK << SLOT_CONTEXT_DEVICE_ADDRESS_SHIFT); + self.dev_state |= + (state & SLOT_CONTEXT_DEVICE_ADDRESS_MASK) << SLOT_CONTEXT_DEVICE_ADDRESS_SHIFT; + } +} + +impl DwordOrder for XhciSlotCtx {} + +/// Endpoint Context. See the spec 6.2.3 Endpoint Context. +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct XhciEpCtx { + pub ep_info: u32, + pub ep_info2: u32, + pub deq_lo: u32, + pub deq_hi: u32, + pub tx_info: u32, +} + +impl XhciEpCtx { + pub fn set_tr_dequeue_pointer(&mut self, dequeue: u64) { + self.deq_lo = dequeue as u32; + self.deq_hi = (dequeue >> 32) as u32; + } + + pub fn get_max_packet_size(&self) -> u32 { + self.ep_info2 >> EP_CONTEXT_MAX_PACKET_SIZE_SHIFT & EP_CONTEXT_MAX_PACKET_SIZE_MASK + } + + pub fn set_max_packet_size(&mut self, size: u32) { + self.ep_info2 &= !(EP_CONTEXT_MAX_PACKET_SIZE_MASK << EP_CONTEXT_MAX_PACKET_SIZE_SHIFT); + self.ep_info2 |= + (size & EP_CONTEXT_MAX_PACKET_SIZE_MASK) << EP_CONTEXT_MAX_PACKET_SIZE_SHIFT; + } + + pub fn set_interval(&mut self, inter: u32) { + self.ep_info &= !(EP_CONTEXT_INTERVAL_MASK << EP_CONTEXT_INTERVAL_SHIFT); + self.ep_info |= (inter & EP_CONTEXT_INTERVAL_MASK) << EP_CONTEXT_INTERVAL_SHIFT; + } + + pub fn get_ep_state(&self) -> u32 { + self.ep_info >> EP_CONTEXT_EP_STATE_SHIFT & EP_CONTEXT_EP_STATE_MASK + } + + pub fn set_ep_state(&mut self, state: u32) { + self.ep_info &= !(EP_CONTEXT_EP_STATE_MASK << EP_CONTEXT_EP_STATE_SHIFT); + self.ep_info |= (state & EP_CONTEXT_EP_STATE_MASK) << EP_CONTEXT_EP_STATE_SHIFT; + } + + pub fn set_ep_type(&mut self, state: u32) { + self.ep_info2 &= !(EP_CONTEXT_EP_TYPE_MASK << EP_CONTEXT_EP_TYPE_SHIFT); + self.ep_info2 |= (state & EP_CONTEXT_EP_TYPE_MASK) << EP_CONTEXT_EP_TYPE_SHIFT; + } +} + +impl DwordOrder for XhciEpCtx {} + +pub trait DwordOrder: Default + Copy + Send + Sync { + fn as_dwords(&self) -> &[u32] { + // SAFETY: Tt can be guaranteed that self has been initialized. + unsafe { from_raw_parts(self as *const Self as *const u32, size_of::() / 4) } + } + + fn as_mut_dwords(&mut self) -> &mut [u32] { + // SAFETY: Tt can be guaranteed that self has been initialized. + unsafe { from_raw_parts_mut(self as *mut Self as *mut u32, size_of::() / 4) } + } +} + +#[repr(transparent)] +#[derive(Clone)] +pub struct XhciStreamArray(Vec>>); + +impl XhciStreamArray { + fn new(mem: &Arc, max_pstreams: u32) -> Self { + let pstreams_num = 1 << (max_pstreams + 1); + let pstreams = (0..pstreams_num) + .map(|_| Arc::new(Mutex::new(XhciStreamContext::new(mem)))) + .collect(); + XhciStreamArray(pstreams) + } + + fn init(&self, mut dequeue: u64) -> Result<()> { + for stream_context in self.0.iter() { + stream_context.lock().unwrap().init(dequeue)?; + dequeue += std::mem::size_of::() as u64; + } + + Ok(()) + } + + fn reset(&self) { + for stream_context in self.0.iter() { + stream_context.lock().unwrap().reset(); + } + } +} + +#[derive(Clone)] +pub struct XhciStreamContext { + /// Memory address space. + mem: Arc, + /// Dequeue pointer. + dequeue: GuestAddress, + /// Transfer Ring (no Secondary Streams for now). + ring: Arc, + /// Whether the context is up to date after reset. + needs_refresh: bool, +} + +impl XhciStreamContext { + fn new(mem: &Arc) -> Self { + Self { + mem: Arc::clone(mem), + dequeue: GuestAddress(0), + ring: Arc::new(XhciTransferRing::new(mem)), + needs_refresh: true, + } + } + + fn init(&mut self, addr: u64) -> Result<()> { + self.dequeue = GuestAddress(addr); + self.refresh()?; + Ok(()) + } + + fn try_refresh(&mut self) -> Result<()> { + if self.needs_refresh { + self.refresh()?; + } + + Ok(()) + } + + fn refresh(&mut self) -> Result<()> { + let mut stream_ctx = XhciStreamCtx::default(); + dma_read_u32(&self.mem, self.dequeue, stream_ctx.as_mut_dwords())?; + let dequeue = addr64_from_u32(stream_ctx.deq_lo & !0xf, stream_ctx.deq_hi); + self.ring.init(dequeue); + self.needs_refresh = false; + Ok(()) + } + + fn reset(&mut self) { + self.needs_refresh = true; + } +} + +#[repr(C, packed)] +#[derive(Debug, Default, Clone, Copy)] +pub struct XhciStreamCtx { + pub deq_lo: u32, + pub deq_hi: u32, + pub stopped_edtla: u32, + pub reserved: u32, +} + +impl DwordOrder for XhciStreamCtx {} + +/// Xhci controller device. +pub struct XhciDevice { + pub numports_2: u8, + pub numports_3: u8, + pub oper: XhciOperReg, + pub usb_ports: Vec>>, + pub slots: Vec, + pub intrs: Vec>>, + pub cmd_ring: XhciCommandRing, + mem_space: Arc, + /// Runtime Register. + mfindex_start: Duration, + timer_id: Option, + packet_count: u32, +} + +impl XhciDevice { + pub fn new(mem_space: &Arc, config: &XhciConfig) -> Arc> { + let mut p2 = XHCI_DEFAULT_PORT; + let mut p3 = XHCI_DEFAULT_PORT; + if config.p2.is_some() { + p2 = config.p2.unwrap(); + if p2 > XHCI_MAX_PORT2 { + p2 = XHCI_MAX_PORT2 + } + } + if config.p3.is_some() { + p3 = config.p3.unwrap(); + if p3 > XHCI_MAX_PORT3 { + p3 = XHCI_MAX_PORT3; + } + } + let oper = XhciOperReg::default(); + + let mut intrs = Vec::new(); + for i in 0..MAX_INTRS { + intrs.push(Arc::new(Mutex::new(XhciInterrupter::new( + mem_space, + &oper.usb_cmd, + &oper.usb_status, + i, + )))); + } + + let mut slots = Vec::new(); + for _ in 0..MAX_SLOTS { + slots.push(XhciSlot::new(mem_space)); + } + + let xhci = XhciDevice { + packet_count: 0, + oper, + usb_ports: Vec::new(), + numports_3: p3, + numports_2: p2, + slots, + intrs, + cmd_ring: XhciCommandRing::new(mem_space), + mem_space: mem_space.clone(), + mfindex_start: EventLoop::get_ctx(None).unwrap().get_virtual_clock(), + timer_id: None, + }; + let xhci = Arc::new(Mutex::new(xhci)); + let clone_xhci = xhci.clone(); + let mut locked_xhci = clone_xhci.lock().unwrap(); + locked_xhci.oper.set_usb_status(USB_STS_HCH); + for i in 0..locked_xhci.numports_2 { + let usb_port = Arc::new(Mutex::new(UsbPort::new( + &Arc::downgrade(&clone_xhci), + i + 1, + ))); + locked_xhci.usb_ports.push(usb_port.clone()); + let mut locked_port = usb_port.lock().unwrap(); + locked_port.speed_mask = USB_SPEED_MASK_LOW | USB_SPEED_MASK_HIGH | USB_SPEED_MASK_FULL; + } + for i in 0..locked_xhci.numports_3 { + let idx = i + locked_xhci.numports_2 + 1; + let usb_port = Arc::new(Mutex::new(UsbPort::new(&Arc::downgrade(&clone_xhci), idx))); + locked_xhci.usb_ports.push(usb_port.clone()); + let mut locked_port = usb_port.lock().unwrap(); + locked_port.speed_mask = USB_SPEED_MASK_SUPER; + } + xhci + } + + pub fn set_interrupt_ops(&mut self, cb: Arc bool + Send + Sync>) { + for intr in &self.intrs { + intr.lock().unwrap().set_interrupter(cb.clone()); + } + } + + pub fn run(&mut self) { + trace::usb_xhci_run(); + self.oper.unset_usb_status_flag(USB_STS_HCH); + self.mfindex_start = EventLoop::get_ctx(None).unwrap().get_virtual_clock(); + } + + pub fn mfindex(&mut self) -> u64 { + let now = EventLoop::get_ctx(None).unwrap().get_virtual_clock(); + (now - self.mfindex_start).as_nanos() as u64 / ISO_BASE_TIME_INTERVAL + } + + pub fn mfwrap_update(&mut self) { + let bits = USB_CMD_RUN | USB_CMD_EWE; + if self.oper.get_usb_cmd() & bits == bits { + let mfindex = self.mfindex() & (MFINDEX_WRAP_NUM - 1); + let left = MFINDEX_WRAP_NUM - mfindex; + let weak_xhci = self.usb_ports[0].lock().unwrap().xhci.clone(); + + let xhci_mfwrap_timer = Box::new(move || { + let xhci = weak_xhci.upgrade().unwrap(); + let mut locked_xhci = xhci.lock().unwrap(); + + let evt = XhciEvent::new(TRBType::ErMfindexWrap, TRBCCode::Success); + if let Err(e) = locked_xhci.intrs[0].lock().unwrap().send_event(&evt) { + error!("Failed to send event: {:?}", e); + } + + locked_xhci.mfwrap_update(); + }); + self.timer_id = Some(EventLoop::get_ctx(None).unwrap().timer_add( + xhci_mfwrap_timer, + Duration::from_nanos(left * ISO_BASE_TIME_INTERVAL), + )); + } + } + + pub fn stop(&mut self) { + trace::usb_xhci_stop(); + self.oper.set_usb_status_flag(USB_STS_HCH); + self.oper.cmd_ring_ctrl &= !u64::from(CMD_RING_CTRL_CRR); + } + + pub fn running(&self) -> bool { + self.oper.get_usb_status() & USB_STS_HCH != USB_STS_HCH + } + + pub fn host_controller_error(&mut self) { + error!("Xhci host controller error!"); + self.oper.set_usb_status_flag(USB_STS_HCE) + } + + pub fn reset(&mut self) { + trace::usb_xhci_reset(); + self.oper.reset(); + for i in 0..self.slots.len() as u32 { + if let Err(e) = self.disable_slot(i + 1) { + error!("Failed to disable slot {:?}", e); + } + } + for i in 0..self.usb_ports.len() { + let port = self.usb_ports[i].clone(); + if let Err(e) = self.port_update(&port, false) { + error!("Failed to update port: {:?}", e); + } + } + for i in 0..self.intrs.len() { + self.intrs[i].lock().unwrap().reset(); + } + self.cmd_ring.init(0); + + self.mfindex_start = EventLoop::get_ctx(None).unwrap().get_virtual_clock(); + + self.mfwrap_update(); + } + + /// Reset xhci port. + pub fn reset_port(&mut self, xhci_port: &Arc>, warm_reset: bool) -> Result<()> { + let mut locked_port = xhci_port.lock().unwrap(); + trace::usb_xhci_port_reset(&locked_port.port_id, &warm_reset); + let usb_dev = locked_port.dev.as_ref(); + if usb_dev.is_none() { + // No device, no need to reset. + return Ok(()); + } + + let usb_dev = usb_dev.unwrap(); + usb_dev.lock().unwrap().reset(); + let speed = usb_dev.lock().unwrap().speed(); + if speed == USB_SPEED_SUPER && warm_reset { + locked_port.portsc |= PORTSC_WRC; + } + match speed { + USB_SPEED_LOW | USB_SPEED_FULL | USB_SPEED_HIGH | USB_SPEED_SUPER => { + locked_port.set_port_link_state(PLS_U0); + trace::usb_xhci_port_link(&locked_port.port_id, &PLS_U0); + locked_port.portsc |= PORTSC_PED; + } + _ => { + error!("Invalid speed {}", speed); + } + } + locked_port.portsc &= !PORTSC_PR; + drop(locked_port); + self.port_notify(xhci_port, PORTSC_PRC)?; + Ok(()) + } + + /// Send PortStatusChange event to notify drivers. + pub fn port_notify(&mut self, port: &Arc>, flag: u32) -> Result<()> { + let mut locked_port = port.lock().unwrap(); + if locked_port.portsc & flag == flag { + return Ok(()); + } + trace::usb_xhci_port_notify(&locked_port.port_id, &flag); + locked_port.portsc |= flag; + if !self.running() { + return Ok(()); + } + let mut evt = XhciEvent::new(TRBType::ErPortStatusChange, TRBCCode::Success); + evt.ptr = u64::from(u32::from(locked_port.port_id) << PORT_EVENT_ID_SHIFT); + self.intrs[0].lock().unwrap().send_event(&evt)?; + Ok(()) + } + + /// Update the xhci port status and then notify the driver. + pub fn port_update(&mut self, port: &Arc>, detach: bool) -> Result<()> { + let mut locked_port = port.lock().unwrap(); + locked_port.portsc = PORTSC_PP; + let mut pls = PLS_RX_DETECT; + if !detach { + if let Some(dev) = &locked_port.dev { + let speed = dev.lock().unwrap().speed(); + locked_port.portsc |= PORTSC_CCS; + if speed == USB_SPEED_SUPER { + locked_port.portsc |= PORTSC_SPEED_SUPER; + locked_port.portsc |= PORTSC_PED; + pls = PLS_U0; + } else if speed == USB_SPEED_FULL { + locked_port.portsc |= PORTSC_SPEED_FULL; + pls = PLS_POLLING; + } else if speed == USB_SPEED_HIGH { + locked_port.portsc |= PORTSC_SPEED_HIGH; + pls = PLS_POLLING; + } else if speed == USB_SPEED_LOW { + locked_port.portsc |= PORTSC_SPEED_LOW; + pls = PLS_POLLING; + } + } + } + locked_port.set_port_link_state(pls); + trace::usb_xhci_port_link(&locked_port.port_id, &pls); + drop(locked_port); + self.oper.set_usb_status_flag(USB_STS_PCD); + self.port_notify(port, PORTSC_CSC)?; + Ok(()) + } + + fn generate_packet_id(&mut self) -> u32 { + self.packet_count = self.packet_count.wrapping_add(1); + self.packet_count + } + + fn get_slot_id(&self, evt: &mut XhciEvent, trb: &XhciTRB) -> u32 { + let slot_id = (trb.control >> TRB_CR_SLOTID_SHIFT) & TRB_CR_SLOTID_MASK; + if slot_id < 1 || slot_id > self.slots.len() as u32 { + error!("Failed to get slot id, slot {} out of range", slot_id); + evt.ccode = TRBCCode::TrbError; + return 0; + } else if !self.slots[(slot_id - 1) as usize].enabled { + error!("Failed to get slot id, slot {} is disabled", slot_id); + evt.ccode = TRBCCode::SlotNotEnabledError; + return 0; + } + slot_id + } + + fn lookup_usb_port(&mut self, slot_ctx: &XhciSlotCtx) -> Option>> { + let port = (slot_ctx.dev_info2 >> SLOT_CTX_PORT_NUMBER_SHIFT & 0xff) as u8; + if port < 1 || port > self.usb_ports.len() as u8 { + error!("Invalid port: {}", port); + return None; + } + let usb_port = &self.usb_ports[(port - 1) as usize]; + let locked_port = usb_port.lock().unwrap(); + if locked_port.used { + Some(usb_port.clone()) + } else { + None + } + } + + /// Control plane + pub fn handle_command(&mut self) -> Result<()> { + self.oper.start_cmd_ring(); + let mut slot_id: u32 = 0; + let mut event = XhciEvent::new(TRBType::ErCommandComplete, TRBCCode::Success); + for _ in 0..COMMAND_LIMIT { + match self.cmd_ring.fetch_trb()? { + Some(trb) => { + let trb_type = trb.get_type(); + event.ptr = trb.addr; + info!("handle_command {:?} {:?}", trb_type, trb); + match trb_type { + TRBType::CrEnableSlot => { + let mut found = 0; + for i in 0..self.slots.len() as u32 { + if !self.slots[i as usize].enabled { + found = i + 1; + break; + } + } + if found == 0 { + event.ccode = TRBCCode::NoSlotsError; + } else { + slot_id = found; + event.ccode = self.enable_slot(slot_id); + } + } + TRBType::CrDisableSlot => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + event.ccode = self.disable_slot(slot_id)?; + } + } + TRBType::CrAddressDevice => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + event.ccode = self.address_device(slot_id, &trb)?; + } + } + TRBType::CrConfigureEndpoint => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + event.ccode = self.configure_endpoint(slot_id, &trb)?; + } + } + TRBType::CrEvaluateContext => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + event.ccode = self.evaluate_context(slot_id, &trb)?; + } + } + TRBType::CrStopEndpoint => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + let ep_id = trb.control >> TRB_CR_EPID_SHIFT & TRB_CR_EPID_MASK; + event.ccode = self.stop_endpoint(slot_id, ep_id)?; + } + } + TRBType::CrResetEndpoint => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + let ep_id = trb.control >> TRB_CR_EPID_SHIFT & TRB_CR_EPID_MASK; + event.ccode = self.reset_endpoint(slot_id, ep_id)?; + } + } + TRBType::CrSetTrDequeue => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + let ep_id = trb.control >> TRB_CR_EPID_SHIFT & TRB_CR_EPID_MASK; + let stream_id = + trb.status >> TRB_CR_STREAMID_SHIFT & TRB_CR_STREAMID_MASK; + event.ccode = + self.set_tr_dequeue_pointer(slot_id, ep_id, stream_id, &trb)?; + } + } + TRBType::CrResetDevice => { + slot_id = self.get_slot_id(&mut event, &trb); + if slot_id != 0 { + event.ccode = self.reset_device(slot_id)?; + } + } + TRBType::CrNoop => { + event.ccode = TRBCCode::Success; + } + _ => { + error!("Invalid Command: type {:?}", trb_type); + event.ccode = TRBCCode::TrbError; + } + } + event.slot_id = slot_id as u8; + self.intrs[0].lock().unwrap().send_event(&event)?; + } + None => { + trace::usb_xhci_unimplemented(&"No TRB in the cmd ring.".to_string()); + break; + } + } + } + Ok(()) + } + + fn enable_slot(&mut self, slot_id: u32) -> TRBCCode { + trace::usb_xhci_enable_slot(&slot_id); + self.slots[(slot_id - 1) as usize].enabled = true; + TRBCCode::Success + } + + fn disable_slot(&mut self, slot_id: u32) -> Result { + trace::usb_xhci_disable_slot(&slot_id); + for i in 1..=self.slots[(slot_id - 1) as usize].endpoints.len() as u32 { + self.disable_endpoint(slot_id, i)?; + } + self.slots[(slot_id - 1) as usize].enabled = false; + self.slots[(slot_id - 1) as usize].addressed = false; + self.slots[(slot_id - 1) as usize].usb_port = None; + self.slots[(slot_id - 1) as usize].slot_ctx_addr = GuestAddress(0); + Ok(TRBCCode::Success) + } + + pub fn detach_slot(&mut self, slot_id: u32) -> Result<()> { + if slot_id < 1 || slot_id > self.slots.len() as u32 { + return Ok(()); + } + for i in 1..=self.slots[(slot_id - 1) as usize].endpoints.len() as u32 { + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(i - 1) as usize]; + if epctx.enabled { + self.cancel_all_ep_transfers(slot_id, i, TRBCCode::Invalid)?; + } + } + self.slots[(slot_id - 1) as usize].usb_port = None; + Ok(()) + } + + fn address_device(&mut self, slot_id: u32, trb: &XhciTRB) -> Result { + let ictx = trb.parameter; + ictx.checked_add(INPUT_CONTEXT_SIZE).with_context(|| { + format!( + "Input Context access overflow, addr {:x} size {:x}", + ictx, INPUT_CONTEXT_SIZE + ) + })?; + let ccode = self.check_input_ctx(ictx)?; + if ccode != TRBCCode::Success { + return Ok(ccode); + } + let mut slot_ctx = XhciSlotCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress( + // It is safe to plus here because we previously verify the address. + ictx + SLOT_INPUT_CTX_OFFSET, + ), + slot_ctx.as_mut_dwords(), + )?; + let bsr = trb.control & TRB_CR_BSR == TRB_CR_BSR; + let ccode = self.check_slot_state(&slot_ctx, bsr)?; + if ccode != TRBCCode::Success { + return Ok(ccode); + } + let usb_port = if let Some(usb_port) = self.lookup_usb_port(&slot_ctx) { + usb_port + } else { + error!("Failed to found usb port"); + return Ok(TRBCCode::TrbError); + }; + trace::usb_xhci_address_device(&slot_id, &usb_port.lock().unwrap().port_id); + if usb_port.lock().unwrap().dev.is_none() { + error!("No device found in usb port."); + return Ok(TRBCCode::UsbTransactionError); + }; + let ctx_addr = self.get_device_context_addr(slot_id)?; + let mut octx = 0; + dma_read_u64(&self.mem_space, GuestAddress(ctx_addr), &mut octx)?; + octx.checked_add(DEVICE_CONTEXT_SIZE).with_context(|| { + format!( + "Device Context access overflow, addr {:x} size {:x}", + octx, DEVICE_CONTEXT_SIZE + ) + })?; + let mut locked_port = usb_port.lock().unwrap(); + locked_port.slot_id = slot_id; + self.slots[(slot_id - 1) as usize].usb_port = Some(usb_port.clone()); + self.slots[(slot_id - 1) as usize].slot_ctx_addr = GuestAddress(octx); + let dev = locked_port.dev.as_ref().unwrap(); + dev.lock().unwrap().reset(); + if bsr { + slot_ctx.dev_state = SLOT_DEFAULT << SLOT_STATE_SHIFT; + } else { + slot_ctx.dev_state = (SLOT_ADDRESSED << SLOT_STATE_SHIFT) | slot_id; + self.set_device_address(dev, slot_id); + } + // Enable control endpoint. + self.enable_endpoint(slot_id, 1, ictx, octx)?; + dma_write_u32(&self.mem_space, GuestAddress(octx), slot_ctx.as_dwords())?; + self.slots[(slot_id - 1) as usize].addressed = true; + Ok(TRBCCode::Success) + } + + fn check_input_ctx(&self, ictx: u64) -> Result { + let mut ictl_ctx = XhciInputCtrlCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress(ictx), + ictl_ctx.as_mut_dwords(), + )?; + if ictl_ctx.add_flags & 0x3 != 0x3 { + // The Slot Context(Add Context flag0 (A0)) and Default Endpoint Control + // (Add Context flag1 (A1)) shall be valid. Others shall be ignored. + error!("Invalid input context: {:?}", ictl_ctx); + return Ok(TRBCCode::ParameterError); + } + Ok(TRBCCode::Success) + } + + fn check_slot_state(&self, slot_ctx: &XhciSlotCtx, bsr: bool) -> Result { + let slot_state = (slot_ctx.dev_state >> SLOT_STATE_SHIFT) & SLOT_STATE_MASK; + if !(slot_state == SLOT_DISABLED_ENABLED || !bsr && slot_state == SLOT_DEFAULT) { + error!("Invalid slot state: {:?}", slot_state); + return Ok(TRBCCode::ContextStateError); + } + Ok(TRBCCode::Success) + } + + /// Send SET_ADDRESS request to usb device. + fn set_device_address(&mut self, dev: &Arc>, addr: u32) { + let mut locked_dev = dev.lock().unwrap(); + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_SET_ADDRESS, + value: addr as u16, + index: 0, + length: 0, + }; + let target_dev = Arc::downgrade(dev) as Weak>; + let packet_id = self.generate_packet_id(); + let p = Arc::new(Mutex::new(UsbPacket::new( + packet_id, + u32::from(USB_TOKEN_OUT), + 0, + 0, + Vec::new(), + None, + Some(target_dev), + ))); + trace::usb_handle_control(&locked_dev.usb_device_base().base.id, &device_req); + locked_dev.handle_control(&p, &device_req); + } + + fn get_device_context_addr(&self, slot_id: u32) -> Result { + self.oper + .dcbaap + .raw_value() + .checked_add(u64::from(8 * slot_id)) + .with_context(|| { + UsbError::MemoryAccessOverflow(self.oper.dcbaap.raw_value(), u64::from(8 * slot_id)) + }) + } + + fn configure_endpoint(&mut self, slot_id: u32, trb: &XhciTRB) -> Result { + trace::usb_xhci_configure_endpoint(&slot_id); + let slot_state = + self.slots[(slot_id - 1) as usize].get_slot_state_in_context(&self.mem_space)?; + if trb.control & TRB_CR_DC == TRB_CR_DC { + if slot_state != SLOT_CONFIGURED { + error!("Invalid slot state: {:?}", slot_state); + return Ok(TRBCCode::ContextStateError); + } + return self.deconfigure_endpoint(slot_id); + } + if slot_state != SLOT_CONFIGURED && slot_state != SLOT_ADDRESSED { + error!("Invalid slot state: {:?}", slot_state); + return Ok(TRBCCode::ContextStateError); + } + self.config_slot_ep(slot_id, trb.parameter) + } + + fn deconfigure_endpoint(&mut self, slot_id: u32) -> Result { + for i in 2..32 { + self.disable_endpoint(slot_id, i)?; + } + let mut slot_ctx = self.slots[(slot_id - 1) as usize].get_slot_ctx(&self.mem_space)?; + slot_ctx.set_slot_state(SLOT_ADDRESSED); + dma_write_u32( + &self.mem_space, + self.slots[(slot_id - 1) as usize].slot_ctx_addr, + slot_ctx.as_dwords(), + )?; + Ok(TRBCCode::Success) + } + + fn config_slot_ep(&mut self, slot_id: u32, ictx: u64) -> Result { + ictx.checked_add(INPUT_CONTEXT_SIZE).with_context(|| { + format!( + "Input Context access overflow, addr {:x} size {:x}", + ictx, INPUT_CONTEXT_SIZE + ) + })?; + let mut ictl_ctx = XhciInputCtrlCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress(ictx), + ictl_ctx.as_mut_dwords(), + )?; + if ictl_ctx.drop_flags & 0x3 != 0x0 || ictl_ctx.add_flags & 0x3 != 0x1 { + error!("Invalid control context {:?}", ictl_ctx); + return Ok(TRBCCode::TrbError); + } + let octx = self.slots[(slot_id - 1) as usize].slot_ctx_addr; + for i in 2..32 { + if ictl_ctx.drop_flags & (1 << i) == 1 << i { + self.disable_endpoint(slot_id, i)?; + } + if ictl_ctx.add_flags & (1 << i) == 1 << i { + self.disable_endpoint(slot_id, i)?; + self.enable_endpoint(slot_id, i, ictx, octx.raw_value())?; + } + } + // From section 4.6.6 Configure Endpoint of the spec: + // If all Endpoints are Disabled: + // Set the Slot State in the Output Slot Context to Addressed. + // else (An Endpoint is Enabled): + // Set the Slot State in the Output Slot Context to Configured. + // Set the Context Entries field in the Output Slot Context to the index of + // the last valid Endpoint Context in its Output Device Context structure. + let mut enabled_ep_idx = 0; + for i in (2..32).rev() { + if self.slots[(slot_id - 1) as usize].endpoints[(i - 1) as usize].enabled { + enabled_ep_idx = i; + break; + } + } + let mut slot_ctx = self.slots[(slot_id - 1) as usize].get_slot_ctx(&self.mem_space)?; + if enabled_ep_idx == 0 { + slot_ctx.set_slot_state(SLOT_ADDRESSED); + slot_ctx.set_context_entry(1); + } else { + slot_ctx.set_slot_state(SLOT_CONFIGURED); + slot_ctx.set_context_entry(enabled_ep_idx); + } + dma_write_u32(&self.mem_space, octx, slot_ctx.as_dwords())?; + Ok(TRBCCode::Success) + } + + fn evaluate_context(&mut self, slot_id: u32, trb: &XhciTRB) -> Result { + trace::usb_xhci_evaluate_context(&slot_id); + if !self.slots[(slot_id - 1) as usize].slot_state_is_valid(&self.mem_space)? { + error!("Invalid slot state, slot id {}", slot_id); + return Ok(TRBCCode::ContextStateError); + } + let ictx = trb.parameter; + ictx.checked_add(INPUT_CONTEXT_SIZE).with_context(|| { + format!( + "Input Context access overflow, addr {:x} size {:x}", + ictx, INPUT_CONTEXT_SIZE + ) + })?; + let octx = self.slots[(slot_id - 1) as usize].slot_ctx_addr; + let mut ictl_ctx = XhciInputCtrlCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress(ictx), + ictl_ctx.as_mut_dwords(), + )?; + if ictl_ctx.drop_flags != 0x0 || ictl_ctx.add_flags & !0x3 == !0x3 { + error!("Invalid input control"); + return Ok(TRBCCode::TrbError); + } + if ictl_ctx.add_flags & 0x1 == 0x1 { + let mut islot_ctx = XhciSlotCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress( + // It is safe to plus here because we previously verify the address. + ictx + SLOT_INPUT_CTX_OFFSET, + ), + islot_ctx.as_mut_dwords(), + )?; + let mut slot_ctx = XhciSlotCtx::default(); + dma_read_u32(&self.mem_space, octx, slot_ctx.as_mut_dwords())?; + slot_ctx.set_max_exit_latency(islot_ctx.get_max_exit_latency()); + slot_ctx.set_interrupter_target(islot_ctx.get_interrupter_target()); + dma_write_u32(&self.mem_space, octx, slot_ctx.as_dwords())?; + } + if ictl_ctx.add_flags & 0x2 == 0x2 { + // Default control endpoint context. + let mut iep_ctx = XhciEpCtx::default(); + dma_read_u32( + &self.mem_space, + GuestAddress( + // It is safe to use plus here because we previously verify the address. + ictx + EP_INPUT_CTX_OFFSET, + ), + iep_ctx.as_mut_dwords(), + )?; + let mut ep_ctx = XhciEpCtx::default(); + let ep_ctx_addr = octx.checked_add(EP_CTX_OFFSET).with_context(|| { + format!( + "Endpoint Context access overflow, addr {:x} size {:x}", + octx.raw_value(), + EP_CTX_OFFSET + ) + })?; + dma_read_u32(&self.mem_space, ep_ctx_addr, ep_ctx.as_mut_dwords())?; + ep_ctx.set_max_packet_size(iep_ctx.get_max_packet_size()); + dma_write_u32(&self.mem_space, ep_ctx_addr, ep_ctx.as_dwords())?; + } + Ok(TRBCCode::Success) + } + + fn reset_device(&mut self, slot_id: u32) -> Result { + trace::usb_xhci_reset_device(&slot_id); + let mut slot_ctx = XhciSlotCtx::default(); + let octx = self.slots[(slot_id - 1) as usize].slot_ctx_addr; + dma_read_u32(&self.mem_space, octx, slot_ctx.as_mut_dwords())?; + let slot_state = (slot_ctx.dev_state >> SLOT_STATE_SHIFT) & SLOT_STATE_MASK; + if slot_state != SLOT_ADDRESSED + && slot_state != SLOT_CONFIGURED + && slot_state != SLOT_DEFAULT + { + error!("Invalid slot state: {:?}", slot_state); + return Ok(TRBCCode::ContextStateError); + } + for i in 2..32 { + self.disable_endpoint(slot_id, i)?; + } + slot_ctx.set_slot_state(SLOT_DEFAULT); + slot_ctx.set_context_entry(1); + slot_ctx.set_usb_device_address(0); + dma_write_u32(&self.mem_space, octx, slot_ctx.as_dwords())?; + Ok(TRBCCode::Success) + } + + fn enable_endpoint( + &mut self, + slot_id: u32, + ep_id: u32, + input_ctx: DmaAddr, + output_ctx: DmaAddr, + ) -> Result { + trace::usb_xhci_enable_endpoint(&slot_id, &ep_id); + let entry_offset = u64::from(ep_id - 1) * EP_INPUT_CTX_ENTRY_SIZE; + let mut ep_ctx = XhciEpCtx::default(); + dma_read_u32( + &self.mem_space, + // It is safe to use plus here because we previously verify the address on the outer + // layer. + GuestAddress(input_ctx + EP_INPUT_CTX_OFFSET + entry_offset), + ep_ctx.as_mut_dwords(), + )?; + self.disable_endpoint(slot_id, ep_id)?; + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + epctx.epid = ep_id; + epctx.enabled = true; + // It is safe to use plus here because we previously verify the address on the outer layer. + epctx.init_ctx(output_ctx + EP_CTX_OFFSET + entry_offset, &ep_ctx)?; + epctx.set_ep_state(EP_RUNNING); + ep_ctx.ep_info &= !EP_STATE_MASK; + ep_ctx.ep_info |= EP_RUNNING; + dma_write_u32( + &self.mem_space, + // It is safe to use plus here because we previously verify the address on the outer + // layer. + GuestAddress(output_ctx + EP_CTX_OFFSET + entry_offset), + ep_ctx.as_dwords(), + )?; + + epctx.mfindex_last = 0; + + Ok(TRBCCode::Success) + } + + fn disable_endpoint(&mut self, slot_id: u32, ep_id: u32) -> Result { + trace::usb_xhci_disable_endpoint(&slot_id, &ep_id); + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if !epctx.enabled { + trace::usb_xhci_unimplemented(&"Endpoint already disabled".to_string()); + return Ok(TRBCCode::Success); + } + self.cancel_all_ep_transfers(slot_id, ep_id, TRBCCode::Invalid)?; + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if self.oper.dcbaap.raw_value() != 0 { + epctx.set_state(EP_DISABLED, None)?; + } + epctx.enabled = false; + Ok(TRBCCode::Success) + } + + fn stop_endpoint(&mut self, slot_id: u32, ep_id: u32) -> Result { + trace::usb_xhci_stop_endpoint(&slot_id, &ep_id); + if !(ENDPOINT_ID_START..=MAX_ENDPOINTS).contains(&ep_id) { + error!("Invalid endpoint id"); + return Ok(TRBCCode::TrbError); + } + if !self.slots[(slot_id - 1) as usize].slot_state_is_valid(&self.mem_space)? { + error!("Invalid slot state, slotid {}", slot_id); + return Ok(TRBCCode::ContextStateError); + } + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if !epctx.enabled { + error!(" Endpoint is disabled, slotid {} epid {}", slot_id, ep_id); + return Ok(TRBCCode::EpNotEnabledError); + } + if epctx.get_ep_state() != EP_RUNNING { + error!( + "Endpoint invalid state, slotid {} epid {} state {}", + slot_id, + ep_id, + epctx.get_ep_state() + ); + return Ok(TRBCCode::ContextStateError); + } + if self.cancel_all_ep_transfers(slot_id, ep_id, TRBCCode::Stopped)? > 0 { + trace::usb_xhci_unimplemented(&format!( + "Endpoint stop when xfers running, slot_id {} epid {}", + slot_id, ep_id + )); + } + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + epctx.set_state(EP_STOPPED, None)?; + if epctx.max_pstreams != 0 { + epctx.reset_streams()?; + } + Ok(TRBCCode::Success) + } + + fn reset_endpoint(&mut self, slot_id: u32, ep_id: u32) -> Result { + trace::usb_xhci_reset_endpoint(&slot_id, &ep_id); + if !(ENDPOINT_ID_START..=MAX_ENDPOINTS).contains(&ep_id) { + error!("Invalid endpoint id {}", ep_id); + return Ok(TRBCCode::TrbError); + } + if !self.slots[(slot_id - 1) as usize].slot_state_is_valid(&self.mem_space)? { + error!("Invalid slot state, slotid {}", slot_id); + return Ok(TRBCCode::ContextStateError); + } + let slot = &mut self.slots[(slot_id - 1) as usize]; + let epctx = &mut slot.endpoints[(ep_id - 1) as usize]; + if !epctx.enabled { + error!("reset_endpoint ep is disabled"); + return Ok(TRBCCode::EpNotEnabledError); + } + if epctx.get_ep_state() != EP_HALTED { + error!("Endpoint is not halted"); + return Ok(TRBCCode::ContextStateError); + } + if self.cancel_all_ep_transfers(slot_id, ep_id, TRBCCode::Invalid)? > 0 { + warn!("endpoint reset when xfers running!"); + } + let slot = &mut self.slots[(slot_id - 1) as usize]; + let epctx = &mut slot.endpoints[(ep_id - 1) as usize]; + if let Some(port) = &slot.usb_port { + if port.lock().unwrap().dev.is_some() { + epctx.set_state(EP_STOPPED, None)?; + } else { + error!("Failed to found usb device"); + return Ok(TRBCCode::UsbTransactionError); + } + } else { + error!("Failed to found port"); + return Ok(TRBCCode::UsbTransactionError); + } + if epctx.max_pstreams != 0 { + epctx.reset_streams()?; + } + Ok(TRBCCode::Success) + } + + fn set_tr_dequeue_pointer( + &mut self, + slotid: u32, + epid: u32, + streamid: u32, + trb: &XhciTRB, + ) -> Result { + trace::usb_xhci_set_tr_dequeue(&slotid, &epid, &trb.parameter); + if !(ENDPOINT_ID_START..=MAX_ENDPOINTS).contains(&epid) { + error!("Invalid endpoint id {}", epid); + return Ok(TRBCCode::TrbError); + } + if !self.slots[(slotid - 1) as usize].slot_state_is_valid(&self.mem_space)? { + error!("Invalid slot state, slotid {}", slotid); + return Ok(TRBCCode::ContextStateError); + } + let epctx = &mut self.slots[(slotid - 1) as usize].endpoints[(epid - 1) as usize]; + if !epctx.enabled { + error!("Endpoint is disabled, slotid {} epid {}", slotid, epid); + return Ok(TRBCCode::EpNotEnabledError); + } + let ep_state = epctx.get_ep_state(); + if ep_state != EP_STOPPED && ep_state != EP_ERROR { + error!( + "Endpoint invalid state, slotid {} epid {} state {}", + slotid, epid, ep_state + ); + return Ok(TRBCCode::ContextStateError); + } + epctx.update_dequeue(Some(trb.parameter), streamid)?; + Ok(TRBCCode::Success) + } + + /// Data plane + pub(crate) fn kick_endpoint(&mut self, slot_id: u32, ep_id: u32, stream_id: u32) -> Result<()> { + let epctx = match self.get_endpoint_ctx(slot_id, ep_id) { + Ok(epctx) => epctx, + Err(e) => { + error!("Kick endpoint error: {:?}", e); + // No need to return the error, just ignore it. + return Ok(()); + } + }; + + let ring = epctx.get_ring(stream_id).with_context(|| { + format!( + "Failed to kick Endpoint {}, no Transfer ring found on Stream ID {}", + ep_id, stream_id + ) + })?; + + // If the device has been detached, but the guest has not been notified. + // In this case, the Transaction Error is reported when the TRB processed. + // Therefore, don't continue here. + if self.get_usb_dev(slot_id, ep_id).is_err() { + return Ok(()); + } + + trace::usb_xhci_ep_kick(&slot_id, &ep_id, &ring.get_dequeue_ptr()); + if self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize] + .retry + .is_some() + && !self.endpoint_retry_transfer(slot_id, ep_id, stream_id)? + { + // Return directly to retry again at the next kick. + return Ok(()); + } + + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if epctx.get_ep_state() == EP_HALTED { + info!("xhci: endpoint halted"); + return Ok(()); + } + epctx.set_state(EP_RUNNING, Some(stream_id))?; + const KICK_LIMIT: u32 = 256; + let mut count = 0; + loop { + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + let td = match ring.fetch_td()? { + Some(td) => { + trace::usb_xhci_unimplemented(&format!( + "fetch transfer trb {:?} ring dequeue {:?}", + td, + ring.get_dequeue_ptr(), + )); + td + } + None => { + if epctx.ep_type == EpType::IsoIn || epctx.ep_type == EpType::IsoOut { + let ccode = match epctx.ep_type { + EpType::IsoIn => TRBCCode::RingOverrun, + _ => TRBCCode::RingUnderrun, + }; + let mut evt = XhciEvent::new(TRBType::ErTransfer, ccode); + evt.slot_id = slot_id as u8; + evt.ep_id = ep_id as u8; + evt.ptr = ring.get_dequeue_ptr().raw_value(); + if let Err(e) = self.intrs[0].lock().unwrap().send_event(&evt) { + error!("Failed to send event: {:?}", e); + } + } + trace::usb_xhci_unimplemented(&"No TD in the transfer ring.".to_string()); + break; + } + }; + let in_xfer = transfer_in_direction(ep_id as u8, &td, epctx.ep_type); + let mut epctx = epctx.clone(); + // NOTE: It is necessary to clear the transfer list here because otherwise it would + // result in an infinite cycle of destructor calls, leading to a stack overflow. + epctx.transfers.clear(); + // NOTE: Only support primary interrupter now. + let xfer = Arc::new(Mutex::new(XhciTransfer::new( + (slot_id, ep_id, stream_id), + epctx, + in_xfer, + td, + &self.intrs[0], + ))); + let packet = match self.setup_usb_packet(&xfer) { + Ok(pkt) => pkt, + Err(e) => { + error!("Failed to setup packet {:?}", e); + let mut locked_xfer = xfer.lock().unwrap(); + locked_xfer.status = TRBCCode::TrbError; + return locked_xfer.report_transfer_error(); + } + }; + let mut locked_xfer = xfer.lock().unwrap(); + locked_xfer.packet = packet; + self.endpoint_do_transfer(&mut locked_xfer)?; + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if locked_xfer.complete { + epctx.update_dequeue(None, stream_id)?; + } else { + epctx.transfers.push_back(xfer.clone()); + } + drop(locked_xfer); + epctx.flush_transfer(); + if epctx.get_ep_state() == EP_HALTED { + break; + } + // retry + let locked_xfer = xfer.lock().unwrap(); + if !locked_xfer.complete && locked_xfer.running_retry { + epctx.retry = Some(xfer.clone()); + break; + } + count += 1; + if count > KICK_LIMIT { + warn!("kick endpoint over limit"); + break; + } + } + Ok(()) + } + + fn check_slot_enabled(&self, slot_id: u32) -> Result<()> { + if slot_id == 0 || slot_id > self.slots.len() as u32 { + bail!("Invalid slot id {}", slot_id); + } + if !self.slots[(slot_id - 1) as usize].enabled { + bail!("Slot {} is disabled", slot_id); + } + Ok(()) + } + + fn get_endpoint_ctx(&self, slot_id: u32, ep_id: u32) -> Result<&XhciEpContext> { + self.check_slot_enabled(slot_id)?; + if !(ENDPOINT_ID_START..=MAX_ENDPOINTS).contains(&ep_id) { + bail!("Invalid endpoint id {}", ep_id); + } + let ep_ctx = &self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if !ep_ctx.enabled { + bail!("Endpoint is disabled, slot id {} ep id {}", slot_id, ep_id); + } + Ok(ep_ctx) + } + + /// Return Ok(true) if retry is done. + /// Return Ok(false) if packet is need to retry again. + /// Return Err() if retry failed. + fn endpoint_retry_transfer( + &mut self, + slot_id: u32, + ep_id: u32, + stream_id: u32, + ) -> Result { + let slot = &mut self.slots[(slot_id - 1) as usize]; + // Safe because the retry is checked in the outer function call. + let xfer = slot.endpoints[(ep_id - 1) as usize] + .retry + .as_ref() + .unwrap() + .clone(); + let mut locked_xfer = xfer.lock().unwrap(); + if locked_xfer.timed_xfer { + let mfindex = self.mfindex(); + self.check_intr_iso_kick(&mut locked_xfer, mfindex); + if locked_xfer.running_retry { + return Ok(false); + } + locked_xfer.timed_xfer = false; + locked_xfer.running_retry = true; + } + + self.device_handle_packet(&mut locked_xfer); + if !locked_xfer.iso_xfer + && locked_xfer.packet.lock().unwrap().status == UsbPacketStatus::Nak + { + trace::usb_xhci_unimplemented(&"USB packet status is NAK".to_string()); + // NAK need to retry again. + return Ok(false); + } + self.complete_packet(&mut locked_xfer)?; + + let epctx = &mut self.slots[(slot_id - 1) as usize].endpoints[(ep_id - 1) as usize]; + if locked_xfer.complete { + drop(locked_xfer); + epctx.update_dequeue(None, stream_id)?; + epctx.flush_transfer(); + } + epctx.retry = None; + Ok(true) + } + + fn device_handle_packet(&mut self, xfer: &mut XhciTransfer) { + if let Ok(usb_dev) = self.get_usb_dev(xfer.slotid, xfer.epid) { + let mut locked_dev = usb_dev.lock().unwrap(); + locked_dev.handle_packet(&xfer.packet); + } else { + xfer.packet.lock().unwrap().status = UsbPacketStatus::NoDev; + error!("Failed to handle packet, No endpoint found"); + } + } + + fn endpoint_do_transfer(&mut self, xfer: &mut XhciTransfer) -> Result<()> { + if xfer.epid == 1 { + self.do_ctrl_transfer(xfer)?; + } else { + self.do_data_transfer(xfer)?; + } + Ok(()) + } + + /// Control Transfer, TRBs include Setup, Data(option), Status. + fn do_ctrl_transfer(&mut self, xfer: &mut XhciTransfer) -> Result<()> { + trace::usb_xhci_xfer_start(&xfer.slotid, &xfer.epid); + if let Err(e) = self.check_ctrl_transfer(xfer) { + error!("Failed to check control transfer {:?}", e); + xfer.status = TRBCCode::TrbError; + return xfer.report_transfer_error(); + } + let trb_setup = xfer.td[0]; + xfer.packet.lock().unwrap().parameter = trb_setup.parameter; + self.device_handle_packet(xfer); + self.complete_packet(xfer)?; + Ok(()) + } + + fn check_ctrl_transfer(&self, xfer: &XhciTransfer) -> Result<()> { + let trb_setup = xfer.td[0]; + let mut trb_status = xfer.td[xfer.td.len() - 1]; + let status_type = trb_status.get_type(); + + if status_type == TRBType::TrEvdata && xfer.td.len() > 2 { + trb_status = xfer.td[xfer.td.len() - 2]; + } + + let setup_type = trb_setup.get_type(); + if setup_type != TRBType::TrSetup { + bail!("The first TRB is not Setup"); + } + if trb_status.get_type() != TRBType::TrStatus { + bail!("The last TRB is not Status"); + } + if trb_setup.control & TRB_TR_IDT != TRB_TR_IDT { + bail!("no IDT bit"); + } + if trb_setup.status & TRB_TR_LEN_MASK != SETUP_TRB_TR_LEN { + bail!( + "Bad Setup TRB length {}", + trb_setup.status & TRB_TR_LEN_MASK + ); + } + Ok(()) + } + + fn calc_iso_kick(&mut self, xfer: &mut XhciTransfer, mfindex: u64) { + let epctx = &self.slots[(xfer.slotid - 1) as usize].endpoints[(xfer.epid - 1) as usize]; + + if xfer.td[0].control & TRB_TR_SIA != 0 { + let asap = u64::from((mfindex as u32 + epctx.interval - 1) & !(epctx.interval - 1)); + if asap >= epctx.mfindex_last + && asap <= epctx.mfindex_last + u64::from(epctx.interval) * 4 + { + xfer.mfindex_kick = epctx.mfindex_last + u64::from(epctx.interval); + } else { + xfer.mfindex_kick = asap; + } + } else { + xfer.mfindex_kick = + u64::from((xfer.td[0].control >> TRB_TR_FRAMEID_SHIFT) & TRB_TR_FRAMEID_MASK) << 3; + xfer.mfindex_kick |= mfindex & !(MFINDEX_WRAP_NUM - 1); + if xfer.mfindex_kick + 0x100 < mfindex { + xfer.mfindex_kick += MFINDEX_WRAP_NUM; + } + } + } + + fn check_intr_iso_kick(&mut self, xfer: &mut XhciTransfer, mfindex: u64) { + let epctx = &mut self.slots[(xfer.slotid - 1) as usize].endpoints[(xfer.epid - 1) as usize]; + if xfer.mfindex_kick > mfindex { + let weak_xhci = self.usb_ports[0].lock().unwrap().xhci.clone(); + let slotid = xfer.slotid; + let epid = xfer.epid; + let xhci_ep_kick_timer = Box::new(move || { + let xhci = weak_xhci.upgrade().unwrap(); + let mut locked_xhci = xhci.lock().unwrap(); + let epctx = match locked_xhci.get_endpoint_ctx(slotid, epid) { + Ok(epctx) => epctx, + Err(e) => { + error!("Kick endpoint error: {:?}", e); + return; + } + }; + let ep_state = epctx.get_ep_state(); + if ep_state == EP_STOPPED && ep_state == EP_ERROR { + return; + } + if let Err(e) = locked_xhci.kick_endpoint(slotid, epid, 0) { + error!("Failed to kick endpoint: {:?}", e); + } + }); + let ctx = EventLoop::get_ctx(None).unwrap(); + if self.timer_id.is_some() { + ctx.timer_del(self.timer_id.unwrap()); + } + self.timer_id = Some(ctx.timer_add( + xhci_ep_kick_timer, + Duration::from_nanos((xfer.mfindex_kick - mfindex) * ISO_BASE_TIME_INTERVAL), + )); + xfer.running_retry = true; + } else { + epctx.mfindex_last = xfer.mfindex_kick; + if self.timer_id.is_some() { + EventLoop::get_ctx(None) + .unwrap() + .timer_del(self.timer_id.unwrap()); + self.timer_id = None; + } + xfer.running_retry = false; + } + } + + fn do_data_transfer(&mut self, xfer: &mut XhciTransfer) -> Result<()> { + trace::usb_xhci_xfer_start(&xfer.slotid, &xfer.epid); + let epctx = &self.slots[(xfer.slotid - 1) as usize].endpoints[(xfer.epid - 1) as usize]; + match epctx.ep_type { + EpType::IntrOut | EpType::IntrIn => { + xfer.iso_xfer = false; + xfer.timed_xfer = false; + if xfer.running_retry { + return Ok(()); + } + } + EpType::BulkOut | EpType::BulkIn => { + xfer.iso_xfer = false; + xfer.timed_xfer = false; + } + EpType::IsoOut | EpType::IsoIn => { + xfer.iso_xfer = true; + xfer.timed_xfer = true; + let mfindex = self.mfindex(); + self.calc_iso_kick(xfer, mfindex); + self.check_intr_iso_kick(xfer, mfindex); + if xfer.running_retry { + return Ok(()); + } + } + _ => { + bail!( + "endpoint type: {:?} is unsupported by data transfer", + epctx.ep_type + ); + } + } + self.device_handle_packet(xfer); + self.complete_packet(xfer)?; + Ok(()) + } + + // Setup USB packet, include mapping dma address to iovector. + fn setup_usb_packet( + &mut self, + xfer: &Arc>, + ) -> Result>> { + let locked_xfer = xfer.lock().unwrap(); + let dir = if locked_xfer.in_xfer { + USB_TOKEN_IN + } else { + USB_TOKEN_OUT + }; + + // Map dma address to iovec. + let mut vec = Vec::new(); + for trb in &locked_xfer.td { + let trb_type = trb.get_type(); + if trb_type == TRBType::TrData && (trb.control & TRB_TR_DIR == 0) == locked_xfer.in_xfer + { + bail!("Direction of data transfer is mismatch"); + } + + if trb_type == TRBType::TrData + || trb_type == TRBType::TrNormal + || trb_type == TRBType::TrIsoch + { + let chunk = trb.status & TRB_TR_LEN_MASK; + let dma_addr = if trb.control & TRB_TR_IDT == TRB_TR_IDT { + if chunk > 8 && locked_xfer.in_xfer { + bail!("Invalid immediate data TRB"); + } + trb.addr + } else { + trb.parameter + }; + + self.mem_space.get_address_map( + &None, + GuestAddress(dma_addr), + u64::from(chunk), + &mut vec, + )?; + } + } + + let target_dev = + if let Ok(target_dev) = self.get_usb_dev(locked_xfer.slotid, locked_xfer.epid) { + Some(Arc::downgrade(&target_dev) as Weak>) + } else { + None + }; + + let packet_id = self.generate_packet_id(); + let (_, ep_number) = endpoint_id_to_number(locked_xfer.epid as u8); + let stream = locked_xfer.streamid; + let xfer_ops = Arc::downgrade(xfer) as Weak>; + let packet = UsbPacket::new( + packet_id, + u32::from(dir), + ep_number, + stream, + vec, + Some(xfer_ops), + target_dev, + ); + Ok(Arc::new(Mutex::new(packet))) + } + + fn get_usb_dev(&self, slotid: u32, epid: u32) -> Result>> { + let port = self.slots[(slotid - 1) as usize] + .usb_port + .as_ref() + .with_context(|| format!("USB port not found slotid {} epid {}", slotid, epid))?; + let locked_port = port.lock().unwrap(); + let dev = locked_port + .dev + .as_ref() + .with_context(|| "No device found in USB port.")?; + Ok(dev.clone()) + } + + /// Update packet status and then submit transfer. + fn complete_packet(&mut self, xfer: &mut XhciTransfer) -> Result<()> { + if xfer.packet.lock().unwrap().is_async { + trace::usb_xhci_xfer_async(); + xfer.running_retry = false; + xfer.running_async = true; + return Ok(()); + } + if xfer.packet.lock().unwrap().status == UsbPacketStatus::Nak { + trace::usb_xhci_xfer_nak(); + xfer.complete = false; + xfer.running_retry = true; + return Ok(()); + } else { + trace::usb_xhci_xfer_retry(); + xfer.complete = true; + xfer.running_retry = false; + } + + xfer.complete_transfer() + } + + /// Flush transfer in endpoint in some case such as stop endpoint. + fn cancel_all_ep_transfers(&mut self, slotid: u32, epid: u32, report: TRBCCode) -> Result { + trace::usb_xhci_cancel_all_ep_transfers(&slotid, &epid); + let mut cnt = 0; + let mut report = report; + while let Some(xfer) = self.slots[(slotid - 1) as usize].endpoints[(epid - 1) as usize] + .transfers + .pop_front() + { + let mut locked_xfer = xfer.lock().unwrap(); + if locked_xfer.complete { + continue; + } + cnt += self.cancel_one_ep_transfer(slotid, epid, &mut locked_xfer, report)?; + if cnt != 0 { + // Only report once. + report = TRBCCode::Invalid; + } + } + self.slots[(slotid - 1) as usize].endpoints[(epid - 1) as usize] + .transfers + .clear(); + Ok(cnt) + } + + fn cancel_one_ep_transfer( + &mut self, + slotid: u32, + ep_id: u32, + xfer: &mut XhciTransfer, + report: TRBCCode, + ) -> Result { + let mut killed = 0; + + if xfer.running_async { + if report != TRBCCode::Invalid { + xfer.status = report; + xfer.submit_transfer()?; + let locked_packet = xfer.packet.lock().unwrap(); + + if let Some(usb_dev) = locked_packet.target_dev.as_ref() { + if let Some(usb_dev) = usb_dev.clone().upgrade() { + drop(locked_packet); + let mut locked_usb_dev = usb_dev.lock().unwrap(); + locked_usb_dev.cancel_packet(&xfer.packet); + } + } + } + xfer.running_async = false; + killed = 1; + } + + if xfer.running_retry { + if report != TRBCCode::Invalid { + xfer.status = report; + xfer.submit_transfer()?; + } + let epctx = &mut self.slots[(slotid - 1) as usize].endpoints[(ep_id - 1) as usize]; + epctx.retry = None; + xfer.running_retry = false; + killed = 1; + } + xfer.td.clear(); + Ok(killed) + } + + /// Used for device to wakeup endpoint + pub fn wakeup_endpoint(&mut self, slot_id: u32, ep_id: u32, stream_id: u32) -> Result<()> { + if let Err(e) = self.get_endpoint_ctx(slot_id, ep_id) { + trace::usb_xhci_unimplemented(&format!( + "Invalid slot id or ep id, maybe device not activated, {:?}", + e + )); + return Ok(()); + } + self.kick_endpoint(slot_id, ep_id, stream_id)?; + Ok(()) + } + + pub(crate) fn reset_event_ring(&mut self, idx: u32) -> Result<()> { + let mut locked_intr = self.intrs[idx as usize].lock().unwrap(); + if locked_intr.erstsz == 0 || locked_intr.erstba.raw_value() == 0 { + locked_intr.er_start = GuestAddress(0); + locked_intr.er_size = 0; + return Ok(()); + } + let mut seg = XhciEventRingSeg::new(&self.mem_space); + seg.fetch_event_ring_seg(locked_intr.erstba)?; + if seg.size < 16 || seg.size > 4096 { + bail!("Invalid segment size {}", seg.size); + } + + // GPAChecked: the event ring must locate in guest ram. + let base_addr = GuestAddress(addr64_from_u32(seg.addr_lo, seg.addr_hi)); + // SAFETY: seg size is a 16 bit register, will not overflow. + let er_len = seg.size * TRB_SIZE; + if !self + .mem_space + .address_in_memory(base_addr, u64::from(er_len)) + { + bail!("The event ring does not locate in guest ram"); + } + + locked_intr.er_start = base_addr; + locked_intr.er_size = seg.size; + locked_intr.er_ep_idx = 0; + locked_intr.er_pcs = true; + Ok(()) + } + + /// Assign USB port and attach the device. + pub fn assign_usb_port( + &mut self, + dev: &Arc>, + ) -> Option>> { + let speed = dev.lock().unwrap().speed(); + for port in &self.usb_ports { + let mut locked_port = port.lock().unwrap(); + if locked_port.speed_supported(speed) && !locked_port.used { + locked_port.used = true; + locked_port.dev = Some(dev.clone()); + let mut locked_dev = dev.lock().unwrap(); + locked_dev.set_usb_port(Some(Arc::downgrade(port))); + return Some(port.clone()); + } + } + None + } + + pub fn discharge_usb_port(&mut self, port: &mut UsbPort) { + if port.used { + port.used = false; + port.dev = None; + port.slot_id = INVALID_SLOT_ID; + } + } + + pub fn find_usb_port_by_id(&mut self, id: &str) -> Option>> { + for port in &self.usb_ports { + let locked_port = port.lock().unwrap(); + if !locked_port.used || locked_port.dev.is_none() { + continue; + } + let dev = locked_port.dev.as_ref().unwrap(); + if dev.lock().unwrap().device_id() == id { + return Some(port.clone()); + } + } + None + } +} + +fn usb_packet_status_to_trb_code(status: UsbPacketStatus) -> Result { + let code = match status { + UsbPacketStatus::Success => TRBCCode::Success, + UsbPacketStatus::NoDev | UsbPacketStatus::IoError => TRBCCode::UsbTransactionError, + UsbPacketStatus::Stall => TRBCCode::StallError, + UsbPacketStatus::Babble => TRBCCode::BabbleDetected, + _ => { + bail!("Unhandle packet status {:?}", status); + } + }; + Ok(code) +} + +// DMA read/write helpers. +pub fn dma_read_bytes( + addr_space: &Arc, + addr: GuestAddress, + mut buf: &mut [u8], +) -> Result<()> { + let len = buf.len() as u64; + addr_space + .read(&mut buf, addr, len, AddressAttr::Ram) + .with_context(|| { + format!( + "Failed to read dma memory at gpa=0x{:x} len=0x{:x}", + addr.0, len + ) + })?; + Ok(()) +} + +pub fn dma_write_bytes( + addr_space: &Arc, + addr: GuestAddress, + mut buf: &[u8], +) -> Result<()> { + let len = buf.len() as u64; + addr_space + .write(&mut buf, addr, len, AddressAttr::Ram) + .with_context(|| { + format!( + "Failed to write dma memory at gpa=0x{:x} len=0x{:x}", + addr.0, len + ) + })?; + Ok(()) +} + +fn dma_read_u64(addr_space: &Arc, addr: GuestAddress, data: &mut u64) -> Result<()> { + let mut tmp = [0_u8; 8]; + dma_read_bytes(addr_space, addr, &mut tmp)?; + *data = LittleEndian::read_u64(&tmp); + Ok(()) +} + +pub fn dma_read_u32( + addr_space: &Arc, + addr: GuestAddress, + buf: &mut [u32], +) -> Result<()> { + let vec_len = std::mem::size_of_val(buf); + let mut vec = vec![0_u8; vec_len]; + let tmp = vec.as_mut_slice(); + dma_read_bytes(addr_space, addr, tmp)?; + for i in 0..buf.len() { + buf[i] = LittleEndian::read_u32(&tmp[(size_of::() * i)..]); + } + Ok(()) +} + +pub fn dma_write_u32( + addr_space: &Arc, + addr: GuestAddress, + buf: &[u32], +) -> Result<()> { + let vec_len = std::mem::size_of_val(buf); + let mut vec = vec![0_u8; vec_len]; + let tmp = vec.as_mut_slice(); + for i in 0..buf.len() { + LittleEndian::write_u32(&mut tmp[(size_of::() * i)..], buf[i]); + } + dma_write_bytes(addr_space, addr, tmp)?; + Ok(()) +} + +fn addr64_from_u32(low: u32, high: u32) -> u64 { + (u64::from(high) << 32) | u64::from(low) +} + +// | ep id | < = > | ep direction | ep number | +// | 1 | | | 0 | +// | 2 | | OUT | 1 | +// | 3 | | IN | 1 | +fn endpoint_id_to_number(ep_id: u8) -> (bool, u8) { + (ep_id & 1 == 1, ep_id >> 1) +} + +pub fn endpoint_number_to_id(in_direction: bool, ep_number: u8) -> u8 { + if ep_number == 0 { + // Control endpoint. + 1 + } else if in_direction { + ep_number * 2 + 1 + } else { + ep_number * 2 + } +} + +fn transfer_in_direction(ep_id: u8, td: &[XhciTRB], ep_type: EpType) -> bool { + if ep_id == 1 { + let trb_setup = td[0]; + let bm_request_type = trb_setup.parameter as u8; + bm_request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST + } else { + ep_type == EpType::IsoIn || ep_type == EpType::BulkIn || ep_type == EpType::IntrIn + } +} diff --git a/devices/src/usb/xhci/xhci_pci.rs b/devices/src/usb/xhci/xhci_pci.rs new file mode 100644 index 0000000000000000000000000000000000000000..f91461e3db297c0e2f8877092ddd232d8ef531d6 --- /dev/null +++ b/devices/src/usb/xhci/xhci_pci.rs @@ -0,0 +1,413 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cmp::max; +use std::os::unix::io::AsRawFd; +use std::os::unix::prelude::RawFd; +use std::rc::Rc; +use std::sync::atomic::{AtomicU16, Ordering}; +use std::sync::{Arc, Mutex, Weak}; + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use log::error; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use super::xhci_controller::{XhciDevice, MAX_INTRS, MAX_SLOTS}; +use super::xhci_regs::{ + build_cap_ops, build_doorbell_ops, build_oper_ops, build_port_ops, build_runtime_ops, + XHCI_CAP_LENGTH, XHCI_OFF_DOORBELL, XHCI_OFF_RUNTIME, +}; +use crate::pci::config::{ + PciConfig, RegionType, DEVICE_ID, MINIMUM_BAR_SIZE_FOR_MMIO, PCI_CONFIG_SPACE_SIZE, + PCI_DEVICE_ID_REDHAT_XHCI, PCI_VENDOR_ID_REDHAT, REVISION_ID, SUB_CLASS_CODE, VENDOR_ID, +}; +use crate::pci::{init_intx, init_msix, le_write_u16, PciBus, PciDevBase, PciDevOps}; +use crate::usb::UsbDevice; +use crate::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +use address_space::{AddressRange, AddressSpace, Region, RegionIoEventFd}; +use machine_manager::config::{get_pci_df, valid_id}; +use machine_manager::event_loop::register_event_helper; +use util::gen_base_func; +use util::loop_context::{ + create_new_eventfd, read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, +}; + +/// 5.2 PCI Configuration Registers(USB) +const PCI_CLASS_PI: u16 = 0x09; +const PCI_CACHE_LINE_SIZE: u16 = 0x0c; +const PCI_SERIAL_BUS_RELEASE_NUMBER: u8 = 0x60; +const PCI_FRAME_LENGTH_ADJUSTMENT: u8 = 0x61; +const PCI_SERIAL_BUS_RELEASE_VERSION_3_0: u8 = 0x30; +const PCI_CLASS_SERIAL_USB: u16 = 0x0c03; +const PCI_NO_FRAME_LENGTH_TIMING_CAP: u8 = 0x40; +/// PCI capability offset or size. +const XHCI_PCI_CONFIG_LENGTH: u32 = 0x4000; +const XHCI_PCI_CAP_OFFSET: u32 = 0x0; +const XHCI_PCI_CAP_LENGTH: u32 = XHCI_CAP_LENGTH; +const XHCI_PCI_OPER_OFFSET: u32 = XHCI_PCI_CAP_LENGTH; +const XHCI_PCI_OPER_LENGTH: u32 = 0x400; +const XHCI_PCI_RUNTIME_OFFSET: u32 = XHCI_OFF_RUNTIME; +const XHCI_PCI_RUNTIME_LENGTH: u32 = (MAX_INTRS + 1) * 0x20; +const XHCI_PCI_DOORBELL_OFFSET: u32 = XHCI_OFF_DOORBELL; +const XHCI_PCI_DOORBELL_LENGTH: u32 = (MAX_SLOTS + 1) * 0x20; +const XHCI_PCI_PORT_OFFSET: u32 = XHCI_PCI_OPER_OFFSET + XHCI_PCI_OPER_LENGTH; +const XHCI_PCI_PORT_LENGTH: u32 = 0x10; +const XHCI_MSIX_TABLE_OFFSET: u32 = 0x3000; +const XHCI_MSIX_PBA_OFFSET: u32 = 0x3800; + +/// XHCI controller configuration. +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct XhciConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + id: Option, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + // number of usb2.0 ports. + #[arg(long, value_parser = clap::value_parser!(u8).range(1..u8::MAX as i64))] + pub p2: Option, + // number of usb3.0 ports. + #[arg(long, value_parser = clap::value_parser!(u8).range(1..u8::MAX as i64))] + pub p3: Option, + #[arg(long)] + pub iothread: Option, +} + +/// Registers offset. +/// 0x0 0x40 0x440 0x1000 0x2000 0x3000 0x4000 +/// | cap | oper | port | runtime | doorbell | MSIX | + +/// XHCI pci device which can be attached to PCI bus. +pub struct XhciPciDevice { + base: PciDevBase, + pub xhci: Arc>, + dev_id: Arc, + mem_region: Region, + doorbell_fd: Arc, + delete_evts: Vec, + iothread: Option, +} + +impl XhciPciDevice { + pub fn new( + config: &XhciConfig, + devfn: u8, + parent_bus: Weak>, + mem_space: &Arc, + ) -> Self { + Self { + base: PciDevBase { + base: DeviceBase::new(config.id.clone().unwrap(), true, Some(parent_bus)), + config: PciConfig::new(devfn, PCI_CONFIG_SPACE_SIZE, 1), + devfn, + }, + xhci: XhciDevice::new(mem_space, config), + dev_id: Arc::new(AtomicU16::new(0)), + mem_region: Region::init_container_region( + u64::from(XHCI_PCI_CONFIG_LENGTH), + "XhciPciContainer", + ), + doorbell_fd: Arc::new(create_new_eventfd().unwrap()), + delete_evts: Vec::new(), + iothread: config.iothread.clone(), + } + } + + fn mem_region_init(&mut self) -> Result<()> { + let cap_region = Region::init_io_region( + u64::from(XHCI_PCI_CAP_LENGTH), + build_cap_ops(&self.xhci), + "XhciPciCapRegion", + ); + self.mem_region + .add_subregion(cap_region, u64::from(XHCI_PCI_CAP_OFFSET)) + .with_context(|| "Failed to register cap region.")?; + + let mut oper_region = Region::init_io_region( + u64::from(XHCI_PCI_OPER_LENGTH), + build_oper_ops(&self.xhci), + "XhciPciOperRegion", + ); + oper_region.set_access_size(4); + self.mem_region + .add_subregion(oper_region, u64::from(XHCI_PCI_OPER_OFFSET)) + .with_context(|| "Failed to register oper region.")?; + + let port_num = self.xhci.lock().unwrap().usb_ports.len(); + for i in 0..port_num { + let port = &self.xhci.lock().unwrap().usb_ports[i]; + let port_region = Region::init_io_region( + u64::from(XHCI_PCI_PORT_LENGTH), + build_port_ops(port), + "XhciPciPortRegion", + ); + let offset = u64::from(XHCI_PCI_PORT_OFFSET + XHCI_PCI_PORT_LENGTH * i as u32); + self.mem_region + .add_subregion(port_region, offset) + .with_context(|| "Failed to register port region.")?; + } + + let mut runtime_region = Region::init_io_region( + u64::from(XHCI_PCI_RUNTIME_LENGTH), + build_runtime_ops(&self.xhci), + "XhciPciRuntimeRegion", + ); + runtime_region.set_access_size(4); + self.mem_region + .add_subregion(runtime_region, u64::from(XHCI_PCI_RUNTIME_OFFSET)) + .with_context(|| "Failed to register runtime region.")?; + + let doorbell_region = Region::init_io_region( + u64::from(XHCI_PCI_DOORBELL_LENGTH), + build_doorbell_ops(&self.xhci), + "XhciPciDoorbellRegion", + ); + doorbell_region.set_ioeventfds(&self.ioeventfds()); + + self.mem_region + .add_subregion(doorbell_region, u64::from(XHCI_PCI_DOORBELL_OFFSET)) + .with_context(|| "Failed to register doorbell region.")?; + Ok(()) + } + + fn ioeventfds(&self) -> Vec { + vec![RegionIoEventFd { + fd: self.doorbell_fd.clone(), + addr_range: AddressRange::from((0, 4u64)), + data_match: false, + data: 0, + }] + } + + pub fn attach_device(&self, dev: &Arc>) -> Result<()> { + let mut locked_xhci = self.xhci.lock().unwrap(); + let usb_port = locked_xhci + .assign_usb_port(dev) + .with_context(|| "No available USB port.")?; + locked_xhci.port_update(&usb_port, false)?; + trace::usb_xhci_attach_device( + &usb_port.lock().unwrap().port_id, + &dev.lock().unwrap().device_id(), + ); + let mut locked_dev = dev.lock().unwrap(); + locked_dev.handle_attach()?; + locked_dev.set_controller(Arc::downgrade(&self.xhci)); + Ok(()) + } + + pub fn detach_device(&self, id: String) -> Result<()> { + let mut locked_xhci = self.xhci.lock().unwrap(); + let usb_port = locked_xhci.find_usb_port_by_id(&id); + if usb_port.is_none() { + bail!("Failed to detach device: id {} not found", id); + } + let usb_port = usb_port.unwrap(); + let slot_id = usb_port.lock().unwrap().slot_id; + locked_xhci.detach_slot(slot_id)?; + locked_xhci.port_update(&usb_port, true)?; + + // Unrealize device and discharge usb port. + let mut locked_port = usb_port.lock().unwrap(); + let dev = locked_port.dev.as_ref().unwrap(); + let mut locked_dev = dev.lock().unwrap(); + trace::usb_xhci_detach_device(&locked_port.port_id, &locked_dev.device_id()); + locked_dev.usb_device_base_mut().unplugged = true; + locked_dev.unrealize()?; + drop(locked_dev); + locked_xhci.discharge_usb_port(&mut locked_port); + + Ok(()) + } +} + +impl Device for XhciPciDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + self.xhci.lock().unwrap().reset(); + + self.base.config.reset()?; + + Ok(()) + } + + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + le_write_u16( + &mut self.base.config.config, + VENDOR_ID as usize, + PCI_VENDOR_ID_REDHAT, + )?; + le_write_u16( + &mut self.base.config.config, + DEVICE_ID as usize, + PCI_DEVICE_ID_REDHAT_XHCI, + )?; + le_write_u16(&mut self.base.config.config, REVISION_ID, 0x3_u16)?; + le_write_u16( + &mut self.base.config.config, + SUB_CLASS_CODE as usize, + PCI_CLASS_SERIAL_USB, + )?; + self.base.config.config[PCI_CLASS_PI as usize] = 0x30; + + #[cfg(target_arch = "aarch64")] + self.base.config.set_interrupt_pin(); + + self.base.config.config[PCI_CACHE_LINE_SIZE as usize] = 0x10; + self.base.config.config[PCI_SERIAL_BUS_RELEASE_NUMBER as usize] = + PCI_SERIAL_BUS_RELEASE_VERSION_3_0; + self.base.config.config[PCI_FRAME_LENGTH_ADJUSTMENT as usize] = + PCI_NO_FRAME_LENGTH_TIMING_CAP; + self.dev_id + .store(u16::from(self.base.devfn), Ordering::SeqCst); + self.mem_region_init()?; + + let handler = Arc::new(Mutex::new(DoorbellHandler::new( + self.xhci.clone(), + self.doorbell_fd.clone(), + ))); + + register_event_helper( + EventNotifierHelper::internal_notifiers(handler), + self.iothread.as_ref(), + &mut self.delete_evts, + )?; + + let intrs_num = self.xhci.lock().unwrap().intrs.len() as u32; + init_msix( + &mut self.base, + 0_usize, + intrs_num, + self.dev_id.clone(), + Some(&self.mem_region), + Some((XHCI_MSIX_TABLE_OFFSET, XHCI_MSIX_PBA_OFFSET)), + )?; + + let parent_bus = self.parent_bus().unwrap(); + init_intx( + self.name(), + &mut self.base.config, + parent_bus, + self.base.devfn, + )?; + + let mut mem_region_size = u64::from(XHCI_PCI_CONFIG_LENGTH).next_power_of_two(); + mem_region_size = max(mem_region_size, MINIMUM_BAR_SIZE_FOR_MMIO as u64); + self.base.config.register_bar( + 0_usize, + self.mem_region.clone(), + RegionType::Mem64Bit, + false, + mem_region_size, + )?; + + let devfn = u64::from(self.base.devfn); + // It is safe to unwrap, because it is initialized in init_msix. + let cloned_msix = self.base.config.msix.as_ref().unwrap().clone(); + let cloned_intx = self.base.config.intx.as_ref().unwrap().clone(); + let cloned_dev_id = self.dev_id.clone(); + // Registers the msix to the xhci device for interrupt notification. + self.xhci + .lock() + .unwrap() + .set_interrupt_ops(Arc::new(move |n: u32, level: u8| -> bool { + let mut locked_msix = cloned_msix.lock().unwrap(); + if locked_msix.enabled && level != 0 { + locked_msix.notify(n as u16, cloned_dev_id.load(Ordering::Acquire)); + return true; + } + if n == 0 && !locked_msix.enabled { + cloned_intx.lock().unwrap().notify(level); + } + + false + })); + let dev = Arc::new(Mutex::new(self)); + // Attach to the PCI bus. + let bus = dev.lock().unwrap().parent_bus().unwrap().upgrade().unwrap(); + bus.lock().unwrap().attach_child(devfn, dev.clone())?; + Ok(dev) + } + + fn unrealize(&mut self) -> Result<()> { + trace::usb_xhci_exit(); + Ok(()) + } +} + +impl PciDevOps for XhciPciDevice { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); + + fn write_config(&mut self, offset: usize, data: &[u8]) { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + pci_bus.update_dev_id(self.base.devfn, &self.dev_id); + + self.base.config.write( + offset, + data, + self.dev_id.clone().load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ); + } +} + +struct DoorbellHandler { + xhci: Arc>, + fd: Arc, +} + +impl DoorbellHandler { + fn new(xhci: Arc>, fd: Arc) -> Self { + DoorbellHandler { xhci, fd } + } +} + +impl EventNotifierHelper for DoorbellHandler { + fn internal_notifiers(io_handler: Arc>) -> Vec { + let cloned_io_handler = io_handler.clone(); + let handler: Rc = Rc::new(move |_event, fd: RawFd| { + read_fd(fd); + let locked_handler = cloned_io_handler.lock().unwrap(); + let mut locked_xhci = locked_handler.xhci.lock().unwrap(); + + if !locked_xhci.running() { + error!("Failed to write doorbell, XHCI is not running"); + return None; + } + if let Err(e) = locked_xhci.handle_command() { + error!("Failed to handle command: {:?}", e); + locked_xhci.host_controller_error(); + } + + None + }); + vec![EventNotifier::new( + NotifierOperation::AddShared, + io_handler.lock().unwrap().fd.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )] + } +} diff --git a/devices/src/usb/xhci/xhci_regs.rs b/devices/src/usb/xhci/xhci_regs.rs new file mode 100644 index 0000000000000000000000000000000000000000..abf3128d428fc3b4759d75439db1b653da5c3aea --- /dev/null +++ b/devices/src/usb/xhci/xhci_regs.rs @@ -0,0 +1,852 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::atomic::{fence, AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::{debug, error}; + +use super::xhci_controller::dma_write_bytes; +use super::xhci_controller::{UsbPort, XhciDevice, XhciEvent}; +use super::xhci_ring::XhciTRB; +use super::xhci_trb::{TRBCCode, TRBType, TRB_C, TRB_SIZE}; +use crate::usb::{config::*, UsbError}; +use address_space::{AddressSpace, GuestAddress, RegionOps}; +use util::num_ops::{read_data_u32, read_u32, write_data_u32, write_u64_high, write_u64_low}; + +/// Capability offset or size. +pub(crate) const XHCI_CAP_LENGTH: u32 = 0x40; +pub(crate) const XHCI_OFF_DOORBELL: u32 = 0x2000; +pub(crate) const XHCI_OFF_RUNTIME: u32 = 0x1000; +/// Capability Registers. +/// Capability Register Length. +const XHCI_CAP_REG_CAPLENGTH: u64 = 0x00; +/// Interface Version Number. +const XHCI_CAP_REG_HCIVERSION: u64 = 0x02; +/// Structural Parameters 1. +const XHCI_CAP_REG_HCSPARAMS1: u64 = 0x04; +/// Structural Parameters 2. +const XHCI_CAP_REG_HCSPARAMS2: u64 = 0x08; +/// Structural Parameters 3. +const XHCI_CAP_REG_HCSPARAMS3: u64 = 0x0c; +/// Capability Parameters 1. +const XHCI_CAP_REG_HCCPARAMS1: u64 = 0x10; +/// Doorbell Offset. +const XHCI_CAP_REG_DBOFF: u64 = 0x14; +/// Runtime Register Space Offset. +const XHCI_CAP_REG_RTSOFF: u64 = 0x18; +/// Capability Parameters 2. +const XHCI_CAP_REG_HCCPARAMS2: u64 = 0x1c; +const XHCI_VERSION: u32 = 0x100; +/// Number of Device Slots(MaxSlots). +const CAP_HCSP_NDS_SHIFT: u32 = 0; +/// Number of Interrupters(MaxIntrs). +const CAP_HCSP_NI_SHIFT: u32 = 8; +/// Number of Ports(MaxPorts). +const CAP_HCSP_NP_SHIFT: u32 = 24; +/// 64-bit Addressing Capability. +const CAP_HCCP_AC64: u32 = 0x1; +/// xHCI Extended Capabilities Pointer. +const CAP_HCCP_EXCP_SHIFT: u32 = 16; +/// Maximum Primary Stream Array Size. +const CAP_HCCP_MPSAS_SHIFT: u32 = 12; +/// Extended Capability Code (Supported Protocol). +const CAP_EXT_CAP_ID_SUPPORT_PROTOCOL: u8 = 2; +/// xHCI Supported Protocol Capability (Name String). +const CAP_EXT_USB_NAME_STRING: u32 = 0x20425355; +/// Supported Protocol Capability (Major Revision and Minor Revision). +const CAP_EXT_REVISION_SHIFT: u32 = 16; +/// Next xHCI Extended Capability Pointer. +const CAP_EXT_NEXT_CAP_POINTER_SHIFT: u32 = 8; +/// USB 2.0. +const CAP_EXT_USB_REVISION_2_0: u32 = 0x0200; +/// USB 3.0. +const CAP_EXT_USB_REVISION_3_0: u32 = 0x0300; +/// Operational Registers. +pub const XHCI_OPER_REG_USBCMD: u64 = 0x00; +pub const XHCI_OPER_REG_USBSTS: u64 = 0x04; +pub const XHCI_OPER_REG_PAGESIZE: u64 = 0x08; +pub const XHCI_OPER_REG_DNCTRL: u64 = 0x14; +const XHCI_OPER_REG_CMD_RING_CTRL_LO: u64 = 0x18; +const XHCI_OPER_REG_CMD_RING_CTRL_HI: u64 = 0x1c; +const XHCI_OPER_REG_DCBAAP_LO: u64 = 0x30; +const XHCI_OPER_REG_DCBAAP_HI: u64 = 0x34; +pub const XHCI_OPER_REG_CONFIG: u64 = 0x38; +const XHCI_OPER_PAGESIZE: u32 = 1; +/// Command Ring Control Register RCS/CS/CA mask. +const XHCI_CRCR_CTRL_LO_MASK: u32 = 0xffffffc7; +/// Command Ring Pointer Mask. +const XHCI_CRCR_CRP_MASK: u64 = !0x3f; +/// Notification Enable. +pub const XHCI_OPER_NE_MASK: u32 = 0xffff; +/// Interrupter Registers. +pub const XHCI_INTR_REG_IMAN: u64 = 0x00; +pub const XHCI_INTR_REG_IMOD: u64 = 0x04; +pub const XHCI_INTR_REG_ERSTSZ: u64 = 0x08; +pub const XHCI_INTR_REG_ERSTBA_LO: u64 = 0x10; +pub const XHCI_INTR_REG_ERSTBA_HI: u64 = 0x14; +pub const XHCI_INTR_REG_ERDP_LO: u64 = 0x18; +pub const XHCI_INTR_REG_ERDP_HI: u64 = 0x1c; +pub const XHCI_INTR_REG_SIZE: u64 = 0x20; +const XHCI_INTR_REG_SHIFT: u64 = 5; +/// Doorbell Register Bit Field. +/// DB Target. +const DB_TARGET_MASK: u32 = 0xff; +/// DB Stream. +const DB_STREAM_ID_SHIFT: u32 = 16; +const DB_STREAM_ID_MASK: u32 = 0xffff; +/// Port Registers. +const XHCI_PORTSC: u64 = 0x0; +const XHCI_PORTPMSC: u64 = 0x4; +const XHCI_PORTLI: u64 = 0x8; +const XHCI_PORTHLPMC: u64 = 0xc; + +/// XHCI Operation Registers +#[derive(Default)] +pub struct XhciOperReg { + /// USB Command + pub usb_cmd: Arc, + /// USB Status + pub usb_status: Arc, + /// Device Notify Control + pub dev_notify_ctrl: u32, + /// Command Ring Control + pub cmd_ring_ctrl: u64, + /// Device Context Base Address Array Pointer + pub dcbaap: GuestAddress, + /// Configure + pub config: u32, +} + +impl XhciOperReg { + pub fn reset(&mut self) { + self.set_usb_cmd(0); + self.set_usb_status(USB_STS_HCH); + self.dev_notify_ctrl = 0; + self.cmd_ring_ctrl = 0; + self.dcbaap = GuestAddress(0); + self.config = 0; + } + + /// Run the command ring. + pub fn start_cmd_ring(&mut self) { + self.cmd_ring_ctrl |= u64::from(CMD_RING_CTRL_CRR); + } + + pub fn set_usb_cmd(&mut self, value: u32) { + self.usb_cmd.store(value, Ordering::SeqCst) + } + + pub fn get_usb_cmd(&self) -> u32 { + self.usb_cmd.load(Ordering::Acquire) + } + + pub fn set_usb_status(&mut self, value: u32) { + self.usb_status.store(value, Ordering::SeqCst) + } + + pub fn get_usb_status(&self) -> u32 { + self.usb_status.load(Ordering::Acquire) + } + + pub fn set_usb_status_flag(&mut self, value: u32) { + self.usb_status.fetch_or(value, Ordering::SeqCst); + } + + pub fn unset_usb_status_flag(&mut self, value: u32) { + self.usb_status.fetch_and(!value, Ordering::SeqCst); + } +} + +/// XHCI Interrupter +pub struct XhciInterrupter { + mem: Arc, + oper_usb_cmd: Arc, + oper_usb_status: Arc, + id: u32, + interrupt_cb: Option bool + Send + Sync>>, + /// Interrupter Management + pub iman: u32, + /// Interrupter Morderation + pub imod: u32, + /// Event Ring Segment Table Size + pub erstsz: u32, + /// Event Ring Segment Table Base Address + pub erstba: GuestAddress, + /// Event Ring Dequeue Pointer + pub erdp: GuestAddress, + /// Event Ring Producer Cycle State + pub er_pcs: bool, + pub er_start: GuestAddress, + pub er_size: u32, + pub er_ep_idx: u32, +} + +impl XhciInterrupter { + pub fn new( + mem: &Arc, + oper_usb_cmd: &Arc, + oper_usb_status: &Arc, + id: u32, + ) -> Self { + Self { + mem: mem.clone(), + oper_usb_cmd: oper_usb_cmd.clone(), + oper_usb_status: oper_usb_status.clone(), + id, + interrupt_cb: None, + iman: 0, + imod: 0, + erstsz: 0, + erstba: GuestAddress(0), + erdp: GuestAddress(0), + er_pcs: true, + er_start: GuestAddress(0), + er_size: 0, + er_ep_idx: 0, + } + } + + pub fn set_interrupter(&mut self, cb: Arc bool + Send + Sync>) { + self.interrupt_cb = Some(cb); + } + + pub fn oper_intr_enabled(&self) -> bool { + self.oper_usb_cmd.load(Ordering::Acquire) & USB_CMD_INTE == USB_CMD_INTE + } + + pub fn enable_intr(&mut self) { + self.oper_usb_status + .fetch_or(USB_STS_EINT, Ordering::SeqCst); + } + + pub fn reset(&mut self) { + self.iman = 0; + self.imod = 0; + self.erstsz = 0; + self.erstba = GuestAddress(0); + self.erdp = GuestAddress(0); + self.er_pcs = true; + self.er_start = GuestAddress(0); + self.er_size = 0; + self.er_ep_idx = 0; + } + + /// Send event TRB to driver, first write TRB and then send interrupt. + pub fn send_event(&mut self, evt: &XhciEvent) -> Result<()> { + let er_end = self + .er_start + .checked_add(u64::from(TRB_SIZE * self.er_size)) + .ok_or_else(|| { + UsbError::MemoryAccessOverflow( + self.er_start.raw_value(), + u64::from(TRB_SIZE * self.er_size), + ) + })?; + if self.erdp < self.er_start || self.erdp >= er_end { + bail!( + "DMA out of range, erdp {:x} er_start {:x} er_size {}", + self.erdp.raw_value(), + self.er_start.raw_value(), + self.er_size + ); + } + let dp_idx = (self.erdp.raw_value() - self.er_start.raw_value()) / u64::from(TRB_SIZE); + if u64::from((self.er_ep_idx + 2) % self.er_size) == dp_idx { + debug!("Event ring full error, idx {}", dp_idx); + let event = XhciEvent::new(TRBType::ErHostController, TRBCCode::EventRingFullError); + self.write_event(&event)?; + } else if u64::from((self.er_ep_idx + 1) % self.er_size) == dp_idx { + debug!("Event Ring full, drop Event."); + } else { + self.write_event(evt)?; + } + self.send_intr(); + Ok(()) + } + + fn send_intr(&mut self) { + let erdp = self.erdp.raw_value(); + let pending = read_u32(erdp, 0) & ERDP_EHB == ERDP_EHB; + let mut erdp_low = read_u32(erdp, 0); + erdp_low |= ERDP_EHB; + self.erdp = GuestAddress(write_u64_low(erdp, erdp_low)); + self.iman |= IMAN_IP; + self.enable_intr(); + if pending { + return; + } + if self.iman & IMAN_IE != IMAN_IE { + return; + } + if !self.oper_intr_enabled() { + return; + } + + if let Some(intr_ops) = self.interrupt_cb.as_ref() { + if intr_ops(self.id, 1) { + self.iman &= !IMAN_IP; + } + } + } + + fn update_intr(&mut self) { + if self.id == 0 { + let mut level = 0; + if self.iman & IMAN_IP == IMAN_IP + && self.iman & IMAN_IE == IMAN_IE + && self.oper_intr_enabled() + { + level = 1; + } + if let Some(intr_ops) = &self.interrupt_cb { + if intr_ops(0, level) { + self.iman &= !IMAN_IP; + } + } + } + } + + /// Write event to the ring and update index. + pub fn write_event(&mut self, evt: &XhciEvent) -> Result<()> { + let mut ev_trb = evt.to_trb(); + if self.er_pcs { + ev_trb.control |= TRB_C; + } + self.write_trb(&ev_trb)?; + // Update index + self.er_ep_idx += 1; + if self.er_ep_idx >= self.er_size { + self.er_ep_idx = 0; + self.er_pcs = !self.er_pcs; + } + Ok(()) + } + + fn write_trb(&mut self, trb: &XhciTRB) -> Result<()> { + let addr = self + .er_start + .checked_add(u64::from(TRB_SIZE * self.er_ep_idx)) + .ok_or_else(|| { + UsbError::MemoryAccessOverflow( + self.er_start.raw_value(), + u64::from(TRB_SIZE * self.er_ep_idx), + ) + })?; + let cycle = trb.control as u8; + // Toggle the cycle bit to avoid driver read it. + let control = if trb.control & TRB_C == TRB_C { + trb.control & !TRB_C + } else { + trb.control | TRB_C + }; + let mut buf = [0_u8; TRB_SIZE as usize]; + LittleEndian::write_u64(&mut buf, trb.parameter); + LittleEndian::write_u32(&mut buf[8..], trb.status); + LittleEndian::write_u32(&mut buf[12..], control); + dma_write_bytes(&self.mem, addr, &buf)?; + // Write the cycle bit at last. + fence(Ordering::SeqCst); + dma_write_bytes(&self.mem, addr.unchecked_add(12), &[cycle])?; + Ok(()) + } +} + +/// Build capability region ops. +pub fn build_cap_ops(xhci_dev: &Arc>) -> RegionOps { + let xhci_dev = xhci_dev.clone(); + let cap_read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + let locked_dev = xhci_dev.lock().unwrap(); + let max_ports = locked_dev.numports_2 + locked_dev.numports_3; + let max_intrs = locked_dev.intrs.len() as u32; + let value = match offset { + XHCI_CAP_REG_CAPLENGTH => { + let hci_version_offset = XHCI_CAP_REG_HCIVERSION * 8; + XHCI_VERSION << hci_version_offset | XHCI_CAP_LENGTH + } + XHCI_CAP_REG_HCSPARAMS1 => { + u32::from(max_ports) << CAP_HCSP_NP_SHIFT + | max_intrs << CAP_HCSP_NI_SHIFT + | (locked_dev.slots.len() as u32) << CAP_HCSP_NDS_SHIFT + } + XHCI_CAP_REG_HCSPARAMS2 => { + // IST + 0xf + } + XHCI_CAP_REG_HCSPARAMS3 => 0x0, + XHCI_CAP_REG_HCCPARAMS1 => { + // The offset of the first extended capability is (base) + (0x8 << 2) + // The primary stream array size is 1 << (0x7 + 1) + 0x8 << CAP_HCCP_EXCP_SHIFT | (0x7 << CAP_HCCP_MPSAS_SHIFT) | CAP_HCCP_AC64 + } + XHCI_CAP_REG_DBOFF => XHCI_OFF_DOORBELL, + XHCI_CAP_REG_RTSOFF => XHCI_OFF_RUNTIME, + XHCI_CAP_REG_HCCPARAMS2 => 0, + // Extended capabilities (USB 2.0) + 0x20 => { + CAP_EXT_USB_REVISION_2_0 << CAP_EXT_REVISION_SHIFT + | 0x4 << CAP_EXT_NEXT_CAP_POINTER_SHIFT + | u32::from(CAP_EXT_CAP_ID_SUPPORT_PROTOCOL) + } + 0x24 => CAP_EXT_USB_NAME_STRING, + 0x28 => (u32::from(locked_dev.numports_2) << 8) | 1, + 0x2c => 0x0, + // Extended capabilities (USB 3.0) + 0x30 => { + CAP_EXT_USB_REVISION_3_0 << CAP_EXT_REVISION_SHIFT + | u32::from(CAP_EXT_CAP_ID_SUPPORT_PROTOCOL) + } + 0x34 => CAP_EXT_USB_NAME_STRING, + 0x38 => (u32::from(locked_dev.numports_3) << 8) | u32::from(locked_dev.numports_2 + 1), + 0x3c => 0x0, + _ => { + error!("Failed to read xhci cap: not implemented"); + 0 + } + }; + trace::usb_xhci_cap_read(&addr.0, &offset, &value); + write_data_u32(data, value) + }; + + let cap_write = move |_data: &[u8], _addr: GuestAddress, offset: u64| -> bool { + error!( + "Failed to write cap register: addr {:?} offset {}", + _addr, offset + ); + true + }; + + RegionOps { + read: Arc::new(cap_read), + write: Arc::new(cap_write), + } +} + +/// Build operational region ops. +pub fn build_oper_ops(xhci_dev: &Arc>) -> RegionOps { + let xhci = xhci_dev.clone(); + let oper_read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + let locked_xhci = xhci.lock().unwrap(); + let value = match offset { + XHCI_OPER_REG_USBCMD => locked_xhci.oper.get_usb_cmd(), + XHCI_OPER_REG_USBSTS => locked_xhci.oper.get_usb_status(), + XHCI_OPER_REG_PAGESIZE => XHCI_OPER_PAGESIZE, + XHCI_OPER_REG_DNCTRL => locked_xhci.oper.dev_notify_ctrl, + XHCI_OPER_REG_CMD_RING_CTRL_LO => { + // 5.4.5 Command Ring Control Register + // Table 5-24 shows read RCS CS CA always returns 0. + read_u32(locked_xhci.oper.cmd_ring_ctrl, 0) & CMD_RING_CTRL_CRR + } + XHCI_OPER_REG_CMD_RING_CTRL_HI => { + // 5.4.5 Command Ring Control Register + // Table 5-24 shows read CRP always returns 0. + 0 + } + XHCI_OPER_REG_DCBAAP_LO => read_u32(locked_xhci.oper.dcbaap.raw_value(), 0), + XHCI_OPER_REG_DCBAAP_HI => read_u32(locked_xhci.oper.dcbaap.raw_value(), 1), + XHCI_OPER_REG_CONFIG => locked_xhci.oper.config, + _ => { + error!( + "Invalid offset {:x} for reading operational registers.", + offset + ); + 0 + } + }; + trace::usb_xhci_oper_read(&addr.0, &offset, &value); + write_data_u32(data, value) + }; + + let xhci = xhci_dev.clone(); + let oper_write = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + let mut value = 0; + if !read_data_u32(data, &mut value) { + return false; + } + let mut locked_xhci = xhci.lock().unwrap(); + match offset { + XHCI_OPER_REG_USBCMD => { + if (value & USB_CMD_RUN) == USB_CMD_RUN + && (locked_xhci.oper.get_usb_cmd() & USB_CMD_RUN) != USB_CMD_RUN + { + locked_xhci.run(); + } else if (value & USB_CMD_RUN) != USB_CMD_RUN + && (locked_xhci.oper.get_usb_cmd() & USB_CMD_RUN) == USB_CMD_RUN + { + locked_xhci.stop(); + } + if value & USB_CMD_CSS == USB_CMD_CSS { + locked_xhci.oper.unset_usb_status_flag(USB_STS_SRE); + } + // When the restore command is issued, an error is reported and then + // guest OS performs a complete initialization. + if value & USB_CMD_CRS == USB_CMD_CRS { + locked_xhci.oper.set_usb_status_flag(USB_STS_SRE); + } + locked_xhci.oper.set_usb_cmd(value & 0xc0f); + locked_xhci.mfwrap_update(); + if value & USB_CMD_HCRST == USB_CMD_HCRST { + locked_xhci.reset(); + } + locked_xhci.intrs[0].lock().unwrap().update_intr(); + } + XHCI_OPER_REG_USBSTS => { + // Write 1 to clear. + locked_xhci.oper.unset_usb_status_flag( + value & (USB_STS_HSE | USB_STS_EINT | USB_STS_PCD | USB_STS_SRE), + ); + locked_xhci.intrs[0].lock().unwrap().update_intr(); + } + XHCI_OPER_REG_DNCTRL => locked_xhci.oper.dev_notify_ctrl = value & XHCI_OPER_NE_MASK, + XHCI_OPER_REG_CMD_RING_CTRL_LO => { + let mut crc_lo = read_u32(locked_xhci.oper.cmd_ring_ctrl, 0); + crc_lo = (value & XHCI_CRCR_CTRL_LO_MASK) | (crc_lo & CMD_RING_CTRL_CRR); + locked_xhci.oper.cmd_ring_ctrl = + write_u64_low(locked_xhci.oper.cmd_ring_ctrl, crc_lo); + } + XHCI_OPER_REG_CMD_RING_CTRL_HI => { + let crc_hi = u64::from(value) << 32; + let mut crc_lo = read_u32(locked_xhci.oper.cmd_ring_ctrl, 0); + if crc_lo & (CMD_RING_CTRL_CA | CMD_RING_CTRL_CS) != 0 + && (crc_lo & CMD_RING_CTRL_CRR) == CMD_RING_CTRL_CRR + { + let event = + XhciEvent::new(TRBType::ErCommandComplete, TRBCCode::CommandRingStopped); + crc_lo &= !CMD_RING_CTRL_CRR; + if let Err(e) = locked_xhci.intrs[0].lock().unwrap().send_event(&event) { + error!("Failed to send event: {:?}", e); + } + } else { + let addr = (crc_hi | u64::from(crc_lo)) & XHCI_CRCR_CRP_MASK; + locked_xhci.cmd_ring.init(addr); + } + crc_lo &= !(CMD_RING_CTRL_CA | CMD_RING_CTRL_CS); + locked_xhci.oper.cmd_ring_ctrl = write_u64_low(crc_hi, crc_lo); + } + XHCI_OPER_REG_DCBAAP_LO => { + let dcbaap = write_u64_low(locked_xhci.oper.dcbaap.raw_value(), value & 0xffffffc0); + locked_xhci.oper.dcbaap = GuestAddress(dcbaap); + } + XHCI_OPER_REG_DCBAAP_HI => { + let dcbaap = write_u64_high(locked_xhci.oper.dcbaap.raw_value(), value); + locked_xhci.oper.dcbaap = GuestAddress(dcbaap); + } + XHCI_OPER_REG_CONFIG => locked_xhci.oper.config = value & 0xff, + _ => { + error!( + "Invalid offset {:x} for writing operational registers.", + offset + ); + return false; + } + }; + trace::usb_xhci_oper_write(&addr.0, &offset, &value); + true + }; + + RegionOps { + read: Arc::new(oper_read), + write: Arc::new(oper_write), + } +} + +/// Build runtime region ops. +pub fn build_runtime_ops(xhci_dev: &Arc>) -> RegionOps { + let xhci = xhci_dev.clone(); + let runtime_read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + let mut value = 0; + if offset < 0x20 { + if offset == 0x0 { + value = (xhci.lock().unwrap().mfindex() & 0x3fff) as u32; + } else { + error!("Failed to read runtime registers, offset is {:x}", offset); + } + } else { + let idx = ((offset - XHCI_INTR_REG_SIZE) >> XHCI_INTR_REG_SHIFT) as usize; + let xhci = xhci.lock().unwrap(); + if idx >= xhci.intrs.len() { + error!("Invalid interrupter index: {} idx {}", offset, idx); + return false; + } + let locked_intr = xhci.intrs[idx].lock().unwrap(); + value = match offset & 0x1f { + XHCI_INTR_REG_IMAN => locked_intr.iman, + XHCI_INTR_REG_IMOD => locked_intr.imod, + XHCI_INTR_REG_ERSTSZ => locked_intr.erstsz, + XHCI_INTR_REG_ERSTBA_LO => read_u32(locked_intr.erstba.raw_value(), 0), + XHCI_INTR_REG_ERSTBA_HI => read_u32(locked_intr.erstba.raw_value(), 1), + XHCI_INTR_REG_ERDP_LO => read_u32(locked_intr.erdp.raw_value(), 0), + XHCI_INTR_REG_ERDP_HI => read_u32(locked_intr.erdp.raw_value(), 1), + _ => { + error!( + "Invalid offset {:x} for reading interrupter registers.", + offset + ); + return false; + } + }; + } + trace::usb_xhci_runtime_read(&addr.0, &offset, &value); + write_data_u32(data, value) + }; + + let xhci = xhci_dev.clone(); + let runtime_write = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + let mut value = 0; + if !read_data_u32(data, &mut value) { + return false; + } + if offset < 0x20 { + error!("Runtime write not implemented: offset {}", offset); + return false; + } + let mut xhci = xhci.lock().unwrap(); + let idx = ((offset - XHCI_INTR_REG_SIZE) >> XHCI_INTR_REG_SHIFT) as u32; + if idx >= xhci.intrs.len() as u32 { + error!("Invalid interrupter index: {} idx {}", offset, idx); + return false; + } + let mut locked_intr = xhci.intrs[idx as usize].lock().unwrap(); + match offset & 0x1f { + XHCI_INTR_REG_IMAN => { + if value & IMAN_IP == IMAN_IP { + locked_intr.iman &= !IMAN_IP; + } + locked_intr.iman &= !IMAN_IE; + locked_intr.iman |= value & IMAN_IE; + locked_intr.update_intr(); + } + XHCI_INTR_REG_IMOD => locked_intr.imod = value, + XHCI_INTR_REG_ERSTSZ => locked_intr.erstsz = value & 0xffff, + XHCI_INTR_REG_ERSTBA_LO => { + let erstba = write_u64_low(locked_intr.erstba.raw_value(), value & 0xffffffc0); + locked_intr.erstba = GuestAddress(erstba); + } + XHCI_INTR_REG_ERSTBA_HI => { + let erstba = GuestAddress(write_u64_high(locked_intr.erstba.raw_value(), value)); + locked_intr.erstba = erstba; + drop(locked_intr); + if let Err(e) = xhci.reset_event_ring(idx) { + error!("Failed to reset event ring: {:?}", e); + } + } + XHCI_INTR_REG_ERDP_LO => { + // ERDP_EHB is write 1 clear. + let mut erdp_lo = value & !ERDP_EHB; + if value & ERDP_EHB != ERDP_EHB { + let erdp_old = read_u32(locked_intr.erdp.raw_value(), 0); + erdp_lo |= erdp_old & ERDP_EHB; + } + let erdp = write_u64_low(locked_intr.erdp.raw_value(), erdp_lo); + locked_intr.erdp = GuestAddress(erdp); + if value & ERDP_EHB == ERDP_EHB { + let erdp = locked_intr.erdp; + let er_end = if let Some(addr) = locked_intr + .er_start + .checked_add(u64::from(TRB_SIZE * locked_intr.er_size)) + { + addr + } else { + error!( + "Memory access overflow, addr {:x} offset {:x}", + locked_intr.er_start.raw_value(), + u64::from(TRB_SIZE * locked_intr.er_size) + ); + return false; + }; + if erdp >= locked_intr.er_start + && erdp < er_end + && (erdp.raw_value() - locked_intr.er_start.raw_value()) + / u64::from(TRB_SIZE) + != u64::from(locked_intr.er_ep_idx) + { + drop(locked_intr); + xhci.intrs[idx as usize].lock().unwrap().send_intr(); + } + } + } + XHCI_INTR_REG_ERDP_HI => { + let erdp = write_u64_high(locked_intr.erdp.raw_value(), value); + locked_intr.erdp = GuestAddress(erdp); + } + _ => { + error!( + "Invalid offset {:x} for writing interrupter registers.", + offset + ); + } + }; + trace::usb_xhci_runtime_write(&addr.0, &offset, &value); + true + }; + + RegionOps { + read: Arc::new(runtime_read), + write: Arc::new(runtime_write), + } +} + +/// Build doorbell region ops. +pub fn build_doorbell_ops(xhci_dev: &Arc>) -> RegionOps { + let doorbell_read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + trace::usb_xhci_doorbell_read(&addr.0, &offset, &0); + write_data_u32(data, 0) + }; + let xhci = xhci_dev.clone(); + let doorbell_write = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + let mut value = 0; + if !read_data_u32(data, &mut value) { + return false; + } + if !xhci.lock().unwrap().running() { + error!("Failed to write doorbell, XHCI is not running"); + return false; + } + let mut xhci = xhci.lock().unwrap(); + let slot_id = (offset >> 2) as u32; + if slot_id == 0 { + error!("Invalid slot id 0 !"); + return false; + } else { + let ep_id = value & DB_TARGET_MASK; + let stream_id = (value >> DB_STREAM_ID_SHIFT) & DB_STREAM_ID_MASK; + if let Err(e) = xhci.kick_endpoint(slot_id, ep_id, stream_id) { + error!("Failed to kick endpoint: {:?}", e); + xhci.host_controller_error(); + return false; + } + } + trace::usb_xhci_doorbell_write(&addr.0, &offset, &value); + true + }; + + RegionOps { + read: Arc::new(doorbell_read), + write: Arc::new(doorbell_write), + } +} + +/// Build port region ops. +pub fn build_port_ops(xhci_port: &Arc>) -> RegionOps { + let port = xhci_port.clone(); + let port_read = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + let locked_port = port.lock().unwrap(); + let value = match offset { + XHCI_PORTSC => locked_port.portsc, + XHCI_PORTPMSC => 0, + XHCI_PORTLI => 0, + XHCI_PORTHLPMC => 0, + _ => { + error!("Failed to read port register: offset {:x}", offset); + return false; + } + }; + trace::usb_xhci_port_read(&addr.0, &offset, &value); + write_data_u32(data, value) + }; + + let port = xhci_port.clone(); + let port_write = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + let mut value = 0; + if !read_data_u32(data, &mut value) { + return false; + } + match offset { + XHCI_PORTSC => { + if let Err(e) = xhci_portsc_write(&port, value) { + error!("Failed to write portsc register, {:?}", e); + return false; + } + } + XHCI_PORTPMSC => (), + XHCI_PORTLI => (), + XHCI_PORTHLPMC => (), + _ => { + error!("Invalid port link state offset {}", offset); + return false; + } + } + trace::usb_xhci_port_write(&addr.0, &offset, &value); + true + }; + + RegionOps { + read: Arc::new(port_read), + write: Arc::new(port_write), + } +} + +fn xhci_portsc_write(port: &Arc>, value: u32) -> Result<()> { + let locked_port = port.lock().unwrap(); + let xhci = locked_port.xhci.upgrade().unwrap(); + drop(locked_port); + // Lock controller first. + let mut locked_xhci = xhci.lock().unwrap(); + if value & PORTSC_WPR == PORTSC_WPR { + return locked_xhci.reset_port(port, true); + } + if value & PORTSC_PR == PORTSC_PR { + return locked_xhci.reset_port(port, false); + } + let mut locked_port = port.lock().unwrap(); + let old_portsc = locked_port.portsc; + let mut notify = 0; + // Write 1 to clear. + locked_port.portsc &= !(value + & (PORTSC_CSC + | PORTSC_PEC + | PORTSC_WRC + | PORTSC_OCC + | PORTSC_PRC + | PORTSC_PLC + | PORTSC_CEC)); + if value & PORTSC_LWS == PORTSC_LWS { + let old_pls = (old_portsc >> PORTSC_PLS_SHIFT) & PORTSC_PLS_MASK; + let new_pls = (value >> PORTSC_PLS_SHIFT) & PORTSC_PLS_MASK; + notify = xhci_portsc_ls_write(&mut locked_port, old_pls, new_pls); + } + locked_port.portsc &= !(PORTSC_PP | PORTSC_WCE | PORTSC_WDE | PORTSC_WOE); + locked_port.portsc |= value & (PORTSC_PP | PORTSC_WCE | PORTSC_WDE | PORTSC_WOE); + drop(locked_port); + if notify != 0 { + locked_xhci.port_notify(port, notify)?; + } + Ok(()) +} + +fn xhci_portsc_ls_write(port: &mut UsbPort, old_pls: u32, new_pls: u32) -> u32 { + match new_pls { + PLS_U0 => { + if old_pls != PLS_U0 { + port.set_port_link_state(new_pls); + trace::usb_xhci_port_link(&port.port_id, &new_pls); + return PORTSC_PLC; + } + } + PLS_U3 => { + if old_pls < PLS_U3 { + port.set_port_link_state(new_pls); + trace::usb_xhci_port_link(&port.port_id, &new_pls); + } + } + PLS_RESUME => {} + _ => { + error!( + "Unhandled port link state, ignore the write. old {:x} new {:x}", + old_pls, new_pls + ); + } + } + 0 +} diff --git a/devices/src/usb/xhci/xhci_ring.rs b/devices/src/usb/xhci/xhci_ring.rs new file mode 100644 index 0000000000000000000000000000000000000000..35cda571b5bf6b29f415ec3f3d4e8d54eed67c91 --- /dev/null +++ b/devices/src/usb/xhci/xhci_ring.rs @@ -0,0 +1,276 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::atomic::{fence, AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::debug; + +use super::super::UsbError; +use super::xhci_controller::{dma_read_u32, dma_write_u32, DwordOrder, XhciEpCtx}; +use super::xhci_trb::{ + TRBType, TRB_C, TRB_LK_TC, TRB_SIZE, TRB_TR_CH, TRB_TYPE_MASK, TRB_TYPE_SHIFT, +}; +use crate::usb::xhci::xhci_controller::dma_read_bytes; +use address_space::{AddressSpace, GuestAddress}; + +const TRB_LINK_LIMIT: u32 = 32; +/// The max size of a ring segment in bytes is 64k. +const RING_SEGMENT_LIMIT: u32 = 0x1_0000; +/// The max size of ring. +const RING_LEN_LIMIT: u32 = TRB_LINK_LIMIT * RING_SEGMENT_LIMIT / TRB_SIZE; + +type DmaAddr = u64; + +/// XHCI Transfer Request Block +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] +pub struct XhciTRB { + pub parameter: u64, + pub status: u32, + pub control: u32, + pub addr: DmaAddr, + pub ccs: bool, +} + +impl XhciTRB { + /// Get TRB type + pub fn get_type(&self) -> TRBType { + ((self.control >> TRB_TYPE_SHIFT) & TRB_TYPE_MASK).into() + } +} + +/// XHCI Command Ring +#[derive(Clone)] +pub struct XhciCommandRing { + mem: Arc, + pub dequeue: GuestAddress, + /// Consumer Cycle State + pub ccs: bool, +} + +impl XhciCommandRing { + pub fn new(mem: &Arc) -> Self { + Self { + mem: mem.clone(), + dequeue: GuestAddress(0), + ccs: true, + } + } + + pub fn init(&mut self, addr: u64) { + self.dequeue = GuestAddress(addr); + self.ccs = true; + } + + pub fn set_cycle_bit(&mut self, v: bool) { + self.ccs = v; + } + + /// Fetch TRB from the ring. + pub fn fetch_trb(&mut self) -> Result> { + let mut link_cnt = 0; + loop { + if read_cycle_bit(&self.mem, self.dequeue)? != self.ccs { + debug!("TRB cycle bit not matched"); + return Ok(None); + } + fence(Ordering::Acquire); + let mut trb = read_trb(&self.mem, self.dequeue)?; + trb.addr = self.dequeue.raw_value(); + trb.ccs = self.ccs; + let trb_type = trb.get_type(); + debug!("Fetch TRB: type {:?} trb {:?}", trb_type, trb); + if trb_type == TRBType::TrLink { + link_cnt += 1; + if link_cnt > TRB_LINK_LIMIT { + bail!("TRB reach link limit"); + } + self.dequeue = GuestAddress(trb.parameter); + if trb.control & TRB_LK_TC == TRB_LK_TC { + self.ccs = !self.ccs; + } + } else { + self.dequeue = self + .dequeue + .checked_add(u64::from(TRB_SIZE)) + .ok_or_else(|| { + UsbError::MemoryAccessOverflow( + self.dequeue.raw_value(), + u64::from(TRB_SIZE), + ) + })?; + return Ok(Some(trb)); + } + } + } +} + +/// XHCI Transfer Ring +pub struct XhciTransferRing { + pub mem: Arc, + pub dequeue: Mutex, + /// Consumer Cycle State + pub ccs: AtomicBool, +} + +impl XhciTransferRing { + pub fn new(mem: &Arc) -> Self { + Self { + mem: mem.clone(), + dequeue: Mutex::new(GuestAddress(0)), + ccs: AtomicBool::new(true), + } + } + + pub fn init(&self, addr: u64) { + self.set_dequeue_ptr(GuestAddress(addr)); + self.set_cycle_bit(true); + } + + pub fn get_dequeue_ptr(&self) -> GuestAddress { + *self.dequeue.lock().unwrap() + } + + pub fn set_dequeue_ptr(&self, addr: GuestAddress) { + *self.dequeue.lock().unwrap() = addr + } + + pub fn get_cycle_bit(&self) -> bool { + self.ccs.load(Ordering::Acquire) + } + + pub fn set_cycle_bit(&self, v: bool) { + self.ccs.store(v, Ordering::SeqCst); + } + + /// Get the transfer descriptor which includes one or more TRBs. + /// Return None if the td is not ready. + /// Return Vec if the td is ok. + /// Return Error if read trb failed. + pub fn fetch_td(&self) -> Result>> { + let mut dequeue = self.get_dequeue_ptr(); + let mut ccs = self.get_cycle_bit(); + let mut ctrl_td = false; + let mut link_cnt = 0; + let mut td = Vec::new(); + for _ in 0..RING_LEN_LIMIT { + if read_cycle_bit(&self.mem, dequeue)? != ccs { + debug!("TRB cycle bit not matched"); + return Ok(None); + } + fence(Ordering::Acquire); + let mut trb = read_trb(&self.mem, dequeue)?; + trb.addr = dequeue.raw_value(); + trb.ccs = ccs; + trace::usb_xhci_fetch_trb(&dequeue, &trb.parameter, &trb.status, &trb.control); + let trb_type = trb.get_type(); + if trb_type == TRBType::TrLink { + link_cnt += 1; + if link_cnt > TRB_LINK_LIMIT { + bail!("TRB link over limit"); + } + dequeue = GuestAddress(trb.parameter); + if trb.control & TRB_LK_TC == TRB_LK_TC { + ccs = !ccs; + } + } else { + td.push(trb); + dequeue = dequeue.checked_add(u64::from(TRB_SIZE)).ok_or_else(|| { + UsbError::MemoryAccessOverflow(dequeue.raw_value(), u64::from(TRB_SIZE)) + })?; + if trb_type == TRBType::TrSetup { + ctrl_td = true; + } else if trb_type == TRBType::TrStatus { + ctrl_td = false; + } + if !ctrl_td && (trb.control & TRB_TR_CH != TRB_TR_CH) { + // Update the dequeue pointer and ccs flag. + self.set_dequeue_ptr(dequeue); + self.set_cycle_bit(ccs); + return Ok(Some(td)); + } + } + } + bail!("Transfer TRB length over limit"); + } + + /// Refresh dequeue pointer to output context. + pub fn refresh_dequeue_ptr(&self, output_ctx_addr: GuestAddress) -> Result<()> { + let mut ep_ctx = XhciEpCtx::default(); + dma_read_u32(&self.mem, output_ctx_addr, ep_ctx.as_mut_dwords())?; + self.update_dequeue_to_ctx(&mut ep_ctx.as_mut_dwords()[2..]); + dma_write_u32(&self.mem, output_ctx_addr, ep_ctx.as_dwords())?; + Ok(()) + } + + pub fn update_dequeue_to_ctx(&self, ctx: &mut [u32]) { + let dequeue = self.get_dequeue_ptr().raw_value(); + ctx[0] = dequeue as u32 | u32::from(self.get_cycle_bit()); + ctx[1] = (dequeue >> 32) as u32; + } +} + +fn read_trb(mem: &Arc, addr: GuestAddress) -> Result { + let mut buf = [0; TRB_SIZE as usize]; + dma_read_bytes(mem, addr, &mut buf)?; + let trb = XhciTRB { + parameter: LittleEndian::read_u64(&buf), + status: LittleEndian::read_u32(&buf[8..]), + control: LittleEndian::read_u32(&buf[12..]), + addr: 0, + ccs: true, + }; + Ok(trb) +} + +fn read_cycle_bit(mem: &Arc, addr: GuestAddress) -> Result { + let addr = addr + .checked_add(12) + .ok_or_else(|| UsbError::MemoryAccessOverflow(addr.raw_value(), 12))?; + let mut buf = [0]; + dma_read_u32(mem, addr, &mut buf)?; + Ok(buf[0] & TRB_C == TRB_C) +} + +/// Event Ring Segment Table Entry. See in the specs 6.5 Event Ring Segment Table. +pub struct XhciEventRingSeg { + mem: Arc, + pub addr_lo: u32, + pub addr_hi: u32, + pub size: u32, + pub rsvd: u32, +} + +impl XhciEventRingSeg { + pub fn new(mem: &Arc) -> Self { + Self { + mem: mem.clone(), + addr_lo: 0, + addr_hi: 0, + size: 0, + rsvd: 0, + } + } + + /// Fetch the event ring segment. + pub fn fetch_event_ring_seg(&mut self, addr: GuestAddress) -> Result<()> { + let mut buf = [0_u8; TRB_SIZE as usize]; + dma_read_bytes(&self.mem, addr, &mut buf)?; + self.addr_lo = LittleEndian::read_u32(&buf); + self.addr_hi = LittleEndian::read_u32(&buf[4..]); + self.size = LittleEndian::read_u32(&buf[8..]); + self.rsvd = LittleEndian::read_u32(&buf[12..]); + Ok(()) + } +} diff --git a/devices/src/usb/xhci/xhci_trb.rs b/devices/src/usb/xhci/xhci_trb.rs new file mode 100644 index 0000000000000000000000000000000000000000..f77336e914200e6ce79f9bf3154d0d2c8cbc2ddf --- /dev/null +++ b/devices/src/usb/xhci/xhci_trb.rs @@ -0,0 +1,160 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// Transfer Request Block +pub const TRB_SIZE: u32 = 16; +pub const TRB_TYPE_SHIFT: u32 = 10; +pub const TRB_TYPE_MASK: u32 = 0x3f; +/// Cycle bit +pub const TRB_C: u32 = 1; +/// Event Data +pub const TRB_EV_ED: u32 = 1 << 2; +/// Toggle Cycle +pub const TRB_LK_TC: u32 = 1 << 1; +/// Interrupt-on Short Packet +pub const TRB_TR_ISP: u32 = 1 << 2; +/// Chain bit +pub const TRB_TR_CH: u32 = 1 << 4; +/// Interrupt On Completion +pub const TRB_TR_IOC: u32 = 1 << 5; +/// Immediate Data. +pub const TRB_TR_IDT: u32 = 1 << 6; +/// Direction of the data transfer. +pub const TRB_TR_DIR: u32 = 1 << 16; +/// Frame ID shift. +pub const TRB_TR_FRAMEID_SHIFT: u32 = 20; +/// Frame ID mask. +pub const TRB_TR_FRAMEID_MASK: u32 = 0x7ff; +/// Start Isoch ASAP. +pub const TRB_TR_SIA: u32 = 1 << 31; +/// TRB Transfer Length Mask +pub const TRB_TR_LEN_MASK: u32 = 0x1ffff; +/// Setup Stage TRB Length always 8 +pub const SETUP_TRB_TR_LEN: u32 = 8; + +/// TRB Type Definitions. See the spec 6.4.6 TRB types. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TRBType { + TrbReserved = 0, + TrNormal, + TrSetup, + TrData, + TrStatus, + TrIsoch, + TrLink, + TrEvdata, + TrNoop, + CrEnableSlot, + CrDisableSlot, + CrAddressDevice, + CrConfigureEndpoint, + CrEvaluateContext, + CrResetEndpoint, + CrStopEndpoint, + CrSetTrDequeue, + CrResetDevice, + CrForceEvent, + CrNegotiateBw, + CrSetLatencyTolerance, + CrGetPortBandwidth, + CrForceHeader, + CrNoop, + ErTransfer = 32, + ErCommandComplete, + ErPortStatusChange, + ErBandwidthRequest, + ErDoorbell, + ErHostController, + ErDeviceNotification, + ErMfindexWrap, + Unknown, +} + +impl From for TRBType { + fn from(t: u32) -> TRBType { + match t { + 0 => TRBType::TrbReserved, + 1 => TRBType::TrNormal, + 2 => TRBType::TrSetup, + 3 => TRBType::TrData, + 4 => TRBType::TrStatus, + 5 => TRBType::TrIsoch, + 6 => TRBType::TrLink, + 7 => TRBType::TrEvdata, + 8 => TRBType::TrNoop, + 9 => TRBType::CrEnableSlot, + 10 => TRBType::CrDisableSlot, + 11 => TRBType::CrAddressDevice, + 12 => TRBType::CrConfigureEndpoint, + 13 => TRBType::CrEvaluateContext, + 14 => TRBType::CrResetEndpoint, + 15 => TRBType::CrStopEndpoint, + 16 => TRBType::CrSetTrDequeue, + 17 => TRBType::CrResetDevice, + 18 => TRBType::CrForceEvent, + 19 => TRBType::CrNegotiateBw, + 20 => TRBType::CrSetLatencyTolerance, + 21 => TRBType::CrGetPortBandwidth, + 22 => TRBType::CrForceHeader, + 23 => TRBType::CrNoop, + 32 => TRBType::ErTransfer, + 33 => TRBType::ErCommandComplete, + 34 => TRBType::ErPortStatusChange, + 35 => TRBType::ErBandwidthRequest, + 36 => TRBType::ErDoorbell, + 37 => TRBType::ErHostController, + 38 => TRBType::ErDeviceNotification, + 39 => TRBType::ErMfindexWrap, + _ => TRBType::Unknown, + } + } +} + +/// TRB Completion Code. See the spec 6.4.5 TRB Completion Codes. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum TRBCCode { + Invalid = 0, + Success, + DataBufferError, + BabbleDetected, + UsbTransactionError, + TrbError, + StallError, + ResourceError, + BandwidthError, + NoSlotsError, + InvalidStreamTypeError, + SlotNotEnabledError, + EpNotEnabledError, + ShortPacket, + RingUnderrun, + RingOverrun, + VfErFull, + ParameterError, + BandwidthOverrun, + ContextStateError, + NoPingResponseError, + EventRingFullError, + IncompatibleDeviceError, + MissedServiceError, + CommandRingStopped, + CommandAborted, + Stopped, + StoppedLengthInvalid, + MaxExitLatencyTooLargeError = 29, + IsochBufferOverrun = 31, + EventLostError, + UndefinedError, + InvalidStreamIdError, + SecondaryBandwidthError, + SplitTransactionError, +} diff --git a/docs/boot.ch.md b/docs/boot.ch.md index 774e4a81cefc7e130d7bc5bfd97d38e2c0851aa3..09d459b60c85e71169a280b9259734e4099e832d 100644 --- a/docs/boot.ch.md +++ b/docs/boot.ch.md @@ -2,6 +2,21 @@ StratoVirt提供了轻量虚拟机和标准虚拟机两种机型。两种机型的启动过程如下。 +## 前置参数设置 + +```shell +arch=`uname -m` +if [ ${arch} = "x86_64" ]; then + con=ttyS0 + machine="q35" +elif [ ${arch} = "aarch64" ]; then + con=ttyAMA0 + machine="virt" +else + echo "${arch} architecture not supported." + exit 1 +fi + ## 轻量虚拟机启动过程 ### 1. 构建内核镜像 @@ -54,7 +69,7 @@ Rootfs镜像是一种文件系统镜像。在StratoVirt启动时可以挂载带 -kernel /path/to/kernel \ -smp 1 \ -m 1024m \ - -append "console=ttyS0 pci=off reboot=k quiet panic=1 root=/dev/vda" \ + -append "console=${con} pci=off reboot=k quiet panic=1 root=/dev/vda" \ -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ -device virtio-blk-device,drive=rootfs,id=rootfs \ -qmp unix:/path/to/socket,server,nowait \ @@ -197,7 +212,29 @@ $ qemu-img convert -f qcow2 -O raw openEuler-21.03-x86_64.qcow2 openEuler-21.03- 至此就获得了可以使用的 raw 格式镜像。 -### 4. 启动命令行样例 +### 4. 以 direct kernel boot 方式启动标准虚拟机 + +为virt虚机主板提供直接从kernel启动的模式。在该模式下,不需要UEFI和APCI表, +虚拟机将跳过UEFI启动阶段,直接从kernel启动,从而加快启动速度。 + +启动命令行如下: + +```shell +/usr/bin/stratovirt \ + -machine virt \ + -kernel /path/to/kernel \ + -smp 1 \ + -m 2G \ + -append "console=${con} reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-pci,drive=rootfs,id=blk1,bus=pcie.0,addr=0x2 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio +``` + +说明:当前只支持ARM架构下virt虚机主板快速启动标准虚拟机。 + +### 5. 启动命令行样例 请注意,标准虚拟机需要两个PFlash设备,它们将使用来自与EDK2二进制的两个固件文件。 如果你不需要保持启动信息,单元序列为1的数据存储文件可以被省略。但是单元序号为0的 @@ -206,18 +243,6 @@ $ qemu-img convert -f qcow2 -O raw openEuler-21.03-x86_64.qcow2 openEuler-21.03- 首先给出 kernel + rootfs 的启动命令,具体如下: ```shell -arch=`uname -m` -if [ ${arch} = "x86_64" ]; then - con=ttyS0 - machine="q35" -elif [ ${arch} = "aarch64" ]; then - con=ttyAMA0 - machine="virt" -else - echo "${arch} architecture not supported." - exit 1 -fi - /usr/bin/stratovirt \ -machine ${machine} \ -kernel /path/to/kernel \ diff --git a/docs/boot.md b/docs/boot.md index 66541fa0607c25d4bc1763850e278749335bbdb1..72fe1de0643966b45466aa249e32e083722969c1 100644 --- a/docs/boot.md +++ b/docs/boot.md @@ -3,6 +3,22 @@ StratoVirt provides two kinds of machine, which are microvm and standard VM. The boot process of these two machines are as follows. +## pre-parameter setting + +```shell +arch=`uname -m` +if [ ${arch} = "x86_64" ]; then + con=ttyS0 + machine="q35" +elif [ ${arch} = "aarch64" ]; then + con=ttyAMA0 + machine="virt" +else + echo "${arch} architecture not supported." + exit 1 +fi +``` + ## microvm boot process ### 1. Build kernel @@ -45,7 +61,7 @@ and copy it to `kernel` path as `.config`. You can also modify config options by ### 2. Build rootfs -Rootfs image is a file system image. An EXT4-format image with `/sbin/init` can +Rootfs image is a file system image. An EXT4-format image with `/sbin/init` can be mounted at boot time in StratoVirt. You can check [Appendix](#2Appendix). ### 3. Boot command line sample @@ -56,7 +72,7 @@ be mounted at boot time in StratoVirt. You can check [Appendix](#2Appendix). -kernel /path/to/kernel \ -smp 1 \ -m 1024m \ - -append "console=ttyS0 pci=off reboot=k quiet panic=1 root=/dev/vda" \ + -append "console=${con} pci=off reboot=k quiet panic=1 root=/dev/vda" \ -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ -device virtio-blk-device,drive=rootfs,id=rootfs \ -qmp unix:/path/to/socket,server,nowait \ @@ -176,8 +192,8 @@ Kernel image can be built with: # on x86_64 platform, get bzImage format kernel image. $ make -j$(nproc) bzImage ``` -In addition to manually building the kernel image, you can also download the -[kernel image](https://repo.openeuler.org/openEuler-21.09/stratovirt_img/x86_64/std-vmlinuxz) +In addition to manually building the kernel image, you can also download the +[kernel image](https://repo.openeuler.org/openEuler-21.09/stratovirt_img/x86_64/std-vmlinuxz) from the openEuler official website. #### 2.2 Build rootfs @@ -202,7 +218,28 @@ $ qemu-img convert -f qcow2 -O raw openEuler-21.03-x86_64.qcow2 openEuler-21.03- Now the available raw image is obtained. -### 4. Boot command line sample +### 4. Boot with kernel directly + +It can directly boot from kernel. In this mode, UEFI and ACPI will not be used. And VM will skip the UEFI, directly start the kernel to reduce boot up time. + +Run the following commands to direct boot VM from kernel: + +```shell +/usr/bin/stratovirt \ + -machine virt \ + -kernel /path/to/kernel \ + -smp 1 \ + -m 2G \ + -append "console=${con} reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-pci,drive=rootfs,id=blk1,bus=pcie.0,addr=0x2 \ + -qmp unix:/path/to/socket,server,nowait \ + -serial stdio +``` + +Note: This mode currently only supports arm architecture. + +### 5. Boot command line sample Note that standard need two PFlash devices which will use two firmware files from EDK II binary. If you don't need to store boot information, data storage file can @@ -211,18 +248,6 @@ be omitted whose unit is 1. But code storage file with unit 0 is necessary. Run the following commands to boot with the kernel and rootfs: ```shell -arch=`uname -m` -if [ ${arch} = "x86_64" ]; then - con=ttyS0 - machine="q35" -elif [ ${arch} = "aarch64" ]; then - con=ttyAMA0 - machine="virt" -else - echo "${arch} architecture not supported." - exit 1 -fi - /usr/bin/stratovirt \ -machine ${machine} \ -kernel /path/to/kernel \ @@ -303,7 +328,7 @@ Below is a simple way to make a EXT4 rootfs image: **Notice: alpine is an example. You can use any open rootfs filesystem with init/systemd as rootfs image.** -5. Unmount rootfs image: +5. Unmount rootfs image: ```shell $ cd ~ && umount /mnt/rootfs @@ -311,6 +336,6 @@ Below is a simple way to make a EXT4 rootfs image: ## Links -- [EDK II wiki](https://github.com/tianocore/tianocore.github.io/wiki/EDK-II) +- [EDK II wiki](https://github.com/tianocore/tianocore.github.io/wiki/EDK-II) - [OVMF wiki](https://github.com/tianocore/tianocore.github.io/wiki/OVMF) diff --git a/docs/build_guide.ch.md b/docs/build_guide.ch.md index ac9d4668c99e49734bdf3539ed94889314a637a9..b4f506a83276b1de1e72cf61087a181fc30b0024 100644 --- a/docs/build_guide.ch.md +++ b/docs/build_guide.ch.md @@ -4,11 +4,11 @@ ## 1. 检查Rust构建环境 为了构建StratoVirt,需保证已经安装了Rust语言环境和Cargo软件。 -rustc的推荐版本为1.51.0及其之后的版本, 否则编译可能失败。 +rustc的推荐版本为1.64.0及其之后的版本, 否则编译可能失败。 ```shell $ rustc --version -rustc 1.51.0 +rustc 1.64.0 ``` 如果你想部署rust环境,下面的链接可以帮助你: @@ -25,7 +25,7 @@ $ arch=`uname -m` $ rustup target add ${arch}-unknown-linux-gnu # 构建StratoVirt -$ cargo build --release --target ${arch}-unknown-linux-gnu +$ cargo build --workspace --bins --release --target ${arch}-unknown-linux-gnu ``` 现在你可找到StratoVirt二进制的路径在 `target/${arch}-unknown-linux-gnu/release/stratovirt`. @@ -40,7 +40,66 @@ $ arch=`uname -m` $ rustup target add ${arch}-unknown-linux-musl # 构建StratoVirt -$ cargo build --release --target ${arch}-unknown-linux-musl +$ cargo build --workspace --bins --release --target ${arch}-unknown-linux-musl ``` 现在你可找到StratoVirt静态链接二进制的路径在 `target/${arch}-unknown-linux-musl/release/stratovirt`. + +## 4. 特性编译选项 + +对于不同的场景,StratoVirt提供基于cargo `feature`的特性条件编译选项。 + +可选特性清单如下: + +- scream_alsa:使能虚拟声卡,使用`ALSA`后端 +- scream_pulseaudio:使能虚拟声卡,使用`PulseAudio`后端 +- usb_host:使能USB Host设备 +- usb_camera_v4l2:使能USB摄像头,使用`v4l2`后端 +- gtk:使能GTK显示 +- vnc:使能VNC显示 +- ramfb:使能ramfb显示设备 +- virtio_gpu:使能virtio-gpu虚拟显卡 + +```shell +$ cargo build --release --features "scream_alsa" +``` + +## 5. OpenHarmony OS版本的编译 + +StratoVirt支持在Openharmony OS(OHOS)的运行。该版本的编译需要一台x64机器,并使用OHOS提供的RUST交叉编译工具链、以及SDK。 + +编译之前,需要把OHOS SDK的路径指定到环境变量OHOS_SDK中。另外,StratoVirt依赖的crate有部分不支持OHOS的编译,需要对其源码做相关修改。 + +编译命令示意如下: + +``` +RUSTFLAGS="-C link-arg=--target=aarch64-linux-ohos -C linker={OHOS_SDK}/llvm/bin/clang" cargo build --target aarch64-linux-ohos --features {FEATURES}" +``` + +# 通过容器构建StratoVirt静态链接二进制 + +## 1. 检查docker环境 + +为了通过容器构建StratoVirt,需保证已经安装了docker软件。可通过下面的命令检查: + +```shell +$ docker -v +Docker version 18.09.0 +``` + +如果你想部署docker环境,下面的链接可以帮助你: + + + +## 2. 使用tools下提供的构建工具 + +运行tools/build_stratovirt_static下的脚本,自动拉起docker容器构建静态链接的StratoVirt。 + +```shell +$ cd tools/build_stratovirt_static +# 自定义一个镜像名称,构建StratoVirt静态链接二进制 +$ sh build_stratovirt_from_docker.sh custom_image_name +``` + +构建完成后,可找到StratoVirt构建静态链接二进制的路径在 `target/${arch}-unknown-linux-musl/release/stratovirt`. + diff --git a/docs/build_guide.md b/docs/build_guide.md index 3d7320e437d44958883f4320e043f6a70a828be0..b6afe232191aa3aee113ae2c7f86283d6d2d6925 100644 --- a/docs/build_guide.md +++ b/docs/build_guide.md @@ -4,11 +4,11 @@ ## 1. Check Rust environment To build StratoVirt, make sure that Rust language environment and Cargo have already been installed. -The recommended version of rustc is 1.51.0 or later, otherwise compilation may be failed. +The recommended version of rustc is 1.64.0 or later, otherwise compilation may be failed. ```shell $ rustc --version -rustc 1.51.0 +rustc 1.64.0 ``` If you want to deploy rust environment, the following link will help you: @@ -25,7 +25,7 @@ $ arch=`uname -m` $ rustup target add ${arch}-unknown-linux-gnu # Build StratoVirt -$ cargo build --release --target ${arch}-unknown-linux-gnu +$ cargo build --workspace --bins --release --target ${arch}-unknown-linux-gnu ``` Now you can find StratoVirt binary file in `target/${arch}-unknown-linux-gnu/release/stratovirt`. @@ -41,7 +41,67 @@ $ arch=`uname -m` $ rustup target add ${arch}-unknown-linux-musl # Build StratoVirt -$ cargo build --release --target ${arch}-unknown-linux-musl +$ cargo build --workspace --bins --release --target ${arch}-unknown-linux-musl ``` Now you can find StratoVirt static binary file in `target/${arch}-unknown-linux-musl/release/stratovirt`. + +## 4. Build with features + +For different scenarios, StratoVirt provides feature conditional compilation options based on the cargo `feature`. + +List of optional features: + +- scream_alsa: enable virtual sound card with `ALSA` interface +- scream_pulseaudio: enable virtual sound card with `PulseAudio` interface +- usb_host: enable USB Host device +- usb_camera_v4l2: enable USB camera with `v4l2` backend +- gtk: enable GTK display +- vnc: enable VNC display +- ramfb: enable ramfb display device +- virtio_gpu: enable virtio-gpu virtualized graphics card +- pvpanic: enable virtualized pvpanic pci device + +```shell +$ cargo build --workspace --bins --release --features "scream_alsa" +``` + +## 5. Compiling of OpenHarmony OS version + +Stratovirt now can run on OpenHarmony OS(OHOS). Stratovirt, OHOS version, is compiled on x64, and relies on RUST cross compilation toolchain and SDK offered by OHOS. + +Before compiling, specify OHOS SDK path in environment variable OHOS_SDK. Some crates needed by StratoVirt now are not support OHOS platform, adapting is essential. + +Here is a command demo: + +``` +RUSTFLAGS="-C link-arg=--target=aarch64-linux-ohos -C linker={OHOS_SDK}/llvm/bin/clang" cargo build --target aarch64-linux-ohos --features {FEATURES}" +``` + +# Build static StratoVirt in containers + +## 1. Check docker environment + +In order to build StratoVirt in containers, ensure that the docker software is installed. This can be checked with the following command: + +```shell +$ docker -v +Docker version 18.09.0 +``` + +If you want to deploy a docker environment, the following link can help you: + + + +## 2. Run the build script + +Run the script under tools/build_stratovirt_static directory to automatically run a docker container to build a statically linked StratoVirt. + +```shell +$ cd tools/build_stratovirt_static +# Build StratoVirt with your custom_image_name +$ sh build_stratovirt_from_docker.sh custom_image_name +``` + +After the build is complete, you can find the statically linked binary StratoVirt in the path: `target/${arch}-unknown-linux-musl/release/stratovirt`. + diff --git a/docs/config_guidebook.md b/docs/config_guidebook.md index 34f192fd81be89a60cb8403e60ba460d291d3dde..29b0e14b320938808cbeae334f2014e20aa640ed 100644 --- a/docs/config_guidebook.md +++ b/docs/config_guidebook.md @@ -7,10 +7,10 @@ StratoVirt can only be launched via cmdline arguments. ### 1.1 Machine Config General configuration of machine, including -* type: The type of machine, three types of machine are available: "none", "microvm", +* type: The type of machine, three types of machine are available: "none", "microvm", "q35"(x86_64 platform) and "virt" (aarch64 platform). * dump-guest-core: Including guest memory in coredump file or not, default value is true. -* mem-share: Guest memory is sharable with other processes or not. +* mem-share: Guest memory is sharable with other processes or not. By default this option is turned off. * accel: accelerate module, supported value `kvm`. (optional). If not set, default is KVM. * usb: whether use usb. supported value `off`. (optional). If not set, default is off. @@ -18,10 +18,12 @@ NB: machine type "none" is used to get the capabilities of stratovirt. ```shell # cmdline --machine [type=]name[,dump-guest-core=on|off,mem-share=on|off] +-machine [type=]name[,dump-guest-core={on|off}][,mem-share={on|off}] ``` -### 1.2 Cpu Number +### 1.2 CPU Config + +#### 1.2.1 CPU Number StratoVirt supports to set the number of VCPUs(**nr_vcpus**). @@ -30,18 +32,36 @@ This allows you to set the maximum number of VCPUs that VM will support. The max By default, after booted, VM will online all CPUs you set. Four properties are supported for `smp`. * cpus: the number of VCPUs. -* sockets: the number of socket. (optional). If not set, default is the value of `cpus`. -* cores: the number of core. (optional). If not set, default is one. -* threads: the number of thread. (optional). If not set, default is one. -NB: the arguments of cpu topology is used to interconnect with libvirt, but the cpu topology of StratoVirt -is not supported yet. Therefore, it is better to ignore these three arguments (sockets, cores, threads). -If it is configured, the sockets number should equals to the number of cpu, `cores` should be `1` -and `threads` should be `1`. +* maxcpus: the number of max VCPUs. +* sockets: the number of socket. (optional). If not set, its value depends on the value of `maxcpus`. On the arm machine, if you start a microvm, the value of socket must be one so far. +* dies: the number of dies. (optional). If not set, default is one. +* clusters: the number of clusters. (optional). If not set, default is one. +* cores: the number of core. (optional). If not set, its value depends on the value of `maxcpus`. +* threads: the number of thread. (optional). If not set, its value depends on the value of `maxcpus`. + +NB: the arguments of cpu topology is used to interconnect with libvirt. + +If it is configured, sockets * dies * clusters * cores * threads must be equal to maxcpus, and maxcpus should be larger than or equal to cpus. + + +```shell +# cmdline +-smp [cpus=]n[,maxcpus=][,sockets=][,dies=][,clusters=][,cores=][,threads=] +``` +#### 1.2.2 CPU Features + +StratoVirt allows the configuration of CPU features. + +Currently, these options are supported. + +* CPU Family: Set the CPU family for VM, default to `host`, and this is the only supported variant currently. +* pmu: This enables armv8 PMU for VM. Should be `off` or `on`, default to `off`. (Currently only supported on aarch64) +* sve: This enables SVE feature for VM. Should be `off` or `on`, default to `off`. (Currently only supported on aarch64) ```shell # cmdline --smp [cpus=]n[,sockets=n,cores=1,threads=1] +-cpu host[,pmu={on|off}][,sve={on|off}] ``` ### 1.3 Memory @@ -51,21 +71,26 @@ and `threads` should be `1`. StratoVirt supports to set the size of VM's memory in cmdline. This allows you to set the size of memory that VM will support. -You can choose `G` as unit (default unit is `M`). +You can choose `G` as unit (default unit is `M`). And the memory size needs to be an integer. -Default VM memory size is 256M. The supported VM memory size is among [256M, 512G]. +Default VM memory size is 256M. The supported VM memory size is among [128M, 512G]. ```shell # cmdline --m [size=]megs +-m [size=][m|M|g|G] + -m 256m -m 256 -m 1G ``` #### 1.3.2 Memory Prealloc -Memory prealloc is supported by StratoVirt, users can use the following cmdline to configure -memory prealloc. +Memory Prealloc feature is used to preallocate VM physical memory in advance and create its page tables. +Using this feature, the number of page faults will decrease, and the memory access performance of the VM will improve. + +Note: This option will take effect the VM startup time. + +You can use the following cmdline to configure memory prealloc. ```shell -mem-prealloc @@ -80,13 +105,12 @@ The path has to be absolute path. ```shell # cmdline --mem-path /path/to/file --mem-path /path/to/dir +-mem-path ``` -### 1.4.1 hugepages +#### 1.4.1 hugepages -Memory backend file can be used to let guest use hugetlbfs on host. +Memory backend file can be used to let guest use hugetlbfs on host. It supports 2M or 1G hugepages memory. The following steps show how to use hugepages: ```shell @@ -100,10 +124,62 @@ $ sysctl vm.nr_hugepages=1024 $ cat /proc/meminfo # run StratoVirt with backend-file -... -mem-path /path/to/hugepages ... +... -mem-path +``` + +### 1.5 NUMA node +The optional NUMA node element gives the opportunity to create a virtual machine with non-uniform memory accesses. +The application of NUMA node is that one region of memory can be set as fast memory, another can be set as slow memory. +The configuration items(mem-path, mem-prealloc) here will cause the global configuration to be invalidated + +Each NUMA node is given a list of command lines option, there will be described in detail below. +1. -object memory-backend-ram,size=,id=[,policy=][,host-nodes=<0>][,mem-prealloc=][,dump-guest-core=][,share=] + -object memory-backend-file,size=,id=[,host-nodes=<0-1>][,policy=bind][,mem-path=][,dump-guest-core=][,mem-prealloc=][,share=] + -object memory-backend-memfd,size=,id=[,host-nodes=0-1][,policy=bind][,mem-prealloc=][,dump-guest-core=][,share=] + It describes the size and id of each memory zone, the policy of binding to host memory node. + you should choose `G` or `M` as unit for each memory zone. The host-nodes id must exist on host OS. + The optional policies are default, preferred, bind and interleave. If it is not configured, `default` is used. +2. -numa node,cpus=0-1,memdev=mem0 + It describes id and cpu set of the NUMA node, and the id belongs to which memory zone. +3. -numa dist,src=0,dst=0,val=10 + It describes the distance between source and destination. The default of source to source is 10, + source to destination is 20. And if you choose not to set these parameters, the VM will set the default values. + +Note: The maximum number of numa nodes is not more than 8. + +The following command shows how to set NUMA node: + +```shell +# The number of cpu must be set to be the same as numa node cpu. +-smp 8 + +# The memory size must be set to be the same as numa node mem. +-m 4G + +-object memory-backend-ram,size=2G,id=mem0,host-nodes=0-1,policy=bind +-object memory-backend-ram,size=2G,id=mem1,host-nodes=0-1,policy=bind +or +-object memory-backend-file,size=2G,id=mem0,host-nodes=0-1,policy=bind,mem-path=/path/to/file +-object memory-backend-memfd,size=2G,id=mem1,host-nodes=0-1,policy=bind,mem-prealloc=true + +-numa node,nodeid=0,cpus=0-1:4-5,memdev=mem0 +-numa node,nodeid=1,cpus=2-3:6-7,memdev=mem1 +[-numa dist,src=0,dst=0,val=10] +[-numa dist,src=0,dst=1,val=20] +[-numa dist,src=1,dst=0,val=20] +[-numa dist,src=1,dst=1,val=10] +``` + +Detailed configuration instructions: +``` +-object memory-backend-ram,size=,id=,policy={bind|default|preferred|interleave},host-nodes= +-object memory-backend-file,size=,id=,policy={bind|default|preferred|interleave},host-nodes=,mem-path=[,dump-guest-core=] +-object memory-backend-memfd,size=,id=[,host-nodes=0-1][,policy=bind][,mem-prealloc=true][,dump-guest-core=false] +-numa node[,nodeid=][,cpus=[-][:[-]]][,memdev=] +-numa dist,src=,dst=,val= ``` -### 1.5 Kernel and Kernel Parameters +### 1.6 Kernel and Kernel Parameters StratoVirt supports to launch PE or bzImage (only x86_64) format linux kernel 4.19 and can also set kernel parameters for VM. @@ -114,11 +190,14 @@ And the given kernel parameters will be actually analyzed by boot loader. ``` shell # cmdline --kernel /path/to/kernel \ +-kernel \ +-append + +for example: -append "console=ttyS0 rebook=k panic=1 pci=off tsc=reliable ipv6.disable=1" ``` -### 1.6 Initrd Configuration +### 1.7 Initrd Configuration StratoVirt supports to launch VM by a initrd (boot loader initialized RAM disk) as well. @@ -128,10 +207,10 @@ If you want to use initrd as rootfs, `root=/dev/ram` and `rdinit=/bin/sh` must b ```shell # cmdline --initrd /path/to/initrd +-initrd ``` -### 1.7 Global config +### 1.8 Global config Users can set the global configuration using the -global parameter. @@ -140,10 +219,10 @@ One property can be set: * pcie-root-port.fast-unplug: the fast unplug feature switch, only Kata is supported. ```shell --global pcie-root-port.fast-unplug=1 +-global pcie-root-port.fast-unplug={0|1} ``` -### 1.8 Logging +### 1.9 Logging StratoVirt supports to output log to stderr and log file. @@ -153,13 +232,14 @@ You can enable StratoVirt's logging by: # Output log to stderr -D # Output log to log file --D /path/to/log/file +-D ``` StratoVirt's log-level depends on env `STRATOVIRT_LOG_LEVEL`. StratoVirt supports five log-levels: `trace`, `debug`, `info`, `warn`, `error`. The default level is `error`. +If "-D" parameter is not set, logs are output to stderr by default. -### 1.9 Daemonize +### 1.10 Daemonize StratoVirt supports to run as a daemon. @@ -174,7 +254,43 @@ And you can also restore StratoVirt's **pid number** to a file by: ```shell # cmdline --pidfile /path/to/pidfile +-pidfile +``` + +### 1.11 Smbios +The SMBIOS specification defines the data structures and information that will enter the data structures associated with the system. Having these fields populate the data associated with each system enables system administrators to identify and manage these systems remotely. + +```shell +# cmdline +# type 0: BIOS information, support version and release date string. +-smbios type=0[,vendor=str][,version=str][,date=str] +# type 1: System information, the information in this structure defines attributes of +# the overall system and is intended to be associated with the Component ID group of the system’s MIF. +-smbios type=1[,manufacturer=str][,version=str][,product=str][,serial=str][,uuid=str][,sku=str][,family=str] +# type 2: Baseboard information, the information in this structure defines attributes of a system baseboard +# (for example, a motherboard, planar, server blade, or other standard system module). +-smbios type=2[,manufacturer=str][,product=str][,version=str][,serial=str][,asset=str][,location=str] +# type 3: System Enclosure information, defines attributes of the system’s mechanical enclosure(s). +# For example, if a system included a separate enclosure for its peripheral devices, +# two structures would be returned: one for the main system enclosure and the second for the peripheral device enclosure. +-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str] +# type 4: Processor information, defines the attributes of a single processor; +# a separate structure instance is provided for each system processor socket/slot. +# For example, a system with an IntelDX2 processor would have a single structure instance +# while a system with an IntelSX2 processor would have a structure to describe the main CPU +# and a second structure to describe the 80487 co-processor +-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,max-speed=%d][,current-speed=%d] +# type 17: Memory Device,this structure describes a single memory device. +-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str][,asset=str][,part=str][,speed=%d] + +``` + +### 1.12 Hardware Signature +This option is used for configuring ACPI Hardware Signature, which is used for VM S4 state. It's an 32 bit integer. For more information, please refer to https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#firmware-acpi-control-structure-facs-table. + +```shell +# cmdline +-hardware-signature 1 ``` ## 2. Device Configuration @@ -188,8 +304,9 @@ root bus named pcie.0. As a result, a total of 32 pci devices can be configured. ### 2.1 iothread -Iothread is used by devices to improve io performance. StratoVirt will spawn some extra threads due to `iothread` configuration, -and these threads can be used by devices exclusively improving performance. +Iothread is used by devices to improve io performance. StratoVirt will spawn some extra threads due to `iothread` configuration, and these threads can be used by devices exclusively improving performance. + +Note: iothread is strongly recommended if a specific device supports it, otherwise the main thread has the risk of getting stuck. There is only one argument for iothread: @@ -197,42 +314,118 @@ There is only one argument for iothread: ```shell # cmdline --object iothread,id=iothread1 -object iothread,id=iothread2 +-object iothread,id= ``` ### 2.2 Virtio-blk Virtio block device is a virtual block device, which process read and write requests in virtio queue from guest. -Nine properties are supported for virtio block device. +fourteen properties are supported for virtio block device. -* drive_id: unique device-id in StratoVirt. -* path_on_host: the path of block device in host. -* serial_num: serial number of virtio block. (optional) -* read_only: whether virtio block device is read-only. If not set, default is false. -* direct: open block device with `O_DIRECT` mode. If not set, default is true. -* iothread: indicate which iothread will be used, if not specified the main thread will be used. (optional) +* id: unique device-id in StratoVirt. +* file: the path of backend file on host. +* serial: serial number of virtio block. (optional) +* readonly: whether virtio block device is read-only. (optional) If not set, default is false. +* direct: open block device with `O_DIRECT` mode. (optional) If not set, default is true. +* iothread: indicate which iothread will be used. (optional) if not set, the main thread will be used. * throttling.iops-total: used to limit IO operations for block device. (optional) -* if: drive type, for block drive, it should be `none`. If not set, default is `none` (optional) -* format: the format of block image, default value `raw`. NB: currently only `raw` is supported. (optional) -If not set, default is raw. - -For virtio-blk-pci, two more properties are required. +* discard: free up unused disk space. (optional) `unmap/ignore` means `on/off`. If not set, default is `ignore`. +* detect-zeroes: optimize writing zeroes to disk space. (optional) `unmap` means it can free up disk space when discard is `unmap`. If discard is `ignore`, `unmap` of detect-zeroes is same as `on`. If not set, default is `off`. +* if: drive type, for block drive, it should be `none`. (optional) If not set, default is `none`. +* format: the format of block image. (optional) Possible values are `raw` or `qcow2`. If not set, default is `raw`. NB: currently only `raw` is supported for microvm. +* num-queues: the optional num-queues attribute controls the number of queues to be used for block device. (optional) The max queues number supported is 32. If not set, the default block queue number is the smaller one of vCPU count and the max queues number (e.g, min(vcpu_count, 32)). +* bootindex: the boot order of block device. (optional) If not set, the priority is lowest. +The number ranges from 0 to 255, the smaller the number, the higher the priority. +It determines the order of bootable devices which firmware will use for booting the guest OS. +* aio: the aio type of block device (optional). Possible values are `native`, `io_uring`, or `off`. If not set, default is `native` if `direct` is true, otherwise default is `off`. + +For virtio-blk-pci, four more properties are required. * bus: name of bus which to attach. * addr: including slot number and function number. The first number represents slot number of device and the second one represents function number of it. +* multifunction: whether to open multi-function for device. (optional) If not set, default is false. +* queue-size: the optional virtqueue size for all the queues. (optional) Configuration range is (2, 1024] and queue size must be power of 2. Default queue size is 256. If you want to boot VM with a virtio block device as rootfs, you should add `root=DEVICE_NAME_IN_GUESTOS` in Kernel Parameters. `DEVICE_NAME_IN_GUESTOS` will from `vda` to `vdz` in order. ```shell # virtio mmio block device. --drive id=drive_id,file=path_on_host[,readonly=off,direct=off,throttling.iops-total=200] --device virtio-blk-device,drive=drive_id,id=blkid[,iothread=iothread1,serial=serial_num] +-drive id=,file=[,readonly={on|off}][,direct={on|off}][,throttling.iops-total=][,discard={unmap|ignore}][,detect-zeroes={unmap|on|off}] +-device virtio-blk-device,drive=,id=[,iothread=][,serial=] # virtio pci block device. --drive id=drive_id,file=path_on_host[,readonly=off,direct=off,throttling.iops-total=200] --device virtio-blk-pci,drive=drive_id,bus=pcie.0,addr=0x3.0x0,id=blk-0[,multifunction=on,iothread=iothread1,serial=serial_num] +-drive id=,file=[,readonly={on|off}][,direct={on|off}][,throttling.iops-total=][,discard={unmap|ignore}][,detect-zeroes={unmap|on|off}] +-device virtio-blk-pci,id=,drive=,bus=,addr=<0x3>[,multifunction={on|off}][,iothread=][,serial=][,num-queues=][,bootindex=][,queue-size=] + +``` + +StratoVirt also supports vhost-user-blk to get a higher performance in storage. + +You can use it by adding a new device, one more property is supported by vhost-user-blk device than virtio-blk. + +* chardev: id for char device, that means you need to add a chardev first, and use its id to find the character device. +```shell +# vhost user blk mmio device +-chardev socket,id=,path= +-device vhost-user-blk-device,id=,chardev=[,num-queues=][,queue-size=] +# vhost user blk pci device +-chardev socket,id=,path= +-device vhost-user-blk-pci,id=,chardev=,bus=,addr=<0x3>[,num-queues=][,bootindex=][,queue-size=] +``` + +Note: More features to be supported. + +It should open sharing memory('-mem-share=on') and hugepages('-mem-path ...' ) when using vhost-user-blk. + +Vhost-user-blk use spdk as vhost-backend, so you need to start spdk before starting stratovirt. + +*How to start and configure spdk?* + +``` shell +# Get code and compile spdk +$ git clone https://github.com/spdk/spdk.git +$ cd spdk +$ git submodule update --init +$ ./scripts/pkgdep.sh +$ ./configure +$ make + +# Test spdk environment +$ ./test/unit/unittest.sh + +# Setup spdk +$ HUGEMEM=2048 ./scripts/setup.sh +# Mount huge pages, you need to add -mem-path=/dev/hugepages in stratovirt config +$ mount -t hugetlbfs hugetlbfs /dev/hugepages +# Assign the number of the hugepage +$ sysctl vm.nr_hugepages=1024 +# Start vhost, alloc 1024MB memory, default socket path is /var/tmp/spdk.sock, 0x3 means we use cpu cores 0 and 1 (cpumask 0x3) +$ build/bin/vhost --logflag vhost_blk -S /var/tmp -s 1024 -m 0x3 & +# Create a malloc bdev which size is 128MB, block size is 512B +$ ./scripts/rpc.py bdev_malloc_create 128 512 -b Malloc0 +# Create a vhost-blk device exposing Malloc0 bdev, the I/O polling will be pinned to the CPU 0 (cpumask 0x1). +$ ./scripts/rpc.py vhost_create_blk_controller --cpumask 0x1 spdk.sock Malloc0 +``` +A config template to start stratovirt with vhost-user-blk-pci as below: + +``` shell +stratovirt \ + -machine q35,mem-share=on \ + -smp 1 \ + -kernel /path-to/std-vmlinuxz \ + -mem-path /dev/hugepages \ + -m 1G \ + -append "console=ttyS0 reboot=k panic=1 root=/dev/vda rw" \ + -drive file=/path-to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ + -drive file=/path-to/OVMF_VARS.fd,if=pflash,unit=1 \ + -drive file=/path-to/openEuler.img,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-pci,drive=rootfs,id=blk0,bus=pcie.0,addr=0x2,bootindex=0 \ + -chardev socket,id=spdk_vhost_blk0,path=/var/tmp/spdk.sock \ + -device vhost-user-blk-pci,id=blk1,chardev=spdk_vhost_blk0,bus=pcie.0,addr=0x3\ + -qmp unix:/path-to/stratovirt.socket,server,nowait \ + -serial stdio ``` ### 2.3 Virtio-net @@ -240,54 +433,78 @@ If you want to boot VM with a virtio block device as rootfs, you should add `roo Virtio-net is a virtual Ethernet card in VM. It can enable the network capability of VM. Six properties are supported for netdev. -* tap: the type of net device. NB: currently only tap is supported. +* tap/vhost-user: the type of net device. NB: currently only tap and vhost-user is supported. + * id: unique netdev id. + * ifname: name of tap device in host. -* fd: the file descriptor of opened tap device. + +* fd: the file descriptor of opened tap device. + * fds: file descriptors of opened tap device. -* queues: the optional queues attribute controls the number of queues to be used for either multiple queue virtio-net or vhost-net device. -NB: to configure a tap device, use either `fd` or `ifname`, if both of them are given, -the tap device would be created according to `ifname`. + +* queues: the optional queues attribute controls the number of queues to be used for either multiple queue virtio-net or + vhost-net device. The max queues number supported is no more than 16. + +NB: to configure a tap device, use either `fd` or `ifname`, if both of them are given, the tap device would be created according to `ifname`. Eight properties are supported for virtio-net-device or virtio-net-pci. * id: unique net device id. * iothread: indicate which iothread will be used, if not specified the main thread will be used. It has no effect when vhost is set. +* rx-iothread: set the receiving task in this iothread, if not specified the former parameter iothread will be used. +* tx-iothread: set the sending task in this iothread, if not specified the former parameter iothread will be used. * netdev: netdev of net device. * vhost: whether to run as a vhost-net device. * vhostfd: the file descriptor of opened tap device. * vhostfds: file descriptors of opened tap device. -* mac: set mac address in VM (optional). +* mac: set mac address in VM (optional). A default mac address will be created when it is not assigned by user. So, it may + cause the same mac address between two virtio-net devices when one device has mac and the other hasn't. * mq: the optional mq attribute enable device multiple queue feature. -Two more properties are supported for virtio pci net device. +Three more properties are supported for virtio pci net device. * bus: name of bus which to attach. * addr: including slot number and function number. The first number represents slot number -of device and the second one represents function number of it. For virtio pci net device, it +of device and the second one represents function number of it. For virtio pci net device, it is a single function device, the function number should be set to zero. +* queue-size: the optional virtqueue size for all the queues. (optional) Configuration range is [256, 4096] and queue size must be power of 2. Default queue size is 256. ```shell # virtio mmio net device --netdev tap,id=netdevid,ifname=host_dev_name --device virtio-net-device,netdev=netdevid,id=netid[,iothread=iothread1,mac=12:34:56:78:9A:BC] +-netdev tap,id=,ifname= +-device virtio-net-device,id=,netdev=[,iothread=][,rx-iothread=][,tx-iothread=][,mac=] # virtio pci net device --netdev tap,id=netdevid,ifname=host_dev_name[,queues=N] --device virtio-net-pci,netdev=netdevid,id=netid,bus=pcie.0,addr=0x2.0x0[,multifunction=on,iothread=iothread1,mac=12:34:56:78:9A:BC,mq=on] +-netdev tap,id=,ifname=[,queues=] +-device virtio-net-pci,id=,netdev=,bus=,addr=<0x2>[,multifunction={on|off}][,iothread=][,rx-iothread=][,tx-iothread=][,mac=][,mq={on|off}][,queue-size=] ``` -StratoVirt also supports vhost-net to get a higher performance in network. It can be set by +StratoVirt also supports vhost-net to get a higher performance in network. It can be set by giving `vhost` property, and one more property is supported for vhost-net device. -* vhostfd: fd for vhost-net device, it could be configured when `vhost=on`. If this argument is not +* vhostfd: fd for vhost-net device, it could be configured when `vhost=on`. If this argument is not given when `vhost=on`, StratoVirt gets it by opening "/dev/vhost-net" automatically. ```shell # virtio mmio net device --netdev tap,id=netdevid,ifname=host_dev_name,vhost=on[,vhostfd=2] --device virtio-net-device,netdev=netdevid,id=netid[,iothread=iothread1,mac=12:34:56:78:9A:BC] +-netdev tap,id=,ifname=[,vhost=on[,vhostfd=]] +-device virtio-net-device,id=,netdev=[,iothread=][,mac=] # virtio pci net device --netdev tap,id=netdevid,ifname=host_dev_name,vhost=on[,vhostfd=2,queues=N] --device virtio-net-pci,netdev=netdevid,id=netid,bus=pcie.0,addr=0x2.0x0[,multifunction=on,iothread=iothread1,mac=12:34:56:78:9A:BC,mq=on] +-netdev tap,id=,ifname=[,vhost=on[,vhostfd=,queues=]] +-device virtio-net-pci,id=,netdev=,bus=,addr=<0x2>[,multifunction={on|off}][,iothread=][,mac=][,mq={on|off}] +``` + +StratoVirt also supports vhost-user net to get a higher performance by ovs-dpdk. +It should open sharing memory('-mem-share=on') and hugepages('-mem-path ...' ) when using vhost-user net. + +```shell +# virtio mmio net device +-chardev socket,id=chardevid,path=socket_path +-netdev vhost-user,id=,chardev= +-device virtio-net-device,id=,netdev=[,iothread=][,mac=] +# virtio pci net device +-chardev socket,id=chardevid,path=socket_path +-netdev vhost-user,id=,chardev=[,queues=] +-device virtio-net-pci,id=,netdev=,bus=,addr=<0x2>[,multifunction={on|off}][,iothread=][,mac=][,mq={on|off}] ``` *How to set a tap device?* @@ -297,8 +514,9 @@ given when `vhost=on`, StratoVirt gets it by opening "/dev/vhost-net" automatica $ brctl addbr qbr0 $ ip tuntap add tap0 mode tap $ brctl addif qbr0 tap0 -$ ifconfig qbr0 up; ifconfig tap0 up -$ ifconfig qbr0 1.1.1.1 +$ ip link set qbr0 up +$ ip link set tap0 up +$ ip address add 1.1.1.1/24 dev qbr0 # Run StratoVirt ... -netdev tap,id=netdevid,ifname=tap0 ... @@ -317,38 +535,79 @@ note: If you want to use multiple queues, create a tap device as follows: $ brctl addbr qbr0 $ ip tuntap add tap1 mode tap multi_queue $ brctl addif qbr0 tap1 -$ ifconfig qbr0 up; ifconfig tap1 up -$ ifconfig qbr0 1.1.1.1 +$ ip link set qbr0 up +$ ip link set tap1 up +$ ip address add 1.1.1.1/24 dev qbr0 +``` + +*How to create port by ovs-dpdk?* + +```shell +# Start open vSwitch daemons +$ ovs-ctl start +# Initialize database +$ ovs-vsctl init +# Dpdk init +$ ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true +# Set up dpdk lcore mask +$ ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0xf +# Set up hugepages for dpdk-socket-mem (2G) +$ ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=1024 +# Set up PMD(Pull Mode Driver) cpu mask +$ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf +# Add bridge +$ ovs-vsctl add-br ovs_br -- set bridge ovs_br datapath_type=netdev +# Add port +$ ovs-vsctl add-port ovs_br port1 -- set Interface port1 type=dpdkvhostuser +$ ovs-vsctl add-port ovs_br port2 -- set Interface port2 type=dpdkvhostuser +# Set num of rxq/txq +$ ovs-vsctl set Interface port1 options:n_rxq=num,n_txq=num +$ ovs-vsctl set Interface port2 options:n_rxq=num,n_txq=num ``` ### 2.4 Virtio-console -Virtio console is a general-purpose serial device for data transfer between the guest and host. -Character devices at /dev/hvc0 to /dev/hvc7 in guest will be created once setting it. -To set the virtio console, chardev for redirection will be required. See [section 2.12 Chardev](#212-chardev) for details. +Virtio console device is a simple device for data transfer between the guest and host. A console device may have +one or more ports. These ports could be generic ports or console ports. Character devices /dev/vport\*p\* in linux +guest will be created once setting a port (Whether it is a console port or not). Character devices at /dev/hvc0 to +/dev/hvc7 in linux guest will be created once setting console port. To set the virtio console, chardev for +redirection will be required. See [section 2.12 Chardev](#212-chardev) for details. -Two properties can be set for virtconsole. +Three properties can be set for virtconsole(console port) and virtserialport(generic port). * id: unique device-id. -* chardev: char device of virtio console device. +* chardev: char device of this console/generic port. +* nr: unique port number for this port. (optional) If set, all virtserialports and virtconsoles should set. nr = 0 is only allowed for virtconsole. The default nr for generic port starts from 1 and starts from 0 for console port. If not set, nr = 0 will be assigned to the first console port in the command line. And nr = 0 will be reserved if there is no console port in the command line. -For virtio-serial-pci, two more properties are required. +For virtio-serial-pci, Four more properties are required. * bus: bus number of virtio console. -* addr: including slot number and function number. The first number represents slot number -of device and the second one represents function number of it. +* addr: including slot number and function number. The first number represents slot number of device and the second one represents function number of it. +* multifunction: whether to open multi-function for device. (optional) If not set, default is false. +* max_ports: max number of ports we can have for a virtio-serial device. Configuration range is [1, 31]. (optional) If not set, default is 31. + +For virtio-serial-device, Two more properties are required. +* bus: bus number of virtio console. +* addr: including slot number and function number. The first number represents slot number of device and the second one represents function number of it. ```shell -# virtio mmio device --device virtio-serial-device[,id=virtio-serial0] --chardev socket,path=socket_path,id=virtioconsole1,server,nowait --device virtconsole,chardev=virtioconsole1,id=console_id +# virtio mmio device using console port +-device virtio-serial-device[,id=] +-chardev socket,path=,id=,server,nowait +-device virtconsole,id=,chardev=,nr=0 + +# virtio mmio device using generic port +-device virtio-serial-device[,id=] +-chardev socket,path=,id=,server,nowait +-device virtserialport,id=,chardev=,nr=0 # virtio pci device --device virtio-serial-pci,bus=pcie.0,addr=0x1.0x0,id=virtio-serial0[,multifunction=on] --chardev socket,path=socket_path,id=virtioconsole1,server,nowait --device virtconsole,chardev=virtioconsole1,id=console_id +-device virtio-serial-pci,id=,bus=,addr=<0x3>[,multifunction={on|off},max_ports=] +-chardev socket,path=,id=,server,nowait +-device virtconsole,id=,chardev=,nr=0 +-chardev socket,path=,id=,server,nowait +-device virtserialport,id=,chardev=,nr=1 ``` NB: -Currently, only one virtio console device is supported in standard machine. +Currently, only one virtio console device is supported. Only one port is supported in microvm. ### 2.5 Virtio-vsock @@ -374,10 +633,10 @@ of device and the second one represents function number of it. ```shell # virtio mmio device. --device vhost-vsock-device,id=vsock_id,guest-cid=3 +-device vhost-vsock-device,id=,guest-cid= # virtio pci device. --device vhost-vsock-pci,id=vsock_id,guest-cid=3,bus=pcie.0,addr=0x1.0x0[,multifunction=on] +-device vhost-vsock-pci,id=,guest-cid=,bus=,addr=<0x3>[,multifunction={on|off}] ``` *You can only set one virtio vsock device for one VM.* @@ -404,25 +663,26 @@ NB: We can only set *one* serial. To use the first method, chardev for redirection will be required. See [section 2.12 Chardev](#212-chardev) for details. ```shell # add a chardev and redirect the serial port to chardev --chardev backend,id=chardev_id[,path=path,server,nowait] +-chardev backend,id=[,path=,server,nowait] -serial chardev:chardev_id ``` Or you can simply use `-serial dev` to bind serial with character device. ```shell -# simplifed redirect methods +# simplified redirect methods -serial stdio -serial pty --serial socket,path=socket_path,server,nowait --serial file,path=file_path +-serial socket,path=,server,nowait +-serial socket,port=[,host=],server,nowait +-serial file,path= ``` ### 2.7 Virtio-balloon Balloon is a virtio device, it offers a flex memory mechanism for VM. -Only one property is supported for virtio-balloon. -* deflate_on_oom: whether to deflate balloon when there is no enough memory in guest. -This feature can prevent OOM occur in guest. +Two properties are supported for virtio-balloon. +* deflate_on_oom: Deflate balloon on guest out of memory condition. If deflate_on_oom has not been negotiated, the driver MUST NOT use pages from the balloon when num_pages is less than or equal to the actual number of pages in the balloon. If deflate_on_oom has been negotiated, the driver MAY use pages from the balloon when num_pages is less than or equal to the actual number of pages in the balloon if this is required for system stability (e.g. if memory is required by applications running within the guest). This feature may prevent OOM occur in guest. +* free_page_reporting: whether to release free guest pages. This feature can be used to reuse memory. For virtio-balloon-pci, two more properties are required. * bus: name of bus which to attach. @@ -431,11 +691,14 @@ of device and the second one represents function number of it. ```shell # virtio mmio balloon device --device virtio-balloon-device,deflate-on-oom=true +-device virtio-balloon-device[,deflate-on-oom={true|false}][,free-page-reporting={true|false}] # virtio pci balloon device --device virtio-balloon-pci,bus=pcie.0,addr=0x4.0x0,deflate-on-oom=true,id=balloon-0[,multifunction=on] +-device virtio-balloon-pci,id=,bus=,addr=<0x4>[,deflate-on-oom={true|false}][,free-page-reporting={true|false}][,multifunction={on|off}] ``` +Note: avoid using balloon devices and vfio devices together, balloon device is invalid when memory is hugepages. +The balloon memory size must be an integer multiple of guest page size. + ### 2.8 Virtio-rng Virtio rng is a paravirtualized random number generator device, it provides a hardware rng device to the guest. @@ -457,16 +720,16 @@ single function device, the function number should be set to zero. NB: * The limited rate will be transformed to bytes/sec. For instance: if period=100, max-bytes=100; the final result is that the max number of bytes generated by rng device is 1000. - * Limited rate should between 64(include) and 1000000000(not include). In other words: - 64 <= max-bytes/period < 1000000000. + * The limited rate should be between 64(included) and 1000000000(included), that is: + 64 <= max-bytes/period\*1000 <= 1000000000. ```shell # virtio mmio rng device --object rng-random,id=objrng0,filename=/path/to/random_file --device virtio-rng-device,rng=objrng0,max-bytes=1234,period=1000 +-object rng-random,id=,filename= +-device virtio-rng-device,rng=,max-bytes=<1234>,period=<1000> # virtio pci rng device --object rng-random,id=objrng0,filename=/path/to/random_file --device virtio-rng-pci,rng=objrng0,max-bytes=1234,period=1000,bus=pcie.0,addr=0x1.0x0,id=rng-id[,multifunction=on] +-object rng-random,id=,filename= +-device virtio-rng-pci,id=,rng=[,max-bytes=<1234>][,period=<1000>],bus=,addr=<0x1>[,multifunction={on|off}] ``` ### 2.9 PCIe root port @@ -479,11 +742,11 @@ Four parameters are supported for pcie root port. * addr: including slot number and function number. * id: the name of secondary bus. * chassis: the number of chassis. Interconnect with libvirt only.(optional). -* multifunction: whether to open multi function for pcie root port.(optional). +* multifunction: whether to open multi function for pcie root port.(optional). If not set, default value is false. ```shell --device pcie-root-port,port=0x1,addr=0x1,bus=pcie.0,id=pcie.1[,multifunction=on] +-device pcie-root-port,id=,port=<0x1>,bus=,addr=<0x1>[,multifunction={on|off}] ``` **The slot number of the device attached to the root port must be 0** @@ -502,8 +765,7 @@ Four properties can be set for PFlash device. ```shell # cmdline --drive file=/path/to/code_storage_file,if=pflash,unit=0[,readonly=true] --drive file=/path/to/data_storage_file,if=pflash,unit=1, +-drive file=,if=pflash,unit={0|1}[,readonly={true|false}] ``` ### 2.11 VFIO @@ -518,7 +780,7 @@ Four properties are supported for VFIO device * addr: including slot number and function number. ```shell --device vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x03.0x0[,multifunction=on] +-device vfio-pci,id=,host=<0000:1a:00.3>,bus=,addr=<0x03>[,multifunction={on|off}] ``` Note: the kernel must contain physical device drivers, otherwise it cannot be loaded normally. @@ -528,51 +790,438 @@ See [VFIO](./vfio.md) for more details. ### 2.12 Chardev The type of chardev backend could be: stdio, pty, socket and file(output only). -Five properties can be set for chardev. +One property can be set for chardev of stdio or pty type. +* id: unique chardev-id. +Four properties can be set for chardev of unix-socket type. * id: unique chardev-id. -* backend: the type of redirect method. -* path: the path of backend in the host. This argument is only required for socket-type chardev and file-type chardev. -* server: run as a server. This argument is only required for socket-type chardev. -* nowait: do not wait for connection. This argument is only required for socket-type chardev. +* path: path to the unix-socket file on the host. +* server: run as a server. +* nowait: do not wait for connection. + +Five properties can be set for chardev of tcp-socket type. +* id: unique chardev-id. +* host: host for binding on (in case of server mode) or connecting to (in case of non-server mode). Default value for binding is '0.0.0.0'. +* port: port for binding on (in case of server mode) or connecting to (in case of non-server mode). +* server: run as a server. +* nowait: do not wait for connection. + +Two properties can be set for chardev of file type. +* id: unique chardev-id. +* path: path to the input data file on the host. ```shell # redirect methods --chardev stdio,id=chardev_id --chardev pty,id=chardev_id --chardev socket,id=chardev_id,path=socket_path,server,nowait --chardev file,id=chardev_id,path=file_path +-chardev stdio,id= +-chardev pty,id= +-chardev socket,id=,path=[,server,nowait] +-chardev socket,id=,port=[,host=][,server,nowait] +-chardev file,id=,path= +``` + +### 2.13 USB +StratoVirt supports XHCI USB controller, you can attach USB devices under XHCI USB controller. + +#### 2.13.1 USB controller +USB controller is a pci device which can be attached USB device. + +Three properties can be set for USB controller. + +* id: unique device id. +* bus: bus number of the device. +* addr: including slot number and function number. +* iothread: indicate which iothread will be used, if not specified the main thread will be used. (optional) + +```shell +-device nec-usb-xhci,id=,bus=,addr=<0xa>[,iothread=] +``` + +Note: Only one USB controller can be configured, USB controller can only support USB keyboard and USB tablet. + +#### 2.13.2 USB Keyboard +The USB keyboard is a keyboard that uses the USB protocol. It should be attached to USB controller. Keypad and led are not supported yet. + +One property can be set for USB Keyboard. + +* id: unique device id. + +```shell +-device usb-kbd,id= +``` + +Note: Only one keyboard can be configured. + +#### 2.13.3 USB Tablet +Pointer Device which uses alsolute coordinates. It should be attached to USB controller. + +One property can be set for USB Tablet. + +* id: unique device id. + +```shell +-device usb-tablet,id= ``` +Note: Only one tablet can be configured. + +#### 2.13.4 USB Camera +Video Camera Device that based on USB video class protocol. It should be attached to USB controller. + +3 properties can be set for USB Camera. + +* id: unique device id. +* backend: backend device type, either `v4l2` or `demo`. +* path: the file path used to connect to the backend, required for `v4l2`, but not for `demo`. eg. `/dev/video0`. + +```shell +-device usb-camera,id=,backend="v4l2",path="/dev/video0" +-device usb-camera,id=,backend="demo" +``` + +Note: Only one camera can be configured. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable usb-camera. + +#### 2.13.5 USB Storage +USB storage device that base on classic bulk-only transport protocol. It should be attached to USB controller. + +Three properties can be set for USB Storage. + +* id: unique device id. +* file: the path of backend image file. +* media: the media type of storage. Possible values are `disk` or `cdrom`. If not set, default is `disk`. + +```shell +-device usb-storage,drive=,id= +-drive id=,file=[,media={disk|cdrom}],aio=off,direct=false +``` + +Note: "aio=off,direct=false" must be configured and other aio/direct values are not supported. + +#### 2.13.6 USB Host +USB Host Device that based on USB protocol. It should be attached to USB controller. + +Six properties can be set for USB Host. + +* id: unique device id. +* hostbus: the bus number of the usb host device. +* hostaddr: the addr number of the usb host device. +* hostport: the physical number of the usb host device. +* vendorid: the vendor ID of the usb host device. +* productid: the product ID of the usb host device. +* isobufs: the number of Isochronous Transfers buffer. If not set, default is 4. +* isobsize: the size of Isochronous Transfers buffer. If not set, default is 32. + +Pass through the host device identified by bus and addr: + +```shell +-device usb-host,id=,hostbus=,hostaddr=[,isobufs=][,isobsize=] + +``` + +Pass through the host device identified by bus and physical port: + +```shell +-device usb-host,id=,hostbus=,hostport=[,isobufs=][,isobsize=] + +``` + +Pass through the host device identified by the vendor and product ID: + +```shell +-device usb-host,id=,vendorid=,productid=[,isobufs=][,isobsize=] + +``` + +Note: +1. The combination of vendor and product ID takes precedence over the combination of bus number and physical port number. +2. The combination of bus and physical port takes precedence over the combination of bus number and addr number. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable usb-host. + +#### 2.13.7 USB Uas +USB Mass Storage Device that is based on the USB Attached Scsi (UAS) protocol. It should be attached to USB controller. + +Three properties can be set for USB Uas. + +* id: unique device id. +* file: the path of backend image file. +* media: the media type of storage. Possible values are `disk` or `cdrom`. If not set, default is `disk`. + +```shell +-device usb-uas,drive=,id= +-drive id=,file=[,media={disk|cdrom}],aio=off,direct=false +``` + +Note: "aio=off,direct=false" must be configured and other aio/direct values are not supported. + +### 2.14 Virtio Scsi Controller +Virtio Scsi controller is a pci device which can be attached scsi device. + +Six properties can be set for Virtio-Scsi controller. + +* id: unique device id. +* bus: bus number of the device. +* addr: including slot number and function number. +* iothread: indicate which iothread will be used, if not specified the main thread will be used. (optional) +* num-queues: the optional num-queues attribute controls the number of request queues to be used for the scsi controller. If not set, the default queue number is the smaller one of vCPU count and the max queues number (e.g, min(vcpu_count, 32)). The max queues number supported is no more than 32. (optional) +* queue-size: the optional virtqueue size for all the queues. Configuration range is (2, 1024] and queue size must be power of 2. Default queue size is 256. +```shell +-device virtio-scsi-pci,id=,bus=,addr=<0x3>[,multifunction={on|off}][,iothread=][,num-queues=][,queue-size=] +``` +### 2.15 Virtio Scsi HardDisk +Virtio Scsi HardDisk is a virtual block device, which process read and write requests in virtio queue from guest. + +Ten properties can be set for virtio-scsi hd. + +* file: the path of backend image file. +* id: unique device id. +* bus: scsi bus name, only support $scsi_controller_name + ".0" +* scsi-id: id number (target) of scsi four level hierarchical address (host, channel, target, lun). Configuration range is [0, 255]. Boot scsi disk configuration range is [0, 31]. +* lun: lun number (lun) of scsi four level hierarchical address (host, channel, target, lun). Configuration rage is [0, 255]. Boot scsi disk configuration range is [0, 7]. +* serial: serial number of virtio scsi device. (optional) +* readonly: whether scsi device is read-only or not. Default option is false. (optional) +* direct: open block device with `O_DIRECT` mode. (optional) If not set, default is true. +* aio: the aio type of block device (optional). Possible values are `native`, `io_uring`, or `off`. If not set, default is `native` if `direct` is true, otherwise default is `off`. +* bootindex: the boot order of the scsi device. (optional) If not set, the priority is lowest. +The number ranges from 0 to 255, the smaller the number, the higher the priority. +It determines the order of bootable devices which firmware will use for booting the guest OS. + +```shell +-device virtio-scsi-pci,bus=pcie.1,addr=0x0,id=scsi0[,multifunction=on,iothread=iothread1,num-queues=4] +-drive file=path_on_host,id=drive-scsi0-0-0-0[,readonly=true,aio=native,direct=true] +-device scsi-hd,bus=scsi0.0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0[,serial=123456,bootindex=1] +``` +### 2.16 Display + +Multiple display methods are supported by stratovirt, including `GTK` and `VNC`, which allows users to interact with virtual machine. + +Display on OpenHarmony OS(OHOS) is also supported, while a client program need to be implemented. + +#### 2.16.1 GTK + +Graphical interface drawn by gtk toolkits. Visit [GTK](https://www.gtk.org) for more details. + +Two properties can be set for GTK. + +* appname: string of the program name, which will be drawn on the titlebar of the window. +* full-screen: if configured on, the initial window will cover the entire screen. + +Sample Configuration: + +```shell +-display gtk[,appname=,full-screen={on|off}] +``` + +Note: It should be ensured that gtk toolkits have been installed before using gtk. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable GTK. + +#### 2.16.2 VNC +VNC can provide the users with way to login virtual machines remotely. + +In order to use VNC, the ip and port value must be configured. The IP address can be set to a specified value or `0.0.0.0`, which means that all IP addresses on the host network card are monitored + +```shell +-vnc 0.0.0.0:0 +-vnc +``` + +Tls encryption is an optional configuration.Three properties can be set for encrypted transmission: + +* certificate type. +* id: unique object id. +* dir: certificate directory. You should place a legal institutional certificate, a server certificate, and a private key for certificate encryption in this directory. + +```shell +-object tls-creds-x509,id=,dir= +``` + +Authentication is an optional configuration, it depends on the saslauth service . To use this function, you must ensure that the saslauthd service is running normally, and configure the supported authentication mechanism in `/etc/sasl2/stratovirt. conf` + +Sample configuration for file `/etc/sasl2/stratovirt.conf` +```shell +# Using the saslauthd service +pwcheck_method: saslauthd +# Authentication mechanism +mech_list: plain +``` + +Three properties can be set for Authentication: + +- authz-simple +- id: unique object id. +- identity: specify the username that can log in. + +```shell +-object authz-simple,id=authz0,identity=username +``` + +Sample Configuration: + +```shell +-object authz-simple,id=authz0,identity=username +-object tls-creds-x509,id=vnc-tls-creds0,dir=/etc/pki/vnc +-vnc 0.0.0.0:0,tls-creds=vnc-tls-creds0,sasl,sasl-authz=authz0 +``` + +Note: 1. Only one client can be connected at the same time. Follow-up clients connections will result in failure. 2. TLS encrypted transmission can be configured separately, but authentication must be used together with encryption. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable VNC. + +#### 2.16.2 OHUI server + +OHUI server support display on OHOS. It relies on UDS for communication with OHUI client. Basically speaking, it works like VNC. +Client gets keyboard and mouse's action and sends it to server, and also draws VM's image on screen. +Server processes keyboard and mouse's action, and transfer VM's image. + +Sample Configuration: + +```shell +[-object iothread,id=] +-display ohui[,iothread=,socks-path=] +``` + +Note: "socks-path" specifies where UDS file is. It's "/tmp" by default. + +### 2.17 Virtio-fs +Virtio-fs is a shared file system that lets virtual machines access a directory tree on the host. Unlike existing approaches, it is designed to offer local file system semantics and performance. + +#### 2.17.1 virtio fs device +Three properties can be set for virtio fs device. +* chardevid: id for char device +* device_id: the unique id for device +* mount_tag: the mount tag of the shared directory which can be mounted in the guest + +```shell +# vhost user fs mmio device +-chardev socket,id=,path= +-device vhost-user-fs-device,id=,chardev=,tag= +# vhost user fs pci device +-chardev socket,id=,path= +-device vhost-user-fs-pci,id=,chardev=,tag= +``` + +#### 2.17.2 vhost_user_fs + +Note: The vhost_user_fs binary of StratoVirt has been removed. As there is a new Rust implementation of virtiofsd at "https://gitlab.com/virtio-fs/virtiofsd", it's marked as stable and existing project should consider to use it instead. + +*How to setup file sharing based on StratoVirt and virtiofsd?* + +```shell +host# Setup virtiofsd server, refer to "https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md" + +host# stratovirt \ + -machine type=q35,dump-guest-core=off,mem-share=on \ + -smp 1 \ + -m 1024 \ + -kernel \ + -append root=/dev/vda console=ttyS0 reboot=k panic=1 random.trust_cpu=on rw \ + -drive file=,if=pflash,unit=0 \ + -qmp unix:/tmp/qmp2.socket,server,nowait \ + -drive id=drive_id,file=,direct=on \ + -device virtio-blk-pci,drive=drive_id,bug=pcie.0,addr=1,id=blk -serial stdio -disable-seccomp \ + -chardev socket,id=virtio_fs,path=/path/to/virtiofsd.sock,server,nowait \ + -device vhost-user-fs-pci,id=device_id,chardev=virtio_fs,tag=myfs,bus=pcie.0,addr=0x7 + +guest# mount -t virtiofs myfs /mnt +``` + +### 2.18 virtio-gpu +virtio-gpu is an virtualized graphics card that lets virtual machines can display with it. +Usually used in conjunction with VNC, the final images is rendered to the VNC client. + +Sample Configuration: +```shell +-device virtio-gpu-pci,id=,bus=pcie.0,addr=0x2.0x0[,max_outputs=][,edid=true|false][,xres=][,yres= ][,max_hostmem=][,enable_bar0=true|false] +``` + +In addition to the required slot information, six optional properties are supported for virtio-gpu. +* max_outputs: Number of screens supported by the current graphics card. The maximum value is 16. (can switch by using ctrl + alt + , for details, see vnc Client switchover) +* edid: Edid feature, the virtual machine's kernel may checks this feature for HiDPi. You are advised to set to true. +* xres/yres: The size of the login windows. +* max_hostmem: The maximum memory that a graphics card can occupy on the host is expressed in byte. You are advised to set not less than 256MiB, otherwise the final supported resolutions is affected. +* enable_bar0: Enable a 64M bar0 in virtio-gpu. + +Note: +1. Only virtio-gpu 2D supported. +2. Live migration is not supported. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable virtio-gpu. + +### 2.19 ivshmem-scream + +ivshmem-scream is a virtual sound card that relies on Intel-VM shared memory to transmit audio data. + +Nine properties are supported for ivshmem-scream device. +* id: unique device id. +* memdev: configuration of the back-end memory device used by the ivshmem. +* interface: configuring audio playback and recording interfaces, currently can be set to `ALSA`, `PulseAudio` or `Demo`. +* playback: Path for storing audio. When interface is set to Demo, playback is mandatory. +* record: Path for obtaining audio. When interface is set to Demo, record is mandatory. +* bus: bus number of the device. +* addr: including slot number and function number. +* share: the shared memory must be set to `on`. +* size: size of th shared memory, 2M is suggested. + +Sample Configuration: + +```shell +-device ivshmem-scream,id=,memdev=,interface=[,playback=][,record=],bus=pcie.0,addr=0x2.0x0 +-object memory-backend-ram,id=,share=on,size=2M +``` + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable scream. + +### 2.20 ramfb +Ramfb is a simple display device. It is used in the Windows system on aarch64. + +Two properties are supported for ramfb device. +* id: unique device id. +* install: when install the Windows system, setting true will automatically press enter key to skip the stage which needs to manually press any key boot from cd or dvd. + +Sample Configuration: +```shell +-device ramfb,id=[,install=true|false] +``` + +Note: Only supported on aarch64. + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable ramfb. + +### 2.21 pvpanic +pvpanic is a virtual pci device. It is used to give the virtual machine the ability to sense guest os crashes or failures. + +Four properties are supported for pvpanic device. +* id: unique device id. +* bus: bus number of the device. +* addr: slot number. +* supported-features: supported features, 0-3 refers to `None`, `panicked`, `crashload` and `panicked and crashload` respectively. 3 is suggested. + +Sample Configuration: +```shell +-device pvpanic,id=,bus=,addr=<0x7>[,supported-features=<0|1|2|3>] +``` + +Please see the [4. Build with features](docs/build_guide.md) if you want to enable pvpanic. + ## 3. Trace -Users can specify the configuration file which lists events to trace. +Users can specify a configuration file which lists the traces that needs to be enabled, or specify the trace type that needs to be enabled. Setting both file and type is also allowed, so that traces with the specified type and traces listed in the file will all be enabled. One property can be set: -* events: file lists events to trace. +* file: specify the file containing the traces that needs to be enabled. +* type: specify the traces type that needs to be enabled. ```shell --trace events= +-trace file=|type= ``` ## 4. Seccomp StratoVirt use [seccomp(2)](https://man7.org/linux/man-pages/man2/seccomp.2.html) to limit the syscalls in StratoVirt process by default. It will make a slight influence on performance to StratoVirt. -* x86_64 - -| Number of Syscalls | GNU Toolchain | MUSL Toolchain | -| :----------------: | :-----------: | :------------: | -| microvm | 46 | 46 | -| q35 | 49 | 51 | - -* aarch64 - -| Number of Syscalls | GNU Toolchain | MUSL Toolchain | -| :----------------: | :-----------: | :------------: | -| microvm | 44 | 45 | -| virt | 48 | 47 | If you want to disable seccomp, you can run StratoVirt with `-disable-seccomp`. ```shell @@ -602,9 +1251,9 @@ $ ./stratovirt \ * incoming: the path of the template. See [Snapshot and Restore](./snapshot.md) for details. - + ## 6. Ozone -Ozone is a lightweight secure sandbox for StratoVirt, it provides secure environment for StratoVirt +Ozone is a lightweight secure sandbox for StratoVirt, it provides secure environment for StratoVirt by limiting resources of StratoVirt using 'namespace'. Please run ozone with root permission. ### 6.1 Usage @@ -636,12 +1285,12 @@ About the arguments: * `clean-resource` : a flag to clean resource. * `numa` : numa node, this argument must be configured if `cpuset.cpus` is set. * `cgroup` : set cgroup controller value. supported controller: `cpuset.cpus` and `memory.limit_in_bytes`. -* `--` : these two dashes are used to splite args, the args followed are used to launched StratoVirt. +* `--` : these two dashes are used to split args, the args followed are used to launched StratoVirt. ### 6.2 Example As ozone uses a directory to mount as a root directory, after ozone is launched, the directory "/srv/zozne/{exec_file}/{name}" will be created. (Where, `exec_file` is the executable binary file, usually it is `stratovirt`, while `name` is the name of ozone, it is given by users, but the length of it should be no more than 255 bytes.) In order to run ozone normally, please make sure that the directory "/srv/zozne/{exec_file}/{name}" does not exists before launching ozone. -On top of that, the path-related arguments are different. They are all in the current(`./`) directory. +On top of that, the path-related arguments are different. They are all in the current(`./`) directory. For net name space, it can be created by the following command with name "mynet": ```shell @@ -693,3 +1342,10 @@ white list. However, these cmdlines never function. Apart from the above commands, some arguments are playing the same roles. Like 'format' and 'bootindex' for virtio-blk; 'chassis' for pcie-root-port; 'sockets', 'cores' and 'threads' for smp; 'accel' and 'usb' for machine; "format" for pflash device. + +## 8. Debug boot time +Currently, measurement of guest boot up time is supported. The guest kernel writes different +values to specific IO/MMIO regions, and it will trap to `stratovirt`, we can record the timestamp +of kernel start or kernel boot complete. + +See [Debug_Boot_Time](https://gitee.com/openeuler/stratovirt/wikis/%E6%B5%8B%E8%AF%95%E6%96%87%E6%A1%A3/%E6%80%A7%E8%83%BD%E6%B5%8B%E8%AF%95-%E5%86%B7%E5%90%AF%E5%8A%A8%E6%97%B6%E9%97%B4) for more details. diff --git a/docs/cpu_hotplug.ch.md b/docs/cpu_hotplug.ch.md new file mode 100644 index 0000000000000000000000000000000000000000..7fc1b386404afd9c47f81a3108a66eafeb2d84ce --- /dev/null +++ b/docs/cpu_hotplug.ch.md @@ -0,0 +1,60 @@ +# CPU热插拔 + +StratoVirt支持对一个运行中的虚机进行CPU的热插入和热拔出。该功能可以动态调整虚机的CPU资源。目前,该功能只支持x86_64的标准虚机,并且不包含NUMA架构。 + +## 创建虚机 + +首先,创建一台虚机。 + +```shell +$ ./stratovirt \ + -machine q35 \ + -smp [cpus=],maxcpus= \ + -m 512 \ + -kernel path/to/kernel \ + -append "console=ttyS0 root=/dev/vda reboot=k panic=1" \ + -drive file=path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ + -device pcie-root-port,port=0x0,addr=0x1.0x0,bus=pcie.0,id=pcie.1 \ + -drive file=path/to/rootfs,id=rootfs,readonly=true \ + -device virtio-blk-pci,drive=rootfs,bus=pcie.1,addr=0x0.0x0,id=blk-0 \ + -qmp unix:path/to/api/socket,server,nowait \ + -serial stdio +``` + +- `cpus`:设置虚机的启动CPU数量为'n'(默认: 1)。 `cpus`参数所设置的CPU会在虚机启动后全部上线运行,并且这些CPU不支持热拔出。 +- `maxcpus`:设置虚机的总CPU数量, 包含了在线和离线的CPU, 离线CPU的数量也就是支持热插拔的CPU, `maxcpus`的数量不能小于`cpus`。 + +## 热插入CPU + +虚机启动后,通过QMP热插入CPU + +```shell +$ ncat -U /path/to/api/socket +{"QMP":{"version":{"qemu":{"micro":1,"minor":0,"major":5},"package":"StratoVirt-2.4.0"},"capabilities":[]}} +-> {"execute": "device_add","arguments": { "id": "device-id", "driver": "generic-x86-cpu", "cpu-id": cpuid }} +<- {"return":{}} +<- {"event":"CPU_RESIZE","data":{},"timestamp":{"seconds":seconds, "microseconds":microseconds}} +``` + +- `id`: CPU设备的ID, 该ID应该为全局唯一的字符串。 +- `cpu-id`: CPU的编号,编号的取值范围是[`cpus`, `maxcpus`)内的整数。 + +## 热拔出CPU + +通过QMP热拔出CPU: + +```shell +$ ncat -U /path/to/api/socket +{"QMP":{"version":{"qemu":{"micro":1,"minor":0,"major":5},"package":"StratoVirt-2.4.0"},"capabilities":[]}} +-> {"execute": "device_del", "arguments": { "id": "device-id"}} +<- {"return":{}} +<- {"event":"CPU_RESIZE","data":{},"timestamp":{"seconds":seconds, "microseconds":microseconds}} +``` + +## 限制 + +CPU热插拔支持的虚机类型: +- `q35` (on x86_64 platform) + +CPU热插拔不支持的设备和特性: +- `numa` diff --git a/docs/cpu_hotplug.md b/docs/cpu_hotplug.md new file mode 100644 index 0000000000000000000000000000000000000000..1a5f2046de0bb579b269ef155229f4407be15e9a --- /dev/null +++ b/docs/cpu_hotplug.md @@ -0,0 +1,60 @@ +# CPU hotplug and hotunplug + +StratoVirt support to hot(un)plug CPU to a running VM. This feature supports dynamic adjustment of CPU resources of VM. Currently, only standard VM with x86_64 architecture is supported, and NUMA architecture is not supported. + +## Create VM + +First, we create a StratoVirt VM. + +```shell +$ ./stratovirt \ + -machine q35 \ + -smp [cpus=],maxcpus= \ + -m 512 \ + -kernel path/to/kernel \ + -append "console=ttyS0 root=/dev/vda reboot=k panic=1" \ + -drive file=path/to/OVMF_CODE.fd,if=pflash,unit=0,readonly=true \ + -device pcie-root-port,port=0x0,addr=0x1.0x0,bus=pcie.0,id=pcie.1 \ + -drive file=path/to/rootfs,id=rootfs,readonly=true \ + -device virtio-blk-pci,drive=rootfs,bus=pcie.1,addr=0x0.0x0,id=blk-0 \ + -qmp unix:path/to/api/socket,server,nowait \ + -serial stdio +``` + +- `cpus`: Set the number of CPUs to 'n' (default: 1). The number of `cpus` will all online after VM booted, and can't be hotunplugged. +- `maxcpus`: Set the number of total CPUs, including online and offline CPUs. The number of offline CPUs is also the number of CPUs that support hotplug. The number of `maxcpus` should not less than `cpus`. + +## Hotplug CPU + +After the VM boot up, hotplug CPU with QMP: + +```shell +$ ncat -U path/to/api/socket +{"QMP":{"version":{"qemu":{"micro":1,"minor":0,"major":5},"package":"StratoVirt-2.4.0"},"capabilities":[]}} +-> {"execute": "device_add","arguments": { "id": "device-id", "driver": "generic-x86-cpu", "cpu-id": cpuid }} +<- {"return":{}} +<- {"event":"CPU_RESIZE","data":{},"timestamp":{"seconds":seconds, "microseconds":microseconds}} +``` + +- `id`: The ID of the CPU device, which should be a globally unique string. +- `cpu-id`: The number of the CPU, which can be an integer in the range of [`cpus`, `maxcpus`) + +## Hotunplug CPU + +hotunplug CPU with QMP: + +```shell +$ ncat -U path/to/api/socket +{"QMP":{"version":{"qemu":{"micro":1,"minor":0,"major":5},"package":"StratoVirt-2.4.0"},"capabilities":[]}} +-> {"execute": "device_del", "arguments": { "id": "device-id"}} +<- {"return":{}} +<- {"event":"CPU_RESIZE","data":{},"timestamp":{"seconds":seconds, "microseconds":microseconds}} +``` + +## Limitations + +CPU hot(un)plug support machine type: +- `q35` (on x86_64 platform) + +Some devices and feature don't support to be CPU hotplug yet: +- `numa` diff --git a/docs/design.ch.md b/docs/design.ch.md index 8dd774bf53b3a41c00ad421f0a2860fe75cb5cc1..0c96eb48fd8508d3bc43f8f1abee694bf99dd655 100644 --- a/docs/design.ch.md +++ b/docs/design.ch.md @@ -25,7 +25,7 @@ StratoVirt的核心架构如下图所示,从上到下分为三层: - OCI兼容性:StratoVirt与isula和kata容器配合使用,可以完美融入Kubernetes生态系统; - 多平台支持:全面支持Intel和ARM平台; - 可扩展性:StratoVirt保留接口和设计,用于导入更多特性,甚至扩展到标准虚拟化支持; -- 安全性:运行时系统调用数小于47; +- 安全性:运行时系统调用数小于55; ## 实现 diff --git a/docs/design.md b/docs/design.md index e66249977096082d15e37cc77ed1cee1dcb962dc..391f82af1da9305c5ba68b9aad00aee32929e222 100644 --- a/docs/design.md +++ b/docs/design.md @@ -2,8 +2,8 @@ ## Overview -StratoVirt is an open-source lightweight virtualization technology based on -Linux Kernel-based Virtual Machine(KVM), which reduces memory resource +StratoVirt is an open-source lightweight virtualization technology based on +Linux Kernel-based Virtual Machine(KVM), which reduces memory resource consumption and improves VM startup speed while retains isolation capability and security capability of traditional virtualization. StratoVirt can be applied to microservices or serverless scenarios such as function computing, and reserves @@ -22,7 +22,7 @@ in lightweight scenarios, and provide UEFI boot support for standard VM. - Emulated mainboard: - microvm: To improve performance as well as reduce the attack surface, StratoVirt minimizes the simulation of user-mode devices. KVM simulation - devices and paravirtualization devices, such as GIC, serial, RTC and + devices and paravirtualization devices, such as GIC, serial, RTC and virtio-mmio devices are implemented; - standard VM: realize UEFI boot with constructed ACPI tables. Virtio-pci and VFIO devices can be attached to greatly improve the I/O performance; @@ -32,7 +32,7 @@ VFIO devices can be attached to greatly improve the I/O performance; ## Features - High isolation ability based on hardware; -- Fast cold boot: Benefit from the minimalist design, microvm can be started +- Fast cold boot: Benefit from the minimalist design, microvm can be started within 50ms; - Low memory overhead: StratoVirt works with a memory footprint at 4MB; - IO enhancement: StratoVirt offers normal IO ability with minimalist IO device @@ -42,7 +42,7 @@ integrated in Kubernetes ecosystem perfectly; - Multi-platform support: Fully support for Intel and Arm platform; - Expansibility: StratoVirt reserves interface and design for importing more features, even expand to standard virtualization support; -- Security: less than 52 syscalls while running; +- Security: less than 55 syscalls while running; ## Implementation diff --git a/docs/hisysevent.md b/docs/hisysevent.md new file mode 100644 index 0000000000000000000000000000000000000000..b92ce4fe4d2c41328110ad5a5cb9018d50ebc078 --- /dev/null +++ b/docs/hisysevent.md @@ -0,0 +1,40 @@ +# HiSysEvent + +HiSysEvent(https://gitee.com/openharmony/hiviewdfx_hisysevent) is a tool in open- +harmonyOS to recode important information of key processes during system running, +helping locate faults and do some data analytics. + +This document describes the way to how to use hisysevent in StratoVirt. + +## Add Event + +### Modify configuration file + +First, you need to modify or creat toml file in the event/event_info directory +to add a new event in order to generate the event function. For example: + +```toml +[[events]] +name = "example" +event_type = "Behavior" +args = "example_bool: bool, example_str: String, example_integer: u64, example_array: &[u8]" +enable = true +``` + +In the above configuration, "name" is used to represent the only event, and +duplication is not allowed; "event_type" is one of four event type defined +by openharmonyOS: Fault, Statistic, Security and Behavior; "args" will be +formatted as arguments passed to hisysevent service in open-harmonyOS; +"enabled" indicates whether it is enabled during compilation. + +### Call event function + +Just call the event function where needed. +```rust +fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()> { + hisysevent::example("true", "init_ram".to_string(), mem_size, &[0,1]); + let vm_ram = self.get_vm_ram(); + let layout_size = MEM_LAYOUT[LayoutEntryType::Mem as usize].1; + ...... +} +``` diff --git a/docs/interconnect_with_libvirt.ch.md b/docs/interconnect_with_libvirt.ch.md index d0323e2d08fde7d7f4e37c70583e9bcc1edc42eb..2da7eeacbb6f0aecbd685d9d57e40882703695ad 100644 --- a/docs/interconnect_with_libvirt.ch.md +++ b/docs/interconnect_with_libvirt.ch.md @@ -1,5 +1,5 @@ # libvirt -Libvirt是StratoVirt的管理软件,它是通过创建命令行来运行StratoVirt和发送QMP命令来管理StratoVirt。目前,支持五个virsh命令来管理StratoVirt: +Libvirt是StratoVirt的管理软件,它是通过创建命令行来运行StratoVirt和发送QMP命令来管理StratoVirt。目前,支持五个virsh命令来管理StratoVirt: `virsh create`, `virsh destroy`, `virsh suspend`, `virsh resume` 和 `virsh console`. diff --git a/docs/interconnect_with_libvirt.md b/docs/interconnect_with_libvirt.md index 88ec77f396803c95122d5637e2f18699f1c50162..494831900bc7634b91aa98b8f3263ee7cb271d73 100644 --- a/docs/interconnect_with_libvirt.md +++ b/docs/interconnect_with_libvirt.md @@ -1,6 +1,6 @@ # libvirt -Libvirt is one of manager for StratoVirt, it manages StratoVirt by creating cmdlines to launch StratoVirt -and giving commands via QMP. Currently, five virsh commands are supported to manage StratoVirt: +Libvirt is one of manager for StratoVirt, it manages StratoVirt by creating cmdlines to launch StratoVirt +and giving commands via QMP. Currently, five virsh commands are supported to manage StratoVirt: `virsh create`, `virsh destroy`, `virsh suspend`, `virsh resume` and `virsh console`. @@ -102,7 +102,7 @@ Pflash can be added by the following config. ``` -- net +- net ``` @@ -117,7 +117,7 @@ Pflash can be added by the following config. - console -To use `virsh console` command, the virtio console with redirect `pty` should be configured. +To use `virsh console` command, the virtio console with redirect `pty` should be configured. ``` diff --git a/docs/kernel_config/micro_vm/kernel_config_4.19_aarch64 b/docs/kernel_config/micro_vm/kernel_config_4.19_aarch64 index 0736417835c3be6db26598b9c8d9ba7d64e749e4..804b86c1f0d11190c5e9163f650b761497cc44d7 100644 --- a/docs/kernel_config/micro_vm/kernel_config_4.19_aarch64 +++ b/docs/kernel_config/micro_vm/kernel_config_4.19_aarch64 @@ -1761,7 +1761,7 @@ CONFIG_PARTITION_PERCPU=y # # CONFIG_ARM_CCI_PMU is not set # CONFIG_ARM_CCN is not set -# CONFIG_ARM_PMU is not set +CONFIG_ARM_PMU=y # CONFIG_ARM_DSU_PMU is not set # CONFIG_HISI_PMU is not set # CONFIG_ARM_SPE_PMU is not set diff --git a/docs/kernel_config/micro_vm/kernel_config_5.10_aarch64 b/docs/kernel_config/micro_vm/kernel_config_5.10_aarch64 index 5ef0f9d6c9ecbdb6a34c3483972050bb43368a22..1ef0b502455dc028fdfff16ff480adc448b3f44b 100644 --- a/docs/kernel_config/micro_vm/kernel_config_5.10_aarch64 +++ b/docs/kernel_config/micro_vm/kernel_config_5.10_aarch64 @@ -128,9 +128,11 @@ CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y +# CONFIG_MEMCG_MEMFS_INFO is not set CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y +# CONFIG_QOS_SCHED is not set CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y # CONFIG_RT_GROUP_SCHED is not set @@ -152,6 +154,7 @@ CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y +# CONFIG_SCHED_STEAL is not set # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set @@ -220,17 +223,19 @@ CONFIG_SLAB_MERGE_DEFAULT=y # CONFIG_SLAB_FREELIST_HARDENED is not set # CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set # CONFIG_PROFILING is not set +CONFIG_KABI_RESERVE=y +CONFIG_KABI_SIZE_ALIGN_CHECKS=y # end of General setup CONFIG_ARM64=y CONFIG_64BIT=y CONFIG_MMU=y -CONFIG_ARM64_PAGE_SHIFT=16 -CONFIG_ARM64_CONT_PTE_SHIFT=5 -CONFIG_ARM64_CONT_PMD_SHIFT=5 -CONFIG_ARCH_MMAP_RND_BITS_MIN=14 -CONFIG_ARCH_MMAP_RND_BITS_MAX=29 -CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=7 +CONFIG_ARM64_PAGE_SHIFT=12 +CONFIG_ARM64_CONT_PTE_SHIFT=4 +CONFIG_ARM64_CONT_PMD_SHIFT=4 +CONFIG_ARCH_MMAP_RND_BITS_MIN=18 +CONFIG_ARCH_MMAP_RND_BITS_MAX=33 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_NO_IOPORT_MAP=y CONFIG_STACKTRACE_SUPPORT=y @@ -249,7 +254,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_SMP=y CONFIG_KERNEL_MODE_NEON=y CONFIG_FIX_EARLYCON_MEM=y -CONFIG_PGTABLE_LEVELS=3 +CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y CONFIG_ARCH_HAS_CPU_RELAX=y @@ -277,6 +282,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y # CONFIG_ARCH_MESON is not set # CONFIG_ARCH_MVEBU is not set # CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_PHYTIUM is not set # CONFIG_ARCH_QCOM is not set # CONFIG_ARCH_REALTEK is not set # CONFIG_ARCH_RENESAS is not set @@ -297,6 +303,13 @@ CONFIG_ARCH_HAS_CPU_RELAX=y # CONFIG_ARCH_ZYNQMP is not set # end of Platform selection +CONFIG_HAVE_LIVEPATCH_WO_FTRACE=y + +# +# Enable Livepatch +# +# end of Enable Livepatch + # # Kernel Features # @@ -327,31 +340,33 @@ CONFIG_CAVIUM_ERRATUM_30115=y # CONFIG_CAVIUM_TX2_ERRATUM_219 is not set # CONFIG_FUJITSU_ERRATUM_010001 is not set CONFIG_HISILICON_ERRATUM_161600802=y +# CONFIG_HISILICON_ERRATUM_1980005 is not set CONFIG_QCOM_FALKOR_ERRATUM_1003=y CONFIG_QCOM_FALKOR_ERRATUM_1009=y CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_FALKOR_ERRATUM_E1041=y CONFIG_SOCIONEXT_SYNQUACER_PREITS=y +CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH=y +# CONFIG_HISILICON_HIP08_RU_PREFETCH_DEFAULT_OFF is not set # end of ARM errata workarounds via the alternatives framework -# CONFIG_ARM64_4K_PAGES is not set +CONFIG_ARM64_4K_PAGES=y # CONFIG_ARM64_16K_PAGES is not set -CONFIG_ARM64_64K_PAGES=y -# CONFIG_ARM64_VA_BITS_42 is not set +# CONFIG_ARM64_64K_PAGES is not set +# CONFIG_ARM64_VA_BITS_39 is not set CONFIG_ARM64_VA_BITS_48=y -# CONFIG_ARM64_VA_BITS_52 is not set CONFIG_ARM64_VA_BITS=48 CONFIG_ARM64_PA_BITS_48=y -# CONFIG_ARM64_PA_BITS_52 is not set CONFIG_ARM64_PA_BITS=48 # CONFIG_CPU_BIG_ENDIAN is not set CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SCHED_MC=y +# CONFIG_SCHED_CLUSTER is not set CONFIG_SCHED_SMT=y CONFIG_NR_CPUS=255 CONFIG_HOTPLUG_CPU=y +# CONFIG_ARM64_BOOTPARAM_HOTPLUG_CPU0 is not set # CONFIG_NUMA is not set -CONFIG_HOLES_IN_ZONE=y # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set @@ -365,7 +380,10 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_HAVE_ARCH_PFN_VALID=y CONFIG_SYS_SUPPORTS_HUGETLBFS=y +CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_ARCH_LLC_128_LINE_SIZE=y +CONFIG_ARCH_HAS_FILTER_PGPROT=y CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_PARAVIRT=y # CONFIG_PARAVIRT_SPINLOCKS is not set @@ -375,10 +393,12 @@ CONFIG_PARAVIRT=y # CONFIG_XEN is not set CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_UNMAP_KERNEL_AT_EL0=y +CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY=y # CONFIG_RODATA_FULL_DEFAULT_ENABLED is not set -# CONFIG_ARM64_PMEM_RESERVE is not set +# CONFIG_ARM64_PMEM_LEGACY is not set # CONFIG_ARM64_SW_TTBR0_PAN is not set # CONFIG_ARM64_TAGGED_ADDR_ABI is not set +# CONFIG_AARCH32_EL0 is not set # # ARMv8.1 architectural features @@ -392,7 +412,6 @@ CONFIG_ARM64_VHE=y # # ARMv8.2 architectural features # -CONFIG_ARM64_UAO=y # CONFIG_ARM64_PMEM is not set CONFIG_ARM64_RAS_EXTN=y CONFIG_ARM64_CNP=y @@ -426,11 +445,24 @@ CONFIG_ARCH_RANDOM=y CONFIG_ARM64_AS_HAS_MTE=y # end of ARMv8.5 architectural features +# +# ARMv8.6 architectural features +# +CONFIG_ARM64_TWED=y +# end of ARMv8.6 architectural features + +# +# ARMv8.7 architectural features +# +CONFIG_ARM64_EPAN=y +# end of ARMv8.7 architectural features + CONFIG_ARM64_SVE=y # CONFIG_ARM64_PSEUDO_NMI is not set CONFIG_RELOCATABLE=y # CONFIG_RANDOMIZE_BASE is not set CONFIG_CC_HAVE_STACKPROTECTOR_SYSREG=y +# CONFIG_ASCEND_FEATURES is not set # end of Kernel Features # @@ -470,6 +502,7 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y # # Firmware Drivers # +# CONFIG_ARM_SCMI_PROTOCOL is not set # CONFIG_ARM_SDE_INTERFACE is not set # CONFIG_GOOGLE_FIRMWARE is not set @@ -477,6 +510,7 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y # EFI (Extensible Firmware Interface) Support # CONFIG_EFI_ESRT=y +# CONFIG_EFI_FAKE_MEMMAP is not set CONFIG_EFI_PARAMS_FROM_FDT=y CONFIG_EFI_RUNTIME_WRAPPERS=y CONFIG_EFI_GENERIC_STUB=y @@ -509,7 +543,6 @@ CONFIG_ARCH_SUPPORTS_ACPI=y # # General architecture-dependent options # -CONFIG_SET_FS=y # CONFIG_JUMP_LABEL is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_HAVE_KPROBES=y @@ -543,19 +576,22 @@ CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y +# CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y # CONFIG_STACKPROTECTOR is not set CONFIG_HAVE_CONTEXT_TRACKING=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +CONFIG_HAVE_MOVE_PUD=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_HUGE_VMAP=y +CONFIG_HAVE_ARCH_HUGE_VMALLOC=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y -CONFIG_ARCH_MMAP_RND_BITS=14 +CONFIG_ARCH_MMAP_RND_BITS=18 CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y CONFIG_CLONE_BACKWARDS=y # CONFIG_COMPAT_32BIT_TIME is not set @@ -704,6 +740,7 @@ CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y +CONFIG_HOLES_IN_ZONE=y CONFIG_ARCH_KEEP_MEMBLOCK=y CONFIG_MEMORY_ISOLATION=y CONFIG_MEMORY_HOTPLUG=y @@ -714,6 +751,7 @@ CONFIG_MEMORY_BALLOON=y # CONFIG_COMPACTION is not set CONFIG_PAGE_REPORTING=y # CONFIG_MIGRATION is not set +# CONFIG_HUGE_VMALLOC_DEFAULT_ENABLED is not set CONFIG_PHYS_ADDR_T_64BIT=y # CONFIG_BOUNCE is not set # CONFIG_KSM is not set @@ -723,6 +761,10 @@ CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y # CONFIG_TRANSPARENT_HUGEPAGE is not set # CONFIG_CLEANCACHE is not set # CONFIG_FRONTSWAP is not set +# CONFIG_SHRINK_PAGECACHE is not set +CONFIG_MEMCG_QOS=y +# CONFIG_ETMEM_SCAN is not set +# CONFIG_ETMEM_SWAP is not set # CONFIG_CMA is not set # CONFIG_ZPOOL is not set # CONFIG_ZBUD is not set @@ -735,6 +777,12 @@ CONFIG_ARCH_HAS_PTE_DEVMAP=y # CONFIG_GUP_BENCHMARK is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y # CONFIG_PIN_MEMORY is not set + +# +# Data Access Monitoring +# +# CONFIG_DAMON is not set +# end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y @@ -1279,7 +1327,6 @@ CONFIG_OF_IRQ=y CONFIG_OF_NET=y CONFIG_OF_RESERVED_MEM=y # CONFIG_OF_OVERLAY is not set -# CONFIG_OF_CONFIGFS is not set # CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set @@ -1296,6 +1343,7 @@ CONFIG_VIRTIO_BLK=y # NVME Support # # CONFIG_NVME_FC is not set +# CONFIG_NVME_TCP is not set # CONFIG_NVME_TARGET is not set # end of NVME Support @@ -1418,7 +1466,6 @@ CONFIG_VIRTIO_NET=y # CONFIG_WAN is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set -# CONFIG_NVM is not set # # Input device support @@ -1459,9 +1506,6 @@ CONFIG_INPUT=y # # Character devices # -# CONFIG_BRCM_CHAR_DRIVERS is not set -CONFIG_BCM2835_DEVGPIOMEM=y -# CONFIG_RPIVID_MEM is not set CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y @@ -1749,7 +1793,6 @@ CONFIG_DMA_OF=y # DMABUF options # # CONFIG_SYNC_FILE is not set -# CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_HEAPS is not set # end of DMABUF options @@ -1783,7 +1826,6 @@ CONFIG_COMMON_CLK=y # CONFIG_CLK_QORIQ is not set # CONFIG_COMMON_CLK_XGENE is not set # CONFIG_COMMON_CLK_FIXED_MMIO is not set -# CONFIG_MCHP_CLK_PFSOC is not set # CONFIG_HWSPINLOCK is not set # @@ -1833,7 +1875,6 @@ CONFIG_ARM_ARCH_TIMER=y # # Broadcom SoC drivers # -# CONFIG_OPENEULER_RASPBERRYPI is not set # CONFIG_SOC_BRCMSTB is not set # end of Broadcom SoC drivers @@ -1907,7 +1948,7 @@ CONFIG_PARTITION_PERCPU=y # CONFIG_ARM_CCI_PMU is not set # CONFIG_ARM_CCN is not set # CONFIG_ARM_CMN is not set -# CONFIG_ARM_PMU is not set +CONFIG_ARM_PMU=y # CONFIG_ARM_DSU_PMU is not set # CONFIG_ARM_SPE_PMU is not set # end of Performance monitor support @@ -1920,12 +1961,15 @@ CONFIG_PARTITION_PERCPU=y # CONFIG_ANDROID is not set # end of Android +# +# Vendor Hooks +# +# end of Vendor Hooks + # CONFIG_LIBNVDIMM is not set -# CONFIG_PMEM_LEGACY is not set # CONFIG_DAX is not set CONFIG_NVMEM=y # CONFIG_NVMEM_SYSFS is not set -# CONFIG_NVMEM_RMEM is not set # # HW tracing support @@ -2008,6 +2052,7 @@ CONFIG_VIRTIO_FS=y # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set +# CONFIG_NTFS3_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # @@ -2026,6 +2071,8 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y +CONFIG_HUGETLB_PAGE_FREE_VMEMMAP=y +# CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON is not set CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y CONFIG_CONFIGFS_FS=y @@ -2124,6 +2171,7 @@ CONFIG_CRYPTO_ACOMP2=y # CONFIG_CRYPTO_RSA is not set # CONFIG_CRYPTO_DH is not set # CONFIG_CRYPTO_ECDH is not set +# CONFIG_CRYPTO_ECDSA is not set # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set @@ -2319,8 +2367,6 @@ CONFIG_FONT_8x16=y CONFIG_FONT_AUTOSELECT=y CONFIG_ARCH_STACKWALK=y CONFIG_SBITMAP=y -# CONFIG_ETMEM_SCAN is not set -# CONFIG_ETMEM_SWAP is not set # CONFIG_STRING_SELFTEST is not set # end of Library routines @@ -2349,6 +2395,7 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_FRAME_WARN=2048 # CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HEADERS_INSTALL is not set +# CONFIG_OPTIMIZE_INLINING is not set # CONFIG_DEBUG_SECTION_MISMATCH is not set # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_ARCH_WANT_FRAME_POINTERS=y @@ -2386,6 +2433,8 @@ CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set +CONFIG_HAVE_ARCH_KFENCE=y +# CONFIG_KFENCE is not set # end of Memory Debugging # diff --git a/docs/kernel_config/standard_vm/kernel_config_4.19_aarch64 b/docs/kernel_config/standard_vm/kernel_config_4.19_aarch64 index c2159dd40f1ff5b04f1d78190010a861507dd1ce..85b78de4bfb3552b1393094f338f1dec522553dc 100644 --- a/docs/kernel_config/standard_vm/kernel_config_4.19_aarch64 +++ b/docs/kernel_config/standard_vm/kernel_config_4.19_aarch64 @@ -1409,15 +1409,15 @@ CONFIG_NET_VENDOR_INTEL=y # CONFIG_E100 is not set # CONFIG_E1000 is not set # CONFIG_E1000E is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set +CONFIG_IGB=y +CONFIG_IGBVF=y # CONFIG_IXGB is not set CONFIG_IXGBE=y # CONFIG_IXGBE_DCB is not set -# CONFIG_IXGBEVF is not set +CONFIG_IXGBEVF=y CONFIG_I40E=y # CONFIG_I40E_DCB is not set -# CONFIG_I40EVF is not set +CONFIG_I40EVF=y # CONFIG_ICE is not set # CONFIG_FM10K is not set CONFIG_NET_VENDOR_NETSWIFT=y diff --git a/docs/kernel_config/standard_vm/kernel_config_4.19_x86_64 b/docs/kernel_config/standard_vm/kernel_config_4.19_x86_64 index b817bc9b04bafade44639f229209c79ffbf7c20b..6ed9be72fd53bb7a77a58a143026a632e6febdcc 100644 --- a/docs/kernel_config/standard_vm/kernel_config_4.19_x86_64 +++ b/docs/kernel_config/standard_vm/kernel_config_4.19_x86_64 @@ -1368,15 +1368,15 @@ CONFIG_NET_VENDOR_INTEL=y # CONFIG_E100 is not set # CONFIG_E1000 is not set # CONFIG_E1000E is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set +CONFIG_IGB=y +CONFIG_IGBVF=y # CONFIG_IXGB is not set CONFIG_IXGBE=y # CONFIG_IXGBE_DCB is not set -# CONFIG_IXGBEVF is not set +CONFIG_IXGBEVF=y CONFIG_I40E=y # CONFIG_I40E_DCB is not set -# CONFIG_I40EVF is not set +CONFIG_I40EVF=y # CONFIG_ICE is not set # CONFIG_FM10K is not set CONFIG_NET_VENDOR_NETSWIFT=y diff --git a/docs/kernel_config/standard_vm/kernel_config_5.10_aarch64 b/docs/kernel_config/standard_vm/kernel_config_5.10_aarch64 index 3d89d8a965ab60a665fc22f28b2b7ff243eee42d..4ec28e45d75330d1f02408f222afdbfc878adba8 100644 --- a/docs/kernel_config/standard_vm/kernel_config_5.10_aarch64 +++ b/docs/kernel_config/standard_vm/kernel_config_5.10_aarch64 @@ -1463,15 +1463,15 @@ CONFIG_NET_VENDOR_INTEL=y # CONFIG_E100 is not set # CONFIG_E1000 is not set # CONFIG_E1000E is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set +CONFIG_IGB=y +CONFIG_IGBVF=y # CONFIG_IXGB is not set CONFIG_IXGBE=y # CONFIG_IXGBE_DCB is not set -# CONFIG_IXGBEVF is not set +CONFIG_IXGBEVF=y CONFIG_I40E=y # CONFIG_I40E_DCB is not set -# CONFIG_I40EVF is not set +CONFIG_I40EVF=y # CONFIG_ICE is not set # CONFIG_FM10K is not set # CONFIG_IGC is not set diff --git a/docs/kernel_config/standard_vm/kernel_config_5.10_x86_64 b/docs/kernel_config/standard_vm/kernel_config_5.10_x86_64 index bcc32e78323c62dc24de385b4456f9edf6d1493c..d52a12d6abbcd06b3baf94a5bae2e745cfcfa4ea 100644 --- a/docs/kernel_config/standard_vm/kernel_config_5.10_x86_64 +++ b/docs/kernel_config/standard_vm/kernel_config_5.10_x86_64 @@ -1399,15 +1399,15 @@ CONFIG_NET_VENDOR_INTEL=y # CONFIG_E100 is not set # CONFIG_E1000 is not set # CONFIG_E1000E is not set -# CONFIG_IGB is not set -# CONFIG_IGBVF is not set +CONFIG_IGB=y +CONFIG_IGBVF=y # CONFIG_IXGB is not set CONFIG_IXGBE=y # CONFIG_IXGBE_DCB is not set -# CONFIG_IXGBEVF is not set +CONFIG_IXGBEVF=y CONFIG_I40E=y # CONFIG_I40E_DCB is not set -# CONFIG_I40EVF is not set +CONFIG_I40EVF=y # CONFIG_ICE is not set # CONFIG_FM10K is not set # CONFIG_IGC is not set diff --git a/docs/migration.md b/docs/migration.md new file mode 100644 index 0000000000000000000000000000000000000000..75c6c6651e2ea8dafc823a9bfd180e75986a0fb7 --- /dev/null +++ b/docs/migration.md @@ -0,0 +1,134 @@ +# Live migration + +## Introduction + +Virtual machine live migration is the key feature provided by StratoVirt. It needs to execute virtual machine migration +when any of the following happens: +- Server overload: when a source server is overloaded, a set of the VMs from this server is migrated to an underloaded + server using VM migration technique. +- Server maintenance: if there is a need for server maintenance, VMs from the source server are migrated to another server. +- Server fault: whenever there is server fault, VMs are migrated from the faulty server to the target server. + +## Transports + +The migration stream can be passed over any transport as following: +- TCP mode migration: using tcp sockets to do the migration. +- UNIX mode migration: using unix sockets to do the migration. + +Note: UNIX mode only supports migrate two VMs on the same host OS. TCP mode supports migrate both on the same or + different host OS. + +## Migration + +Launch the source VM: +```shell +./stratovirt \ + -machine q35 \ + -kernel ./vmlinux.bin \ + -append "console=ttyS0 pci=off reboot=k quiet panic=1 root=/dev/vda" \ + -drive file=path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-pci,drive=rootfs,id=rootfs,bus=pcie.0,addr=0 \ + -qmp unix:path/to/socket1,server,nowait \ + -serial stdio \ +``` + +Launch the destination VM: +```shell +./stratovirt \ + -machine q35 \ + -kernel ./vmlinux.bin \ + -append "console=ttyS0 pci=off reboot=k quiet panic=1 root=/dev/vda" \ + -drive file=path/to/rootfs,id=rootfs,readonly=off,direct=off \ + -device virtio-blk-pci,drive=rootfs,id=rootfs,bus=pcie.0,addr=0 \ + -qmp unix:path/to/socket2,server,nowait \ + -serial stdio \ + -incoming tcp:192.168.0.1:4446 \ +``` + +Note: +- The destination VM command line parameter needs to be consistent with the source VM. +- If it is necessary to change the data transmission from tcp network protocol to unix socket, + the parameter `-incoming tcp:192.168.0.1:4446` needs to be replaced with `-incoming unix:/tmp/stratovirt-migrate.socket`. +- Unix socket protocol only supports migrate two VMs on the same host OS. + +Start to send migration for the source VM: +```shell +$ ncat -U path/to/socket1 +<- {"QMP":{"version":{"StratoVirt":{"micro":1,"minor":0,"major":0},"package":""},"capabilities":[]}} +-> {"execute":"migrate", "arguments":{"uri":"tcp:192.168.0.1:4446"}} +<- {"return":{}} +``` + +Note: +- If using unix socket protocol to migrate vm, you need to modify QMP command of `"uri":"tcp:192.168.0.1:4446"` to + `"uri":"unix:/tmp/stratovirt-migrate.socket"`. + +When finish executing the command line, the live migration is start. in a moment, the source VM should be successfully +migrated to the destination VM. + +## Cancel Migration + +If you want to cancel the live migration, executing the following command: +```shell +$ ncat -U path/to/socket1 +<- {"QMP":{"version":{"StratoVirt":{"micro":1,"minor":0,"major":0},"package":""},"capabilities":[]}} +-> {"execute":"migrate_cancel"} +<- {"return":{}} +``` + +## Query migration state + +Use QMP command `query-migrate` to check migration state: +```shell +$ ncat -U path/to/socket +<- {"QMP":{"version":{"StratoVirt":{"micro":1,"minor":0,"major":0},"package":""},"capabilities":[]}} +-> {"execute":"query-migrate"} +<- {"return":{"status":"completed"}} +``` + +Now there are 5 states during migration: +- `None`: Resource is not prepared all. +- `Setup`: Resource is setup, ready to migration. +- `Active`: In migration. +- `Completed`: Migration completed. +- `Failed`: Migration failed. +- `Canceled`: Migration canceled. + +## Limitations + +Migration supports machine type: +- `q35` (on x86_64 platform) +- `virt` (on aarch64 platform) + +Some devices and feature don't support to be migration yet: +- `vhost-net` +- `vhost-user-net` +- `vfio` devices +- `balloon` +- `mem-shared`,`backend file of memory` +- `pmu` +- `sve` +- `gic-version=2` + +Some device attributes can't be changed: +- `virtio-net`: mac +- `virtio-blk`: file(only ordinary file or copy file), serial_num +- `device`: bus, addr +- `smp` +- `m` + +If hot plug device before migrate source vm, add newly replaced device command should be add to destination vm. + +Before live migration: +- source and destination host CPU needs to be the same architecture. +- the VMs image needs to be shared by source and destination. +- live migration may fail if the VM is performing lifecycle operations, such as reboot, shutdown. +- the command to startup the VM needs to be consistent on source and destination host. + +During live migration: +- source and destination networks cannot be disconnected. +- it is banned to operate VM lifecycle, includes using the QMP command and executing in the VM. +- live migration time is affected by network performance, total memory of VM and applications. + +After live migration: +- it needs to wait for the source VM to release resources before fetching back the live migration operation. diff --git a/docs/mk_initrd.ch.md b/docs/mk_initrd.ch.md index d11802f706f9153358c57d82e31a6c0678ef2574..c57390ac0c6c8001f94cb8102b86a9ff499bcbbf 100644 --- a/docs/mk_initrd.ch.md +++ b/docs/mk_initrd.ch.md @@ -3,8 +3,8 @@ ## 1. 下载Busybox源码,然后解压 ``` shell -wget https://busybox.net/downloads/busybox-1.31.1.tar.bz2 -tar -xjf busybox-1.31.1.tar.bz2 +wget https://busybox.net/downloads/busybox-1.36.1.tar.bz2 +tar -xjf busybox-1.36.1.tar.bz2 ``` ## 2. 编译BusyBox diff --git a/docs/mk_initrd.md b/docs/mk_initrd.md index 79d880212502cc5e89c466887925392406bd7c7d..2bcca4aa75bb63f031411ecb49b2a10edbc7a9ff 100644 --- a/docs/mk_initrd.md +++ b/docs/mk_initrd.md @@ -3,8 +3,8 @@ ## 1. Download BusyBox, then decompression ``` shell -wget https://busybox.net/downloads/busybox-1.31.1.tar.bz2 -tar -xjf busybox-1.31.1.tar.bz2 +wget https://busybox.net/downloads/busybox-1.36.1.tar.bz2 +tar -xjf busybox-1.36.1.tar.bz2 ``` ## 2. Compile BusyBox diff --git a/docs/qmp.md b/docs/qmp.md index ceee6fd81d754fe3ad0c466d84fb42f8e2ffdd2e..5e5e25e9f6e1cb09f6933f9b32022acd36b0b36b 100644 --- a/docs/qmp.md +++ b/docs/qmp.md @@ -9,12 +9,18 @@ StratoVirt controls VM's lifecycle and external api interface with [QMP](https:/ When running StratoVirt, you must create QMP in cmdline arguments as a management interface. -StratoVirt supports UnixSocket-type QMP, you can set it by: +StratoVirt supports UnixSocket-type QMP and TcpSocket-type QMP, you can set it by: ```shell # cmdline +# Start with UnixSocket -qmp unix:/path/to/api/socket,server,nowait ``` +```shell +# cmdline +# Start with TcpSocket +-qmp tcp:ip:port,server,nowait +``` Where, the information about 'server' and 'nowait' can be found in [section 2.12 Chardev](#212-chardev) On top of that, monitor can be used to create QMP connection as well. @@ -29,9 +35,16 @@ Three properties can be set for monitor. ```shell # cmdline +# Start with UnixSocket -chardev socket,path=/path/to/monitor/sock,id=chardev_id,server,nowait -mon chardev=chardev_id,id=monitor_id,mode=control ``` +```shell +# cmdline +# Start with TcpSocket +-chardev socket,host=ip,port=port,id=chardev_id,server,nowait +-mon chardev=chardev_id,id=monitor_id,mode=control +``` ## QMP Connection @@ -43,11 +56,14 @@ Several steps to connect QMP are showed as following: # Start with UnixSocket $ ncat -U /path/to/api/socket ``` - +```shell +# Start with TcpSocket +$ ncat ip port +``` Once connection is built, you will receive a `greeting` message from StratoVirt. ```json -{"QMP":{"version":{"StratoVirt":{"micro":1,"minor":0,"major":0},"package":""},"capabilities":[]}} +{ "QMP": { "version": { "StratoVirt": { "micro":1, "minor":0, "major":0 }, "package":"" }, "capabilities":[] } } ``` Now you can input QMP command to control StratoVirt. @@ -62,8 +78,11 @@ Add a block backend. * `node-name` : the name of the block driver node, must be unique. * `file` : the backend file information. +* `media` : indicate media type of the backend file. Possible values are `disk` or `cdrom`. If not set, default is `disk`. * `cache` : if use direct io. * `read-only` : if readonly. +* `driver` : the block image format. Possible values are `raw` or `qcow2`. If not set, default is `raw`. +* `aio` : the aio type of block device. #### Notes @@ -74,11 +93,13 @@ Add a block backend. * For `addr`, it start at `0x0` mapping in guest with `vda` on x86_64 platform, and start at `0x1` mapping in guest with `vdb` on aarch64 platform. + * For `driver`, only `raw` is supported. + #### Example ```json -<- {"execute": "blockdev-add", "arguments": {"node-name": "drive-0", "file": {"driver": "file", "filename": "/path/to/block"}, "cache": {"direct": true}, "read-only": false}} --> {"return": {}} +-> { "execute": "blockdev-add", "arguments": { "node-name": "drive-0", "file": { "driver": "file", "filename": "/path/to/block", "aio": "native" }, "cache": { "direct": true }, "read-only": false } } +<- { "return": {} } ``` ### blockdev-del @@ -92,8 +113,8 @@ Remove a block backend. #### Example ```json -<- {"execute": "blockdev-del", "arguments": {"node-name": "drive-0"}} --> {"return": {}} +-> { "execute": "blockdev-del", "arguments": { "node-name": "drive-0" } } +<- { "return": {} } ``` ## Net device backend management @@ -106,7 +127,13 @@ Add a network backend. * `id` : the device's ID, must be unique. * `ifname` : the backend tap dev name. -* `fds` : the file fd opened by upper level. +* `fd` : the opened tap fd. +* `fds` : the opened tap fds. +* `queues` : the num of queues for multi-queue. +* `vhost` : whether to run as a vhost-net device. +* `vhostfd` : the vhost-net device fd. +* `vhostfds` : the vhost-net device fds. +* `chardev` : the chardev name for vhost-user net. #### Notes @@ -116,11 +143,13 @@ Add a network backend. * For `addr`, it start at `0x0` mapping in guest with `eth0`. +* It does not support multi-queue. + #### Example ```json -<- {"execute":"netdev_add", "arguments":{"id":"net-0", "ifname":"tap0"}} --> {"return": {}} +-> { "execute": "netdev_add", "arguments": { "id": "net-0", "ifname": "tap0" } } +<- { "return": {} } ``` ### netdev_del @@ -134,13 +163,96 @@ Remove a network backend. #### Example ```json -<- {"execute": "netdev_del", "arguments": {"id": "net-0"}} --> {"return": {}} +-> { "execute": "netdev_del", "arguments": { "id": "net-0" } } +<- { "return": {} } +``` + +## Camera device backend management + +### cameradev_add + +Add a camera backend. + +#### Arguments + +* `id` : the device's ID, must be unique. +* `driver` : the backend camera type, eg. v4l2 or demo. +* `path` : the backend camera file's path, eg. /dev/video0 + +#### Notes + +* MicroVM is not supported. + +#### Example + +```json +-> { "execute": "cameradev_add", "arguments": { "id": "cam-0", "driver": "v4l2", "path": "/dev/video0" } } +<- { "return": {} } +``` + +### cameradev_del + +Remove a camera backend. + +#### Arguments + +* `id` : the device's ID. + +#### Notes + +* MicroVM is not supported. + +#### Example + +```json +-> { "execute": "cameradev_del", "arguments": { "id": "cam-0" } } +<- { "return": {} } +``` + +## Character device backend management + +Currently, It only supports Standard VM. + +### chardev-add + +Add a character device backend. + +#### Arguments + +* `id` : the character device's ID, must be unique. +* `backend` : the chardev backend info. + +#### Notes + +*Standard VM* + +* `id` in `chardev-add` should be same as `id` in `netdev_add`. + +#### Example + +```json +-> { "execute": "chardev-add", "arguments": { "id": "chardev_id", "backend": { "type": "socket", "data": { "addr": { "type": "unix", "data": { "path": "/path/to/socket" } }, "server": false } } } } +<- { "return": {} } +``` + +### chardev-remove + +Remove a character device backend. + +#### Arguments + +* `id` : the character device's ID. + +#### Example + +```json +-> { "execute": "chardev-remove", "arguments": { "id": "chardev_id" } } +<- { "return": {} } ``` ## Hot plug management -StratoVirt supports hot-plug virtio-blk and virtio-net devices with QMP. Standard VM supports hot-plug vfio devices. +StratoVirt supports hot-plug virtio-blk and virtio-net devices with QMP. Standard VM supports hot-plug vfio and vhost-user net devices. ### device_add @@ -171,8 +283,8 @@ Add a device. #### Example ```json -<- {"execute":"device_add", "arguments":{"id":"net-0", "driver":"virtio-net-mmio", "addr":"0x0"}} --> {"return": {}} +-> { "execute": "device_add", "arguments": { "id": "net-0", "driver": "virtio-net-mmio", "addr": "0x0" } } +<- { "return": {} } ``` ### device_del @@ -190,9 +302,9 @@ Remove a device from a guest. #### Example ```json -<- {"execute": "device_del", "arguments": {"id": "net-0"}} --> {"event":"DEVICE_DELETED","data":{"device":"net-0","path":"net-0"},"timestamp":{"seconds":1614310541,"microseconds":554250}} --> {"return": {}} +-> { "execute": "device_del", "arguments": { "id": "net-0" } } +<- { "event": "DEVICE_DELETED", "data": { "device": "net-0", "path": "net-0" }, "timestamp": { "seconds": 1614310541, "microseconds": 554250 } } +<- { "return": {} } ``` ## Lifecycle Management @@ -207,9 +319,9 @@ Stop all guest VCPUs execution. #### Example ```json -<- {"execute":"stop"} --> {"event":"STOP","data":{},"timestamp":{"seconds":1583908726,"microseconds":162739}} --> {"return":{}} +-> { "execute": "stop" } +<- { "event": "STOP", "data": {}, "timestamp": { "seconds": 1583908726, "microseconds": 162739 } } +<- { "return": {} } ``` ### cont @@ -219,46 +331,59 @@ Resume all guest VCPUs execution. #### Example ```json -<- {"execute":"cont"} --> {"event":"RESUME","data":{},"timestamp":{"seconds":1583908853,"microseconds":411394}} --> {"return":{}} +-> { "execute": "cont" } +<- { "event": "RESUME", "data": {}, "timestamp": { "seconds": 1583908853, "microseconds": 411394 } } +<- { "return": {} } ``` -### quit +### system_reset -This command will cause StratoVirt process to exit gracefully. +Reset all guest VCPUs execution. #### Example ```json -<- {"execute":"quit"} --> {"return":{}} --> {"event":"SHUTDOWN","data":{"guest":false,"reason":"host-qmp-quit"},"timestamp":{"ds":1590563776,"microseconds":519808}} +-> { "execute": "system_reset" } +<- { "return": {} } +<- { "event": "RESET", "data": { "guest": true }, "timestamp": { "seconds": 1677381086, "microseconds": 432033 } } ``` -### query-status +### system_powerdown -Query the running status of all VCPUs. +Requests that a guest perform a powerdown operation. #### Example ```json -<- { "execute": "query-status" } --> { "return": { "running": true,"singlestep": false,"status": "running" } } +-> { "execute": "system_powerdown" } +<- { "return": {} } +<- { "event": "POWERDOWN", "data": {}, "timestamp": { "seconds": 1677850193, "microseconds": 617907 } } ``` -### getfd +### quit -Receive a file descriptor via SCM rights and assign it a name. +This command will cause StratoVirt process to exit gracefully. #### Example ```json -<- { "execute": "getfd", "arguments": { "fdname": "fd1" } } --> { "return": {} } +-> { "execute": "quit" } +<- { "return": {} } +<- { "event": "SHUTDOWN", "data": { "guest": false, "reason": "host-qmp-quit" }, "timestamp": { "ds": 1590563776, "microseconds": 519808 } } ``` -## balloon +### query-status + +Query the running status of all VCPUs. + +#### Example + +```json +-> {"execute": "query-status"} +<- {"return": { "running": true,"singlestep": false,"status": "running"}} +``` + +## Balloon device backend management With QMP command you can set target memory size of guest and get memory size of guest. @@ -273,8 +398,8 @@ Set target memory size of guest. #### Example ```json -<- { "execute": "balloon", "arguments": { "value": 2147483648 } } --> {"return":{}} +-> { "execute": "balloon", "arguments": { "value": 2147483648 } } +<- { "return": {} } ``` ### query-balloon @@ -284,8 +409,8 @@ Get memory size of guest. #### Example ```json -<- { "execute": "query-balloon" } --> {"return":{"actual":2147483648}} +-> { "execute": "query-balloon" } +<- { "return": { "actual": 2147483648 } } ``` ## Migration @@ -301,8 +426,8 @@ Take a snapshot of the VM into the specified directory. #### Example ```json -<- {"execute":"migrate", "arguments":{"uri":"file:path/to/template"}} --> {"return":{}} +-> { "execute": "migrate", "arguments": { "uri": "file:path/to/template" } } +<- { "return": {} } ``` ### query-migrate @@ -322,15 +447,176 @@ Now there are 5 states during snapshot: #### Example ```json -<- {"execute":"query-migrate"} --> {"return":{"status":"completed"}} +-> { "execute": "query-migrate" } +<- { "return": { "status": "completed" } } ``` +## Snapshot + +### blockdev-snapshot-internal-sync + +Create disk internal snapshot. + +#### Arguments + +* `device` - the valid block device. +* `name` - the snapshot name. + +#### Example + +```json +-> { "execute": "blockdev-snapshot-internal-sync", "arguments": { "device": "disk0", "name": "snapshot1" } } +<- { "return": {} } +``` + +### blockdev-snapshot-delete-internal-sync + +Delete disk internal snapshot. + +#### Arguments + +* `device` - the valid block device. +* `name` - the snapshot name. + +#### Example + +```json +-> { "execute": "blockdev-snapshot-delete-internal-sync", "arguments": { "device": "disk0", "name": "snapshot1" } } +<- { "return": { "id": "1", "name": "snapshot0", "vm-state-size": 0, "date-sec": 1000012, "date-nsec": 10, "vm-clock-sec": 100, vm-clock-nsec": 20, "icount": 220414 } } +``` + +## Debug + +### query-vcpu-reg + +Query vcpu register value. + +#### Arguments + +* `addr` : the register address. +* `vcpu` : vcpu id. + +#### Notes + +- The VM will pause during the query and then resume. +- Only aarch64 is supported now. + +#### Example + +```json +-> {"execute": "query-vcpu-reg", "arguments": {"addr": "603000000013df1a", "vcpu": 0}} +<- {"return": "348531C5"} +``` + +### query-mem-gpa + +Query the value of the guest physical address. + +#### Arguments + +* `gpa` : the guest physical address. + +#### Example + +```json +-> {"execute": "query-mem-gpa", "arguments": {"gpa": "13c4d1d00" }} +<- {"return": "B9000001"} +``` + +### query-display-image + +Query the display image of virtiogpu. Currently only stdvm and gtk supports. + +#### Example + +```json +-> { "execute": "query-display-image" } +<- { "return": { "fileDir": "/tmp/stratovirt-images", "isSuccess": true } } +``` + +### query-workloads + +Query the workloads of the vm. + +#### Example + +```json +-> {"execute": "query-workloads", "arguments": {}} +<- {"return":[{"module":"scream-play","state":"Off"},{"module":"tap-0","state":"upload: 0 download: 0"}]} +``` + +### trace-get-state + +Query whether the trace state is enabled. + +#### Arguments + +* `name` : Pattern used to match trace name. + +#### Example + +```json +-> { "execute": "trace-get-state", "arguments": { "name": "trace_name" } } +<- { "return": [ { "name": "trace_name", "state": "disabled" } ] } +``` + +### trace-set-state + +Set the state of trace. + +#### Arguments + +* `name` : Pattern used to match trace name. +* `enable` : Whether to enable trace state. + +#### Example + +```json +-> { "execute": "trace-set-state", "arguments": { "name": "trace_name","enable": true } } +<- { "return": {} } +``` + +## Others + +### getfd + +Receive a file descriptor via SCM rights and assign it a name. + +#### Example + +```json +-> { "execute": "getfd", "arguments": { "fdname": "fd1" } } +<- { "return": {} } + ``` + +### switch-audio-record + +Control if the scream device can use host's microphone record. + +#### Arguments + +* `authorized` : "on" means scream can use host's microphone record, "off" opposites in meaning. + +#### Example + +```json +-> { "execute": "switch-audio-record", "arguments": { "authorized": "on" } } +<- { "return": {} } + ``` + ## Event Notification When some events happen, connected client will receive QMP events. -Now StratoVirt supports four events: `SHUTDOWN`, `STOP`, `RESUME`, `DEVICE_DELETED`. +Now StratoVirt supports these events: + +- `SHUTDOWN`: Emitted when the virtual machine has shut down, indicating that StratoVirt is about to exit. +- `RESET`: Emitted when the virtual machine is reset. +- `STOP`: Emitted when the virtual machine is stopped. +- `RESUME`: Emitted when the virtual machine resumes execution. +- `POWERDOWN`: Emitted when the virtual machine powerdown execution. +- `DEVICE_DELETED`: Emitted whenever the device removal completion is acknowledged by the guest. +- `BALLOON_CHANGED`: Emitted when the virtual machine changes the actual BALLOON level. ## Flow control diff --git a/docs/quickstart.ch.md b/docs/quickstart.ch.md index 29e1496a25dec4a6a2474934252d002ccb81cdd1..7f8892c5a4829d37a80e602ebd1ff80f0ef3687e 100644 --- a/docs/quickstart.ch.md +++ b/docs/quickstart.ch.md @@ -3,9 +3,9 @@ ## 1. 准备工作 * 主机操作系统 - + StratoVirt可以运行在x86_64和aarch64平台。 - + 最重要的是StratoVirt是基于Linux内核的虚拟机(KVM)构建的,因此在运行的平台上需要保证有KVM内核模块的存在。 * 权限 @@ -26,7 +26,7 @@ $ sudo yum install stratovirt 安装完成后可以找到StratoVirt二进制的路径: `/usr/bin/stratovirt`. -如果需要自己构建StratoVirt二进制, 可以参考[构建指导](./build_guide_ch.md). +如果需要自己构建StratoVirt二进制, 可以参考[构建指导](./build_guide.ch.md). ## 3. 运行StratoVirt @@ -34,10 +34,10 @@ $ sudo yum install stratovirt StratoVirt当前提供了两种虚拟机:微虚拟机和标准虚拟机(x86_64平台q35主板和aarch平台的virt主板)。 作为快速入门,以下展示启动微虚拟机。 首先,需要PE格式的Linux内核二进制和ext4文件系统镜像(作为rootfs)。 -* `x86_64` 启动资源: [内核二进制](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/x86_64/vmlinux.bin) -and [rootfs镜像](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/x86_64/openEuler-21.03-stratovirt-x86_64.img.xz). -* `aarch64` 启动资源: [内核二进制](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/aarch64/vmlinux.bin) -and [rootfs镜像](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/aarch64/openEuler-21.03-stratovirt-aarch64.img.xz). +* `x86_64` 启动资源: [内核二进制](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/x86_64/vmlinux.bin) +and [rootfs镜像](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/x86_64/openEuler-22.03-LTS-stratovirt-x86_64.img.xz). +* `aarch64` 启动资源: [内核二进制](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/aarch64/vmlinux.bin) +and [rootfs镜像](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/aarch64/openEuler-22.03-LTS-stratovirt-aarch64.img.xz). 也可以通过以下的shell脚本获取内核二进制和rootfs镜像: @@ -45,11 +45,11 @@ and [rootfs镜像](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/aar arch=`uname -m` dest_kernel="vmlinux.bin" dest_rootfs="rootfs.ext4" -image_bucket_url="https://repo.openeuler.org/openEuler-21.03/stratovirt_img" +image_bucket_url="https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img" if [ ${arch} = "x86_64" ] || [ ${arch} = "aarch64" ]; then kernel="${image_bucket_url}/${arch}/vmlinux.bin" - rootfs="${image_bucket_url}/${arch}/openEuler-21.03-stratovirt-${arch}.img.xz" + rootfs="${image_bucket_url}/${arch}/openEuler-22.03-LTS-stratovirt-${arch}.img.xz" else echo "Cannot run StratoVirt on ${arch} architecture!" exit 1 @@ -77,7 +77,7 @@ rm -f ${socket_path} # 通过StratoVirt启动轻量化机型的Linux客户机。 /usr/bin/stratovirt \ - -machine microvm + -machine microvm \ -kernel ${kernel_path} \ -smp 1 \ -m 1024 \ @@ -88,7 +88,7 @@ rm -f ${socket_path} -serial stdio ``` -在标准输入输出串口上提示登入客户机。 如果使用我们提供的`openEuler-21.03-stratovirt-aarch64.img`镜像, +在标准输入输出串口上提示登入客户机。 如果使用我们提供的`openEuler-22.03-LTS-stratovirt.img`镜像, 可以使用用户名`root`和密码`openEuler12#$`进行登入。 如果想要停止客户机,可以通过在客户机内部输入`reboot`命令来实际关闭StratoVirt。 diff --git a/docs/quickstart.md b/docs/quickstart.md index add534cb246d68b555e96485cb8221cab7964111..80275cef5dc2320b7dd2acada395678f22877a1b 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -37,10 +37,10 @@ are microvm and standard_vm("q35" on x86_64 platform and "virt" on aarch64 platf As a quick start, we show how to start a VM with microvm. First, you will need an PE format Linux kernel binary, and an ext4 file system image (as rootfs). -* `x86_64` boot source: [kernel](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/x86_64/vmlinux.bin) -and [rootfs](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/x86_64/openEuler-21.03-stratovirt-x86_64.img.xz). -* `aarch64` boot source: [kernel](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/aarch64/vmlinux.bin) -and [rootfs](https://repo.openeuler.org/openEuler-21.03/stratovirt_img/aarch64/openEuler-21.03-stratovirt-aarch64.img.xz). +* `x86_64` boot source: [kernel](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/x86_64/vmlinux.bin) +and [rootfs](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/x86_64/openEuler-22.03-LTS-stratovirt-x86_64.img.xz). +* `aarch64` boot source: [kernel](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/aarch64/vmlinux.bin) +and [rootfs](https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img/aarch64/openEuler-22.03-LTS-stratovirt-aarch64.img.xz). Or get the kernel and rootfs with shell: @@ -48,11 +48,11 @@ Or get the kernel and rootfs with shell: arch=`uname -m` dest_kernel="vmlinux.bin" dest_rootfs="rootfs.ext4" -image_bucket_url="https://repo.openeuler.org/openEuler-21.03/stratovirt_img" +image_bucket_url="https://repo.openeuler.org/openEuler-22.03-LTS/stratovirt_img" if [ ${arch} = "x86_64" ] || [ ${arch} = "aarch64" ]; then kernel="${image_bucket_url}/${arch}/vmlinux.bin" - rootfs="${image_bucket_url}/${arch}/openEuler-21.03-stratovirt-${arch}.img.xz" + rootfs="${image_bucket_url}/${arch}/openEuler-22.03-LTS-stratovirt-${arch}.img.xz" else echo "Cannot run StratoVirt on ${arch} architecture!" exit 1 @@ -80,7 +80,7 @@ rm -f ${socket_path} # Start linux VM with machine type "microvm" by StratoVirt. /usr/bin/stratovirt \ - -machine microvm + -machine microvm \ -kernel ${kernel_path} \ -smp 1 \ -m 1024 \ @@ -92,7 +92,7 @@ rm -f ${socket_path} ``` You should now see a serial in stdio prompting you to log into the guest machine. -If you used our `openEuler-21.03-stratovirt-aarch64.img` image, you can login as +If you used our `openEuler-22.03-LTS-stratovirt.img` image, you can login as `root`, using the password `openEuler12#$`. If you want to quit the guest machine, using a `reboot` command inside the guest diff --git a/docs/snapshot.md b/docs/snapshot.md index a8ebffd2cda25cd85d09473f2c8121f79ada5b0f..5e9c118b2577b0276249d699664ef2fba2732fc6 100644 --- a/docs/snapshot.md +++ b/docs/snapshot.md @@ -25,7 +25,7 @@ $ ncat -U path/to/socket {"return":{}} ``` -When VM is in paused state, is's safe to take a snapshot of the VM into the specified directory with QMP. +When VM is in paused state, it's safe to take a snapshot of the VM into the specified directory with QMP. ```shell $ ncat -U path/to/socket {"QMP":{"version":{"StratoVirt":{"micro":1,"minor":0,"major":0},"package":""},"capabilities":[]}} @@ -86,6 +86,9 @@ Some devices and feature don't support to be snapshot yet: - `vfio` devices - `balloon` - `hugepage`,`mem-shared`,`backend file of memory` +- `pmu` +- `sve` +- `gic-version=2` Some device attributes can't be changed: - `virtio-net`: mac @@ -94,4 +97,4 @@ Some device attributes can't be changed: - `smp` - `m` -For machine type `microvm`, if use `hot-replace` before snapshot, add newly replaced device to restore command. \ No newline at end of file +For machine type `microvm`, if use `hot-replace` before snapshot, add newly replaced device to restore command. diff --git a/docs/stratovirt-img.md b/docs/stratovirt-img.md new file mode 100644 index 0000000000000000000000000000000000000000..cca517a99665e6534c0c9cd39fed455f061bf931 --- /dev/null +++ b/docs/stratovirt-img.md @@ -0,0 +1,113 @@ +# stratovirt-img + +stratovirt-img is an offline tool for virtual disks. + +Usage: + +```shell +stratovirt-img command [command options] +``` + +Command parameters: + +- img_path: the path for image. +- fmt: disk format. +- img_size: size for image, the unit can be K, M, G or none for bytes. +- options is a comma separated list of format specific options in a name=value format. + +Following commands are supported now: + +## Create + +Create virtual disk with different format. +Command syntax: + +```shell +create [-f fmt] [-o options] img_path img_size +``` + +Sample Configuration: + +```shell +stratovirt-img create -f raw img_path img_size +stratovirt-img create -f qcow2 -o cluster-size=65536 img_path img_size +``` + +Note: 1. The cluster size can be only be set for `qcow2` or default to 65536. 2. Disk format is default to raw. + +## Info + +Query the information of virtual disk. + +Sample Configuration: + +```shell +stratovirt-img info img_path +``` + +## Check + +Check if there are some mistakes on the image and choose to fix. +Command syntax: + +```shell +check [-r {leaks|all}] [-no_print_error] [-f fmt] img_path +``` + +- -r: `leaks` means only the leaked cluster will be fixed, `all` means all repairable mistake will be fixed. +- -no_print_error: do not print detailed error messages. + +Sample Configuration: + +```shell +stratovirt-img check img_path +``` + +Note: The command of check is not supported by raw format. + +## Resize + +Change the virtual size of the disk. +- `+size`means increase from old size, while `size` means resize to new size. + +Command syntax: + +```shell +resize [-f fmt] img_path [+]size +``` + +Sample Configuration: + +```shell +stratovirt-img resize -f qcow2 img_path +size +stratovirt-img resize -f raw img_path +size +``` + +Note: Shrink operation is not supported now. + +## Snapshot + +Operating internal snapshot for disk, it is only supported by qcow2. +Command syntax: + +```shell +snapshot [-l | -a snapshot_name | -c snapshot_name | -d snapshot_name | -r old_snapshot_name new_snapshot_name] img_path +``` + +- -a snapshot_name: applies a snapshot (revert disk to saved state). +- -c snapshot_name: creates a snapshot. +- -d snapshot_name: deletes a snapshot. +- -l: lists all snapshots in the given image. +- -r old_snapshot_name new_snapshot_name: change the name from 'old_Snapshot_name' to 'new_Snapshot_name'. + +Sample Configuration: + +```shell +stratovirt-img snapshot -c snapshot_name img_path +stratovirt-img snapshot -a snapshot_name img_path +stratovirt-img snapshot -d snapshot_name img_path +stratovirt-img snapshot -l img_path +stratovirt-img snapshot -r old_snapshot_name new_snapshot_name img_path +``` + +Note: The internal snapshot is not supported by raw. diff --git a/docs/stratovirt_aarch64.xml b/docs/stratovirt_aarch64.xml index fa37d2baaa8a0e496b01cd9c014f98104340158f..b0dbfc5f5825778432ffa7998c6808d42acf3d50 100644 --- a/docs/stratovirt_aarch64.xml +++ b/docs/stratovirt_aarch64.xml @@ -61,6 +61,13 @@ /path/to/random_file
+ + + + + diff --git a/docs/stratovirt_x86.xml b/docs/stratovirt_x86.xml index 68069c02c799117b3938cd977a95f31b55618d13..8732a9a4331f34caded127bed3dbe286dacd54b7 100644 --- a/docs/stratovirt_x86.xml +++ b/docs/stratovirt_x86.xml @@ -69,6 +69,13 @@ /path/to/random_file
+ + + + + diff --git a/docs/test.md b/docs/test.md new file mode 100644 index 0000000000000000000000000000000000000000..c12e17b0c9c499cb85ebb104881c3dd323f0b0b2 --- /dev/null +++ b/docs/test.md @@ -0,0 +1,44 @@ +# StratoVirt test + +StratoVirt supports two test modes: unit test and mod test. It should be noted that mod test is not fully supported on the x86_64 architecture now. + +## Unit test + +Unit tests are Rust functions that verify that the non-test code is functioning in the expected manner. We recommend performing unit test execution separately, run StratoVirt unit test as follows: + +```shell +$ cargo test --workspace --exclude mod_test -- --nocapture --test-threads=1 +``` + +## Mod test + +StratoVirt mod test is an integrated testing method. During the test, the StratoVirt process will be started as the server and communicate through socket and QMP to test the StratoVirt module function. + +Before running mod test, we need to compile `stratovirt` and `virtiofsd` first, and then export the environment variables `STRATOVIRT_BINARY` and `VIRTIOFSD_BINARY`. + +Build StratoVirt: + +```shell +$ cargo build --workspace --bins --release --target=aarch64-unknown-linux-gnu --all-features +``` + +Build virtiofsd: + +```shell +$ git clone https://gitlab.com/virtio-fs/virtiofsd.git +$ cd virtiofsd +$ cargo build --release +``` + +Export the environment variables `STRATOVIRT_BINARY` and `VIRTIOFSD_BINARY`: + +```shell +$ export STRATOVIRT_BINARY="/path/to/stratovirt" +$ export VIRTIOFSD_BINARY="/path/to/virtiofsd" +``` + +Run StratoVirt mod test as follows: + +```shell +$ cargo test --all-features -p mod_test -- --nocapture --test-threads=1 +``` diff --git a/docs/trace.md b/docs/trace.md index fb737f55daad9e58c4f13155b3b9713b6b8faea3..d4623fa6a97b10904eb501c399d9a159e60d75f0 100644 --- a/docs/trace.md +++ b/docs/trace.md @@ -3,32 +3,89 @@ This document describes the way for debugging and profiling in StratoVirt and how to use it. -## Ftrace +## Add trace -Ftrace is a tracer provided by Linux kernel, which can help linux developers to -debug or analyze issues. As ftrace can avoid performance penalty, it's especially -suited for performance issues. +### Modify configuration file -StratoVirt use ftrace by writting trace data to ftrace marker, and developers can -read trace records from *trace* file under mounted ftrace director, -e.g. /sys/kernel/debug/tracing/trace. +First, you need to modify or crate toml file in the trace/trace_info directory to +add a new event or scope in order to generate the trace function. For example: + +```toml +[[events]] +name = "virtio_receive_request" +args = "device: String, behaviour: String" +message = "{}: Request received from guest {}, ready to start processing." +enabled = true + +[[scopes]] +name = "update_cursor" +args = "" +message = "" +enabled = true +``` -## How to use +In the above configuration, "name" is used to represent the only trace, and +duplication is not allowed; "message" and "args" will be formatted as information +output by trace; "enabled" indicates whether it is enabled during compilation. -Trace events are put in StratoVirt by the macro *ftrace!*. The first parameter the -macro receives is name of the trace event. Remaining parameters the macro receives -are the same as *println!* or *format!*, i.e. the first parameter is a format string, -and additional parameters passed replace the {}s within the format string. +### Call trace function +Just call the trace function where needed. ```rust -#[macro_use] -extern crate util; +fn process_queue(&mut self) -> Result<()> { + trace::virtio_receive_request("Rng".to_string(), "to IO".to_string()); + let mut queue_lock = self.queue.lock().unwrap(); + let mut need_interrupt = false; + ...... +} -fn trace_example() { - ftrace!(trace_example, "Test for tracer."); +fn update_cursor(&mut self, info_cursor: &VirtioGpuUpdateCursor, hdr_type: u32) -> Result<()> { + // Trace starts from here, and end when it leaves this scope + trace::trace_scope_start!(update_cursor); + ...... } ``` -Trace events in StratoVirt are disabled by default. Users can pass the file listing -enabled events by launching StratoVirt with "-trace events=". The file should -contains one event name per line. +## Trace control interface + +Trace state in StratoVirt are disabled by default. Users can control whether +the trace state is enabled through the command line or qmp command. + +### Command line +Before starting, you can prepare the trace list that needs to be enabled +and pass it to StratoVirt through [-trace](config_guidebook.md#3-trace). + +### QMP +During the running, you can send the [trace-set-state](qmp.md#trace-set-state) +command through the qmp socket to enable or disable trace state. Similarly, +using the [trace-get-state](qmp.md#trace-get-state) command can check +whether the setting is successful. + +## Choose trace backends + +By setting different features during compilation, trace can generate specified +code to support different trace tools. StratoVirt currently supports two kinds +of settings. + +### log + +StratoVirt supports outputting trace to the log file at trace level. Turn on +the **trace_to_logger** feature to use it. + +### Ftrace + +Ftrace is a tracer provided by Linux kernel, which can help linux developers to +debug or analyze issues. As ftrace can avoid performance penalty, it's especially +suited for performance issues. + +It can be enabled by turning on the **trace_to_ftrace** feature during compilation. +StratoVirt use ftrace by writing trace data to ftrace marker, and developers can +read trace records from trace file under mounted ftrace directory, +e.g. /sys/kernel/debug/tracing/trace. + +### HiTraceMeter + +HiTraceMeter(https://gitee.com/openharmony/hiviewdfx_hitrace) is tool used by developers +to trace process and measure performance. Based on the Ftrace, it provides the ability +to measure the execution time of user-mode application code. After turning on the +**trace_to_hitrace** feature, it can be used on HarmonyOS. diff --git a/docs/vfio.md b/docs/vfio.md index bd0aeead6f527933a81b90ca0272e6f698000de9..0316e6ab78fef88078b45201079777f13f9fa3b5 100644 --- a/docs/vfio.md +++ b/docs/vfio.md @@ -2,13 +2,13 @@ ## Introduction -The VFIO driver is an IOMMU/device agnostic framework for exposing direct access to userspace, in a secure, -IOMMU protected environment. Virtual machine often makes use of direct device access when configured for the highest +The VFIO driver is an IOMMU/device agnostic framework for exposing direct access to userspace, in a secure, +IOMMU protected environment. Virtual machine often makes use of direct device access when configured for the highest possible I/O performance. ## Preparation -In order to successfully use VFIO device, it is mandatory that hardware supports virtualization and IOMMU groups. +In order to successfully use VFIO device, it is mandatory that hardware supports virtualization and IOMMU groups. Execute the following command on your host OS to check whether the IOMMU has been turned on. ```shell # dmesg | grep iommu @@ -49,6 +49,7 @@ Four properties are supported for VFIO device -device vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x03.0x0[,multifunction=on] ``` Note: the kernel must contain physical device drivers, otherwise it cannot be loaded normally. +Note: avoid using balloon devices and vfio devices together. ## Hot plug management @@ -59,14 +60,14 @@ Refer to qmp.md for specific command line parameters. hot plug VFIO device: ```json -<- {"execute":"device_add", "arguments":{"id":"vfio-0", "driver":"vfio-pci", "bus": "pcie.1", "addr":"0x0", "host": "0000:1a:00.3"}} --> {"return": {}} +-> {"execute":"device_add", "arguments":{"id":"vfio-0", "driver":"vfio-pci", "bus": "pcie.1", "addr":"0x0", "host": "0000:1a:00.3"}} +<- {"return": {}} ``` hot unplug VFIO device: ```json -<- {"execute": "device_del", "arguments": {"id": "vfio-0"}} --> {"event":"DEVICE_DELETED","data":{"device":"vfio-0","path":"vfio-0"},"timestamp":{"seconds":1614310541,"microseconds":554250}} --> {"return": {}} +-> {"execute": "device_del", "arguments": {"id": "vfio-0"}} +<- {"event":"DEVICE_DELETED","data":{"device":"vfio-0","path":"vfio-0"},"timestamp":{"seconds":1614310541,"microseconds":554250}} +<- {"return": {}} ``` ## Unbind VFIO device diff --git a/hisysevent/Cargo.toml b/hisysevent/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..11b3b69a0f368a15b2a7281cd385279be2445f36 --- /dev/null +++ b/hisysevent/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "hisysevent" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "Provide hisysevent infrastructure of hmos for StratoVirt" + +[dependencies] +log = "0.4" +lazy_static = "1.4.0" +anyhow = "1.0" +libloading = "0.7.4" +code_generator = { path = "code_generator" } + +[features] +hisysevent = [] diff --git a/hisysevent/build.rs b/hisysevent/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..89563d8528aec20cdfe28ce0643cbce0c243b192 --- /dev/null +++ b/hisysevent/build.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +fn main() { + println!( + "cargo:rerun-if-changed={}/hisysevent", + std::env::var("CARGO_MANIFEST_DIR").unwrap() + ); +} diff --git a/hisysevent/code_generator/Cargo.toml b/hisysevent/code_generator/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..9c1fc3c178e422264cfbb83f5388f9c8b8138e6d --- /dev/null +++ b/hisysevent/code_generator/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "code_generator" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" + +[lib] +name = "code_generator" +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +regex = "1" +serde = { version = "1.0", features = ["derive"] } +syn = "2.0.18" +toml = "0.7" diff --git a/hisysevent/code_generator/src/lib.rs b/hisysevent/code_generator/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..8d7c8d068b1658706895cf2ec0463feb66c71ab5 --- /dev/null +++ b/hisysevent/code_generator/src/lib.rs @@ -0,0 +1,202 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{fs, io::Read}; + +use proc_macro::TokenStream; +use quote::quote; +use regex::Regex; +use serde::Deserialize; +use syn::{parse_str, Expr, Ident, Type}; + +const EVENT_DIR_NAME: &str = "event_info"; + +#[derive(Debug, Deserialize)] +struct EventDesc { + name: String, + event_type: String, + args: String, + enabled: bool, +} + +#[derive(Debug, Deserialize)] +struct HiSysEventConf { + events: Option>, +} + +fn get_event_desc() -> HiSysEventConf { + let event_dir_path = format!( + "{}/{}", + std::env::var("CARGO_MANIFEST_DIR").unwrap(), + EVENT_DIR_NAME + ); + let paths = fs::read_dir(event_dir_path).unwrap(); + let mut desc = String::new(); + + for path in paths { + let file_path = path.unwrap().path(); + let file_name = file_path.to_str().unwrap(); + if file_name.ends_with(".toml") { + let mut file = fs::File::open(file_path).unwrap(); + file.read_to_string(&mut desc).unwrap(); + } + } + match toml::from_str::(&desc) { + Ok(ret) => ret, + Err(e) => panic!("Failed to parse event info : {}", e), + } +} + +fn is_slice(arg_type: &str) -> bool { + let regex = Regex::new(r"\[([^\[\]]*)\]").unwrap(); + let match_texts = regex + .captures_iter(arg_type) + .map(|mat| mat.get(1).map_or("", |m| m.as_str())); + match match_texts.count() { + 0 => false, + 1 => true, + _ => panic!("The format of parameter type: {} is wrong!", arg_type), + } +} + +fn capitalize(s: &str) -> String { + if s.is_empty() { + return String::new(); + } + + let mut chars = s.chars().collect::>(); + if chars[0].is_alphabetic() { + chars[0] = chars[0] + .to_uppercase() + .collect::() + .chars() + .next() + .unwrap(); + } + chars.iter().collect() +} + +fn parse_param_type(arg_type: &str) -> String { + if is_slice(arg_type) { + let regex = Regex::new(r"\[([^\[\]]*)\]").unwrap(); + let match_texts: Vec<&str> = regex + .captures_iter(arg_type) + .map(|mat| mat.get(1).map_or("", |m| m.as_str())) + .collect(); + format!("Array{}", capitalize(match_texts[0])) + } else { + format!("Type{}", capitalize(arg_type)) + } +} + +fn generate_param_value(arg_type: &str, arg_value: &str) -> (Ident, Expr) { + let param_type: Ident; + let param_value: Expr; + if is_slice(arg_type) { + let trans_token = ".as_ptr() as *const std::ffi::c_int as *const ()"; + param_type = parse_str::("void_ptr_value").unwrap(); + param_value = parse_str::(format!("{}{}", arg_value, trans_token).as_str()).unwrap(); + } else if arg_type.contains("String") { + let cstr_arg = format!("std::ffi::CString::new({}).unwrap()", arg_value); + let trans_token = ".into_raw() as *const std::ffi::c_char"; + param_type = parse_str::("char_ptr_value").unwrap(); + param_value = parse_str::(format!("{}{}", cstr_arg, trans_token).as_str()).unwrap(); + } else { + param_type = parse_str::(format!("{}_value", arg_type).as_str()).unwrap(); + param_value = parse_str::(format!("{} as {}", arg_value, arg_type).as_str()).unwrap(); + } + (param_type, param_value) +} + +#[proc_macro] +pub fn gen_hisysevent_func(_input: TokenStream) -> TokenStream { + let events = match get_event_desc().events { + Some(events) => events, + None => return TokenStream::from(quote!()), + }; + let hisysevent_func = events.iter().map(|desc| { + if desc.name.trim().is_empty() { + panic!("Empty event name is unsupported!"); + } + let desc_name = desc.name.trim(); + let func_name = parse_str::(desc_name).unwrap(); + let event_name = desc_name; + let event_type = + parse_str::(format!("HiSysEventType::_{}", desc.event_type.trim()).as_str()) + .unwrap(); + + let func_args = match desc.args.is_empty() { + true => quote!(), + false => { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, t) = arg.split_once(':').unwrap(); + let arg_name = parse_str::(v.trim()).unwrap(); + let arg_type = parse_str::(t.trim()).unwrap(); + quote!( + #arg_name: #arg_type, + ) + }); + quote! { #( #_args )* } + } + }; + + let param_body = { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, t) = arg.split_once(':').unwrap(); + let param_name = v.trim(); + let param_type_str: String = parse_param_type(t.trim()); + let param_type_token = format!("EventParamType::_{}", param_type_str); + let param_type = parse_str::(param_type_token.as_str()).unwrap(); + let (elem_type, elem_value) = generate_param_value(t.trim(), v.trim()); + let param_size = if param_type_str.contains("Array") { + parse_str::(format!("{}.len()", v.trim()).as_str()).unwrap() + } else { + parse_str::("0").unwrap() + }; + + quote!( + EventParam { + param_name: #param_name, + param_type: #param_type, + param_value: EventParamValue{#elem_type: #elem_value}, + array_size: #param_size}, + ) + }); + quote! { #( #_args )* } + }; + + let func_body = match desc.enabled { + true => { + quote!( + #[cfg(all(target_env = "ohos", feature = "hisysevent"))] + { + let func = function!(); + let params: &[EventParam] = &[#param_body]; + write_to_hisysevent(func, #event_name, #event_type as std::ffi::c_int, params); + } + ) + } + false => quote!(), + }; + + quote!( + #[inline(always)] + pub fn #func_name(#func_args) { + #func_body + } + ) + }); + + TokenStream::from(quote! { #( #hisysevent_func )* }) +} diff --git a/hisysevent/event_info/example.toml b/hisysevent/event_info/example.toml new file mode 100644 index 0000000000000000000000000000000000000000..6a5e97ad68abfa1fd3649b66da30132f74fbd21f --- /dev/null +++ b/hisysevent/event_info/example.toml @@ -0,0 +1,5 @@ +[[events]] +name = "example" +event_type = "Behavior" +args = "example_bool: bool, example_str: String, example_integer: u32, example_array: &[u8]" +enabled = true diff --git a/hisysevent/event_info/misc.toml b/hisysevent/event_info/misc.toml new file mode 100644 index 0000000000000000000000000000000000000000..eeb4be8d0efab816a03baf719b14cccf23a1c911 --- /dev/null +++ b/hisysevent/event_info/misc.toml @@ -0,0 +1,5 @@ +[[events]] +name = "STRATOVIRT_PVPANIC" +event_type = "Fault" +args = "event: String" +enabled = true diff --git a/hisysevent/src/interface.rs b/hisysevent/src/interface.rs new file mode 100644 index 0000000000000000000000000000000000000000..5f6ecf64bed8d10fea6e6343dbf25ee87ee582e8 --- /dev/null +++ b/hisysevent/src/interface.rs @@ -0,0 +1,189 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::ffi::{c_char, c_int, c_uint, c_ulonglong, CString, OsStr}; + +use anyhow::{Context, Result}; +use lazy_static::lazy_static; +use libloading::os::unix::Symbol; +use libloading::Library; +use log::error; + +const MAX_PARAM_NAME_LENGTH: usize = 49; + +#[derive(Copy, Clone, Debug)] +pub enum HiSysEventType { + _Fault = 1, + _Statistic, + _Security, + _Behavior, +} + +#[derive(Copy, Clone, Debug)] +pub enum EventParamType { + // Invalid type. + _Invalid = 0, + _TypeBool, + _TypeI8, + _TypeU8, + _TypeI16, + _TypeU16, + _TypeI32, + _TypeU32, + _TypeI64, + _TypeU64, + _TypeF32, + _TypeF64, + _TypeString, + _ArrayBool, + _ArrayI8, + _ArrayU8, + _ArrayI16, + _ArrayU16, + _ArrayI32, + _ArrayU32, + _ArrayI64, + _ArrayU64, + _ArrayF32, + _ArrayF64, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub union EventParamValue { + pub bool_value: bool, + pub i8_value: i8, + pub u8_value: u8, + pub i16_value: i16, + pub u16_value: u16, + pub i32_value: i32, + pub u32_value: u32, + pub i64_value: i64, + pub u64_value: u64, + pub f32_value: f32, + pub f64_value: f64, + // String. + pub char_ptr_value: *const c_char, + // Array. + pub void_ptr_value: *const (), +} + +pub struct EventParam<'a> { + pub param_name: &'a str, + pub param_type: EventParamType, + pub param_value: EventParamValue, + pub array_size: usize, +} + +#[repr(C)] +#[derive(Copy, Clone)] +struct EventParamWrapper { + pub param_name: [u8; MAX_PARAM_NAME_LENGTH], + pub param_type: c_int, + pub param_value: EventParamValue, + pub array_size: c_uint, +} + +lazy_static! { + static ref HISYSEVENT_FUNC_TABLE: HiSysEventFuncTable = + // SAFETY: The dynamic library should be always existing. + unsafe { + HiSysEventFuncTable::new(OsStr::new("libhisysevent.z.so")) + .map_err(|e| { + error!("failed to init HiSysEventFuncTable with error: {:?}", e); + e + }) + .unwrap() + }; +} + +macro_rules! get_libfn { + ( $lib: ident, $tname: ident, $fname: ident ) => { + $lib.get::<$tname>(stringify!($fname).as_bytes()) + .with_context(|| format!("failed to get function {}", stringify!($fname)))? + .into_raw() + }; +} + +type HiSysEventWriteWrapperFn = unsafe extern "C" fn( + func: *const c_char, + line: c_ulonglong, + domain: *const c_char, + name: *const c_char, + event_type: c_int, + params: *const EventParamWrapper, + size: c_uint, +) -> c_int; + +struct HiSysEventFuncTable { + pub hisysevent_write: Symbol, +} + +impl HiSysEventFuncTable { + unsafe fn new(library_name: &OsStr) -> Result { + let library = + Library::new(library_name).with_context(|| "failed to load hisysevent library")?; + + Ok(Self { + hisysevent_write: get_libfn!(library, HiSysEventWriteWrapperFn, HiSysEvent_Write), + }) + } +} + +fn format_param_array(event_params: &[EventParam]) -> Vec { + let mut params_wrapper: Vec = vec![]; + + for param in event_params { + let mut param_name = [0_u8; MAX_PARAM_NAME_LENGTH]; + let name = param.param_name.as_bytes(); + let end = std::cmp::min(name.len(), param_name.len()); + param_name[..end].copy_from_slice(&name[..end]); + params_wrapper.push(EventParamWrapper { + param_name, + param_type: param.param_type as i32, + param_value: param.param_value, + array_size: param.array_size as u32, + }); + } + + params_wrapper +} + +// Write system event. +pub(crate) fn write_to_hisysevent( + func_name: &str, + event_name: &str, + event_type: c_int, + event_params: &[EventParam], +) { + let func = CString::new(func_name).unwrap(); + let domain = CString::new("VM_ENGINE").unwrap(); + let event = CString::new(event_name).unwrap(); + + let params_wrapper = format_param_array(event_params); + + // SAFETY: Call hisysevent function, all parameters are just read. + let ret = unsafe { + (HISYSEVENT_FUNC_TABLE.hisysevent_write)( + func.as_ptr() as *const c_char, + line!() as c_ulonglong, + domain.as_ptr() as *const c_char, + event.as_ptr() as *const c_char, + event_type, + params_wrapper.as_ptr() as *const EventParamWrapper, + params_wrapper.len() as u32, + ) + }; + if ret != 0 { + error!("Failed to write event {} to hisysevent.", event_name); + } +} diff --git a/hisysevent/src/lib.rs b/hisysevent/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..6ab5515a20088d39cb26285e629bc3ed94d32ba8 --- /dev/null +++ b/hisysevent/src/lib.rs @@ -0,0 +1,34 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(all(target_env = "ohos", feature = "hisysevent"))] +mod interface; + +use code_generator::gen_hisysevent_func; + +#[cfg(all(target_env = "ohos", feature = "hisysevent"))] +use crate::interface::*; + +#[macro_export] +macro_rules! function { + () => {{ + fn hook() {} + fn type_name_of(_: T) -> &'static str { + std::any::type_name::() + } + let name = type_name_of(hook); + let off_set: usize = 6; // ::hook + &name[..name.len() - off_set] + }}; +} + +gen_hisysevent_func! {} diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index 65fe2ae9adc2e68d99f027720ecde49a4e40ac01..44308ef49c3b1fc0c1ec30d75019599c7aae43a7 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -1,18 +1,28 @@ [package] name = "hypervisor" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" [dependencies] -arc-swap = "0.4.8" -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -log = "0.4.8" +anyhow = "1.0" +thiserror = "1.0" +kvm-bindings = { version = "0.7.0", features = ["fam-wrappers"] } +kvm-ioctls = "0.16.0" +libc = "0.2" +log = "0.4" +vmm-sys-util = "0.12.1" +address_space = { path = "../address_space" } +cpu = { path = "../cpu" } +devices = { path = "../devices" } +machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } -vmm-sys-util = ">=0.7.0" -once_cell = "1.9.0" +trace = { path = "../trace" } + +[features] +default = [] +vfio_device = [] +boot_time = [] diff --git a/hypervisor/src/error.rs b/hypervisor/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..c5742611eec4f1cf949d70992847c45c16173002 --- /dev/null +++ b/hypervisor/src/error.rs @@ -0,0 +1,38 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[allow(clippy::upper_case_acronyms)] +#[derive(Error, Debug)] +pub enum HypervisorError { + #[cfg(target_arch = "x86_64")] + #[error("Failed to set identity map address.")] + SetIdentityMapAddr, + #[cfg(target_arch = "x86_64")] + #[error("Failed to set tss address.")] + SetTssErr, + #[cfg(target_arch = "x86_64")] + #[error("Failed to create PIT.")] + CrtPitErr, + #[error("Failed to create irq chip.")] + #[cfg(target_arch = "x86_64")] + CrtIrqchipErr, + #[error("Failed to create KVM device: {0:#?}.")] + CreateKvmDevice(kvm_ioctls::Error), + #[error("No available kvm_mem_slot, total count is {0}")] + NoAvailKvmSlot(usize), + #[error("Failed to find matched kvm_mem_slot, addr 0x{0:X}, size 0x{1:X}")] + NoMatchedKvmSlot(u64, u64), + #[error("Added KVM mem range (0x{:X}, 0x{:X}) overlaps with exist one (0x{:X}, 0x{:X})", add.0, add.1, exist.0, exist.1)] + KvmSlotOverlap { add: (u64, u64), exist: (u64, u64) }, +} diff --git a/hypervisor/src/kvm/aarch64/core_regs.rs b/hypervisor/src/kvm/aarch64/core_regs.rs new file mode 100644 index 0000000000000000000000000000000000000000..6d49713f1d492edafddcb60abe55233c4e0c11f8 --- /dev/null +++ b/hypervisor/src/kvm/aarch64/core_regs.rs @@ -0,0 +1,121 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; + +use kvm_bindings::{ + kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, + KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, +}; + +use util::offset_of; + +/// AArch64 cpu core register. +/// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/kvm.h#L50 +/// User structures for general purpose, floating point and debug registers. +/// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/ptrace.h#L75 +pub enum Arm64CoreRegs { + KvmSpEl1, + KvmElrEl1, + KvmSpsr(usize), + UserPTRegRegs(usize), + UserPTRegSp, + UserPTRegPc, + UserPTRegPState, + UserFPSIMDStateVregs(usize), + UserFPSIMDStateFpsr, + UserFPSIMDStateFpcr, +} + +impl From for u64 { + fn from(elem: Arm64CoreRegs) -> Self { + let (register_size, reg_offset) = match elem { + Arm64CoreRegs::KvmSpEl1 => (KVM_REG_SIZE_U64, offset_of!(kvm_regs, sp_el1)), + Arm64CoreRegs::KvmElrEl1 => (KVM_REG_SIZE_U64, offset_of!(kvm_regs, elr_el1)), + Arm64CoreRegs::KvmSpsr(idx) if idx < KVM_NR_SPSR as usize => { + (KVM_REG_SIZE_U64, offset_of!(kvm_regs, spsr) + idx * 8) + } + Arm64CoreRegs::UserPTRegRegs(idx) if idx < 31 => ( + KVM_REG_SIZE_U64, + offset_of!(kvm_regs, regs, user_pt_regs, regs) + idx * 8, + ), + Arm64CoreRegs::UserPTRegSp => ( + KVM_REG_SIZE_U64, + offset_of!(kvm_regs, regs, user_pt_regs, sp), + ), + Arm64CoreRegs::UserPTRegPc => ( + KVM_REG_SIZE_U64, + offset_of!(kvm_regs, regs, user_pt_regs, pc), + ), + Arm64CoreRegs::UserPTRegPState => ( + KVM_REG_SIZE_U64, + offset_of!(kvm_regs, regs, user_pt_regs, pstate), + ), + Arm64CoreRegs::UserFPSIMDStateVregs(idx) if idx < 32 => ( + KVM_REG_SIZE_U128, + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, vregs) + idx * 16, + ), + Arm64CoreRegs::UserFPSIMDStateFpsr => ( + KVM_REG_SIZE_U32, + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpsr), + ), + Arm64CoreRegs::UserFPSIMDStateFpcr => ( + KVM_REG_SIZE_U32, + offset_of!(kvm_regs, fp_regs, user_fpsimd_state, fpcr), + ), + _ => panic!("No such Register"), + }; + + // The core registers of an arm64 machine are represented + // in kernel by the `kvm_regs` structure. This structure is a + // mix of 32, 64 and 128 bit fields, we index it as if it + // was a 32bit array. + // struct kvm_regs { + // struct user_pt_regs regs; + // __u64 sp_el1; + // __u64 elr_el1; + // __u64 spsr[KVM_NR_SPSR]; + // struct user_fpsimd_state fp_regs; + // }; + // struct user_pt_regs { + // __u64 regs[31]; + // __u64 sp; + // __u64 pc; + // __u64 pstate; + // }; + + // struct user_fpsimd_state { + // __uint128_t vregs[32]; + // __u32 fpsr; + // __u32 fpcr; + // __u32 __reserved[2]; + // }; + + // #define KVM_REG_ARM64 0x6000000000000000ULL + // #define KVM_REG_SIZE_U32 0x0020000000000000ULL + // #define KVM_REG_SIZE_U64 0x0030000000000000ULL + // #define KVM_REG_SIZE_U128 0x0040000000000000ULL + // #define KVM_REG_ARM_CORE 0x00100000ULL + + // The id of the register is encoded as specified for `KVM_GET_ONE_REG` in the kernel + // documentation. + // reg_id = KVM_REG_ARM64 | KVM_REG_SIZE_* | KVM_REG_ARM_CORE | reg_offset_index + // reg_offset_index = reg_offset / sizeof(u32) + // KVM_REG_SIZE_* => KVM_REG_SIZE_U32/KVM_REG_SIZE_U64/KVM_REG_SIZE_U128 + + // calculate reg_id + KVM_REG_ARM64 + | register_size + | u64::from(KVM_REG_ARM_CORE) + | (reg_offset / size_of::()) as u64 + } +} diff --git a/hypervisor/src/kvm/aarch64/cpu_caps.rs b/hypervisor/src/kvm/aarch64/cpu_caps.rs new file mode 100644 index 0000000000000000000000000000000000000000..9d60ae2211d62fb3c404347a1dd4ce65c8db4ab8 --- /dev/null +++ b/hypervisor/src/kvm/aarch64/cpu_caps.rs @@ -0,0 +1,46 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use kvm_ioctls::Cap; +use kvm_ioctls::Kvm; + +// Capabilities for ARM cpu. +#[derive(Debug, Clone)] +pub struct ArmCPUCaps { + pub irq_chip: bool, + pub ioevent_fd: bool, + pub irq_fd: bool, + pub user_mem: bool, + pub psci02: bool, + pub mp_state: bool, + pub vcpu_events: bool, + pub pmuv3: bool, + pub sve: bool, +} + +impl ArmCPUCaps { + /// Initialize ArmCPUCaps instance. + pub fn init_capabilities() -> Self { + let kvm = Kvm::new().unwrap(); + ArmCPUCaps { + irq_chip: kvm.check_extension(Cap::Irqchip), + ioevent_fd: kvm.check_extension(Cap::Ioeventfd), + irq_fd: kvm.check_extension(Cap::Irqfd), + user_mem: kvm.check_extension(Cap::UserMemory), + psci02: kvm.check_extension(Cap::ArmPsci02), + mp_state: kvm.check_extension(Cap::MpState), + vcpu_events: kvm.check_extension(Cap::VcpuEvents), + pmuv3: kvm.check_extension(Cap::ArmPmuV3), + sve: kvm.check_extension(Cap::ArmSve), + } + } +} diff --git a/hypervisor/src/kvm/aarch64/gicv2.rs b/hypervisor/src/kvm/aarch64/gicv2.rs new file mode 100644 index 0000000000000000000000000000000000000000..58758142d337087567978a937e837d277f08cea3 --- /dev/null +++ b/hypervisor/src/kvm/aarch64/gicv2.rs @@ -0,0 +1,164 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kvm_ioctls::{DeviceFd, VmFd}; + +use super::KvmDevice; +use crate::error::HypervisorError; +use devices::GICv2Access; + +pub struct KvmGICv2 { + fd: DeviceFd, +} + +impl KvmGICv2 { + pub fn new(vm_fd: Arc) -> Result { + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2, + fd: 0, + flags: 0, + }; + + let gic_fd = match vm_fd.create_device(&mut gic_device) { + Ok(fd) => fd, + Err(e) => return Err(anyhow!(HypervisorError::CreateKvmDevice(e))), + }; + + Ok(Self { fd: gic_fd }) + } +} + +impl GICv2Access for KvmGICv2 { + fn init_gic(&self, nr_irqs: u32, dist_base: u64, cpu_if_base: u64) -> Result<()> { + KvmDevice::kvm_device_check(&self.fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)?; + + // Init the interrupt number support by the GIC. + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, + &nr_irqs as *const u32 as u64, + true, + ) + .with_context(|| "Failed to set GICv2 attribute: irqs")?; + + // Finalize the GIC. + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + 0, + true, + ) + .with_context(|| "KVM failed to initialize GICv2")?; + + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_DIST), + &dist_base as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set GICv2 attribute: distributor address")?; + + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_CPU), + &cpu_if_base as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set GICv2 attribute: cpu address") + } + + fn vcpu_gicr_attr(&self, offset: u64, cpu: usize) -> u64 { + (((cpu as u64) << u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CPUID_SHIFT)) + & kvm_bindings::KVM_DEV_ARM_VGIC_CPUID_MASK) + | ((offset << u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_OFFSET_SHIFT)) + & u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_OFFSET_MASK)) + } + + fn access_gic_distributor(&self, offset: u64, gicd_value: &mut u32, write: bool) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + offset, + gicd_value as *mut u32 as u64, + write, + ) + .with_context(|| format!("Failed to access gic distributor for offset 0x{:x}", offset)) + } + + fn access_gic_cpu( + &self, + offset: u64, + cpu: usize, + gicc_value: &mut u64, + write: bool, + ) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + self.vcpu_gicr_attr(offset, cpu), + gicc_value as *mut u64 as u64, + write, + ) + .with_context(|| format!("Failed to access gic cpu for offset 0x{:x}", offset)) + } + + fn pause(&self) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES), + 0, + true, + ) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use devices::{GICDevice, GICVersion, GICv2, ICGICConfig, ICGICv2Config, GIC_IRQ_MAX}; + + use crate::kvm::aarch64::gicv2::KvmGICv2; + use crate::kvm::KvmHypervisor; + + #[test] + fn test_create_kvm_gicv2() { + let kvm_hyp = KvmHypervisor::new().unwrap_or(KvmHypervisor::default()); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let gic_conf = ICGICConfig { + version: Some(GICVersion::GICv2), + vcpu_count: 4, + max_irq: GIC_IRQ_MAX, + v2: Some(ICGICv2Config { + dist_range: (0x0800_0000, 0x0001_0000), + cpu_range: (0x080A_0000, 0x00F6_0000), + v2m_range: None, + sys_mem: None, + }), + v3: None, + }; + let hypervisor_gic = KvmGICv2::new(kvm_hyp.vm_fd.clone().unwrap()).unwrap(); + let gic = GICv2::new(Arc::new(hypervisor_gic), &gic_conf).unwrap(); + assert!(gic.realize().is_ok()); + } +} diff --git a/hypervisor/src/kvm/aarch64/gicv3.rs b/hypervisor/src/kvm/aarch64/gicv3.rs new file mode 100644 index 0000000000000000000000000000000000000000..23d86c0dfe42184017d5e1f7859f5c383a4d1090 --- /dev/null +++ b/hypervisor/src/kvm/aarch64/gicv3.rs @@ -0,0 +1,381 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kvm_ioctls::{DeviceFd, VmFd}; +use log::info; + +use super::KvmDevice; +use crate::error::HypervisorError; +use devices::{GICv3Access, GICv3ItsAccess, GicRedistRegion}; + +pub struct KvmGICv3 { + fd: DeviceFd, + vcpu_count: u64, +} + +impl KvmGICv3 { + pub fn new(vm_fd: Arc, vcpu_count: u64) -> Result { + let mut gic_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, + fd: 0, + flags: 0, + }; + + let gic_fd = match vm_fd.create_device(&mut gic_device) { + Ok(fd) => fd, + Err(e) => return Err(anyhow!(HypervisorError::CreateKvmDevice(e))), + }; + + Ok(Self { + fd: gic_fd, + vcpu_count, + }) + } +} + +impl GICv3Access for KvmGICv3 { + fn init_gic( + &self, + nr_irqs: u32, + redist_regions: Vec, + dist_base: u64, + ) -> Result<()> { + if redist_regions.len() > 1 { + KvmDevice::kvm_device_check( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION), + ) + .with_context(|| { + "Multiple redistributors are acquired while KVM does not provide support." + })?; + } + + if redist_regions.len() == 1 { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST), + &redist_regions.get(0).unwrap().base as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set GICv3 attribute: redistributor address")?; + } else { + for redist in &redist_regions { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION), + &redist.base_attr as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set GICv3 attribute: redistributor region address")?; + } + } + + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_DIST), + &dist_base as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set GICv3 attribute: distributor address")?; + + KvmDevice::kvm_device_check(&self.fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)?; + + // Init the interrupt number support by the GIC. + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, + &nr_irqs as *const u32 as u64, + true, + ) + .with_context(|| "Failed to set GICv3 attribute: irqs")?; + + // Finalize the GIC. + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + 0, + true, + ) + .with_context(|| "KVM failed to initialize GICv3") + } + + fn vcpu_gicr_attr(&self, cpu: usize) -> u64 { + let clustersz = 16usize; + + let aff1 = (cpu / clustersz) as u64; + let aff0 = (cpu % clustersz) as u64; + + let affid = (aff1 << 8) | aff0; + let cpu_affid: u64 = ((affid & 0xFF_0000_0000) >> 8) | (affid & 0xFF_FFFF); + + let last = u64::from((self.vcpu_count - 1) == cpu as u64); + + // Allow conversion of variables from i64 to u64. + ((cpu_affid << 32) | (1 << 24) | (1 << 8) | (last << 4)) + & kvm_bindings::KVM_DEV_ARM_VGIC_V3_MPIDR_MASK as u64 + } + + fn access_gic_distributor(&self, offset: u64, gicd_value: &mut u32, write: bool) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + offset, + gicd_value as *mut u32 as u64, + write, + ) + .with_context(|| format!("Failed to access gic distributor for offset 0x{:x}", offset)) + } + + fn access_gic_redistributor( + &self, + offset: u64, + cpu: usize, + gicr_value: &mut u32, + write: bool, + ) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + self.vcpu_gicr_attr(cpu) | offset, + gicr_value as *mut u32 as u64, + write, + ) + .with_context(|| { + format!( + "Failed to access gic redistributor for offset 0x{:x}", + offset + ) + }) + } + + fn access_gic_cpu( + &self, + offset: u64, + cpu: usize, + gicc_value: &mut u64, + write: bool, + ) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + self.vcpu_gicr_attr(cpu) | offset, + gicc_value as *mut u64 as u64, + write, + ) + .with_context(|| format!("Failed to access gic cpu for offset 0x{:x}", offset)) + } + + fn access_gic_line_level(&self, offset: u64, gicll_value: &mut u32, write: bool) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + self.vcpu_gicr_attr(0) | offset, + gicll_value as *mut u32 as u64, + write, + ) + } + + fn pause(&self) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES), + 0, + true, + ) + } +} + +pub struct KvmGICv3Its { + fd: DeviceFd, +} + +impl KvmGICv3Its { + pub fn new(vm_fd: Arc) -> Result { + let mut its_device = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, + fd: 0, + flags: 0, + }; + + let its_fd = match vm_fd.create_device(&mut its_device) { + Ok(fd) => fd, + Err(e) => return Err(anyhow!(HypervisorError::CreateKvmDevice(e))), + }; + + Ok(Self { fd: its_fd }) + } +} + +impl GICv3ItsAccess for KvmGICv3Its { + fn init_gic_its(&self, msi_base: u64) -> Result<()> { + KvmDevice::kvm_device_check( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), + ) + .with_context(|| "ITS address attribute is not supported for KVM")?; + + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, + u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), + &msi_base as *const u64 as u64, + true, + ) + .with_context(|| "Failed to set ITS attribute: ITS address")?; + + // Finalize the GIC Its. + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), + &msi_base as *const u64 as u64, + true, + ) + .with_context(|| "KVM failed to initialize ITS")?; + + Ok(()) + } + + fn access_gic_its(&self, attr: u32, its_value: &mut u64, write: bool) -> Result<()> { + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ITS_REGS, + u64::from(attr), + its_value as *const u64 as u64, + write, + ) + } + + fn access_gic_its_tables(&self, save: bool) -> Result<()> { + let attr = if save { + u64::from(kvm_bindings::KVM_DEV_ARM_ITS_SAVE_TABLES) + } else { + u64::from(kvm_bindings::KVM_DEV_ARM_ITS_RESTORE_TABLES) + }; + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + attr, + std::ptr::null::() as u64, + true, + ) + } + + fn reset(&self) -> Result<()> { + info!("Reset gicv3 its"); + KvmDevice::kvm_device_access( + &self.fd, + kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_ARM_ITS_CTRL_RESET), + std::ptr::null::() as u64, + true, + ) + .with_context(|| "Failed to reset ITS") + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::kvm::aarch64::gicv3::{KvmGICv3, KvmGICv3Its}; + use crate::kvm::KvmHypervisor; + use devices::{GICDevice, GICVersion, GICv3, ICGICConfig, ICGICv3Config, GIC_IRQ_MAX}; + + #[test] + fn test_create_kvm_gicv3() { + let kvm_hyp = KvmHypervisor::new().unwrap_or(KvmHypervisor::default()); + if kvm_hyp.vm_fd.is_none() { + return; + } + + assert!(KvmGICv3::new(kvm_hyp.vm_fd.clone().unwrap(), 4).is_ok()); + } + + #[test] + fn test_create_kvm_gicv3its() { + let kvm_hyp = KvmHypervisor::new().unwrap_or(KvmHypervisor::default()); + if kvm_hyp.vm_fd.is_none() { + return; + } + + assert!(KvmGICv3Its::new(kvm_hyp.vm_fd.clone().unwrap()).is_ok()); + } + + #[test] + fn test_realize_gic_device_without_its() { + let kvm_hyp = KvmHypervisor::new().unwrap_or(KvmHypervisor::default()); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let gic_config = ICGICConfig { + version: Some(GICVersion::GICv3), + vcpu_count: 4_u64, + max_irq: GIC_IRQ_MAX, + v2: None, + v3: Some(ICGICv3Config { + msi: false, + dist_range: (0x0800_0000, 0x0001_0000), + redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000)], + its_range: None, + }), + }; + + let hypervisor_gic = + KvmGICv3::new(kvm_hyp.vm_fd.clone().unwrap(), gic_config.vcpu_count).unwrap(); + let its_handler = KvmGICv3Its::new(kvm_hyp.vm_fd.clone().unwrap()).unwrap(); + let gic = GICv3::new(Arc::new(hypervisor_gic), Arc::new(its_handler), &gic_config).unwrap(); + assert!(gic.realize().is_ok()); + assert!(gic.its_dev.is_none()); + } + + #[test] + fn test_gic_redist_regions() { + let kvm_hyp = KvmHypervisor::new().unwrap_or(KvmHypervisor::default()); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let gic_config = ICGICConfig { + version: Some(GICVersion::GICv3), + vcpu_count: 210_u64, + max_irq: GIC_IRQ_MAX, + v2: None, + v3: Some(ICGICv3Config { + msi: true, + dist_range: (0x0800_0000, 0x0001_0000), + redist_region_ranges: vec![(0x080A_0000, 0x00F6_0000), (256 << 30, 0x200_0000)], + its_range: Some((0x0808_0000, 0x0002_0000)), + }), + }; + + let hypervisor_gic = + KvmGICv3::new(kvm_hyp.vm_fd.clone().unwrap(), gic_config.vcpu_count).unwrap(); + let its_handler = KvmGICv3Its::new(kvm_hyp.vm_fd.clone().unwrap()).unwrap(); + let gic = GICv3::new(Arc::new(hypervisor_gic), Arc::new(its_handler), &gic_config).unwrap(); + assert!(gic.realize().is_ok()); + assert!(gic.its_dev.is_some()); + assert_eq!(gic.redist_regions.len(), 2); + } +} diff --git a/hypervisor/src/kvm/aarch64/mod.rs b/hypervisor/src/kvm/aarch64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..7e42f61a64e9283fc6528e59c814928d887a582c --- /dev/null +++ b/hypervisor/src/kvm/aarch64/mod.rs @@ -0,0 +1,466 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod cpu_caps; +pub mod gicv2; +pub mod gicv3; + +mod core_regs; +mod sys_regs; + +use std::mem::forget; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use kvm_bindings::*; +use kvm_ioctls::DeviceFd; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_iow_nr, ioctl_iowr_nr}; + +use self::core_regs::Arm64CoreRegs; +use self::sys_regs::{KVM_REG_ARM_MPIDR_EL1, KVM_REG_ARM_TIMER_CNT}; +use crate::kvm::{KvmCpu, KvmHypervisor}; +use cpu::{ + ArchCPU, CPUBootConfig, CPUFeatures, CpregListEntry, RegsIndex, CPU, PMU_INTR, PPI_BASE, +}; + +pub const KVM_MAX_CPREG_ENTRIES: usize = 500; +const KVM_NR_REGS: u64 = 31; +const KVM_NR_FP_REGS: u64 = 32; + +ioctl_iow_nr!(KVM_GET_DEVICE_ATTR, KVMIO, 0xe2, kvm_device_attr); +ioctl_iow_nr!(KVM_GET_ONE_REG, KVMIO, 0xab, kvm_one_reg); +ioctl_iow_nr!(KVM_SET_ONE_REG, KVMIO, 0xac, kvm_one_reg); +ioctl_iowr_nr!(KVM_GET_REG_LIST, KVMIO, 0xb0, kvm_reg_list); +ioctl_iow_nr!(KVM_ARM_VCPU_INIT, KVMIO, 0xae, kvm_vcpu_init); + +/// A wrapper for kvm_based device check and access. +pub struct KvmDevice; +impl KvmDevice { + fn kvm_device_check(fd: &DeviceFd, group: u32, attr: u64) -> Result<()> { + let attr = kvm_bindings::kvm_device_attr { + group, + attr, + addr: 0, + flags: 0, + }; + fd.has_device_attr(&attr) + .with_context(|| "Failed to check device attributes.")?; + Ok(()) + } + + fn kvm_device_access( + fd: &DeviceFd, + group: u32, + attr: u64, + addr: u64, + write: bool, + ) -> Result<()> { + let attr = kvm_bindings::kvm_device_attr { + group, + attr, + addr, + flags: 0, + }; + + if write { + fd.set_device_attr(&attr) + .with_context(|| "Failed to set device attributes.")?; + } else { + let mut attr = attr; + fd.get_device_attr(&mut attr) + .with_context(|| "Failed to get device attributes.")?; + }; + + Ok(()) + } +} + +impl KvmHypervisor { + pub fn arch_init(&self) -> Result<()> { + Ok(()) + } +} + +impl KvmCpu { + pub fn arch_init_pmu(&self) -> Result<()> { + let pmu_attr = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT), + addr: 0, + flags: 0, + }; + // SAFETY: The fd can be guaranteed to be legal during creation. + let vcpu_device = unsafe { DeviceFd::from_raw_fd(self.fd.as_raw_fd()) }; + vcpu_device + .has_device_attr(&pmu_attr) + .with_context(|| "Kernel does not support PMU for vCPU")?; + // Set IRQ 23, PPI 7 for PMU. + let irq = PMU_INTR + PPI_BASE; + let pmu_irq_attr = kvm_device_attr { + group: KVM_ARM_VCPU_PMU_V3_CTRL, + attr: u64::from(KVM_ARM_VCPU_PMU_V3_IRQ), + addr: &irq as *const u32 as u64, + flags: 0, + }; + + vcpu_device + .set_device_attr(&pmu_irq_attr) + .with_context(|| "Failed to set IRQ for PMU")?; + // Init PMU after setting IRQ. + vcpu_device + .set_device_attr(&pmu_attr) + .with_context(|| "Failed to enable PMU for vCPU")?; + // forget `vcpu_device` to avoid fd close on exit, as DeviceFd is backed by File. + forget(vcpu_device); + + Ok(()) + } + + pub fn arch_vcpu_init(&self) -> Result<()> { + self.fd + .vcpu_init(&self.kvi.lock().unwrap()) + .with_context(|| "Failed to init kvm vcpu") + } + + pub fn arch_set_boot_config( + &self, + arch_cpu: Arc>, + boot_config: &CPUBootConfig, + vcpu_config: &CPUFeatures, + ) -> Result<()> { + let mut kvi = self.kvi.lock().unwrap(); + self.vm_fd + .as_ref() + .unwrap() + .get_preferred_target(&mut kvi) + .with_context(|| "Failed to get kvm vcpu preferred target")?; + + // support PSCI 0.2 + // We already checked that the capability is supported. + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; + // Non-boot cpus are powered off initially. + if arch_cpu.lock().unwrap().apic_id != 0 { + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; + } + + // Enable PMU from config. + if vcpu_config.pmu { + if !self.caps.pmuv3 { + bail!("PMUv3 is not supported by KVM"); + } + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; + } + // Enable SVE from config. + if vcpu_config.sve { + if !self.caps.sve { + bail!("SVE is not supported by KVM"); + } + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE; + } + drop(kvi); + + arch_cpu.lock().unwrap().set_core_reg(boot_config); + + self.arch_vcpu_init()?; + + if vcpu_config.sve { + self.fd + .vcpu_finalize(&(i32::try_from(kvm_bindings::KVM_ARM_VCPU_SVE)?))?; + } + + arch_cpu.lock().unwrap().mpidr = u64::try_from( + self.get_one_reg(KVM_REG_ARM_MPIDR_EL1) + .with_context(|| "Failed to get mpidr")?, + )?; + + arch_cpu.lock().unwrap().features = *vcpu_config; + + Ok(()) + } + + fn get_one_reg(&self, reg_id: u64) -> Result { + let mut val = [0_u8; 16]; + self.fd.get_one_reg(reg_id, &mut val)?; + let data = u128::from_le_bytes(val); + trace::kvm_get_one_reg(self.id, reg_id, data); + Ok(data) + } + + fn set_one_reg(&self, reg_id: u64, val: u128) -> Result<()> { + trace::kvm_set_one_reg(self.id, reg_id, val); + self.fd.set_one_reg(reg_id, &val.to_le_bytes())?; + Ok(()) + } + + pub fn arch_get_one_reg(&self, reg_id: u64) -> Result { + self.get_one_reg(reg_id) + } + + pub fn arch_get_regs( + &self, + arch_cpu: Arc>, + regs_index: RegsIndex, + ) -> Result<()> { + let mut locked_arch_cpu = arch_cpu.lock().unwrap(); + + match regs_index { + RegsIndex::CoreRegs => { + locked_arch_cpu.core_regs = self.get_core_regs()?; + } + RegsIndex::MpState => { + if self.caps.mp_state { + let mut mp_state = self.fd.get_mp_state()?; + if mp_state.mp_state != KVM_MP_STATE_STOPPED { + mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + } + trace::kvm_get_mp_state(self.id, &mp_state); + locked_arch_cpu.mp_state = mp_state; + } + } + RegsIndex::VcpuEvents => { + if self.caps.vcpu_events { + locked_arch_cpu.cpu_events = self.fd.get_vcpu_events()?; + trace::kvm_get_vcpu_events(self.id, &locked_arch_cpu.cpu_events); + } + } + RegsIndex::CpregList => { + let mut cpreg_list = RegList::new(KVM_MAX_CPREG_ENTRIES)?; + self.fd.get_reg_list(&mut cpreg_list)?; + locked_arch_cpu.cpreg_len = 0; + for cpreg in cpreg_list.as_slice() { + let mut cpreg_entry = CpregListEntry { + reg_id: *cpreg, + value: 0, + }; + if !self.get_cpreg(&mut cpreg_entry)? { + // We sync these cpreg by hand, such as core regs. + continue; + } + let index = locked_arch_cpu.cpreg_len; + locked_arch_cpu.cpreg_list[index] = cpreg_entry; + locked_arch_cpu.cpreg_len += 1; + } + trace::kvm_get_reg_list( + self.id, + &&locked_arch_cpu.cpreg_list[0..locked_arch_cpu.cpreg_len], + ); + } + RegsIndex::VtimerCount => { + locked_arch_cpu.vtimer_cnt = u64::try_from( + self.get_one_reg(KVM_REG_ARM_TIMER_CNT) + .with_context(|| "Failed to get virtual timer count")?, + )?; + locked_arch_cpu.vtimer_cnt_valid = true; + } + } + + Ok(()) + } + + pub fn arch_set_regs( + &self, + arch_cpu: Arc>, + regs_index: RegsIndex, + ) -> Result<()> { + let mut locked_arch_cpu = arch_cpu.lock().unwrap(); + let apic_id = locked_arch_cpu.apic_id; + match regs_index { + RegsIndex::CoreRegs => { + self.set_core_regs(locked_arch_cpu.core_regs) + .with_context(|| format!("Failed to set core register for CPU {}", apic_id))?; + } + RegsIndex::MpState => { + if self.caps.mp_state { + trace::kvm_set_mp_state(self.id, &locked_arch_cpu.mp_state); + self.fd + .set_mp_state(locked_arch_cpu.mp_state) + .with_context(|| format!("Failed to set mpstate for CPU {}", apic_id))?; + } + } + RegsIndex::VcpuEvents => { + if self.caps.vcpu_events { + trace::kvm_set_vcpu_events(self.id, &locked_arch_cpu.cpu_events); + self.fd + .set_vcpu_events(&locked_arch_cpu.cpu_events) + .with_context(|| format!("Failed to set vcpu event for CPU {}", apic_id))?; + } + } + RegsIndex::CpregList => { + for cpreg in locked_arch_cpu.cpreg_list[0..locked_arch_cpu.cpreg_len].iter() { + self.set_cpreg(cpreg) + .with_context(|| format!("Failed to set cpreg for CPU {}", apic_id))?; + } + } + RegsIndex::VtimerCount => { + if locked_arch_cpu.vtimer_cnt_valid { + self.set_one_reg( + KVM_REG_ARM_TIMER_CNT, + u128::from(locked_arch_cpu.vtimer_cnt), + ) + .with_context(|| "Failed to set virtual timer count")?; + locked_arch_cpu.vtimer_cnt_valid = false; + } + } + } + + Ok(()) + } + + /// Returns the vcpu's current `core_register`. + /// + /// The register state is gotten from `KVM_GET_ONE_REG` api in KVM. + /// + /// # Arguments + /// + /// * `vcpu_fd` - the VcpuFd in KVM mod. + fn get_core_regs(&self) -> Result { + let mut core_regs = kvm_regs::default(); + + core_regs.regs.sp = u64::try_from(self.get_one_reg(Arm64CoreRegs::UserPTRegSp.into())?)?; + core_regs.sp_el1 = u64::try_from(self.get_one_reg(Arm64CoreRegs::KvmSpEl1.into())?)?; + core_regs.regs.pstate = + u64::try_from(self.get_one_reg(Arm64CoreRegs::UserPTRegPState.into())?)?; + core_regs.regs.pc = u64::try_from(self.get_one_reg(Arm64CoreRegs::UserPTRegPc.into())?)?; + core_regs.elr_el1 = u64::try_from(self.get_one_reg(Arm64CoreRegs::KvmElrEl1.into())?)?; + + for i in 0..usize::try_from(KVM_NR_REGS)? { + core_regs.regs.regs[i] = + u64::try_from(self.get_one_reg(Arm64CoreRegs::UserPTRegRegs(i).into())?)?; + } + + for i in 0..usize::try_from(KVM_NR_SPSR)? { + core_regs.spsr[i] = u64::try_from(self.get_one_reg(Arm64CoreRegs::KvmSpsr(i).into())?)?; + } + + // State save and restore is not supported for SVE for now, so we just skip it. + if self.kvi.lock().unwrap().features[0] & (1 << kvm_bindings::KVM_ARM_VCPU_SVE) == 0 { + for i in 0..usize::try_from(KVM_NR_FP_REGS)? { + core_regs.fp_regs.vregs[i] = + self.get_one_reg(Arm64CoreRegs::UserFPSIMDStateVregs(i).into())?; + } + } + + core_regs.fp_regs.fpsr = + u32::try_from(self.get_one_reg(Arm64CoreRegs::UserFPSIMDStateFpsr.into())?)?; + core_regs.fp_regs.fpcr = + u32::try_from(self.get_one_reg(Arm64CoreRegs::UserFPSIMDStateFpcr.into())?)?; + + Ok(core_regs) + } + + /// Sets the vcpu's current "core_register" + /// + /// The register state is gotten from `KVM_SET_ONE_REG` api in KVM. + /// + /// # Arguments + /// + /// * `vcpu_fd` - the VcpuFd in KVM mod. + /// * `core_regs` - kvm_regs state to be written. + fn set_core_regs(&self, core_regs: kvm_regs) -> Result<()> { + self.set_one_reg( + Arm64CoreRegs::UserPTRegSp.into(), + u128::from(core_regs.regs.sp), + )?; + self.set_one_reg(Arm64CoreRegs::KvmSpEl1.into(), u128::from(core_regs.sp_el1))?; + self.set_one_reg( + Arm64CoreRegs::UserPTRegPState.into(), + u128::from(core_regs.regs.pstate), + )?; + self.set_one_reg( + Arm64CoreRegs::UserPTRegPc.into(), + u128::from(core_regs.regs.pc), + )?; + self.set_one_reg( + Arm64CoreRegs::KvmElrEl1.into(), + u128::from(core_regs.elr_el1), + )?; + + for i in 0..usize::try_from(KVM_NR_REGS)? { + self.set_one_reg( + Arm64CoreRegs::UserPTRegRegs(i).into(), + u128::from(core_regs.regs.regs[i]), + )?; + } + + for i in 0..usize::try_from(KVM_NR_SPSR)? { + self.set_one_reg( + Arm64CoreRegs::KvmSpsr(i).into(), + u128::from(core_regs.spsr[i]), + )?; + } + + // State save and restore is not supported for SVE for now, so we just skip it. + if self.kvi.lock().unwrap().features[0] & (1 << kvm_bindings::KVM_ARM_VCPU_SVE) == 0 { + for i in 0..usize::try_from(KVM_NR_FP_REGS)? { + self.set_one_reg( + Arm64CoreRegs::UserFPSIMDStateVregs(i).into(), + core_regs.fp_regs.vregs[i], + )?; + } + } + + self.set_one_reg( + Arm64CoreRegs::UserFPSIMDStateFpsr.into(), + u128::from(core_regs.fp_regs.fpsr), + )?; + self.set_one_reg( + Arm64CoreRegs::UserFPSIMDStateFpcr.into(), + u128::from(core_regs.fp_regs.fpcr), + )?; + + Ok(()) + } + + fn reg_sync_by_cpreg_list(reg_id: u64) -> Result { + let coproc = u32::try_from(reg_id)? & KVM_REG_ARM_COPROC_MASK; + if coproc == KVM_REG_ARM_CORE { + return Ok(false); + } + + let size = reg_id & KVM_REG_SIZE_MASK; + if size == KVM_REG_SIZE_U32 || size == KVM_REG_SIZE_U64 { + Ok(true) + } else { + bail!("Can't handle size of register in cpreg list"); + } + } + + fn get_cpreg(&self, cpreg: &mut CpregListEntry) -> Result { + if !Self::reg_sync_by_cpreg_list(cpreg.reg_id)? { + return Ok(false); + } + cpreg.value = self.get_one_reg(cpreg.reg_id)?; + Ok(true) + } + + fn set_cpreg(&self, cpreg: &CpregListEntry) -> Result { + if !Self::reg_sync_by_cpreg_list(cpreg.reg_id)? { + return Ok(false); + } + self.set_one_reg(cpreg.reg_id, cpreg.value)?; + Ok(true) + } + + pub fn arch_put_register(&self, cpu: Arc) -> Result<()> { + let arch_cpu = &cpu.arch_cpu; + self.arch_set_regs(arch_cpu.clone(), RegsIndex::CoreRegs)?; + self.arch_set_regs(arch_cpu.clone(), RegsIndex::MpState)?; + self.arch_set_regs(arch_cpu.clone(), RegsIndex::CpregList)?; + self.arch_set_regs(arch_cpu.clone(), RegsIndex::VcpuEvents) + } + + pub fn arch_reset_vcpu(&self, cpu: Arc) -> Result<()> { + cpu.arch_cpu.lock().unwrap().set(&cpu.boot_state()); + self.arch_vcpu_init() + } +} diff --git a/hypervisor/src/kvm/aarch64/sys_regs.rs b/hypervisor/src/kvm/aarch64/sys_regs.rs new file mode 100644 index 0000000000000000000000000000000000000000..5a63290b05d330e9b17a4fe26810ef9920867150 --- /dev/null +++ b/hypervisor/src/kvm/aarch64/sys_regs.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use kvm_bindings::*; + +// Arm Architecture Reference Manual defines the encoding of AArch64 system registers: +// (Ref: ARMv8 ARM, Section: "System instruction class encoding overview") +// While KVM defines another ID for each AArch64 system register, which is used in calling +// `KVM_G/SET_ONE_REG` to access a system register of a guest. A mapping exists between the +// Arm standard encoding and the KVM ID. +// See: https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/uapi/asm/kvm.h#L216 +#[macro_export] +macro_rules! arm64_sys_reg { + ($op0: tt, $op1: tt, $crn: tt, $crm: tt, $op2: tt) => { + KVM_REG_SIZE_U64 + | KVM_REG_ARM64 + | KVM_REG_ARM64_SYSREG as u64 + | (((($op0 as u32) << KVM_REG_ARM64_SYSREG_OP0_SHIFT) & KVM_REG_ARM64_SYSREG_OP0_MASK) + as u64) + | (((($op1 as u32) << KVM_REG_ARM64_SYSREG_OP1_SHIFT) & KVM_REG_ARM64_SYSREG_OP1_MASK) + as u64) + | (((($crn as u32) << KVM_REG_ARM64_SYSREG_CRN_SHIFT) & KVM_REG_ARM64_SYSREG_CRN_MASK) + as u64) + | (((($crm as u32) << KVM_REG_ARM64_SYSREG_CRM_SHIFT) & KVM_REG_ARM64_SYSREG_CRM_MASK) + as u64) + | (((($op2 as u32) << KVM_REG_ARM64_SYSREG_OP2_SHIFT) & KVM_REG_ARM64_SYSREG_OP2_MASK) + as u64) + }; +} + +// The following system register codes can be found at this website: +// https://elixir.bootlin.com/linux/v5.6/source/arch/arm64/include/asm/sysreg.h + +// MPIDR - Multiprocessor Affinity Register(SYS_MPIDR_EL1). +pub const KVM_REG_ARM_MPIDR_EL1: u64 = arm64_sys_reg!(3, 0, 0, 0, 5); + +// Counter-timer Virtual Count register: Due to the API interface problem, the encode of +// this register is SYS_CNTV_CVAL_EL0. +pub const KVM_REG_ARM_TIMER_CNT: u64 = arm64_sys_reg!(3, 3, 14, 3, 2); diff --git a/hypervisor/src/kvm/interrupt.rs b/hypervisor/src/kvm/interrupt.rs index 8c27cc0a9e5e6321738b6fed2ef34e268e208755..b067b17c3d598abe3b0a427c6a94854448e2bbdb 100644 --- a/hypervisor/src/kvm/interrupt.rs +++ b/hypervisor/src/kvm/interrupt.rs @@ -11,14 +11,15 @@ // See the Mulan PSL v2 for more details. use std::mem::{align_of, size_of}; -use std::os::raw::c_ulong; +use std::sync::Arc; +use anyhow::{bail, Context, Result}; use kvm_bindings::{KVMIO, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI}; -use kvm_ioctls::{Cap, Kvm}; -use util::bitmap::Bitmap; -use vmm_sys_util::ioctl::ioctl_with_val; +use kvm_ioctls::{Cap, Kvm, VmFd}; +use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr}; -use crate::errors::{Result, ResultExt}; +use devices::pci::MsiVector; +use util::bitmap::Bitmap; pub(crate) type IrqRoute = kvm_bindings::kvm_irq_routing; pub(crate) type IrqRouteEntry = kvm_bindings::kvm_irq_routing_entry; @@ -38,23 +39,14 @@ const IOCHIP_NUM_PINS: u32 = 192; #[cfg(target_arch = "aarch64")] const KVM_IRQCHIP: u32 = 0; -/// Wrapper over `KVM_CHECK_EXTENSION`. -/// -/// Returns 0 if the capability is not available and a positive integer otherwise. -fn check_extension_int(kvmfd: &Kvm, c: Cap) -> i32 { - // Safe because we know that our file is a KVM fd and that the extension is one of the ones - // defined by kernel. - unsafe { ioctl_with_val(kvmfd, KVM_CHECK_EXTENSION(), c as c_ulong) } -} - /// Return the max number kvm supports. fn get_maximum_gsi_cnt(kvmfd: &Kvm) -> u32 { - let mut gsi_count = check_extension_int(kvmfd, Cap::IrqRouting); + let mut gsi_count = kvmfd.check_extension_int(Cap::IrqRouting); if gsi_count < 0 { gsi_count = 0; } - gsi_count as u32 + u32::try_from(gsi_count).unwrap_or_default() } /// Return `IrqRouteEntry` according to gsi, irqchip kind and pin. @@ -176,7 +168,7 @@ impl IrqRouteTable { pub fn update_msi_route(&mut self, gsi: u32, msi_vector: MsiVector) -> Result<()> { self.remove_irq_route(gsi); self.add_msi_route(gsi, msi_vector) - .chain_err(|| "Failed to add msi route")?; + .with_context(|| "Failed to add msi route")?; Ok(()) } @@ -186,9 +178,9 @@ impl IrqRouteTable { let free_gsi = self .gsi_bitmap .find_next_zero(0) - .chain_err(|| "Failed to get new free gsi")?; + .with_context(|| "Failed to get new free gsi")?; self.gsi_bitmap.set(free_gsi)?; - Ok(free_gsi as u32) + Ok(u32::try_from(free_gsi)?) } /// Release gsi number to free. @@ -199,68 +191,72 @@ impl IrqRouteTable { pub fn release_gsi(&mut self, gsi: u32) -> Result<()> { self.gsi_bitmap .clear(gsi as usize) - .chain_err(|| "Failed to release gsi")?; + .with_context(|| "Failed to release gsi")?; self.remove_irq_route(gsi); Ok(()) } - /// Get `IrqRouteEntry` by given gsi number. - /// A gsi number may have several entries. If no gsi number in table, is will - /// return an empty vector. - pub fn get_irq_route_entry(&self, gsi: u32) -> Vec { - let mut entries = Vec::new(); - for entry in self.irq_routes.iter() { - if gsi == entry.gsi { - entries.push(*entry); + /// Sets the gsi routing table entries. It will overwrite previously set entries. + pub fn commit_irq_routing(&self, vm_fd: &Arc) -> Result<()> { + let routes = self.irq_routes.clone(); + + let layout = std::alloc::Layout::from_size_align( + size_of::() + routes.len() * size_of::(), + std::cmp::max(align_of::(), align_of::()), + )?; + + trace::kvm_commit_irq_routing(); + // SAFETY: data in `routes` is reliable. + unsafe { + // layout is aligned, so casting of ptr is allowed. + let irq_routing = std::alloc::alloc(layout) as *mut IrqRoute; + if irq_routing.is_null() { + bail!("Failed to alloc irq routing"); } - } - - entries - } -} - -/// Basic data for msi vector. -#[derive(Copy, Clone, Default)] -pub struct MsiVector { - pub msg_addr_lo: u32, - pub msg_addr_hi: u32, - pub msg_data: u32, - pub masked: bool, - #[cfg(target_arch = "aarch64")] - pub dev_id: u32, -} + (*irq_routing).nr = u32::try_from(routes.len())?; + (*irq_routing).flags = 0; + let entries: &mut [IrqRouteEntry] = (*irq_routing).entries.as_mut_slice(routes.len()); + entries.copy_from_slice(&routes); -pub(crate) unsafe fn refact_vec_with_field(count: usize) -> *mut T { - let layout = std::alloc::Layout::from_size_align( - size_of::() + count * size_of::(), - std::cmp::max(align_of::(), align_of::()), - ) - .unwrap(); + let ret = vm_fd + .set_gsi_routing(&*irq_routing) + .with_context(|| "Failed to set gsi routing"); - std::alloc::alloc(layout) as *mut T + std::alloc::dealloc(irq_routing as *mut u8, layout); + ret + } + } } #[cfg(test)] mod tests { - use super::super::KVMFds; - use super::get_maximum_gsi_cnt; + use std::sync::{Arc, Mutex}; + + use super::{get_maximum_gsi_cnt, IrqRouteTable}; + use crate::kvm::{KVMInterruptManager, KvmHypervisor}; #[test] fn test_get_maximum_gsi_cnt() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { return; } - assert!(get_maximum_gsi_cnt(kvm_fds.fd.as_ref().unwrap()) > 0); + assert!(get_maximum_gsi_cnt(kvm_hyp.fd.as_ref().unwrap()) > 0); } #[test] fn test_alloc_and_release_gsi() { - let kvm_fds = KVMFds::new(); - if kvm_fds.vm_fd.is_none() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { return; } - let mut irq_route_table = kvm_fds.irq_route_table.lock().unwrap(); + let irq_route_table = Mutex::new(IrqRouteTable::new(kvm_hyp.fd.as_ref().unwrap())); + let irq_manager = Arc::new(KVMInterruptManager::new( + true, + kvm_hyp.vm_fd.unwrap(), + irq_route_table, + )); + let mut irq_route_table = irq_manager.irq_route_table.lock().unwrap(); irq_route_table.init_irq_route_table(); #[cfg(target_arch = "x86_64")] diff --git a/hypervisor/src/kvm/listener.rs b/hypervisor/src/kvm/listener.rs new file mode 100644 index 0000000000000000000000000000000000000000..1a6943ffb2e802e8bed7855d3cdf760d8f63af9d --- /dev/null +++ b/hypervisor/src/kvm/listener.rs @@ -0,0 +1,837 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +use kvm_bindings::*; +use kvm_bindings::{kvm_userspace_memory_region as KvmMemSlot, KVM_MEM_READONLY}; +use kvm_ioctls::{IoEventAddress, NoDatamatch, VmFd}; +use log::{debug, warn}; + +use crate::HypervisorError; +use address_space::{ + AddressAttr, AddressRange, AddressSpaceError, FlatRange, Listener, ListenerReqType, MemSlot, + RegionIoEventFd, RegionType, +}; +use util::{num_ops::round_down, unix::host_page_size}; + +#[derive(Clone)] +pub struct KvmMemoryListener { + vm_fd: Option>, + /// Id of AddressSpace. + as_id: Arc, + /// Record all MemSlots. + slots: Arc>>, + /// Memory slot registered in kvm. + kvm_memslots: Arc>>, + /// Whether enabled as a memory listener. + enabled: bool, +} + +impl KvmMemoryListener { + /// Create a new KvmMemoryListener for a VM. + /// + /// # Arguments + /// + /// * `nr_slots` - Number of slots. + pub fn new( + nr_slots: u32, + vm_fd: Option>, + kvm_memslots: Arc>>, + ) -> KvmMemoryListener { + KvmMemoryListener { + vm_fd, + as_id: Arc::new(AtomicU32::new(0)), + slots: Arc::new(Mutex::new(vec![MemSlot::default(); nr_slots as usize])), + kvm_memslots, + enabled: false, + } + } + + /// Find a free slot and fills it with given arguments. + /// + /// # Arguments + /// + /// * `guest_addr` - Guest address. + /// * `size` - Size of slot. + /// * `host_addr` - Host address. + /// + /// # Errors + /// + /// Return Error if + /// * No available Kvm slot. + /// * Given memory slot overlap with existed one. + fn get_free_slot(&self, guest_addr: u64, size: u64, host_addr: u64) -> Result { + let mut slots = self.slots.lock().unwrap(); + + // check if the given address range overlaps with exist ones + let range = AddressRange::from((guest_addr, size)); + slots.iter().try_for_each::<_, Result<()>>(|s| { + if AddressRange::from((s.guest_addr, s.size)) + .find_intersection(range) + .is_some() + { + return Err(anyhow!(HypervisorError::KvmSlotOverlap { + add: (guest_addr, size), + exist: (s.guest_addr, s.size) + })); + } + Ok(()) + })?; + + for (index, slot) in slots.iter_mut().enumerate() { + if slot.size == 0 { + slot.index = u32::try_from(index)?; + slot.guest_addr = guest_addr; + slot.size = size; + slot.host_addr = host_addr; + return Ok(slot.index); + } + } + + Err(anyhow!(HypervisorError::NoAvailKvmSlot(slots.len()))) + } + + /// Delete a slot after finding it according to the given arguments. + /// Return the deleted one if succeed. + /// + /// # Arguments + /// + /// * `addr` - Guest address of slot. + /// * `size` - Size of slots. + /// + /// # Errors + /// + /// Return Error if no Kem slot matched. + fn delete_slot(&self, addr: u64, size: u64) -> Result { + let mut slots = self.slots.lock().unwrap(); + for slot in slots.iter_mut() { + if slot.guest_addr == addr && slot.size == size { + // set slot size to zero, so it can be reused later + slot.size = 0; + return Ok(*slot); + } + } + Err(anyhow!(HypervisorError::NoMatchedKvmSlot(addr, size))) + } + + /// Align a piece of memory segment according to `alignment`, + /// return AddressRange after aligned. + /// + /// # Arguments + /// + /// * `range` - One piece of memory segment. + /// * `alignment` - Alignment base. + /// + /// # Errors + /// + /// Return Error if Memslot size is zero after aligned. + fn align_mem_slot(range: AddressRange, alignment: u64) -> Result { + let aligned_addr = range + .base + .align_up(alignment) + .with_context(|| AddressSpaceError::AddrAlignUp(range.base.raw_value(), alignment))?; + + let aligned_size = range + .size + .checked_sub(aligned_addr.offset_from(range.base)) + .and_then(|sz| round_down(sz, alignment)) + .filter(|&sz| sz > 0_u64) + .with_context(|| + format!("Mem slot size is zero after aligned, addr 0x{:X}, size 0x{:X}, alignment 0x{:X}", + range.base.raw_value(), range.size, alignment) + )?; + + Ok(AddressRange::new(aligned_addr, aligned_size)) + } + + /// Callback function for adding Region, which only care about Ram-type Region yet. + /// + /// # Arguments + /// + /// * `flat_range` - Corresponding FlatRange of new-added region. + /// + /// # Errors + /// + /// Return Error if fail to delete kvm_mem_slot. + fn add_region(&self, flat_range: &FlatRange) -> Result<()> { + if flat_range.owner.region_type() == RegionType::RomDevice + && !flat_range.owner.get_rom_device_romd().unwrap() + { + if let Err(ref e) = self.delete_region(flat_range) { + warn!( + "Rom-device Region changes to IO mode, Failed to delete region: {:?}", + e + ); + } + return Ok(()); + } + + let attr = match flat_range.owner.region_type() { + address_space::RegionType::Ram => AddressAttr::Ram, + address_space::RegionType::RamDevice => AddressAttr::RamDevice, + address_space::RegionType::RomDevice => AddressAttr::RomDevice, + _ => return Ok(()), + }; + + let (aligned_addr, aligned_size) = + Self::align_mem_slot(flat_range.addr_range, host_page_size()) + .map(|r| (r.base, r.size)) + .with_context(|| "Failed to align mem slot")?; + let align_adjust = aligned_addr.raw_value() - flat_range.addr_range.base.raw_value(); + + // `unwrap()` won't fail because Ram-type Region definitely has hva + // SAFETY: size has been checked. + let aligned_hva = unsafe { flat_range.owner.get_host_address(attr).unwrap() } + + flat_range.offset_in_region + + align_adjust; + + let slot_idx = self + .get_free_slot(aligned_addr.raw_value(), aligned_size, aligned_hva) + .with_context(|| "Failed to get available KVM mem slot")?; + + let mut flags = 0_u32; + if flat_range.owner.get_rom_device_romd().unwrap_or(false) { + flags |= KVM_MEM_READONLY; + } + let kvm_region = KvmMemSlot { + slot: slot_idx | (self.as_id.load(Ordering::SeqCst) << 16), + guest_phys_addr: aligned_addr.raw_value(), + memory_size: aligned_size, + userspace_addr: aligned_hva, + flags, + }; + + if kvm_region.flags & KVM_MEM_READONLY == 0 { + let mut locked_slots = self.kvm_memslots.as_ref().lock().unwrap(); + locked_slots.insert(kvm_region.slot, kvm_region); + } + + // SAFETY: All parameters in the struct of kvm_region are valid, + // it can be guaranteed that calling the ioctl_with_ref in the function + // of set_user_memory_region is safe. + unsafe { + self.vm_fd + .as_ref() + .unwrap() + .set_user_memory_region(kvm_region) + .or_else(|e| { + self.delete_slot(aligned_addr.raw_value(), aligned_size) + .with_context(|| "Failed to delete Kvm mem slot")?; + Err(e).with_context(|| { + format!( + "KVM register memory region failed: addr 0x{:X}, size 0x{:X}", + aligned_addr.raw_value(), + aligned_size + ) + }) + }) + } + } + + /// Callback function for deleting Region, which only care about Ram-type Region yet. + /// + /// # Arguments + /// + /// * `flat_range` - Corresponding FlatRange of new-deleted region. + fn delete_region(&self, flat_range: &FlatRange) -> Result<()> { + if flat_range.owner.region_type() != RegionType::Ram + && flat_range.owner.region_type() != RegionType::RomDevice + && flat_range.owner.region_type() != RegionType::RamDevice + { + return Ok(()); + } + + let (aligned_addr, aligned_size) = + Self::align_mem_slot(flat_range.addr_range, host_page_size()) + .map(|r| (r.base, r.size)) + .with_context(|| "Failed to align mem slot")?; + + let mem_slot = match self.delete_slot(aligned_addr.raw_value(), aligned_size) { + Ok(m) => m, + Err(_) => { + debug!("no match mem slot registered to KVM, just return"); + return Ok(()); + } + }; + + let kvm_region = kvm_userspace_memory_region { + slot: mem_slot.index | (self.as_id.load(Ordering::SeqCst) << 16), + guest_phys_addr: mem_slot.guest_addr, + memory_size: 0_u64, + userspace_addr: mem_slot.host_addr, + flags: 0, + }; + + let mut locked_slots = self.kvm_memslots.lock().unwrap(); + locked_slots.remove(&kvm_region.slot); + + // SAFETY: All parameters in the struct of kvm_region are valid, + // it can be guaranteed that calling the ioctl_with_ref in the function + // of set_user_memory_region is safe. + unsafe { + self.vm_fd + .as_ref() + .unwrap() + .set_user_memory_region(kvm_region) + .with_context(|| { + format!( + "KVM unregister memory region failed: addr 0x{:X}", + aligned_addr.raw_value(), + ) + }) + } + } + + /// Register a IoEvent to `/dev/kvm`. + /// + /// # Arguments + /// + /// * `ioevtfd` - IoEvent would be added. + /// + /// # Errors + /// + /// Return Error if the length of ioeventfd data is unexpected or syscall failed. + fn add_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { + let vm_fd = self.vm_fd.as_ref().unwrap(); + let io_addr = IoEventAddress::Mmio(ioevtfd.addr_range.base.raw_value()); + trace::kvm_add_ioeventfd( + &ioevtfd.fd, + &io_addr, + ioevtfd.data_match, + ioevtfd.addr_range.size, + ioevtfd.data, + ); + let ioctl_ret = if ioevtfd.data_match { + let length = ioevtfd.addr_range.size; + match length { + 2 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, u16::try_from(ioevtfd.data)?), + 4 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, u32::try_from(ioevtfd.data)?), + 8 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data), + _ => bail!("Unexpected ioeventfd data length {}", length), + } + } else { + vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) + }; + + ioctl_ret.with_context(|| { + format!( + "KVM register ioeventfd failed, mmio addr 0x{:X}, size 0x{:X}, data_match {}", + ioevtfd.addr_range.base.raw_value(), + ioevtfd.addr_range.size, + if ioevtfd.data_match { + ioevtfd.data + } else { + u64::MAX + } + ) + }) + } + + /// Deletes `ioevtfd` from `/dev/kvm` + /// + /// # Arguments + /// + /// * `ioevtfd` - IoEvent would be deleted. + fn delete_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { + let vm_fd = self.vm_fd.as_ref().unwrap(); + let io_addr = IoEventAddress::Mmio(ioevtfd.addr_range.base.raw_value()); + trace::kvm_delete_ioeventfd( + &ioevtfd.fd, + &io_addr, + ioevtfd.data_match, + ioevtfd.addr_range.size, + ioevtfd.data, + ); + let ioctl_ret = if ioevtfd.data_match { + let length = ioevtfd.addr_range.size; + match length { + 2 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, u16::try_from(ioevtfd.data)?), + 4 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, u32::try_from(ioevtfd.data)?), + 8 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data), + _ => bail!("Unexpected ioeventfd data length {}", length), + } + } else { + vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) + }; + + ioctl_ret.with_context(|| { + format!( + "KVM unregister ioeventfd failed: mmio addr 0x{:X}, size 0x{:X}, data_match {}", + ioevtfd.addr_range.base.raw_value(), + ioevtfd.addr_range.size, + if ioevtfd.data_match { + ioevtfd.data + } else { + u64::MAX + } + ) + }) + } +} + +impl Listener for KvmMemoryListener { + /// Get default priority. + fn priority(&self) -> i32 { + 10_i32 + } + + /// Is this listener enabled to call. + fn enabled(&self) -> bool { + self.enabled + } + + /// Enable listener for address space. + fn enable(&mut self) { + self.enabled = true; + } + + /// Disable listener for address space. + fn disable(&mut self) { + self.enabled = false; + } + + /// Deal with the request. + /// + /// # Arguments + /// + /// * `flat_range` - FlatRange would be used to find the region. + /// * `evtfd` - IoEvent of Region. + /// * `req_type` - Request type. + /// + /// # Errors + /// + /// Returns Error if + /// * Both `flat_range` and `evtfd' are not provided. + fn handle_request( + &self, + flat_range: Option<&FlatRange>, + evtfd: Option<&RegionIoEventFd>, + req_type: ListenerReqType, + ) -> Result<()> { + let req_ret = + match req_type { + ListenerReqType::AddRegion => self + .add_region(flat_range.with_context(|| "No FlatRange for AddRegion request")?), + ListenerReqType::DeleteRegion => self.delete_region( + flat_range.with_context(|| "No FlatRange for DeleteRegion request")?, + ), + ListenerReqType::AddIoeventfd => self + .add_ioeventfd(evtfd.with_context(|| "No IoEventFd for AddIoeventfd request")?), + ListenerReqType::DeleteIoeventfd => self.delete_ioeventfd( + evtfd.with_context(|| "No IoEventFd for DeleteIoeventfd request")?, + ), + }; + + req_ret.with_context(|| AddressSpaceError::ListenerRequest(req_type)) + } +} + +#[cfg(target_arch = "x86_64")] +#[derive(Default)] +pub struct KvmIoListener { + vm_fd: Option>, + /// Whether enabled as a IO listener. + enabled: bool, +} + +#[cfg(target_arch = "x86_64")] +impl KvmIoListener { + pub fn new(vm_fd: Option>) -> KvmIoListener { + KvmIoListener { + vm_fd, + enabled: false, + } + } + /// Register a IoEvent to `/dev/kvm`. + /// + /// # Arguments + /// + /// * `ioevtfd` - IoEvent of Region. + /// + /// # Errors + /// + /// Return Error if the length of ioeventfd data is unexpected or syscall failed. + fn add_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { + let vm_fd = self.vm_fd.as_ref().unwrap(); + let io_addr = IoEventAddress::Pio(ioevtfd.addr_range.base.raw_value()); + trace::kvm_add_ioeventfd( + &ioevtfd.fd, + &io_addr, + ioevtfd.data_match, + ioevtfd.addr_range.size, + ioevtfd.data, + ); + let ioctl_ret = if ioevtfd.data_match { + let length = ioevtfd.addr_range.size; + match length { + 2 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), + 4 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), + 8 => vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data), + _ => bail!("unexpected ioeventfd data length {}", length), + } + } else { + vm_fd.register_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) + }; + + ioctl_ret.with_context(|| { + format!( + "KVM register ioeventfd failed: io addr 0x{:X}, size 0x{:X}, data_match {}", + ioevtfd.addr_range.base.raw_value(), + ioevtfd.addr_range.size, + if ioevtfd.data_match { + ioevtfd.data + } else { + u64::MAX + } + ) + }) + } + + /// Delete an IoEvent from `/dev/kvm`. + /// + /// # Arguments + /// + /// * `ioevtfd` - IoEvent of Region. + fn delete_ioeventfd(&self, ioevtfd: &RegionIoEventFd) -> Result<()> { + let vm_fd = self.vm_fd.as_ref().unwrap(); + let io_addr = IoEventAddress::Pio(ioevtfd.addr_range.base.raw_value()); + trace::kvm_delete_ioeventfd( + &ioevtfd.fd, + &io_addr, + ioevtfd.data_match, + ioevtfd.addr_range.size, + ioevtfd.data, + ); + let ioctl_ret = if ioevtfd.data_match { + let length = ioevtfd.addr_range.size; + match length { + 2 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u16), + 4 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data as u32), + 8 => vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, ioevtfd.data), + _ => bail!("Unexpected ioeventfd data length {}", length), + } + } else { + vm_fd.unregister_ioevent(&ioevtfd.fd, &io_addr, NoDatamatch) + }; + + ioctl_ret.with_context(|| { + format!( + "KVM unregister ioeventfd failed: io addr 0x{:X}, size 0x{:X}, data_match {}", + ioevtfd.addr_range.base.raw_value(), + ioevtfd.addr_range.size, + if ioevtfd.data_match { + ioevtfd.data + } else { + u64::MAX + } + ) + }) + } +} + +/// Kvm io listener. +#[cfg(target_arch = "x86_64")] +impl Listener for KvmIoListener { + /// Get the default priority. + fn priority(&self) -> i32 { + 10_i32 + } + + /// Is this listener enabled to call. + fn enabled(&self) -> bool { + self.enabled + } + + /// Enable listener for address space. + fn enable(&mut self) { + self.enabled = true; + } + + /// Disable listener for address space. + fn disable(&mut self) { + self.enabled = false; + } + + /// Deal with the request. + /// + /// # Arguments + /// + /// * `_range` - Corresponding FlatRange of new-added/deleted region. + /// * `evtfd` - IoEvent of Region. + /// * `req_type` - Request type. + fn handle_request( + &self, + _range: Option<&FlatRange>, + evtfd: Option<&RegionIoEventFd>, + req_type: ListenerReqType, + ) -> Result<()> { + let handle_ret = match req_type { + ListenerReqType::AddIoeventfd => { + self.add_ioeventfd(evtfd.with_context(|| "No IoEventFd for AddIoeventfd request")?) + } + ListenerReqType::DeleteIoeventfd => self.delete_ioeventfd( + evtfd.with_context(|| "No IoEventFd for DeleteIoeventfd request")?, + ), + _ => return Ok(()), + }; + + handle_ret.with_context(|| AddressSpaceError::ListenerRequest(req_type)) + } +} + +#[cfg(test)] +mod test { + use libc::EFD_NONBLOCK; + use vmm_sys_util::eventfd::EventFd; + + use crate::kvm::KvmHypervisor; + + use super::*; + use address_space::{GuestAddress, HostMemMapping, Region, RegionIoEventFd}; + + fn generate_region_ioeventfd>(addr: u64, datamatch: T) -> RegionIoEventFd { + let data = datamatch.into(); + RegionIoEventFd { + fd: Arc::new(EventFd::new(EFD_NONBLOCK).unwrap()), + addr_range: AddressRange::from((addr, std::mem::size_of::() as u64)), + data_match: data != 0, + data, + } + } + + fn create_ram_range(addr: u64, size: u64, offset_in_region: u64) -> FlatRange { + let mem_mapping = Arc::new( + HostMemMapping::new(GuestAddress(addr), None, size, None, false, false, false).unwrap(), + ); + FlatRange { + addr_range: AddressRange::new( + mem_mapping.start_address().unchecked_add(offset_in_region), + mem_mapping.size() - offset_in_region, + ), + owner: Region::init_ram_region(mem_mapping.clone(), "ram"), + offset_in_region, + rom_dev_romd: None, + } + } + + #[test] + fn test_alloc_slot() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let kml = KvmMemoryListener::new(4, kvm_hyp.vm_fd.clone(), kvm_hyp.mem_slots); + let host_addr = 0u64; + + assert_eq!(kml.get_free_slot(0, 100, host_addr).unwrap(), 0); + assert_eq!(kml.get_free_slot(200, 100, host_addr).unwrap(), 1); + assert_eq!(kml.get_free_slot(300, 100, host_addr).unwrap(), 2); + assert_eq!(kml.get_free_slot(500, 100, host_addr).unwrap(), 3); + assert!(kml.get_free_slot(200, 100, host_addr).is_err()); + // no available KVM mem slot + assert!(kml.get_free_slot(600, 100, host_addr).is_err()); + + kml.delete_slot(200, 100).unwrap(); + assert!(kml.delete_slot(150, 100).is_err()); + assert!(kml.delete_slot(700, 100).is_err()); + assert_eq!(kml.get_free_slot(200, 100, host_addr).unwrap(), 1); + } + + #[test] + fn test_add_del_ram_region() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let kml = KvmMemoryListener::new(34, kvm_hyp.vm_fd.clone(), kvm_hyp.mem_slots); + let ram_size = host_page_size(); + let ram_fr1 = create_ram_range(0, ram_size, 0); + + kml.handle_request(Some(&ram_fr1), None, ListenerReqType::AddRegion) + .unwrap(); + // flat-range already added, adding again should make an error + assert!(kml + .handle_request(Some(&ram_fr1), None, ListenerReqType::AddRegion) + .is_err()); + assert!(kml + .handle_request(Some(&ram_fr1), None, ListenerReqType::DeleteRegion) + .is_ok()); + // flat-range already deleted, deleting again should make an error + assert!(kml + .handle_request(Some(&ram_fr1), None, ListenerReqType::DeleteRegion) + .is_ok()); + } + + #[test] + fn test_add_region_align() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let kml = KvmMemoryListener::new(34, kvm_hyp.vm_fd.clone(), kvm_hyp.mem_slots); + // flat-range not aligned + let page_size = host_page_size(); + let ram_fr2 = create_ram_range(page_size, 2 * page_size, 1000); + assert!(kml + .handle_request(Some(&ram_fr2), None, ListenerReqType::AddRegion) + .is_ok()); + + // flat-range size is zero after aligned, this step should make an error + let ram_fr3 = create_ram_range(page_size, page_size, 1000); + assert!(kml + .handle_request(Some(&ram_fr3), None, ListenerReqType::AddRegion) + .is_err()); + } + + #[test] + fn test_add_del_ioeventfd() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let kml = KvmMemoryListener::new(34, kvm_hyp.vm_fd.clone(), kvm_hyp.mem_slots); + let evtfd = generate_region_ioeventfd(4, NoDatamatch); + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_ok()); + // The evtfd already added, adding again should make an error. + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_err()); + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_ok()); + // The evtfd already deleted, deleting again should cause an error. + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_err()); + + // Register an ioeventfd with data-match. + let evtfd = generate_region_ioeventfd(64, 4_u64); + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_ok()); + + // Register an ioeventfd which has same address with previously registered ones will cause + // an error. + let same_addred_evtfd = generate_region_ioeventfd(64, 4_u64); + assert!(kml + .handle_request( + None, + Some(&same_addred_evtfd), + ListenerReqType::AddIoeventfd + ) + .is_err()); + + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_ok()); + } + + #[test] + fn test_ioeventfd_with_data_match() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let kml = KvmMemoryListener::new(34, kvm_hyp.vm_fd.clone(), kvm_hyp.mem_slots); + let evtfd_addr = 0x1000_u64; + let mut evtfd = generate_region_ioeventfd(evtfd_addr, 64_u32); + evtfd.addr_range.size = 3_u64; + // Matched data's length must be 2, 4 or 8. + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_err()); + + let evtfd = generate_region_ioeventfd(evtfd_addr, 64_u32); + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_ok()); + + // Delete ioeventfd with wrong address will cause an error. + let mut evtfd_to_del = evtfd.clone(); + evtfd_to_del.addr_range.base.0 -= 2; + assert!(kml + .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) + .is_err()); + + // Delete ioeventfd with inconsistent data-match will cause error. + let mut evtfd_to_del = evtfd.clone(); + evtfd_to_del.data_match = false; + assert!(kml + .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) + .is_err()); + + // Delete ioeventfd with inconsistent matched data will cause an error. + let mut evtfd_to_del = evtfd.clone(); + evtfd_to_del.data = 128_u64; + assert!(kml + .handle_request(None, Some(&evtfd_to_del), ListenerReqType::DeleteIoeventfd) + .is_err()); + + // Delete it successfully. + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_ok()); + + // Delete a not-exist ioeventfd will cause an error. + assert!(kml + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_err()); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_kvm_io_listener() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let iol = KvmIoListener::new(kvm_hyp.vm_fd); + let evtfd = generate_region_ioeventfd(4, NoDatamatch); + assert!(iol + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_ok()); + // evtfd already added, adding again should make an error. + assert!(iol + .handle_request(None, Some(&evtfd), ListenerReqType::AddIoeventfd) + .is_err()); + assert!(iol + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_ok()); + // evtfd already deleted, deleting again should make an error. + assert!(iol + .handle_request(None, Some(&evtfd), ListenerReqType::DeleteIoeventfd) + .is_err()); + + // Matched data's length must be 2, 4 or 8. + let mut evtfd_match = generate_region_ioeventfd(4, 64_u32); + evtfd_match.addr_range.size = 3; + assert!(iol + .handle_request(None, Some(&evtfd_match), ListenerReqType::AddIoeventfd) + .is_err()); + evtfd_match.addr_range.size = 4; + assert!(iol + .handle_request(None, Some(&evtfd_match), ListenerReqType::AddIoeventfd) + .is_ok()); + } +} diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 80012bbfdb3cae7f65627d4d49e543032d27a6f4..16e7a479a23b9247d86db2291cc27a7f95d764b6 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -10,25 +10,71 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +#[cfg(target_arch = "aarch64")] +pub mod aarch64; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + mod interrupt; +mod listener; + #[cfg(target_arch = "x86_64")] -mod state; +pub mod vm_state; -pub use interrupt::MsiVector; +#[cfg(target_arch = "aarch64")] +pub use aarch64::gicv2::KvmGICv2; +#[cfg(target_arch = "aarch64")] +pub use aarch64::gicv3::{KvmGICv3, KvmGICv3Its}; -use std::sync::{Arc, Mutex}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Barrier, Condvar, Mutex}; +use std::thread; +use std::time::Duration; -use arc_swap::ArcSwap; -use interrupt::{refact_vec_with_field, IrqRoute, IrqRouteEntry, IrqRouteTable}; +use anyhow::anyhow; +use anyhow::{bail, Context, Result}; +use kvm_bindings::kvm_userspace_memory_region as KvmMemSlot; use kvm_bindings::*; -use kvm_ioctls::{Kvm, VmFd}; -use once_cell::sync::Lazy; +#[cfg(feature = "vfio_device")] +use kvm_ioctls::DeviceFd; +#[cfg(not(test))] +use kvm_ioctls::VcpuExit; +use kvm_ioctls::{Cap, Kvm, VcpuFd, VmFd}; +use libc::{c_int, c_void, siginfo_t}; +use log::{error, info, warn}; +use vmm_sys_util::{ + eventfd::EventFd, + ioctl_io_nr, ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr, ioctl_iowr_nr, + signal::{register_signal_handler, Killable}, +}; -use vmm_sys_util::eventfd::EventFd; - -use crate::errors::{Result, ResultExt}; +use self::listener::KvmMemoryListener; +use super::HypervisorOps; #[cfg(target_arch = "x86_64")] -use migration::{MigrationManager, MigrationRestoreOrder}; +use crate::HypervisorError; +#[cfg(target_arch = "aarch64")] +use aarch64::cpu_caps::ArmCPUCaps as CPUCaps; +use address_space::{AddressSpace, Listener}; +#[cfg(target_arch = "aarch64")] +use cpu::CPUFeatures; +use cpu::{ + ArchCPU, CPUBootConfig, CPUHypervisorOps, CPUInterface, CPUThreadWorker, CpuError, + CpuLifecycleState, RegsIndex, CPU, VCPU_TASK_SIGNAL, +}; +use devices::{pci::MsiVector, IrqManager, LineIrqManager, MsiIrqManager, TriggerMode}; +#[cfg(target_arch = "aarch64")] +use devices::{ + GICVersion, GICv2, GICv3, GICv3ItsState, GICv3State, ICGICConfig, InterruptController, + GIC_IRQ_INTERNAL, +}; +use interrupt::IrqRouteTable; +use machine_manager::machine::HypervisorType; +#[cfg(target_arch = "aarch64")] +use migration::snapshot::{GICV3_ITS_SNAPSHOT_ID, GICV3_SNAPSHOT_ID}; +use migration::{MigrateMemSlot, MigrateOps, MigrationManager}; +#[cfg(target_arch = "x86_64")] +use x86_64::cpu_caps::X86CPUCaps as CPUCaps; // See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/asm-generic/kvm.h pub const KVM_SET_DEVICE_ATTR: u32 = 0x4018_aee1; @@ -37,139 +83,1114 @@ pub const KVM_IOEVENTFD: u32 = 0x4040_ae79; pub const KVM_SIGNAL_MSI: u32 = 0x4020_aea5; // See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/kvm.h -ioctl_iow_nr!(KVM_SET_GSI_ROUTING, KVMIO, 0x6a, kvm_irq_routing); -ioctl_iow_nr!(KVM_IRQFD, KVMIO, 0x76, kvm_irqfd); +ioctl_iow_nr!(KVM_GET_DIRTY_LOG, KVMIO, 0x42, kvm_dirty_log); +ioctl_iowr_nr!(KVM_CREATE_DEVICE, KVMIO, 0xe0, kvm_create_device); ioctl_io_nr!(KVM_GET_API_VERSION, KVMIO, 0x00); ioctl_ior_nr!(KVM_GET_MP_STATE, KVMIO, 0x98, kvm_mp_state); ioctl_ior_nr!(KVM_GET_VCPU_EVENTS, KVMIO, 0x9f, kvm_vcpu_events); -#[cfg(target_arch = "x86_64")] -ioctl_iowr_nr!(KVM_GET_SUPPORTED_CPUID, KVMIO, 0x05, kvm_cpuid2); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_CPUID2, KVMIO, 0x90, kvm_cpuid2); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_MP_STATE, KVMIO, 0x99, kvm_mp_state); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_SREGS, KVMIO, 0x84, kvm_sregs); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_REGS, KVMIO, 0x82, kvm_regs); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_XSAVE, KVMIO, 0xa5, kvm_xsave); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_DEBUGREGS, KVMIO, 0xa2, kvm_debugregs); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_LAPIC, KVMIO, 0x8f, kvm_lapic_state); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_MSRS, KVMIO, 0x89, kvm_msrs); -#[cfg(target_arch = "x86_64")] -ioctl_iow_nr!(KVM_SET_VCPU_EVENTS, KVMIO, 0xa0, kvm_vcpu_events); -#[cfg(target_arch = "x86_64")] -ioctl_ior_nr!(KVM_GET_PIT2, KVMIO, 0x9f, kvm_pit_state2); ioctl_ior_nr!(KVM_GET_CLOCK, KVMIO, 0x7c, kvm_clock_data); -ioctl_iowr_nr!(KVM_GET_IRQCHIP, KVMIO, 0x62, kvm_irqchip); ioctl_ior_nr!(KVM_GET_REGS, KVMIO, 0x81, kvm_regs); ioctl_ior_nr!(KVM_GET_SREGS, KVMIO, 0x83, kvm_sregs); -#[cfg(target_arch = "x86_64")] -ioctl_ior_nr!(KVM_GET_XSAVE, KVMIO, 0xa4, kvm_xsave); ioctl_ior_nr!(KVM_GET_FPU, KVMIO, 0x8c, kvm_fpu); -#[cfg(target_arch = "x86_64")] -ioctl_ior_nr!(KVM_GET_XCRS, KVMIO, 0xa6, kvm_xcrs); -#[cfg(target_arch = "x86_64")] -ioctl_ior_nr!(KVM_GET_DEBUGREGS, KVMIO, 0xa1, kvm_debugregs); -#[cfg(target_arch = "x86_64")] -ioctl_ior_nr!(KVM_GET_LAPIC, KVMIO, 0x8e, kvm_lapic_state); -#[cfg(target_arch = "x86_64")] -ioctl_iowr_nr!(KVM_GET_MSRS, KVMIO, 0x88, kvm_msrs); -ioctl_iowr_nr!(KVM_CREATE_DEVICE, KVMIO, 0xe0, kvm_create_device); -#[cfg(target_arch = "aarch64")] -ioctl_iow_nr!(KVM_GET_ONE_REG, KVMIO, 0xab, kvm_one_reg); -#[cfg(target_arch = "aarch64")] -ioctl_iow_nr!(KVM_SET_ONE_REG, KVMIO, 0xac, kvm_one_reg); -#[cfg(target_arch = "aarch64")] -ioctl_iow_nr!(KVM_GET_DEVICE_ATTR, KVMIO, 0xe2, kvm_device_attr); -#[cfg(target_arch = "aarch64")] -ioctl_iowr_nr!(KVM_GET_REG_LIST, KVMIO, 0xb0, kvm_reg_list); -#[cfg(target_arch = "aarch64")] -ioctl_iow_nr!(KVM_ARM_VCPU_INIT, KVMIO, 0xae, kvm_vcpu_init); +ioctl_iow_nr!(KVM_SET_GSI_ROUTING, KVMIO, 0x6a, kvm_irq_routing); +ioctl_iow_nr!(KVM_IRQFD, KVMIO, 0x76, kvm_irqfd); +ioctl_iowr_nr!(KVM_GET_IRQCHIP, KVMIO, 0x62, kvm_irqchip); +ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level); +ioctl_iow_nr!(KVM_SET_MP_STATE, KVMIO, 0x99, kvm_mp_state); +ioctl_iow_nr!(KVM_SET_VCPU_EVENTS, KVMIO, 0xa0, kvm_vcpu_events); #[allow(clippy::upper_case_acronyms)] #[derive(Default)] -pub struct KVMFds { +pub struct KvmHypervisor { pub fd: Option, - pub vm_fd: Option, - pub irq_route_table: Mutex, + pub vm_fd: Option>, + pub mem_slots: Arc>>, + #[cfg(target_arch = "aarch64")] + pub irq_chip: Option>, } -impl KVMFds { - pub fn new() -> Self { - let kvm_fds = match Kvm::new() { - Ok(fd) => { - let vm_fd = match fd.create_vm() { - Ok(vm_fd) => vm_fd, +impl KvmHypervisor { + pub fn new() -> Result { + match Kvm::new() { + Ok(kvm_fd) => { + let vm_fd: Option> = Some(Arc::new(match kvm_fd.create_vm() { + Ok(fd) => fd, Err(e) => { - error!("Failed to create VM in KVM: {}", e); - return KVMFds::default(); + bail!("Failed to create VM in KVM: {:?}", e); } - }; - let irq_route_table = Mutex::new(IrqRouteTable::new(&fd)); - KVMFds { - fd: Some(fd), - vm_fd: Some(vm_fd), - irq_route_table, - } + })); + + Ok(KvmHypervisor { + fd: Some(kvm_fd), + vm_fd, + mem_slots: Arc::new(Mutex::new(HashMap::new())), + #[cfg(target_arch = "aarch64")] + irq_chip: None, + }) } Err(e) => { - error!("Failed to open /dev/kvm: {}", e); - KVMFds::default() + bail!("Failed to open /dev/kvm: {:?}", e) } - }; + } + } + + fn create_memory_listener(&self) -> Arc> { + // Memslot will not exceed u32::MAX, so use as translate data type. + Arc::new(Mutex::new(KvmMemoryListener::new( + self.fd.as_ref().unwrap().get_nr_memslots() as u32, + self.vm_fd.clone(), + self.mem_slots.clone(), + ))) + } +} +impl HypervisorOps for KvmHypervisor { + fn init_machine( + &self, + #[cfg(target_arch = "x86_64")] sys_io: &Arc, + sys_mem: &Arc, + ) -> Result<()> { + self.arch_init()?; + + sys_mem.set_ioevtfd_enabled(true); + + sys_mem + .register_listener(self.create_memory_listener()) + .with_context(|| "Failed to register hypervisor listener for memory space.")?; #[cfg(target_arch = "x86_64")] - MigrationManager::register_device_instance( - state::KvmDeviceState::descriptor(), - Arc::new(state::KvmDevice {}), - MigrationRestoreOrder::Default, - ); + sys_io + .register_listener(self.create_io_listener()) + .with_context(|| "Failed to register hypervisor listener for I/O address space.")?; - kvm_fds + Ok(()) } - /// Sets the gsi routing table entries. It will overwrite previously set entries. - pub fn commit_irq_routing(&self) -> Result<()> { - let routes = self.irq_route_table.lock().unwrap().irq_routes.clone(); + #[cfg(target_arch = "aarch64")] + fn create_interrupt_controller( + &mut self, + gic_conf: &ICGICConfig, + ) -> Result> { + gic_conf.check_sanity()?; - // Safe because data in `routes` is reliable. - unsafe { - let mut irq_routing = refact_vec_with_field::(routes.len()); - (*irq_routing).nr = routes.len() as u32; - (*irq_routing).flags = 0; - let entries: &mut [IrqRouteEntry] = (*irq_routing).entries.as_mut_slice(routes.len()); - entries.copy_from_slice(&routes); + let create_gicv3 = || { + let hypervisor_gic = KvmGICv3::new(self.vm_fd.clone().unwrap(), gic_conf.vcpu_count)?; + let its_handler = KvmGICv3Its::new(self.vm_fd.clone().unwrap())?; + let gicv3 = Arc::new(GICv3::new( + Arc::new(hypervisor_gic), + Arc::new(its_handler), + gic_conf, + )?); + if let Some(its_dev) = gicv3.its_dev.clone() { + MigrationManager::register_gic_instance( + GICv3ItsState::descriptor(), + its_dev, + GICV3_ITS_SNAPSHOT_ID, + ); + } - self.vm_fd - .as_ref() - .unwrap() - .set_gsi_routing(&*irq_routing) - .chain_err(|| "Failed to set gsi routing") + MigrationManager::register_gic_instance( + GICv3State::descriptor(), + gicv3.clone(), + GICV3_SNAPSHOT_ID, + ); + + Ok(Arc::new(InterruptController::new(gicv3))) + }; + + let create_gicv2 = || { + let hypervisor_gic = KvmGICv2::new(self.vm_fd.clone().unwrap())?; + let gicv2 = Arc::new(GICv2::new(Arc::new(hypervisor_gic), gic_conf)?); + Ok(Arc::new(InterruptController::new(gicv2))) + }; + + match &gic_conf.version { + Some(GICVersion::GICv3) => create_gicv3(), + Some(GICVersion::GICv2) => create_gicv2(), + // Try v3 by default if no version specified. + None => create_gicv3().or_else(|_| create_gicv2()), } } - pub fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + #[cfg(target_arch = "x86_64")] + fn create_interrupt_controller(&mut self) -> Result<()> { self.vm_fd .as_ref() .unwrap() - .register_irqfd(fd, gsi) - .chain_err(|| format!("Failed to register irqfd: gsi {}.", gsi)) + .create_irq_chip() + .with_context(|| HypervisorError::CrtIrqchipErr)?; + + Ok(()) + } + + fn create_hypervisor_cpu( + &self, + vcpu_id: u8, + ) -> Result> { + let vcpu_fd = self + .vm_fd + .as_ref() + .unwrap() + .create_vcpu(u64::from(vcpu_id)) + .with_context(|| "Create vcpu failed")?; + Ok(Arc::new(KvmCpu::new( + vcpu_id, + #[cfg(target_arch = "aarch64")] + self.vm_fd.clone(), + vcpu_fd, + ))) } - pub fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> Result<()> { + fn create_irq_manager(&mut self) -> Result { + let kvm = Kvm::new().unwrap(); + let irqfd_enable = kvm.check_extension(Cap::Irqfd); + let irq_route_table = Mutex::new(IrqRouteTable::new(self.fd.as_ref().unwrap())); + let irq_manager = Arc::new(KVMInterruptManager::new( + irqfd_enable, + self.vm_fd.clone().unwrap(), + irq_route_table, + )); + let mut locked_irq_route_table = irq_manager.irq_route_table.lock().unwrap(); + locked_irq_route_table.init_irq_route_table(); + locked_irq_route_table.commit_irq_routing(self.vm_fd.as_ref().unwrap())?; + drop(locked_irq_route_table); + + Ok(IrqManager { + line_irq_manager: Some(irq_manager.clone()), + msi_irq_manager: Some(irq_manager), + }) + } + + #[cfg(feature = "vfio_device")] + fn create_vfio_device(&self) -> Option { + let mut device = kvm_create_device { + type_: kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + let vfio_device_fd = match self.vm_fd.as_ref().unwrap().create_device(&mut device) { + Ok(fd) => Some(fd), + Err(_) => { + error!("Failed to create VFIO device."); + None + } + }; + + vfio_device_fd + } +} + +impl MigrateOps for KvmHypervisor { + /// Get ram memory region from `KvmHypervisor` structure. + fn get_mem_slots(&self) -> Arc>> { + let mut mgt_mem_slots = HashMap::new(); + for (_, slot) in self.mem_slots.lock().unwrap().iter() { + let mem_slot = MigrateMemSlot { + slot: slot.slot, + guest_phys_addr: slot.guest_phys_addr, + userspace_addr: slot.userspace_addr, + memory_size: slot.memory_size, + }; + mgt_mem_slots.insert(slot.slot, mem_slot); + } + Arc::new(Mutex::new(mgt_mem_slots)) + } + + fn get_dirty_log(&self, slot: u32, mem_size: u64) -> Result> { self.vm_fd .as_ref() .unwrap() - .unregister_irqfd(fd, gsi) - .chain_err(|| format!("Failed to unregister irqfd: gsi {}.", gsi)) + .get_dirty_log(slot, usize::try_from(mem_size)?) + .with_context(|| { + format!( + "Failed to get dirty log, error is {}", + std::io::Error::last_os_error() + ) + }) + } + + /// Start dirty page tracking in kvm. + fn start_dirty_log(&self) -> Result<()> { + for (_, region) in self.mem_slots.lock().unwrap().iter_mut() { + region.flags = KVM_MEM_LOG_DIRTY_PAGES; + // SAFETY: region from `KvmHypervisor` is reliable. + unsafe { + self.vm_fd + .as_ref() + .unwrap() + .set_user_memory_region(*region) + .with_context(|| { + format!( + "Failed to start dirty log, error is {}", + std::io::Error::last_os_error() + ) + })?; + } + } + + Ok(()) + } + + /// Stop dirty page tracking in kvm. + fn stop_dirty_log(&self) -> Result<()> { + for (_, region) in self.mem_slots.lock().unwrap().iter_mut() { + region.flags = 0; + // SAFETY: region from `KvmHypervisor` is reliable. + unsafe { + self.vm_fd + .as_ref() + .unwrap() + .set_user_memory_region(*region) + .with_context(|| { + format!( + "Failed to stop dirty log, error is {}", + std::io::Error::last_os_error() + ) + })?; + } + } + + Ok(()) + } + + fn register_instance(&self) -> Result<()> { + #[cfg(target_arch = "x86_64")] + MigrationManager::register_kvm_instance( + vm_state::KvmDeviceState::descriptor(), + Arc::new(vm_state::KvmDevice::new(self.vm_fd.clone().unwrap())), + ); + + Ok(()) + } +} + +pub struct KvmCpu { + id: u8, + #[cfg(target_arch = "aarch64")] + vm_fd: Option>, + fd: Arc, + /// The capability of VCPU. + caps: CPUCaps, + #[cfg(target_arch = "aarch64")] + /// Used to pass vcpu target and supported features to kvm. + pub kvi: Mutex, +} + +impl KvmCpu { + pub fn new( + id: u8, + #[cfg(target_arch = "aarch64")] vm_fd: Option>, + vcpu_fd: VcpuFd, + ) -> Self { + Self { + id, + #[cfg(target_arch = "aarch64")] + vm_fd, + fd: Arc::new(vcpu_fd), + caps: CPUCaps::init_capabilities(), + #[cfg(target_arch = "aarch64")] + kvi: Mutex::new(kvm_vcpu_init::default()), + } + } + + /// Init signal for `CPU` event. + fn init_signals(&self) -> Result<()> { + extern "C" fn handle_signal(signum: c_int, _: *mut siginfo_t, _: *mut c_void) { + if signum == VCPU_TASK_SIGNAL { + let _ = CPUThreadWorker::run_on_local_thread_vcpu(|vcpu| { + vcpu.hypervisor_cpu().set_hypervisor_exit().unwrap() + }); + } + } + + register_signal_handler(VCPU_TASK_SIGNAL, handle_signal) + .with_context(|| "Failed to register VCPU_TASK_SIGNAL signal.")?; + + Ok(()) + } + + #[cfg(not(test))] + fn kvm_vcpu_exec(&self, cpu: Arc) -> Result { + let vm = cpu + .vm() + .upgrade() + .with_context(|| CpuError::NoMachineInterface)?; + + match self.fd.run() { + Ok(run) => { + trace::kvm_vcpu_run_exit(cpu.id, &run); + match run { + #[cfg(target_arch = "x86_64")] + VcpuExit::IoIn(addr, data) => { + vm.lock().unwrap().pio_in(u64::from(addr), data); + } + #[cfg(target_arch = "x86_64")] + VcpuExit::IoOut(addr, data) => { + #[cfg(feature = "boot_time")] + cpu::capture_boot_signal(u64::from(addr), data); + + vm.lock().unwrap().pio_out(u64::from(addr), data); + } + VcpuExit::MmioRead(addr, data) => { + vm.lock().unwrap().mmio_read(addr, data); + } + VcpuExit::MmioWrite(addr, data) => { + #[cfg(all(target_arch = "aarch64", feature = "boot_time"))] + cpu::capture_boot_signal(addr, data); + + vm.lock().unwrap().mmio_write(addr, data); + } + #[cfg(target_arch = "x86_64")] + VcpuExit::Hlt => { + info!("Vcpu{} received KVM_EXIT_HLT signal", cpu.id); + return Err(anyhow!(CpuError::VcpuHltEvent(cpu.id))); + } + #[cfg(target_arch = "x86_64")] + VcpuExit::Shutdown => { + info!("Vcpu{} received an KVM_EXIT_SHUTDOWN signal", cpu.id); + cpu.guest_shutdown()?; + + return Ok(false); + } + #[cfg(target_arch = "aarch64")] + VcpuExit::SystemEvent(event, flags) => { + if event == kvm_bindings::KVM_SYSTEM_EVENT_SHUTDOWN { + info!( + "Vcpu{} received an KVM_SYSTEM_EVENT_SHUTDOWN signal", + cpu.id() + ); + cpu.guest_shutdown() + .with_context(|| "Some error occurred in guest shutdown")?; + return Ok(true); + } else if event == kvm_bindings::KVM_SYSTEM_EVENT_RESET { + info!("Vcpu{} received an KVM_SYSTEM_EVENT_RESET signal", cpu.id()); + cpu.guest_reset() + .with_context(|| "Some error occurred in guest reset")?; + return Ok(true); + } else { + error!( + "Vcpu{} received unexpected system event with type 0x{:x}, flags {:#x?}", + cpu.id(), + event, + flags + ); + } + return Ok(false); + } + VcpuExit::FailEntry(reason, cpuid) => { + info!( + "Vcpu{} received KVM_EXIT_FAIL_ENTRY signal. the vcpu could not be run due to unknown reasons({})", + cpuid, reason + ); + return Ok(false); + } + VcpuExit::InternalError => { + info!("Vcpu{} received KVM_EXIT_INTERNAL_ERROR signal", cpu.id()); + return Ok(false); + } + r => { + return Err(anyhow!(CpuError::VcpuExitReason( + cpu.id(), + format!("{:?}", r) + ))); + } + } + } + Err(ref e) => { + match e.errno() { + libc::EAGAIN => {} + libc::EINTR => { + self.fd.set_kvm_immediate_exit(0); + } + _ => { + return Err(anyhow!(CpuError::UnhandledHypervisorExit( + cpu.id(), + e.errno() + ))); + } + }; + } + } + Ok(true) + } + + fn kick_vcpu_thread(&self, task: Arc>>>) -> Result<()> { + let task = task.lock().unwrap(); + match task.as_ref() { + Some(thread) => thread + .kill(VCPU_TASK_SIGNAL) + .with_context(|| CpuError::KickVcpu("Fail to kick vcpu".to_string())), + None => { + warn!("VCPU thread not started, no need to kick"); + Ok(()) + } + } } } -pub static KVM_FDS: Lazy> = Lazy::new(|| ArcSwap::from(Arc::new(KVMFds::new()))); +impl CPUHypervisorOps for KvmCpu { + fn get_hypervisor_type(&self) -> HypervisorType { + HypervisorType::Kvm + } + + fn init_pmu(&self) -> Result<()> { + self.arch_init_pmu() + } + + fn vcpu_init(&self) -> Result<()> { + self.arch_vcpu_init() + } + + fn set_boot_config( + &self, + arch_cpu: Arc>, + boot_config: &CPUBootConfig, + #[cfg(target_arch = "aarch64")] vcpu_config: &CPUFeatures, + ) -> Result<()> { + #[cfg(target_arch = "aarch64")] + return self.arch_set_boot_config(arch_cpu, boot_config, vcpu_config); + #[cfg(target_arch = "x86_64")] + return self.arch_set_boot_config(arch_cpu, boot_config); + } + + fn get_one_reg(&self, reg_id: u64) -> Result { + self.arch_get_one_reg(reg_id) + } + + fn get_regs(&self, arch_cpu: Arc>, regs_index: RegsIndex) -> Result<()> { + self.arch_get_regs(arch_cpu, regs_index) + } + + fn set_regs(&self, arch_cpu: Arc>, regs_index: RegsIndex) -> Result<()> { + self.arch_set_regs(arch_cpu, regs_index) + } + + fn put_register(&self, cpu: Arc) -> Result<()> { + self.arch_put_register(cpu)?; + + Ok(()) + } + + fn reset_vcpu(&self, cpu: Arc) -> Result<()> { + trace::kvm_reset_vcpu(self.id); + self.arch_reset_vcpu(cpu)?; + + Ok(()) + } + + fn vcpu_exec( + &self, + cpu_thread_worker: CPUThreadWorker, + thread_barrier: Arc, + ) -> Result<()> { + cpu_thread_worker.init_local_thread_vcpu(); + if let Err(e) = self.init_signals() { + error!( + "Failed to init cpu{} signal:{:?}", + cpu_thread_worker.thread_cpu.id, e + ); + } + + cpu_thread_worker.thread_cpu.set_tid(None); + + #[cfg(not(test))] + self.put_register(cpu_thread_worker.thread_cpu.clone())?; + + // Wait for all vcpu to complete the running + // environment initialization. + thread_barrier.wait(); + + info!("vcpu{} start running", cpu_thread_worker.thread_cpu.id); + while let Ok(true) = cpu_thread_worker.ready_for_running() { + #[cfg(not(test))] + { + if !self + .kvm_vcpu_exec(cpu_thread_worker.thread_cpu.clone()) + .with_context(|| { + format!( + "VCPU {}/KVM emulate error!", + cpu_thread_worker.thread_cpu.id() + ) + })? + { + break; + } + } + #[cfg(test)] + { + thread::sleep(Duration::from_millis(5)); + } + } + + // The vcpu thread is about to exit, marking the state + // of the CPU state as Stopped. + let (cpu_state, cvar) = &*cpu_thread_worker.thread_cpu.state; + *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; + cvar.notify_one(); + + Ok(()) + } + + fn set_hypervisor_exit(&self) -> Result<()> { + self.fd.set_kvm_immediate_exit(1); + Ok(()) + } + + fn pause( + &self, + task: Arc>>>, + state: Arc<(Mutex, Condvar)>, + pause_signal: Arc, + ) -> Result<()> { + let task = task.lock().unwrap(); + let (cpu_state, cvar) = &*state; + + if *cpu_state.lock().unwrap() == CpuLifecycleState::Running { + *cpu_state.lock().unwrap() = CpuLifecycleState::Paused; + cvar.notify_one() + } else if *cpu_state.lock().unwrap() == CpuLifecycleState::Paused + && pause_signal.load(Ordering::SeqCst) + { + return Ok(()); + } + + match task.as_ref() { + Some(thread) => { + if let Err(e) = thread.kill(VCPU_TASK_SIGNAL) { + return Err(anyhow!(CpuError::StopVcpu(format!("{:?}", e)))); + } + } + None => { + warn!("vCPU thread not started, no need to stop"); + return Ok(()); + } + } + + // It shall wait for the vCPU pause state from hypervisor exits. + let mut sleep_times = 0u32; + while !pause_signal.load(Ordering::SeqCst) { + if sleep_times >= 5 { + bail!(CpuError::StopVcpu("timeout".to_string())); + } + thread::sleep(Duration::from_millis(5)); + sleep_times += 1; + } + + Ok(()) + } + + fn resume( + &self, + state: Arc<(Mutex, Condvar)>, + pause_signal: Arc, + ) -> Result<()> { + let (cpu_state_locked, cvar) = &*state; + let mut cpu_state = cpu_state_locked.lock().unwrap(); + if *cpu_state == CpuLifecycleState::Running { + warn!("vcpu{} in running state, no need to resume", self.id); + return Ok(()); + } + + *cpu_state = CpuLifecycleState::Running; + pause_signal.store(false, Ordering::SeqCst); + drop(cpu_state); + cvar.notify_one(); + Ok(()) + } + + fn destroy( + &self, + task: Arc>>>, + state: Arc<(Mutex, Condvar)>, + ) -> Result<()> { + let (cpu_state, cvar) = &*state; + let mut locked_cpu_state = cpu_state.lock().unwrap(); + if *locked_cpu_state == CpuLifecycleState::Running { + *locked_cpu_state = CpuLifecycleState::Stopping; + } else if *locked_cpu_state == CpuLifecycleState::Stopped + || *locked_cpu_state == CpuLifecycleState::Paused + { + return Ok(()); + } + drop(locked_cpu_state); + + self.kick_vcpu_thread(task)?; + let mut locked_cpu_state = cpu_state.lock().unwrap(); + locked_cpu_state = cvar + .wait_timeout(locked_cpu_state, Duration::from_millis(32)) + .unwrap() + .0; + + if *locked_cpu_state == CpuLifecycleState::Stopped { + Ok(()) + } else { + Err(anyhow!(CpuError::DestroyVcpu(format!( + "VCPU still in {:?} state", + *locked_cpu_state + )))) + } + } +} + +struct KVMInterruptManager { + pub irqfd_cap: bool, + pub vm_fd: Arc, + pub irq_route_table: Mutex, +} + +impl KVMInterruptManager { + pub fn new(irqfd_cap: bool, vm_fd: Arc, irq_route_table: Mutex) -> Self { + KVMInterruptManager { + irqfd_cap, + vm_fd, + irq_route_table, + } + } + + #[cfg(target_arch = "x86_64")] + pub fn arch_map_irq(&self, gsi: u32) -> u32 { + gsi + } + + #[cfg(target_arch = "aarch64")] + pub fn arch_map_irq(&self, gsi: u32) -> u32 { + let irq = gsi + GIC_IRQ_INTERNAL; + let irqtype = KVM_ARM_IRQ_TYPE_SPI; + irqtype << KVM_ARM_IRQ_TYPE_SHIFT | irq + } +} + +impl LineIrqManager for KVMInterruptManager { + fn irqfd_enable(&self) -> bool { + self.irqfd_cap + } + + fn register_irqfd( + &self, + irq_fd: Arc, + irq: u32, + trigger_mode: TriggerMode, + ) -> Result<()> { + if !self.irqfd_cap { + bail!("Hypervisor doesn't support irqfd feature!") + } + + match trigger_mode { + TriggerMode::Edge => { + self.vm_fd.register_irqfd(&irq_fd, irq).map_err(|e| { + error!("Failed to register irq, error is {:?}", e); + e + })?; + } + _ => { + bail!("Unsupported registering irq fd for interrupt of level mode."); + } + } + + Ok(()) + } + + fn unregister_irqfd(&self, irq_fd: Arc, irq: u32) -> Result<()> { + self.vm_fd.unregister_irqfd(&irq_fd, irq).map_err(|e| { + error!("Failed to unregister irq, error is {:?}", e); + e + })?; + + Ok(()) + } + + fn set_level_irq(&self, gsi: u32, level: bool) -> Result<()> { + let kvm_irq = self.arch_map_irq(gsi); + self.vm_fd + .set_irq_line(kvm_irq, level) + .with_context(|| format!("Failed to set irq {} level {:?}.", kvm_irq, level)) + } + + fn set_edge_irq(&self, gsi: u32) -> Result<()> { + let kvm_irq = self.arch_map_irq(gsi); + self.vm_fd + .set_irq_line(kvm_irq, true) + .with_context(|| format!("Failed to set irq {} level {:?}.", kvm_irq, true))?; + self.vm_fd + .set_irq_line(kvm_irq, false) + .with_context(|| format!("Failed to set irq {} level {:?}.", kvm_irq, false)) + } + + fn write_irqfd(&self, irq_fd: Arc) -> Result<()> { + irq_fd.write(1)?; + + Ok(()) + } +} + +impl MsiIrqManager for KVMInterruptManager { + fn irqfd_enable(&self) -> bool { + self.irqfd_cap + } + + fn allocate_irq(&self, vector: MsiVector) -> Result { + let mut locked_irq_route_table = self.irq_route_table.lock().unwrap(); + let gsi = locked_irq_route_table.allocate_gsi().map_err(|e| { + error!("Failed to allocate gsi, error is {:?}", e); + e + })?; + + locked_irq_route_table + .add_msi_route(gsi, vector) + .map_err(|e| { + error!("Failed to add MSI-X route, error is {:?}", e); + e + })?; + + locked_irq_route_table + .commit_irq_routing(&self.vm_fd.clone()) + .map_err(|e| { + error!("Failed to commit irq routing, error is {:?}", e); + e + })?; + + Ok(gsi) + } + + fn release_irq(&self, irq: u32) -> Result<()> { + let mut locked_irq_route_table = self.irq_route_table.lock().unwrap(); + + trace::kvm_release_irq(irq); + locked_irq_route_table.release_gsi(irq).map_err(|e| { + error!("Failed to release gsi, error is {:?}", e); + e + }) + } + + fn register_irqfd(&self, irq_fd: Arc, irq: u32) -> Result<()> { + trace::kvm_register_irqfd(&irq_fd, irq); + self.vm_fd.register_irqfd(&irq_fd, irq).map_err(|e| { + error!("Failed to register irq, error is {:?}", e); + e + })?; + + Ok(()) + } + + fn unregister_irqfd(&self, irq_fd: Arc, irq: u32) -> Result<()> { + trace::kvm_unregister_irqfd(&irq_fd, irq); + self.vm_fd.unregister_irqfd(&irq_fd, irq).map_err(|e| { + error!("Failed to unregister irq, error is {:?}", e); + e + })?; + + Ok(()) + } + + fn trigger(&self, irq_fd: Option>, vector: MsiVector, dev_id: u32) -> Result<()> { + if irq_fd.is_some() { + trace::kvm_trigger_irqfd(irq_fd.as_ref().unwrap()); + irq_fd.unwrap().write(1)?; + } else { + #[cfg(target_arch = "aarch64")] + let flags: u32 = kvm_bindings::KVM_MSI_VALID_DEVID; + #[cfg(target_arch = "x86_64")] + let flags: u32 = 0; + + let kvm_msi = kvm_bindings::kvm_msi { + address_lo: vector.msg_addr_lo, + address_hi: vector.msg_addr_hi, + data: vector.msg_data, + flags, + devid: dev_id, + pad: [0; 12], + }; + + trace::kvm_signal_msi(&kvm_msi); + self.vm_fd.signal_msi(kvm_msi)?; + } + + Ok(()) + } + + fn update_route_table(&self, gsi: u32, vector: MsiVector) -> Result<()> { + let mut locked_irq_route_table = self.irq_route_table.lock().unwrap(); + locked_irq_route_table + .update_msi_route(gsi, vector) + .map_err(|e| { + error!("Failed to update MSI-X route, error is {:?}", e); + e + })?; + locked_irq_route_table + .commit_irq_routing(&self.vm_fd.clone()) + .map_err(|e| { + error!("Failed to commit irq routing, error is {:?}", e); + e + }) + } +} + +#[cfg(test)] +mod test { + use std::sync::{Arc, Mutex}; + use std::time::Duration; + + #[cfg(target_arch = "x86_64")] + use kvm_bindings::kvm_segment; + + #[cfg(target_arch = "x86_64")] + use cpu::{ArchCPU, CPUBootConfig}; + use machine_manager::machine::{ + MachineAddressInterface, MachineInterface, MachineLifecycle, VmState, + }; + + use super::*; + + struct TestVm { + #[cfg(target_arch = "x86_64")] + pio_in: Arc)>>>, + #[cfg(target_arch = "x86_64")] + pio_out: Arc)>>>, + mmio_read: Arc)>>>, + mmio_write: Arc)>>>, + } + + impl TestVm { + fn new() -> Self { + TestVm { + #[cfg(target_arch = "x86_64")] + pio_in: Arc::new(Mutex::new(Vec::new())), + #[cfg(target_arch = "x86_64")] + pio_out: Arc::new(Mutex::new(Vec::new())), + mmio_read: Arc::new(Mutex::new(Vec::new())), + mmio_write: Arc::new(Mutex::new(Vec::new())), + } + } + } + + impl MachineLifecycle for TestVm { + fn notify_lifecycle(&self, _old: VmState, _new: VmState) -> bool { + true + } + } + + impl MachineAddressInterface for TestVm { + #[cfg(target_arch = "x86_64")] + fn pio_in(&self, addr: u64, data: &mut [u8]) -> bool { + self.pio_in.lock().unwrap().push((addr, data.to_vec())); + true + } + + #[cfg(target_arch = "x86_64")] + fn pio_out(&self, addr: u64, data: &[u8]) -> bool { + self.pio_out.lock().unwrap().push((addr, data.to_vec())); + true + } + + fn mmio_read(&self, addr: u64, data: &mut [u8]) -> bool { + #[cfg(target_arch = "aarch64")] + { + data[3] = 0x0; + data[2] = 0x0; + data[1] = 0x5; + data[0] = 0x6; + } + self.mmio_read.lock().unwrap().push((addr, data.to_vec())); + true + } + + fn mmio_write(&self, addr: u64, data: &[u8]) -> bool { + self.mmio_write.lock().unwrap().push((addr, data.to_vec())); + true + } + } + + impl MachineInterface for TestVm {} + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_x86_64_kvm_cpu() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let vm = Arc::new(Mutex::new(TestVm::new())); + + let code_seg = kvm_segment { + base: 0, + limit: 1048575, + selector: 16, + type_: 11, + present: 1, + dpl: 0, + db: 0, + s: 1, + l: 1, + g: 1, + avl: 0, + unusable: 0, + padding: 0, + }; + let data_seg = kvm_segment { + base: 0, + limit: 1048575, + selector: 24, + type_: 3, + present: 1, + dpl: 0, + db: 1, + s: 1, + l: 0, + g: 1, + avl: 0, + unusable: 0, + padding: 0, + }; + let cpu_config = CPUBootConfig { + prot64_mode: true, + boot_ip: 0, + boot_sp: 0, + boot_selector: 0, + zero_page: 0x0000_7000, + code_segment: code_seg, + data_segment: data_seg, + gdt_base: 0x500u64, + gdt_size: 16, + idt_base: 0x520u64, + idt_size: 8, + pml4_start: 0x0000_9000, + }; + + // For `get_lapic` in realize function to work, + // you need to create a irq_chip for VM before creating the VCPU. + let vm_fd = kvm_hyp.vm_fd.as_ref().unwrap(); + vm_fd.create_irq_chip().unwrap(); + let vcpu_fd = kvm_hyp.vm_fd.as_ref().unwrap().create_vcpu(0).unwrap(); + let hypervisor_cpu = Arc::new(KvmCpu::new( + 0, + #[cfg(target_arch = "aarch64")] + kvm_hyp.vm_fd.clone(), + vcpu_fd, + )); + let x86_cpu = Arc::new(Mutex::new(ArchCPU::new(0, 1))); + let cpu = CPU::new(hypervisor_cpu.clone(), 0, x86_cpu, vm); + // test `set_boot_config` function + assert!(hypervisor_cpu + .set_boot_config(cpu.arch().clone(), &cpu_config) + .is_ok()); + + // test setup special registers + let cpu_caps = CPUCaps::init_capabilities(); + assert!(hypervisor_cpu.put_register(Arc::new(cpu)).is_ok()); + let x86_sregs = hypervisor_cpu.fd.get_sregs().unwrap(); + assert_eq!(x86_sregs.cs, code_seg); + assert_eq!(x86_sregs.ds, data_seg); + assert_eq!(x86_sregs.es, data_seg); + assert_eq!(x86_sregs.fs, data_seg); + assert_eq!(x86_sregs.gs, data_seg); + assert_eq!(x86_sregs.ss, data_seg); + assert_eq!(x86_sregs.gdt.base, cpu_config.gdt_base); + assert_eq!(x86_sregs.gdt.limit, cpu_config.gdt_size); + assert_eq!(x86_sregs.idt.base, cpu_config.idt_base); + assert_eq!(x86_sregs.idt.limit, cpu_config.idt_size); + assert_eq!(x86_sregs.cr0 & 0x1, 1); + assert_eq!((x86_sregs.cr0 & 0x8000_0000) >> 31, 1); + assert_eq!(x86_sregs.cr3, cpu_config.pml4_start); + assert_eq!((x86_sregs.cr4 & 0x20) >> 5, 1); + assert_eq!((x86_sregs.efer & 0x700) >> 8, 5); + + // test setup_regs function + let x86_regs = hypervisor_cpu.fd.get_regs().unwrap(); + assert_eq!(x86_regs.rflags, 0x0002); + assert_eq!(x86_regs.rip, 0); + assert_eq!(x86_regs.rsp, 0); + assert_eq!(x86_regs.rbp, 0); + assert_eq!(x86_regs.rsi, 0x0000_7000); + + // test setup_fpu function + if !cpu_caps.has_xsave { + let x86_fpu = hypervisor_cpu.fd.get_fpu().unwrap(); + assert_eq!(x86_fpu.fcw, 0x37f); + } + } + + #[test] + #[allow(unused)] + fn test_cpu_lifecycle_with_kvm() { + let kvm_hyp = KvmHypervisor::new().unwrap_or_default(); + if kvm_hyp.vm_fd.is_none() { + return; + } + + let vcpu_fd = kvm_hyp.vm_fd.as_ref().unwrap().create_vcpu(0).unwrap(); + let hypervisor_cpu = Arc::new(KvmCpu::new( + 0, + #[cfg(target_arch = "aarch64")] + kvm_hyp.vm_fd.clone(), + vcpu_fd, + )); + + let vm = Arc::new(Mutex::new(TestVm::new())); + let cpu = CPU::new( + hypervisor_cpu.clone(), + 0, + Arc::new(Mutex::new(ArchCPU::default())), + vm, + ); + let (cpu_state, _) = &*cpu.state; + assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Created); + drop(cpu_state); + + let cpus_thread_barrier = Arc::new(Barrier::new(2)); + let cpu_thread_barrier = cpus_thread_barrier.clone(); + + #[cfg(target_arch = "aarch64")] + { + let mut kvi = kvm_bindings::kvm_vcpu_init::default(); + kvm_hyp + .vm_fd + .as_ref() + .unwrap() + .get_preferred_target(&mut kvi) + .unwrap(); + kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; + *hypervisor_cpu.kvi.lock().unwrap() = kvi; + hypervisor_cpu.vcpu_init().unwrap(); + } + + // Test cpu life cycle as: + // Created -> Paused -> Running -> Paused -> Running -> Destroy + let cpu_arc = Arc::new(cpu); + CPU::start(cpu_arc.clone(), cpu_thread_barrier, true).unwrap(); + + // Wait for CPU thread init signal hook + std::thread::sleep(Duration::from_millis(50)); + cpus_thread_barrier.wait(); + let (cpu_state, _) = &*cpu_arc.state; + assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Paused); + drop(cpu_state); + + assert!(cpu_arc.resume().is_ok()); + + // Wait for CPU finish state change. + std::thread::sleep(Duration::from_millis(50)); + let (cpu_state, _) = &*cpu_arc.state; + assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Running); + drop(cpu_state); + + assert!(cpu_arc.pause().is_ok()); + + // Wait for CPU finish state change. + std::thread::sleep(Duration::from_millis(50)); + let (cpu_state, _) = &*cpu_arc.state; + assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Paused); + drop(cpu_state); + + assert!(cpu_arc.resume().is_ok()); + // Wait for CPU finish state change. + std::thread::sleep(Duration::from_millis(50)); + + assert!(cpu_arc.destroy().is_ok()); + + // Wait for CPU finish state change. + std::thread::sleep(Duration::from_millis(50)); + let (cpu_state, _) = &*cpu_arc.state; + assert_eq!(*cpu_state.lock().unwrap(), CpuLifecycleState::Stopped); + drop(cpu_state); + } +} diff --git a/hypervisor/src/kvm/state.rs b/hypervisor/src/kvm/vm_state.rs similarity index 67% rename from hypervisor/src/kvm/state.rs rename to hypervisor/src/kvm/vm_state.rs index f38ad064ba5962831a978d80d5a754f7c0fdba7f..103a350cafb7036eff315ae8cb22d78780e69326 100644 --- a/hypervisor/src/kvm/state.rs +++ b/hypervisor/src/kvm/vm_state.rs @@ -10,16 +10,28 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, -}; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use kvm_bindings::{kvm_clock_data, kvm_irqchip, kvm_pit_state2, KVM_IRQCHIP_IOAPIC}; +use kvm_ioctls::VmFd; -use super::KVM_FDS; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration::{ + DeviceStateDesc, FieldDesc, MigrationError, MigrationHook, MigrationManager, StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; /// Structure to wrapper kvm_device related function. -pub struct KvmDevice {} +pub struct KvmDevice { + vm_fd: Arc, +} + +impl KvmDevice { + pub fn new(vm_fd: Arc) -> Self { + Self { vm_fd } + } +} /// Status of kvm device. /// Kvm device include pit, kvm_clock, irq on x86_64 platform. @@ -33,16 +45,16 @@ pub struct KvmDeviceState { } impl StateTransfer for KvmDevice { - fn get_state_vec(&self) -> migration::errors::Result> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); + fn get_state_vec(&self) -> Result> { + let vm_fd = self.vm_fd.clone(); // save pit let pit_state = vm_fd.get_pit2()?; // save kvm_clock let mut kvm_clock = vm_fd.get_clock()?; - kvm_clock.flags &= !KVM_CLOCK_TSC_STABLE; + // Reset kvm clock flag. + kvm_clock.flags = 0; // save ioapic let mut ioapic = kvm_irqchip { @@ -60,12 +72,11 @@ impl StateTransfer for KvmDevice { .to_vec()) } - fn set_state(&self, state: &[u8]) -> migration::errors::Result<()> { - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); + fn set_state(&self, state: &[u8]) -> Result<()> { + let vm_fd = self.vm_fd.clone(); let kvm_state = KvmDeviceState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("KVM_DEVICE"))?; + .with_context(|| MigrationError::FromBytesError("KVM_DEVICE"))?; vm_fd.set_pit2(&kvm_state.pit_state)?; vm_fd.set_clock(&kvm_state.kvm_clock)?; @@ -75,11 +86,7 @@ impl StateTransfer for KvmDevice { } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&KvmDeviceState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&KvmDeviceState::descriptor().name).unwrap_or(!0) } } diff --git a/cpu/src/x86_64/caps.rs b/hypervisor/src/kvm/x86_64/cpu_caps.rs similarity index 92% rename from cpu/src/x86_64/caps.rs rename to hypervisor/src/kvm/x86_64/cpu_caps.rs index 4bf75fb035d251ee5597399cb4f2ac992996ede2..e5760de8e2a4eb3e6fdccc9fe833ce07def73a93 100644 --- a/cpu/src/x86_64/caps.rs +++ b/hypervisor/src/kvm/x86_64/cpu_caps.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -11,7 +11,9 @@ // See the Mulan PSL v2 for more details. use kvm_bindings::{kvm_msr_entry, Msrs}; -use kvm_ioctls::{Cap, Kvm}; +use kvm_ioctls::Cap; +use kvm_ioctls::Kvm; +use vmm_sys_util::fam::Error; /// See: https://elixir.bootlin.com/linux/v4.19.123/source/arch/x86/include/asm/msr-index.h#L558 const MSR_IA32_MISC_ENABLE: ::std::os::raw::c_uint = 0x1a0; @@ -36,7 +38,6 @@ impl X86CPUCaps { /// Initialize X86CPUCaps instance. pub fn init_capabilities() -> Self { let kvm = Kvm::new().unwrap(); - X86CPUCaps { has_xsave: kvm.check_extension(Cap::Xsave), has_xcrs: kvm.check_extension(Cap::Xcrs), @@ -45,7 +46,7 @@ impl X86CPUCaps { } /// Create `Msrs` (a list of `kvm_msr_entry`) from capabilities supported_msrs. - pub fn create_msr_entries(&self) -> Msrs { + pub fn create_msr_entries(&self) -> Result { let entry_vec: Vec = self .supported_msrs .iter() diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..7d7e7b581a0f388b9585085d903cfedfcc13a1e9 --- /dev/null +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -0,0 +1,341 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod cpu_caps; + +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use kvm_bindings::*; +use kvm_ioctls::Kvm; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr, ioctl_iowr_nr}; + +use crate::kvm::listener::KvmIoListener; +use crate::kvm::{KvmCpu, KvmHypervisor}; +use crate::HypervisorError; +use address_space::Listener; +use cpu::{ArchCPU, CPUBootConfig, RegsIndex, CPU}; + +// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/kvm.h +ioctl_iowr_nr!(KVM_GET_SUPPORTED_CPUID, KVMIO, 0x05, kvm_cpuid2); +ioctl_iow_nr!(KVM_SET_CPUID2, KVMIO, 0x90, kvm_cpuid2); +ioctl_iow_nr!(KVM_SET_SREGS, KVMIO, 0x84, kvm_sregs); +ioctl_iow_nr!(KVM_SET_REGS, KVMIO, 0x82, kvm_regs); +ioctl_iow_nr!(KVM_SET_XSAVE, KVMIO, 0xa5, kvm_xsave); +ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); +ioctl_iow_nr!(KVM_SET_FPU, KVMIO, 0x8d, kvm_fpu); +ioctl_iow_nr!(KVM_SET_DEBUGREGS, KVMIO, 0xa2, kvm_debugregs); +ioctl_iow_nr!(KVM_SET_LAPIC, KVMIO, 0x8f, kvm_lapic_state); +ioctl_iow_nr!(KVM_SET_MSRS, KVMIO, 0x89, kvm_msrs); +ioctl_ior_nr!(KVM_GET_PIT2, KVMIO, 0x9f, kvm_pit_state2); +ioctl_ior_nr!(KVM_GET_XSAVE, KVMIO, 0xa4, kvm_xsave); +ioctl_ior_nr!(KVM_GET_XCRS, KVMIO, 0xa6, kvm_xcrs); +ioctl_ior_nr!(KVM_GET_DEBUGREGS, KVMIO, 0xa1, kvm_debugregs); +ioctl_ior_nr!(KVM_GET_LAPIC, KVMIO, 0x8e, kvm_lapic_state); +ioctl_iowr_nr!(KVM_GET_MSRS, KVMIO, 0x88, kvm_msrs); + +impl KvmHypervisor { + pub fn arch_init(&self) -> Result<()> { + // The identity_addr is set in the memory layout of x86 machine. + let identity_addr: u64 = 0xFEF0_C000; + let vm_fd = self.vm_fd.as_ref().unwrap(); + + vm_fd + .set_identity_map_address(identity_addr) + .with_context(|| HypervisorError::SetIdentityMapAddr)?; + + // Page table takes 1 page, TSS takes the following 3 pages. + vm_fd + .set_tss_address((identity_addr + 0x1000) as usize) + .with_context(|| HypervisorError::SetTssErr)?; + + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + pad: Default::default(), + }; + vm_fd + .create_pit2(pit_config) + .with_context(|| HypervisorError::CrtPitErr) + } + + pub fn create_io_listener(&self) -> Arc> { + Arc::new(Mutex::new(KvmIoListener::new(self.vm_fd.clone()))) + } +} + +impl KvmCpu { + pub fn arch_init_pmu(&self) -> Result<()> { + Ok(()) + } + + pub fn arch_vcpu_init(&self) -> Result<()> { + Ok(()) + } + + pub fn arch_set_boot_config( + &self, + arch_cpu: Arc>, + boot_config: &CPUBootConfig, + ) -> Result<()> { + let mut locked_arch_cpu = arch_cpu.lock().unwrap(); + let apic_id = locked_arch_cpu.apic_id; + let lapic = self + .fd + .get_lapic() + .with_context(|| format!("Failed to get lapic for CPU {}/KVM", apic_id))?; + locked_arch_cpu.setup_lapic(lapic)?; + locked_arch_cpu.setup_regs(boot_config); + let sregs = self + .fd + .get_sregs() + .with_context(|| format!("Failed to get sregs for CPU {}/KVM", apic_id))?; + locked_arch_cpu.setup_sregs(sregs, boot_config)?; + locked_arch_cpu.setup_fpu(); + locked_arch_cpu.setup_msrs(); + + Ok(()) + } + + pub fn arch_get_one_reg(&self, _reg_id: u64) -> Result { + Ok(0) + } + + pub fn arch_get_regs( + &self, + arch_cpu: Arc>, + regs_index: RegsIndex, + ) -> Result<()> { + let mut msr_entries = self.caps.create_msr_entries()?; + let mut locked_arch_cpu = arch_cpu.lock().unwrap(); + match regs_index { + RegsIndex::Regs => { + locked_arch_cpu.regs = self.fd.get_regs()?; + trace::kvm_get_regs(self.id, &locked_arch_cpu.regs); + } + RegsIndex::Sregs => { + locked_arch_cpu.sregs = self.fd.get_sregs()?; + trace::kvm_get_sregs(self.id, &locked_arch_cpu.sregs); + } + RegsIndex::Fpu => { + if !self.caps.has_xsave { + locked_arch_cpu.fpu = self.fd.get_fpu()?; + trace::kvm_get_fpu(self.id, &locked_arch_cpu.fpu); + } + } + RegsIndex::MpState => { + locked_arch_cpu.mp_state = self.fd.get_mp_state()?; + trace::kvm_get_mp_state(self.id, &locked_arch_cpu.mp_state); + } + RegsIndex::LapicState => { + locked_arch_cpu.lapic = self.fd.get_lapic()?; + trace::kvm_get_lapic(self.id, &locked_arch_cpu.lapic); + } + RegsIndex::MsrEntry => { + locked_arch_cpu.msr_len = self.fd.get_msrs(&mut msr_entries)?; + for (i, entry) in msr_entries.as_slice().iter().enumerate() { + locked_arch_cpu.msr_list[i] = *entry; + } + trace::kvm_get_msrs( + self.id, + &&locked_arch_cpu.msr_list[0..locked_arch_cpu.msr_len], + ); + } + RegsIndex::VcpuEvents => { + locked_arch_cpu.cpu_events = self.fd.get_vcpu_events()?; + trace::kvm_get_vcpu_events(self.id, &locked_arch_cpu.cpu_events); + } + RegsIndex::Xsave => { + if self.caps.has_xsave { + locked_arch_cpu.xsave = self.fd.get_xsave()?; + trace::kvm_get_xsave(self.id, &locked_arch_cpu.xsave); + } + } + RegsIndex::Xcrs => { + if self.caps.has_xcrs { + locked_arch_cpu.xcrs = self.fd.get_xcrs()?; + trace::kvm_get_xcrs(self.id, &locked_arch_cpu.xcrs); + } + } + RegsIndex::DebugRegs => { + locked_arch_cpu.debugregs = self.fd.get_debug_regs()?; + trace::kvm_get_debug_regs(self.id, &locked_arch_cpu.debugregs); + } + } + + Ok(()) + } + + pub fn arch_set_regs( + &self, + arch_cpu: Arc>, + regs_index: RegsIndex, + ) -> Result<()> { + let locked_arch_cpu = arch_cpu.lock().unwrap(); + let apic_id = locked_arch_cpu.apic_id; + match regs_index { + RegsIndex::Regs => { + trace::kvm_set_regs(self.id, &locked_arch_cpu.regs); + self.fd + .set_regs(&locked_arch_cpu.regs) + .with_context(|| format!("Failed to set regs for CPU {}", apic_id))?; + } + RegsIndex::Sregs => { + trace::kvm_set_sregs(self.id, &locked_arch_cpu.sregs); + self.fd + .set_sregs(&locked_arch_cpu.sregs) + .with_context(|| format!("Failed to set sregs for CPU {}", apic_id))?; + } + RegsIndex::Fpu => { + trace::kvm_set_fpu(self.id, &locked_arch_cpu.fpu); + self.fd + .set_fpu(&locked_arch_cpu.fpu) + .with_context(|| format!("Failed to set fpu for CPU {}", apic_id))?; + } + RegsIndex::MpState => { + trace::kvm_set_mp_state(self.id, &locked_arch_cpu.mp_state); + self.fd + .set_mp_state(locked_arch_cpu.mp_state) + .with_context(|| format!("Failed to set mpstate for CPU {}", apic_id))?; + } + RegsIndex::LapicState => { + trace::kvm_set_lapic(self.id, &locked_arch_cpu.lapic); + self.fd + .set_lapic(&locked_arch_cpu.lapic) + .with_context(|| format!("Failed to set lapic for CPU {}", apic_id))?; + } + RegsIndex::MsrEntry => { + trace::kvm_set_msrs( + self.id, + &&locked_arch_cpu.msr_list[0..locked_arch_cpu.msr_len], + ); + self.fd + .set_msrs(&Msrs::from_entries( + &locked_arch_cpu.msr_list[0..locked_arch_cpu.msr_len], + )?) + .with_context(|| format!("Failed to set msrs for CPU {}", apic_id))?; + } + RegsIndex::VcpuEvents => { + trace::kvm_set_vcpu_events(self.id, &locked_arch_cpu.cpu_events); + self.fd + .set_vcpu_events(&locked_arch_cpu.cpu_events) + .with_context(|| format!("Failed to set vcpu events for CPU {}", apic_id))?; + } + RegsIndex::Xsave => { + trace::kvm_set_xsave(self.id, &locked_arch_cpu.xsave); + self.fd + .set_xsave(&locked_arch_cpu.xsave) + .with_context(|| format!("Failed to set xsave for CPU {}", apic_id))?; + } + RegsIndex::Xcrs => { + trace::kvm_set_xcrs(self.id, &locked_arch_cpu.xcrs); + self.fd + .set_xcrs(&locked_arch_cpu.xcrs) + .with_context(|| format!("Failed to set xcrs for CPU {}", apic_id))?; + } + RegsIndex::DebugRegs => { + trace::kvm_set_debug_regs(self.id, &locked_arch_cpu.debugregs); + self.fd + .set_debug_regs(&locked_arch_cpu.debugregs) + .with_context(|| format!("Failed to set debug register for CPU {}", apic_id))?; + } + } + + Ok(()) + } + + pub fn arch_put_register(&self, cpu: Arc) -> Result<()> { + let locked_arch_cpu = cpu.arch_cpu.lock().unwrap(); + let apic_id = locked_arch_cpu.apic_id; + + let sys_fd = match Kvm::new() { + Ok(fd) => fd, + _ => bail!("setup_cpuid: Open /dev/kvm failed"), + }; + let mut cpuid = sys_fd + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .with_context(|| format!("Failed to get supported cpuid for CPU {}/KVM", apic_id))?; + + locked_arch_cpu + .setup_cpuid(&mut cpuid) + .with_context(|| format!("Failed to set cpuid for CPU {}", apic_id))?; + trace::kvm_setup_cpuid(self.id, &cpuid); + + self.fd + .set_cpuid2(&cpuid) + .with_context(|| format!("Failed to set cpuid for CPU {}/KVM", apic_id))?; + trace::kvm_set_cpuid2(self.id, &cpuid); + + self.fd + .set_mp_state(locked_arch_cpu.mp_state) + .with_context(|| format!("Failed to set mpstate for CPU {}", apic_id))?; + trace::kvm_set_mp_state(self.id, &locked_arch_cpu.mp_state); + + self.fd + .set_sregs(&locked_arch_cpu.sregs) + .with_context(|| format!("Failed to set sregs for CPU {}", apic_id))?; + trace::kvm_set_sregs(self.id, &locked_arch_cpu.sregs); + + self.fd + .set_regs(&locked_arch_cpu.regs) + .with_context(|| format!("Failed to set regs for CPU {}", apic_id))?; + trace::kvm_set_regs(self.id, &locked_arch_cpu.regs); + + if self.caps.has_xsave { + self.fd + .set_xsave(&locked_arch_cpu.xsave) + .with_context(|| format!("Failed to set xsave for CPU {}", apic_id))?; + trace::kvm_set_xsave(self.id, &locked_arch_cpu.xsave); + } else { + self.fd + .set_fpu(&locked_arch_cpu.fpu) + .with_context(|| format!("Failed to set fpu for CPU {}", apic_id))?; + trace::kvm_set_fpu(self.id, &locked_arch_cpu.fpu); + } + if self.caps.has_xcrs { + self.fd + .set_xcrs(&locked_arch_cpu.xcrs) + .with_context(|| format!("Failed to set xcrs for CPU {}", apic_id))?; + trace::kvm_set_xcrs(self.id, &locked_arch_cpu.xcrs); + } + self.fd + .set_debug_regs(&locked_arch_cpu.debugregs) + .with_context(|| format!("Failed to set debug register for CPU {}", apic_id))?; + trace::kvm_set_debug_regs(self.id, &locked_arch_cpu.debugregs); + + self.fd + .set_lapic(&locked_arch_cpu.lapic) + .with_context(|| format!("Failed to set lapic for CPU {}", apic_id))?; + trace::kvm_set_lapic(self.id, &locked_arch_cpu.lapic); + + self.fd + .set_msrs(&Msrs::from_entries( + &locked_arch_cpu.msr_list[0..locked_arch_cpu.msr_len], + )?) + .with_context(|| format!("Failed to set msrs for CPU {}", apic_id))?; + trace::kvm_set_msrs( + self.id, + &&locked_arch_cpu.msr_list[0..locked_arch_cpu.msr_len], + ); + + self.fd + .set_vcpu_events(&locked_arch_cpu.cpu_events) + .with_context(|| format!("Failed to set vcpu events for CPU {}", apic_id))?; + trace::kvm_set_vcpu_events(self.id, &locked_arch_cpu.cpu_events); + + Ok(()) + } + + pub fn arch_reset_vcpu(&self, cpu: Arc) -> Result<()> { + cpu.arch_cpu.lock().unwrap().set(&cpu.boot_state()); + self.arch_put_register(cpu) + } +} diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index 9fb446d61b67f8116995cd28ca3ebace38ed2db7..a156d9af65dc93274c7c0b30f1bd020ca5c818b9 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -12,26 +12,51 @@ //! This crate offers interfaces for different kinds of hypervisors, such as KVM. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate vmm_sys_util; -#[cfg(target_arch = "x86_64")] -#[macro_use] -extern crate migration_derive; - -#[allow(clippy::upper_case_acronyms)] -pub mod errors { - error_chain! { - links { - Util(util::errors::Error, util::errors::ErrorKind); - } - foreign_links { - KVMIoctl(kvm_ioctls::Error); - } +pub mod error; +pub mod kvm; +pub mod test; + +pub use error::HypervisorError; + +use std::any::Any; +use std::sync::Arc; + +use anyhow::Result; +#[cfg(feature = "vfio_device")] +use kvm_ioctls::DeviceFd; + +use address_space::AddressSpace; +use cpu::CPUHypervisorOps; +use devices::IrqManager; +#[cfg(target_arch = "aarch64")] +use devices::{ICGICConfig, InterruptController}; +use machine_manager::machine::HypervisorType; + +pub trait HypervisorOps: Send + Sync + Any { + fn get_hypervisor_type(&self) -> HypervisorType { + HypervisorType::Kvm } -} -pub mod kvm; + fn init_machine( + &self, + #[cfg(target_arch = "x86_64")] sys_io: &Arc, + sys_mem: &Arc, + ) -> Result<()>; + + #[cfg(target_arch = "aarch64")] + fn create_interrupt_controller( + &mut self, + gic_conf: &ICGICConfig, + ) -> Result>; + + #[cfg(target_arch = "x86_64")] + fn create_interrupt_controller(&mut self) -> Result<()>; + + fn create_hypervisor_cpu(&self, vcpu_id: u8) + -> Result>; + + fn create_irq_manager(&mut self) -> Result; + + #[cfg(feature = "vfio_device")] + fn create_vfio_device(&self) -> Option; +} diff --git a/hypervisor/src/test/aarch64/mod.rs b/hypervisor/src/test/aarch64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..0b5317afb1c7b21bbe1b853927dff0e765a8e108 --- /dev/null +++ b/hypervisor/src/test/aarch64/mod.rs @@ -0,0 +1,23 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use devices::{GICv3Access, GICv3ItsAccess}; + +#[derive(Default)] +pub struct TestGicv3 {} + +impl GICv3Access for TestGicv3 {} + +#[derive(Default)] +pub struct TestGicv3Its {} + +impl GICv3ItsAccess for TestGicv3Its {} diff --git a/hypervisor/src/test/listener.rs b/hypervisor/src/test/listener.rs new file mode 100644 index 0000000000000000000000000000000000000000..15a65dd03936abfcebd3063ba19d398e765e4ed3 --- /dev/null +++ b/hypervisor/src/test/listener.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use address_space::Listener; + +#[derive(Default, Clone)] +pub struct TestMemoryListener { + enabled: bool, +} + +impl Listener for TestMemoryListener { + /// Get default priority. + fn priority(&self) -> i32 { + 10_i32 + } + + /// Is this listener enabled to call. + fn enabled(&self) -> bool { + self.enabled + } + + /// Enable listener for address space. + fn enable(&mut self) { + self.enabled = true; + } + + /// Disable listener for address space. + fn disable(&mut self) { + self.enabled = false; + } +} diff --git a/hypervisor/src/test/mod.rs b/hypervisor/src/test/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..99abb06ed1cb403f80adf2c01da64a9b880fa6a0 --- /dev/null +++ b/hypervisor/src/test/mod.rs @@ -0,0 +1,406 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(target_arch = "aarch64")] +mod aarch64; +mod listener; + +use std::collections::HashMap; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Barrier, Condvar, Mutex}; +use std::thread; +use std::time::Duration; + +use anyhow::{anyhow, Context, Result}; +#[cfg(feature = "vfio_device")] +use kvm_ioctls::DeviceFd; +use log::info; +use vmm_sys_util::eventfd::EventFd; + +#[cfg(target_arch = "aarch64")] +use self::aarch64::{TestGicv3, TestGicv3Its}; +use self::listener::TestMemoryListener; +use super::HypervisorOps; +use address_space::{AddressSpace, Listener}; +#[cfg(target_arch = "aarch64")] +use cpu::CPUFeatures; +use cpu::{ + ArchCPU, CPUBootConfig, CPUHypervisorOps, CPUThreadWorker, CpuError, CpuLifecycleState, + RegsIndex, CPU, +}; +use devices::{pci::MsiVector, IrqManager, LineIrqManager, MsiIrqManager, TriggerMode}; +#[cfg(target_arch = "aarch64")] +use devices::{GICVersion, GICv3, ICGICConfig, InterruptController, GIC_IRQ_INTERNAL}; +use machine_manager::machine::HypervisorType; +use migration::{MigrateMemSlot, MigrateOps}; +use util::test_helper::{add_msix_msg, IntxInfo, TEST_INTX_LIST}; + +pub struct TestHypervisor {} + +impl TestHypervisor { + pub fn new() -> Result { + Ok(TestHypervisor {}) + } + + fn create_memory_listener(&self) -> Arc> { + Arc::new(Mutex::new(TestMemoryListener::default())) + } +} + +impl HypervisorOps for TestHypervisor { + fn get_hypervisor_type(&self) -> HypervisorType { + HypervisorType::Test + } + + fn init_machine( + &self, + #[cfg(target_arch = "x86_64")] _sys_io: &Arc, + sys_mem: &Arc, + ) -> Result<()> { + sys_mem + .register_listener(self.create_memory_listener()) + .with_context(|| "Failed to register hypervisor listener for memory space.") + } + + #[cfg(target_arch = "aarch64")] + fn create_interrupt_controller( + &mut self, + gic_conf: &ICGICConfig, + ) -> Result> { + gic_conf.check_sanity()?; + + let create_gicv3 = || { + let gicv3 = Arc::new(GICv3::new( + Arc::new(TestGicv3::default()), + Arc::new(TestGicv3Its::default()), + gic_conf, + )?); + + Ok(Arc::new(InterruptController::new(gicv3))) + }; + + match &gic_conf.version { + Some(GICVersion::GICv3) => create_gicv3(), + Some(GICVersion::GICv2) => Err(anyhow!("MST doesn't support Gicv2.")), + // Try v3 by default if no version specified. + None => create_gicv3(), + } + } + + #[cfg(target_arch = "x86_64")] + fn create_interrupt_controller(&mut self) -> Result<()> { + Ok(()) + } + + fn create_hypervisor_cpu( + &self, + vcpu_id: u8, + ) -> Result> { + Ok(Arc::new(TestCpu::new(vcpu_id))) + } + + fn create_irq_manager(&mut self) -> Result { + let test_irq_manager = Arc::new(TestInterruptManager {}); + Ok(IrqManager { + line_irq_manager: Some(test_irq_manager.clone()), + msi_irq_manager: Some(test_irq_manager), + }) + } + + #[cfg(feature = "vfio_device")] + fn create_vfio_device(&self) -> Option { + None + } +} + +pub struct TestCpu { + #[allow(unused)] + id: u8, +} + +impl TestCpu { + pub fn new(vcpu_id: u8) -> Self { + Self { id: vcpu_id } + } +} + +impl CPUHypervisorOps for TestCpu { + fn get_hypervisor_type(&self) -> HypervisorType { + HypervisorType::Test + } + + fn init_pmu(&self) -> Result<()> { + Ok(()) + } + + fn vcpu_init(&self) -> Result<()> { + Ok(()) + } + + #[allow(unused)] + fn set_boot_config( + &self, + arch_cpu: Arc>, + boot_config: &CPUBootConfig, + #[cfg(target_arch = "aarch64")] _vcpu_config: &CPUFeatures, + ) -> Result<()> { + #[cfg(target_arch = "aarch64")] + { + arch_cpu.lock().unwrap().mpidr = u64::from(self.id); + arch_cpu.lock().unwrap().set_core_reg(boot_config); + } + Ok(()) + } + + fn get_one_reg(&self, _reg_id: u64) -> Result { + Err(anyhow!("MST does not support getting one reg.")) + } + + fn get_regs(&self, _arch_cpu: Arc>, _regs_index: RegsIndex) -> Result<()> { + Ok(()) + } + + fn set_regs(&self, _arch_cpu: Arc>, _regs_index: RegsIndex) -> Result<()> { + Ok(()) + } + + fn put_register(&self, _cpu: Arc) -> Result<()> { + Err(anyhow!("Test does not support putting register.")) + } + + fn reset_vcpu(&self, cpu: Arc) -> Result<()> { + cpu.arch_cpu.lock().unwrap().set(&cpu.boot_state()); + Ok(()) + } + + fn vcpu_exec( + &self, + cpu_thread_worker: CPUThreadWorker, + thread_barrier: Arc, + ) -> Result<()> { + cpu_thread_worker.init_local_thread_vcpu(); + cpu_thread_worker.thread_cpu.set_tid(None); + + // Wait for all vcpu to complete the running + // environment initialization. + thread_barrier.wait(); + + info!("Test vcpu{} start running", cpu_thread_worker.thread_cpu.id); + while let Ok(true) = cpu_thread_worker.ready_for_running() { + thread::sleep(Duration::from_millis(5)); + continue; + } + + // The vcpu thread is about to exit, marking the state + // of the CPU state as Stopped. + let (cpu_state, cvar) = &*cpu_thread_worker.thread_cpu.state; + *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; + cvar.notify_one(); + + Ok(()) + } + + fn set_hypervisor_exit(&self) -> Result<()> { + Ok(()) + } + + fn pause( + &self, + _task: Arc>>>, + _state: Arc<(Mutex, Condvar)>, + _pause_signal: Arc, + ) -> Result<()> { + Ok(()) + } + + fn resume( + &self, + _state: Arc<(Mutex, Condvar)>, + _pause_signal: Arc, + ) -> Result<()> { + Ok(()) + } + + fn destroy( + &self, + _task: Arc>>>, + state: Arc<(Mutex, Condvar)>, + ) -> Result<()> { + let (cpu_state, cvar) = &*state; + let mut locked_cpu_state = cpu_state.lock().unwrap(); + if *locked_cpu_state == CpuLifecycleState::Running { + *locked_cpu_state = CpuLifecycleState::Stopping; + } else if *locked_cpu_state == CpuLifecycleState::Stopped + || *locked_cpu_state == CpuLifecycleState::Paused + { + return Ok(()); + } + drop(locked_cpu_state); + + let mut locked_cpu_state = cpu_state.lock().unwrap(); + locked_cpu_state = cvar + .wait_timeout(locked_cpu_state, Duration::from_millis(10)) + .unwrap() + .0; + + if *locked_cpu_state == CpuLifecycleState::Stopped { + Ok(()) + } else { + Err(anyhow!(CpuError::DestroyVcpu(format!( + "VCPU still in {:?} state", + *locked_cpu_state + )))) + } + } +} + +impl MigrateOps for TestHypervisor { + fn get_mem_slots(&self) -> Arc>> { + Arc::new(Mutex::new(HashMap::new())) + } + + fn get_dirty_log(&self, _slot: u32, _mem_size: u64) -> Result> { + Err(anyhow!( + "Failed to get dirty log, mst doesn't support migration feature." + )) + } + + fn start_dirty_log(&self) -> Result<()> { + Err(anyhow!( + "Failed to start dirty log, mst doesn't support migration feature." + )) + } + + fn stop_dirty_log(&self) -> Result<()> { + Err(anyhow!( + "Failed to stop dirty log, mst doesn't support migration feature." + )) + } + + fn register_instance(&self) -> Result<()> { + Ok(()) + } +} + +struct TestInterruptManager {} + +impl TestInterruptManager { + #[cfg(target_arch = "x86_64")] + pub fn arch_map_irq(&self, gsi: u32) -> u32 { + gsi + } + + #[cfg(target_arch = "aarch64")] + pub fn arch_map_irq(&self, gsi: u32) -> u32 { + gsi + GIC_IRQ_INTERNAL + } +} + +impl LineIrqManager for TestInterruptManager { + fn irqfd_enable(&self) -> bool { + false + } + + fn register_irqfd( + &self, + _irq_fd: Arc, + _irq: u32, + _trigger_mode: TriggerMode, + ) -> Result<()> { + Err(anyhow!( + "Failed to register irqfd, mst doesn't support irqfd feature." + )) + } + + fn unregister_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Err(anyhow!( + "Failed to unregister irqfd, mst doesn't support irqfd feature." + )) + } + + fn set_level_irq(&self, gsi: u32, level: bool) -> Result<()> { + let physical_irq = self.arch_map_irq(gsi); + let level: i8 = if level { 1 } else { 0 }; + + let mut intx_list_lock = TEST_INTX_LIST.lock().unwrap(); + + for intx in intx_list_lock.iter_mut() { + if intx.irq == physical_irq { + intx.level = level; + return Ok(()); + } + } + + let new_intx = IntxInfo::new(physical_irq, level); + intx_list_lock.push(new_intx); + Ok(()) + } + + fn set_edge_irq(&self, _gsi: u32) -> Result<()> { + Ok(()) + } + + fn write_irqfd(&self, _irq_fd: Arc) -> Result<()> { + Err(anyhow!( + "Failed to write irqfd, mst doesn't support irqfd feature." + )) + } +} + +impl MsiIrqManager for TestInterruptManager { + fn irqfd_enable(&self) -> bool { + false + } + + fn allocate_irq(&self, _vector: MsiVector) -> Result { + Err(anyhow!( + "Failed to allocate irq, mst doesn't support irq routing feature." + )) + } + + fn release_irq(&self, _irq: u32) -> Result<()> { + Err(anyhow!( + "Failed to release irq, mst doesn't support irq routing feature." + )) + } + + fn register_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Err(anyhow!( + "Failed to register msi irqfd, mst doesn't support irqfd feature." + )) + } + + fn unregister_irqfd(&self, _irq_fd: Arc, _irq: u32) -> Result<()> { + Err(anyhow!( + "Failed to unregister msi irqfd, mst doesn't support irqfd feature." + )) + } + + fn trigger( + &self, + _irq_fd: Option>, + vector: MsiVector, + _dev_id: u32, + ) -> Result<()> { + let data = vector.msg_data; + let mut addr: u64 = u64::from(vector.msg_addr_hi); + addr = (addr << 32) + u64::from(vector.msg_addr_lo); + add_msix_msg(addr, data); + Ok(()) + } + + fn update_route_table(&self, _gsi: u32, _vector: MsiVector) -> Result<()> { + Err(anyhow!( + "Failed to update route table, mst doesn't support irq routing feature." + )) + } +} diff --git a/image/Cargo.toml b/image/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..fc40a2ae8846d5383d3784badcf5cdd3c9870415 --- /dev/null +++ b/image/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "stratovirt-img" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "Binary tools for offline disk operations" + +[dependencies] +anyhow = "1.0" +log = "0.4" +libc = "0.2" +util = { path = "../util" } +machine_manager = { path = "../machine_manager" } +block_backend = { path = "../block_backend"} diff --git a/image/src/cmdline.rs b/image/src/cmdline.rs new file mode 100644 index 0000000000000000000000000000000000000000..c991366c65333f9a382f435461e944413b452111 --- /dev/null +++ b/image/src/cmdline.rs @@ -0,0 +1,184 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; + +use anyhow::{bail, Result}; + +enum ArgsType { + Flag, + Opt, + OptMulti, +} + +struct Arg { + args_type: ArgsType, + value: Option, + values: Vec, + // Whether this parameter was configured. + presented: bool, +} + +impl Arg { + fn new(args_type: ArgsType) -> Self { + Self { + args_type, + value: None, + values: vec![], + presented: false, + } + } +} + +pub struct ArgsParse { + args: HashMap, + pub free: Vec, +} + +impl ArgsParse { + pub fn create(opt_flag: Vec<&str>, opt_short: Vec<&str>, opt_multi: Vec<&str>) -> Self { + let mut args: HashMap = HashMap::new(); + for arg_name in opt_flag { + args.insert(arg_name.to_string(), Arg::new(ArgsType::Flag)); + } + + for arg_name in opt_short { + args.insert(arg_name.to_string(), Arg::new(ArgsType::Opt)); + } + + for arg_name in opt_multi { + args.insert(arg_name.to_string(), Arg::new(ArgsType::OptMulti)); + } + + Self { + args, + free: Vec::new(), + } + } + + pub fn parse(&mut self, args: Vec) -> Result<()> { + let len = args.len(); + let mut pre_opt = (0, "".to_string()); + + for idx in 0..len { + let str = args[idx].clone(); + if str.starts_with('-') && str.len() > 1 { + if !pre_opt.1.is_empty() { + bail!("missing argument for option '{}'", pre_opt.1); + } + + let name = if str.starts_with("--") && str.len() > 2 { + str[2..].to_string() + } else if str.starts_with('-') && str.len() > 1 { + str[1..].to_string() + } else { + bail!("unrecognized option '{}'", str); + }; + + if let Some(args) = self.args.get_mut(&name) { + match args.args_type { + ArgsType::Flag => { + args.presented = true; + } + _ => { + pre_opt = (idx, name); + } + }; + } else { + bail!("unrecognized option '{}'", name); + } + + continue; + } + + if pre_opt.0 + 1 == idx && !pre_opt.1.is_empty() { + let name = pre_opt.1.to_string(); + let value = str.to_string(); + if let Some(arg) = self.args.get_mut(&name) { + match arg.args_type { + ArgsType::Opt => { + arg.presented = true; + arg.value = Some(value); + } + ArgsType::OptMulti => { + arg.presented = true; + arg.values.push(value); + } + _ => bail!("unrecognized option '{}'", name), + } + } + pre_opt = (0, "".to_string()); + } else if pre_opt.1.is_empty() { + self.free.push(str.to_string()); + } else { + bail!("unrecognized option '{}'", pre_opt.1); + } + } + + if pre_opt.0 == 0 && !pre_opt.1.is_empty() { + bail!("unrecognized option '{}'", pre_opt.1); + } + + Ok(()) + } + + pub fn opt_present(&mut self, name: &str) -> bool { + if let Some(arg) = self.args.get(name) { + return arg.presented; + } + false + } + + pub fn opt_str(&mut self, name: &str) -> Option { + if let Some(arg) = self.args.get(name) { + return arg.value.clone(); + } + None + } + + pub fn opt_strs(&mut self, name: &str) -> Vec { + let mut values: Vec = vec![]; + if let Some(arg) = self.args.get(name) { + values = arg.values.clone(); + } + values + } +} + +#[cfg(test)] +mod test { + use super::ArgsParse; + + #[test] + fn test_arg_parse() { + let mut arg_parser = ArgsParse::create(vec!["q", "h", "help"], vec!["f"], vec!["o"]); + let cmd_line = "-f qcow2 -q -h --help -o cluster_size=512 -o refcount_bits=16 img_path +1G"; + let cmd_args: Vec = cmd_line.split(' ').map(|str| str.to_string()).collect(); + + let ret = arg_parser.parse(cmd_args); + println!("{:?}", ret); + assert!(ret.is_ok()); + + assert!(arg_parser.opt_present("f")); + assert!(arg_parser.opt_present("q")); + assert!(arg_parser.opt_present("h")); + assert!(arg_parser.opt_present("help")); + + let values = arg_parser.opt_strs("o"); + assert!(values.contains(&"cluster_size=512".to_string())); + assert!(values.contains(&"refcount_bits=16".to_string())); + + let free = arg_parser.free.clone(); + assert_eq!(free[0], "img_path".to_string()); + assert_eq!(free[1], "+1G".to_string()); + } +} diff --git a/image/src/img.rs b/image/src/img.rs new file mode 100644 index 0000000000000000000000000000000000000000..1289889e1d07507acf8238588d008dd574044c27 --- /dev/null +++ b/image/src/img.rs @@ -0,0 +1,2107 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::File, + os::unix::prelude::{FileExt, OpenOptionsExt}, + str::FromStr, + sync::Arc, +}; + +use anyhow::{anyhow, bail, Context, Result}; + +use crate::{cmdline::ArgsParse, BINARY_NAME}; +use block_backend::{ + qcow2::{header::QcowHeader, InternalSnapshotOps, Qcow2Driver, SyncAioInfo}, + raw::RawDriver, + BlockDriverOps, BlockProperty, CheckResult, CreateOptions, ImageInfo, FIX_ERRORS, FIX_LEAKS, + NO_FIX, SECTOR_SIZE, +}; +use machine_manager::config::{memory_unit_conversion, DiskFormat}; +use util::{ + aio::{Aio, AioEngine, WriteZeroesState}, + file::{lock_file, open_file, unlock_file}, +}; + +enum SnapshotOperation { + Create, + Delete, + Apply, + List, + Rename, +} + +pub struct ImageFile { + file: Arc, + path: String, +} + +impl ImageFile { + fn create(path: &str, read_only: bool) -> Result { + let file = open_file(path, read_only, false)?; + + // Add write lock for image file. + lock_file(&file, path, read_only).with_context(|| { + format!( + "Could not open '{0}': Failed to get \"write\" lock\n\ + Is another process using the image {0}", + path + ) + })?; + + Ok(Self { + file: Arc::new(file), + path: path.to_string(), + }) + } + + /// If the image format is not specified by user, active detection is required + /// For qcow2: will check its version in header. + /// If the image does not belong to any supported format, it defaults to raw. + fn detect_img_format(&self) -> Result { + let mut buf = vec![0_u8; SECTOR_SIZE as usize]; + self.file.read_at(&mut buf, 0)?; + + let mut disk_format = DiskFormat::Raw; + if let Ok(header) = QcowHeader::from_vec(&buf) { + if header.version == 3 { + disk_format = DiskFormat::Qcow2; + } + } + + Ok(disk_format) + } + + fn check_img_format( + &self, + input_fmt: Option, + detect_fmt: DiskFormat, + ) -> Result { + let real_fmt = match input_fmt { + Some(DiskFormat::Raw) => DiskFormat::Raw, + Some(fmt) => { + if fmt != detect_fmt { + bail!( + "Could not open '{}': Image is not in {} fmt", + self.path, + fmt.to_string() + ); + } + fmt + } + _ => detect_fmt, + }; + + Ok(real_fmt) + } +} + +impl Drop for ImageFile { + fn drop(&mut self) { + if let Err(e) = unlock_file(&self.file, &self.path) { + println!("{:?}", e); + } + } +} + +pub(crate) fn image_create(args: Vec) -> Result<()> { + let mut create_options = CreateOptions::default(); + let mut arg_parser = ArgsParse::create(vec!["h", "help"], vec!["f"], vec!["o"]); + arg_parser.parse(args)?; + + if arg_parser.opt_present("h") || arg_parser.opt_present("help") { + print_help(); + return Ok(()); + } + + let mut disk_fmt = DiskFormat::Raw; + if let Some(fmt) = arg_parser.opt_str("f") { + disk_fmt = DiskFormat::from_str(&fmt)?; + }; + + let extra_options = arg_parser.opt_strs("o"); + for option in extra_options { + if option.starts_with("cluster_size=") { + let vec: Vec = option.split('=').map(|str| str.to_string()).collect(); + if vec.len() == 2 && vec[0] == *"cluster_size" { + let str = vec[1].clone(); + create_options.cluster_size = Some(memory_unit_conversion(&str, 1)?); + continue; + } + } + if option.starts_with("refcount_bits=") { + let vec: Vec = option.split('=').map(|str| str.to_string()).collect(); + if vec.len() == 2 && vec[0] == *"refcount_bits" { + let value = vec[1].clone(); + create_options.refcount_bits = Some(value.parse::()?); + continue; + } + } + + bail!("Invalid parameter '{}'", option); + } + + let len = arg_parser.free.len(); + match len { + 0 => bail!("Image creation requires path and size parameters"), + 1 => bail!("Image creation requires size parameters"), + 2 => { + create_options.path = arg_parser.free[0].clone(); + let img_size_str = arg_parser.free[1].clone(); + create_options.img_size = memory_unit_conversion(&img_size_str, 1)?; + } + _ => { + let param = arg_parser.free[2].clone(); + bail!("Unexpected argument: {}", param); + } + } + + let path = create_options.path.clone(); + let file = Arc::new( + std::fs::OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CREAT | libc::O_TRUNC) + .mode(0o660) + .open(path)?, + ); + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None)?; + let image_info = match disk_fmt { + DiskFormat::Raw => { + create_options.conf.format = DiskFormat::Raw; + let mut raw_driver = RawDriver::new(file, aio, create_options.conf.clone()); + raw_driver.create_image(&create_options)? + } + DiskFormat::Qcow2 => { + create_options.conf.format = DiskFormat::Qcow2; + let mut qcow2_driver = Qcow2Driver::new(file, aio, create_options.conf.clone())?; + qcow2_driver.create_image(&create_options)? + } + }; + println!("Stratovirt-img: {}", image_info); + + Ok(()) +} + +pub(crate) fn image_info(args: Vec) -> Result<()> { + if args.is_empty() { + bail!("Not enough arguments"); + } + let mut arg_parser = ArgsParse::create(vec!["h", "help"], vec![], vec![]); + arg_parser.parse(args)?; + + if arg_parser.opt_present("h") || arg_parser.opt_present("help") { + print_help(); + return Ok(()); + } + + // Parse the image path. + let len = arg_parser.free.len(); + let img_path = match len { + 0 => bail!("Image path is needed"), + 1 => arg_parser.free[0].clone(), + _ => { + let param = arg_parser.free[1].clone(); + bail!("Unexpected argument: {}", param); + } + }; + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None)?; + let image_file = ImageFile::create(&img_path, false)?; + let detect_fmt = image_file.detect_img_format()?; + let conf = BlockProperty { + format: detect_fmt, + ..Default::default() + }; + let mut driver: Box> = match detect_fmt { + DiskFormat::Raw => Box::new(RawDriver::new(image_file.file.clone(), aio, conf)), + DiskFormat::Qcow2 => { + let mut qocw2_driver = Qcow2Driver::new(image_file.file.clone(), aio, conf.clone())?; + qocw2_driver.load_metadata(conf)?; + Box::new(qocw2_driver) + } + }; + + let mut image_info = ImageInfo { + path: img_path, + ..Default::default() + }; + driver.query_image(&mut image_info)?; + print!("{}", image_info); + Ok(()) +} + +pub(crate) fn image_check(args: Vec) -> Result<()> { + let mut arg_parser = + ArgsParse::create(vec!["no_print_error", "h", "help"], vec!["f", "r"], vec![]); + arg_parser.parse(args)?; + + if arg_parser.opt_present("h") || arg_parser.opt_present("help") { + print_help(); + return Ok(()); + } + + let mut quite = false; + let mut disk_fmt: Option = None; + let mut fix = NO_FIX; + + if arg_parser.opt_present("no_print_error") { + quite = true; + } + if let Some(fmt) = arg_parser.opt_str("f") { + disk_fmt = Some(DiskFormat::from_str(&fmt)?); + } + + if let Some(kind) = arg_parser.opt_str("r") { + if kind == *"leaks" { + fix |= FIX_LEAKS; + } else if kind == *"all" { + fix |= FIX_LEAKS; + fix |= FIX_ERRORS; + } else { + bail!( + "Unknown option value for -r {:?}(expects 'leaks' or 'all')", + kind + ); + } + } + + // Parse image path. + let len = arg_parser.free.len(); + let path = match len { + 0 => bail!("Image check requires path"), + 1 => arg_parser.free[0].clone(), + _ => { + let param = arg_parser.free[1].clone(); + bail!("Unexpected argument: {}", param); + } + }; + + let read_only = fix == NO_FIX; + let image_file = ImageFile::create(&path, read_only)?; + let detect_fmt = image_file.detect_img_format()?; + let real_fmt = image_file.check_img_format(disk_fmt, detect_fmt)?; + + let mut check_res = CheckResult::default(); + let file = image_file.file.clone(); + match real_fmt { + DiskFormat::Raw => { + bail!("stratovirt-img: This image format does not support checks"); + } + DiskFormat::Qcow2 => { + let conf = BlockProperty { + format: DiskFormat::Qcow2, + ..Default::default() + }; + let mut qcow2_driver = create_qcow2_driver_for_check(file, conf)?; + let ret = qcow2_driver.check_image(&mut check_res, quite, fix); + let check_message = check_res.collect_check_message(); + print!("{}", check_message); + ret + } + } +} + +pub(crate) fn image_resize(mut args: Vec) -> Result<()> { + if args.len() < 2 { + bail!("Not enough arguments"); + } + let size_str = args.pop().unwrap(); + let mut arg_parser = ArgsParse::create(vec!["h", "help"], vec!["f"], vec![]); + arg_parser.parse(args)?; + + if arg_parser.opt_present("h") || arg_parser.opt_present("help") { + print_help(); + return Ok(()); + } + + // Parse the image path. + let len = arg_parser.free.len(); + let img_path = match len { + 0 => bail!("Expecting image file name and size"), + 1 => arg_parser.free[0].clone(), + _ => bail!("Unexpected argument: {}", arg_parser.free[1]), + }; + + // If the disk format is specified by user, it will be used firstly, + // or it will be detected. + let mut disk_fmt = None; + if let Some(fmt) = arg_parser.opt_str("f") { + disk_fmt = Some(DiskFormat::from_str(&fmt)?); + }; + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None)?; + let image_file = ImageFile::create(&img_path, false)?; + let detect_fmt = image_file.detect_img_format()?; + let real_fmt = image_file.check_img_format(disk_fmt, detect_fmt)?; + let conf = BlockProperty { + format: real_fmt, + ..Default::default() + }; + let mut driver: Box> = match real_fmt { + DiskFormat::Raw => Box::new(RawDriver::new(image_file.file.clone(), aio, conf)), + DiskFormat::Qcow2 => { + let mut qocw2_driver = Qcow2Driver::new(image_file.file.clone(), aio, conf.clone())?; + qocw2_driver.load_metadata(conf)?; + Box::new(qocw2_driver) + } + }; + + let old_size = driver.disk_size()?; + // Only expansion is supported currently. + let new_size = if size_str.starts_with('+') { + let size = memory_unit_conversion(&size_str, 1)?; + old_size + .checked_add(size) + .ok_or_else(|| anyhow!("Disk size is too large for chosen offset"))? + } else if size_str.starts_with('-') { + bail!("The shrink operation is not supported"); + } else { + let new_size = memory_unit_conversion(&size_str, 1)?; + if new_size < old_size { + bail!("The shrink operation is not supported"); + } + new_size + }; + + driver.resize(new_size)?; + println!("Image resized."); + + Ok(()) +} + +pub(crate) fn image_snapshot(args: Vec) -> Result<()> { + let mut arg_parser = ArgsParse::create( + vec!["l", "h", "help", "r"], + vec!["f", "c", "d", "a"], + vec![], + ); + arg_parser.parse(args)?; + + if arg_parser.opt_present("h") || arg_parser.opt_present("help") { + print_help(); + return Ok(()); + } + + let mut snapshot_name: String = String::from(""); + let mut snapshot_operation: Option = None; + let mut disk_fmt: Option = None; + let err_msg = "Cannot mix '-l', '-a', '-c', '-d'".to_string(); + + if let Some(fmt) = arg_parser.opt_str("f") { + disk_fmt = Some(DiskFormat::from_str(&fmt)?); + } + + if arg_parser.opt_present("l") { + snapshot_operation = Some(SnapshotOperation::List); + } + + if let Some(name) = arg_parser.opt_str("c") { + if snapshot_operation.is_some() { + bail!("{}", err_msg); + } + snapshot_operation = Some(SnapshotOperation::Create); + snapshot_name = name; + } + + if let Some(name) = arg_parser.opt_str("d") { + if snapshot_operation.is_some() { + bail!("{}", err_msg); + } + snapshot_operation = Some(SnapshotOperation::Delete); + snapshot_name = name; + } + + if let Some(name) = arg_parser.opt_str("a") { + if snapshot_operation.is_some() { + bail!("{}", err_msg); + } + snapshot_operation = Some(SnapshotOperation::Apply); + snapshot_name = name; + } + + // Rename snapshot name. + let mut old_snapshot_name = String::from(""); + let mut new_snapshot_name = String::from(""); + if arg_parser.opt_present("r") { + snapshot_operation = Some(SnapshotOperation::Rename); + old_snapshot_name = arg_parser.free[0].clone(); + new_snapshot_name = arg_parser.free[1].clone(); + } + + // Parse image path. + let len = arg_parser.free.len(); + let path = match len { + 0 => bail!("Image snapshot requires path"), + 1 => arg_parser.free[0].clone(), + 3 => arg_parser.free[2].clone(), + _ => { + let param = arg_parser.free[1].clone(); + bail!("Unexpected argument: {}", param); + } + }; + + // Detect the image fmt. + let image_file = ImageFile::create(&path, false)?; + let detect_fmt = image_file.detect_img_format()?; + let real_fmt = image_file.check_img_format(disk_fmt, detect_fmt)?; + if real_fmt != DiskFormat::Qcow2 { + bail!( + "Could not create snapshot '{}'(Operation not supported)", + snapshot_name + ); + } + + // Create qcow2 driver. + let qcow2_conf = BlockProperty { + format: DiskFormat::Qcow2, + discard: true, + write_zeroes: WriteZeroesState::Unmap, + ..Default::default() + }; + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + let mut qcow2_driver = Qcow2Driver::new(image_file.file.clone(), aio, qcow2_conf.clone())?; + qcow2_driver.load_metadata(qcow2_conf)?; + + match snapshot_operation { + Some(SnapshotOperation::Create) => { + qcow2_driver.create_snapshot(snapshot_name, 0)?; + } + Some(SnapshotOperation::List) => { + let info = qcow2_driver.list_snapshots(); + println!("Snapshot list:"); + print!("{}", info); + } + Some(SnapshotOperation::Delete) => { + qcow2_driver.delete_snapshot(snapshot_name)?; + } + Some(SnapshotOperation::Apply) => { + qcow2_driver.apply_snapshot(snapshot_name)?; + } + Some(SnapshotOperation::Rename) => { + qcow2_driver.rename_snapshot(old_snapshot_name, new_snapshot_name)?; + } + None => return Ok(()), + }; + + Ok(()) +} + +pub(crate) fn create_qcow2_driver_for_check( + file: Arc, + conf: BlockProperty, +) -> Result> { + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + let mut qcow2_driver = Qcow2Driver::new(file, aio, conf.clone()) + .with_context(|| "Failed to create qcow2 driver")?; + + qcow2_driver + .load_header() + .with_context(|| "Failed to load header")?; + qcow2_driver + .table + .init_table_info(&qcow2_driver.header, &conf) + .with_context(|| "Failed to create qcow2 table")?; + qcow2_driver + .refcount + .init_refcount_info(&qcow2_driver.header, &conf); + qcow2_driver + .load_refcount_table() + .with_context(|| "Failed to load refcount table")?; + qcow2_driver + .snapshot + .set_cluster_size(qcow2_driver.header.cluster_size()); + Ok(qcow2_driver) +} + +pub(crate) fn print_version() { + println!( + "{} version {}\ + Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved.", + BINARY_NAME, + util::VERSION, + ) +} + +pub fn print_help() { + print!( + r#"Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +Usage: stratovirt-img [standard options] command [command options] +Stratovirt disk image utility + +'-h', '--help' display this help and exit +'-v', '--version' output version information and exit + +Command syntax: +create [-f fmt] [-o options] filename [size] +info filename +check [-r [leaks | all]] [-no_print_error] [-f fmt] filename +resize [-f fmt] filename [+]size +snapshot [-l | -a snapshot | -c snapshot | -d snapshot | -r old_snapshot_name new_snapshot_name] filename + +Command parameters: +'filename' is a disk image filename +'fmt' is the disk image format +'size' is the disk image size in bytes +'options' is a comma separated list of format specific options in a +name=value format. + +Parameters to check subcommand: + '-no_print_error' don't print error detail. + '-r' tries to repair any inconsistencies that are found during the check. + '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all + kinds of errors. + +Parameters to snapshot subcommand: +'snapshot' is the name of the snapshot to create, apply or delete + '-a' applies a snapshot (revert disk to saved state) + '-c' creates a snapshot + '-d' deletes a snapshot + '-l' lists all snapshots in the given image + '-r' change the name of a snapshot +"#, + ); +} + +#[cfg(test)] +mod test { + use std::{ + fs::remove_file, + io::{Seek, SeekFrom}, + }; + + use super::*; + use block_backend::qcow2::{ + refcount::Qcow2DiscardType, HostRange, ENTRY_SIZE, L2_TABLE_OFFSET_MASK, + QCOW2_OFFSET_COPIED, + }; + use util::aio::Iovec; + + const M: u64 = 1024 * 1024; + const G: u64 = 1024 * 1024 * 1024; + + pub struct TestQcow2Image { + pub header: QcowHeader, + pub cluster_bits: u64, + pub path: String, + pub file: Arc, + } + + impl TestQcow2Image { + pub fn create(cluster_bits: u64, refcount_bits: u64, path: &str, img_size: &str) -> Self { + let cluster_size = 1 << cluster_bits; + // Create image. + let create_str = format!( + "-f qcow2 -o cluster_size={} -o refcount_bits={} {} {}", + cluster_size, refcount_bits, path, img_size, + ); + let create_args: Vec = + create_str.split(' ').map(|str| str.to_string()).collect(); + assert!(image_create(create_args).is_ok()); + + // Read header. + let file = open_file(path, false, false).unwrap(); + let mut buf = vec![0; QcowHeader::len()]; + assert!(file.read_at(&mut buf, 0).is_ok()); + let header = QcowHeader::from_vec(&buf).unwrap(); + assert_eq!(u64::from(header.cluster_bits), cluster_bits); + + Self { + header, + cluster_bits, + path: path.to_string(), + file: Arc::new(file), + } + } + + fn create_driver(&self) -> Qcow2Driver<()> { + let conf = BlockProperty { + format: DiskFormat::Qcow2, + ..Default::default() + }; + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + let mut qcow2_driver = Qcow2Driver::new(self.file.clone(), aio, conf.clone()).unwrap(); + qcow2_driver.load_metadata(conf).unwrap(); + qcow2_driver + } + + fn create_driver_for_check(&self) -> Qcow2Driver<()> { + let file = self.file.clone(); + let conf = BlockProperty { + format: DiskFormat::Qcow2, + ..Default::default() + }; + + create_qcow2_driver_for_check(file, conf).unwrap() + } + + fn read_data(&self, guest_offset: u64, buf: &Vec) -> Result<()> { + let mut qocw2_driver = self.create_driver(); + qocw2_driver.read_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + guest_offset as usize, + (), + ) + } + + fn write_data(&self, guest_offset: u64, buf: &Vec) -> Result<()> { + let mut qocw2_driver = self.create_driver(); + qocw2_driver.write_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + guest_offset as usize, + (), + ) + } + + fn check_image(&self, quite: bool, fix: u64) -> bool { + let mut res = CheckResult::default(); + let mut qcow2_driver = self.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + + res.err_num == 0 + && res.leaks == 0 + && res.leaks_fixed == 0 + && res.corruptions == 0 + && res.corruptions_fixed == 0 + } + + fn file_len(&mut self) -> u64 { + let file_len = self.file.as_ref().seek(SeekFrom::End(0)).unwrap(); + file_len + } + + fn clear_reftable(&mut self) { + self.header.refcount_table_clusters = 0; + self.header.refcount_table_offset = 0; + + let mut buf = self.header.to_vec(); + assert!(self.file.write_at(&mut buf, 0).is_ok()); + } + } + + impl Drop for TestQcow2Image { + fn drop(&mut self) { + assert!(remove_file(self.path.clone()).is_ok()); + } + } + + struct TestRawImage { + path: String, + } + + impl TestRawImage { + fn create(path: String, img_size: String) -> Self { + let create_str = format!("-f raw {} {}", path, img_size); + let create_args: Vec = + create_str.split(' ').map(|str| str.to_string()).collect(); + assert!(image_create(create_args).is_ok()); + + Self { path } + } + + fn create_driver(&mut self) -> RawDriver<()> { + let conf = BlockProperty::default(); + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + + let file = open_file(&self.path, false, false).unwrap(); + + RawDriver::new(Arc::new(file), aio, conf) + } + } + + impl Drop for TestRawImage { + fn drop(&mut self) { + assert!(remove_file(self.path.clone()).is_ok()); + } + } + + fn vec_is_fill_with(vec: &Vec, num: u8) -> bool { + for elem in vec { + if elem != &num { + return false; + } + } + true + } + + fn image_write( + driver: &mut dyn BlockDriverOps<()>, + offset: usize, + buf: &Vec, + ) -> Result<()> { + driver.write_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + offset, + (), + ) + } + + fn image_read(driver: &mut dyn BlockDriverOps<()>, offset: usize, buf: &Vec) -> Result<()> { + driver.read_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + offset, + (), + ) + } + + /// Test the function of creating image. + /// TestStep: + /// 1. Create image with different args. + /// Expect: + /// 1. If the format of args is invalid, creation failed. + #[test] + fn test_args_parse_of_imgae_create() { + let path = "/tmp/test_args_parse_of_imgae_create.qcow2"; + let test_case = vec![ + ( + "-f qcow2 -o cluster_size=65536 -o refcount_bits=16 img_path +1G", + true, + ), + ( + "-f qcow2 -o cluster_size=65536 refcount_bits=16 img_path +1G", + false, + ), + ("-h", true), + ("-f raw img_path +1G", true), + ("-f raw img_path", false), + ("-f raw -o refcount_bits=16 img_path +1G", false), + ("-f raw -o cluster_size=65536 img_path +1G", false), + ("-f invalid_fmt img_path", false), + ("img_path +1G", true), + ("img_path 1G", true), + ("-f qcow2 -o cluster_size=256 img_path +1G", false), + ("-f qcow2 img_path +1G", true), + ("-f qcow2 img_path +0G", false), + ("-f qcow2 -b backing_file img_path +1G", false), + ("-f qcow2 img_path", false), + ("-f qcow2 +1G", false), + ("-f qcow2 img_path +1G extra_params", false), + ("-f qcow2 -o cluster_size=65536 img_path +1G", true), + ("-f qcow2 -o refcount_bits=16 img_path +1G", true), + ("-f qcow2 -o refcount_bits=128 img_path +1G", false), + ("-f qcow2 -o refcount_bits=63 img_path +1G", false), + ("-f qcow2 -o cluster_size img_path +1G", false), + ("-f qcow2 -o cluster_size=65536 img_path", false), + ("-f qcow2 -o invalid_param img_path", false), + ("-f qcow2 -f raw img_path +1G", true), + ]; + + for case in test_case { + let create_str = case.0.replace("img_path", path); + println!("Create options: {}", create_str); + let create_args: Vec = + create_str.split(' ').map(|str| str.to_string()).collect(); + + if case.1 { + assert!(image_create(create_args).is_ok()); + } else { + assert!(image_create(create_args).is_err()); + } + } + + assert!(remove_file(path).is_ok()); + } + + /// Test the function of query image. + /// TestStep: + /// 2. Query image info with different type. + /// Expect: + /// 1. Ihe invalid args will result in failure. + #[test] + fn test_args_parse_of_image_info() { + let path = "/tmp/test_args_parse_of_image_info.qcow2"; + let test_case = vec![ + ("img_path", true), + ("-f qcow2", false), + ("invalid_args", false), + ("img_path +1G", false), + ("-h", true), + ("--help", true), + ]; + + for case in test_case { + let cmd_str = case.0.replace("img_path", path); + let args: Vec = cmd_str.split(' ').map(|str| str.to_string()).collect(); + + // Query image info with type of qcow2. + assert!(image_create(vec![ + "-f".to_string(), + "qcow2".to_string(), + path.to_string(), + "+10M".to_string() + ]) + .is_ok()); + assert_eq!(image_info(args.clone()).is_ok(), case.1); + + // Query image info with type of raw. + assert!(image_create(vec![ + "-f".to_string(), + "raw".to_string(), + path.to_string(), + "+10M".to_string() + ]) + .is_ok()); + assert_eq!(image_info(args).is_ok(), case.1); + } + + assert!(remove_file(path).is_ok()); + } + + /// Test the function of creating image. + /// TestStep: + /// 1. Create image with different cluster bits, image size and refcount bits. + /// Expect: + /// 1. The header of new image meets expectations. + /// 2. No errors were found during the image check. + #[test] + fn test_create_qcow2_img() { + let path = "/tmp/test_create_qcow2_img.qcow2"; + // (cluster bits, image size in str, image size in number) + let test_case = [ + (9, "+1G", G), + (9, "+128G", 128 * G), + (10, "+20M", 20 * M), + (16, "+50M", 50 * M), + (16, "1024M", G), + (16, "+128G", 128 * G), + ]; + // Only refcount bit=16 is supported currently. + let refcount_bits = 16; + + for case in test_case { + let cluster_bits = case.0; + let cluster_size = 1 << cluster_bits; + let image_size = case.2; + let mut test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, case.1); + + // Check header. + let file_len = test_image.file_len(); + let l1_size = test_image.header.l1_size; + let reftable_clusters = test_image.header.refcount_table_clusters; + let reftable_size = u64::from(reftable_clusters) * cluster_size / ENTRY_SIZE; + let refblock_size = cluster_size / (refcount_bits / 8); + + assert_ne!(l1_size, 0); + assert_ne!(reftable_clusters, 0); + assert!(u64::from(l1_size) * cluster_size * cluster_size / ENTRY_SIZE >= image_size); + assert!(reftable_size * refblock_size * cluster_size >= file_len); + assert_eq!(u64::from(test_image.header.cluster_bits), cluster_bits); + assert_eq!(test_image.header.size, image_size); + + // Check refcount. + assert!(test_image.check_image(false, 0)); + } + } + + /// Test the function of detect image format. + /// TestStep: + /// 1. Create image with different disk format. + /// 2. Detect the format of disk. + /// 3. Apply image check, and specify the format of raw. + /// Expect: + /// 1. The detected disk format is correct. + /// 2. Image check returned error, as raw format is not supported to check. + #[test] + fn test_detect_image_format() { + let path = "/tmp/test_detect_image_format.qcow2"; + let test_case = [ + ("-f raw path +1G", DiskFormat::Raw), + ("-f qcow2 path +1G", DiskFormat::Qcow2), + ]; + let check_str = format!("-f raw {}", path); + let check_args: Vec = check_str.split(' ').map(|str| str.to_string()).collect(); + + for case in test_case { + let create_str = case.0.replace("path", path); + println!("stratovirt-img {}", create_str); + let create_args: Vec = + create_str.split(' ').map(|str| str.to_string()).collect(); + assert!(image_create(create_args).is_ok()); + + let image_file = ImageFile::create(path, false).unwrap(); + assert_eq!(image_file.detect_img_format().unwrap(), case.1); + + assert!(image_check(check_args.clone()).is_err()); + } + + assert!(remove_file(path).is_ok()); + } + + /// Test the function of check image. + /// TestStep: + /// 1. Check image with different args. + /// Expect: + /// 1. If the args is invalid, check operation failed. + #[test] + fn test_args_parse_of_image_check() { + let path = "/tmp/test_args_parse_of_image_check"; + let create_str = "-f disk_fmt img_path +1G".replace("img_path", path); + let test_case = [ + ("qcow2", "-f qcow2 img_path", true), + ("qcow2", "-f qcow2", false), + ("qcow2", "-r leaks -f qcow2 img_path", true), + ("qcow2", "-r all -f qcow2 img_path", true), + ("qcow2", "-r invalid_param -f qcow2 img_path", false), + ("qcow2", "-r -f qcow2 img_path", false), + ("qcow2", "-f raw img_path", false), + ("qcow2", "img_path", true), + ("qcow2", "-f qcow2 img_path extra_params", false), + ("qcow2", "-f raw -f qcow2 img_path", true), + ("qcow2", "-f qcow2 -f raw img_path", false), + ("raw", "-f qcow2 img_path", false), + ]; + + for case in test_case { + let create_string = create_str.replace("disk_fmt", case.0); + let create_args: Vec = create_string + .split(' ') + .map(|str| str.to_string()) + .collect(); + println!("Create args: {}", create_string); + assert!(image_create(create_args.clone()).is_ok()); + + let check_str = case.1.replace("img_path", path); + let check_args: Vec = check_str.split(' ').map(|str| str.to_string()).collect(); + println!("Check args: {}", check_str); + + if case.2 { + assert!(image_check(check_args).is_ok()); + } else { + assert!(image_check(check_args).is_err()); + } + + assert!(remove_file(path).is_ok()); + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create image with different image size. + /// 2. Alloc a a new cluster, and create a snapshot, so the real refcount of this cluster is 2. + /// 3. Decrease the refcount of this cluster in refcount block + /// Expect: + /// 1. The corruptions cluster can be found and fixed during image check. + #[test] + fn test_check_refcount_corruptions() { + let path = "/tmp/test_check_refcount_corruptions.qcow2"; + // (cluster bits, image size in str, image size in number) + let test_case = [ + (16, "+50M", 50 * M), + (16, "1024M", G), + (16, "+128G", 128 * G), + ]; + // Only refcount bit=16 is supported currently. + let refcount_bits = 16; + + for case in test_case { + let cluster_bits = case.0; + let cluster_size = 1 << cluster_bits; + let test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, case.1); + let mut qcow2_driver = test_image.create_driver(); + + // 1. Alloc a cluster. + // 2. Create a snapshot on the image, so the refcount of this cluster is 2. + // 3. Decrease the refcount of this cluster. + let buf = vec![1_u8; cluster_size as usize]; + assert!(qcow2_driver + .write_vectored( + vec![Iovec { + iov_base: buf.as_ptr() as u64, + iov_len: buf.len() as u64, + }], + 0, + (), + ) + .is_ok()); + // Get hostoffset of 0 + let mut offset = 0; + match qcow2_driver.host_offset_for_read(0, cluster_size).unwrap() { + HostRange::DataNotInit(_) => assert!(false), + HostRange::DataAddress(addr, bytes) => { + assert!(bytes >= cluster_size); + offset = addr; + } + }; + assert_ne!(offset, 0); + assert!(qcow2_driver + .create_snapshot("test_refcount".to_string(), 0) + .is_ok()); + qcow2_driver + .refcount + .update_refcount(offset, 1, -1, true, &Qcow2DiscardType::Never) + .unwrap(); + drop(qcow2_driver); + + // Check refcount. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver + .check_image(&mut res, false, FIX_ERRORS) + .is_ok()); + assert_eq!(res.corruptions_fixed, 1); + assert_eq!(res.corruptions, 0); + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create image with different image size. + /// 2. Alloc a a new cluster, the real reference of this cluster is 1. + /// 3. Update the reference of this cluster to 10. + /// Expect: + /// 1. The leaks cluster can be found and fixed by image check. + #[test] + fn test_check_refcount_leaks() { + let path = "/tmp/test_check_refcount_leaks.qcow2"; + // (cluster bits, image size in str, image size in number, number clusters) + let test_case = [ + (16, "+50M", 50 * M, 1), + (16, "1024M", G, 1), + (16, "1024M", G, 11), + (16, "+128G", 128 * G, 1), + (16, "+128G", 128 * G, 37), + ]; + // Only refcount bit=16 is supported currently. + let refcount_bits = 16; + + for case in test_case { + let cluster_bits = case.0; + let test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, case.1); + let mut qcow2_driver = test_image.create_driver(); + + // Alloc cluster, and update the refcount to 10. + let nb_clusters = case.3 as u64; + let offset = qcow2_driver.alloc_cluster(nb_clusters, true).unwrap(); + qcow2_driver + .refcount + .update_refcount(offset, nb_clusters, 9, true, &Qcow2DiscardType::Never) + .unwrap(); + drop(qcow2_driver); + + // Check refcount. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, false, FIX_LEAKS).is_ok()); + assert_eq!(res.leaks, 0); + assert_eq!(res.leaks_fixed, nb_clusters as i32); + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create a new image. + /// 2. Alloc a new cluster, the real reference of this cluster is 1, + /// the l1 entry of this cluster identification with oflag of copied. + /// 3. Clean the oflag of copied in l1 entry of this cluster, and apply image check. + /// Expect: + /// 1. The wrong of oflag in l1 entry can be found and fixed. + #[test] + fn test_check_remove_oflag_copied_in_l1_entry() { + let path = "/tmp/test_check_remove_oflag_copied_in_l1_entry.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let image_size = 1 << 30; + let image_size_str = "+1G"; + + let test_case = vec![ + // (fix, guest_offset, corruptions, corruptions_fixed) + (0, 0, 1, 0), + (FIX_LEAKS, 0, 0, 1), + (FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, cluster_size * 10, 0, 1), + (FIX_LEAKS, cluster_size * 10, 0, 1), + (FIX_ERRORS, cluster_size * 10, 0, 1), + ( + FIX_LEAKS | FIX_ERRORS, + cluster_size + cluster_size / 2, + 0, + 1, + ), + (FIX_LEAKS, image_size - cluster_size, 0, 1), + (FIX_LEAKS | FIX_ERRORS, image_size - cluster_size, 0, 1), + ]; + + // Test different guest offset + for case in test_case { + let quite = false; + let fix = case.0; + let guest_offset = case.1; + let expect_corruptions = case.2; + let expect_corruptions_fixed = case.3; + let test_image = + TestQcow2Image::create(cluster_bits, refcount_bits, path, image_size_str); + // Write data to guest offset. + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(guest_offset, &buf).is_ok()); + + // Modify the oflag of l1 entry of cluster 0. + let mut qcow2_driver = test_image.create_driver(); + let l1_idx = qcow2_driver.table.get_l1_table_index(guest_offset); + qcow2_driver.table.l1_table[l1_idx as usize] &= !QCOW2_OFFSET_COPIED; + assert!(qcow2_driver.table.save_l1_table().is_ok()); + drop(qcow2_driver); + + // Check and fix error copied flag in l1 table. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + assert_eq!(res.corruptions, expect_corruptions); + assert_eq!(res.corruptions_fixed, expect_corruptions_fixed); + let message = res.collect_check_message(); + println!("{}", message); + drop(qcow2_driver); + + // The oflag error in l1 table has been fixed. + if expect_corruptions_fixed != 0 { + let qcow2_driver = test_image.create_driver(); + assert_ne!( + qcow2_driver.table.l1_table[l1_idx as usize] & QCOW2_OFFSET_COPIED, + 0 + ); + drop(qcow2_driver); + } + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create a new image. + /// 2. Alloc a a new cluster, the real reference of this cluster is 1, + /// the l2 entry of this cluster identification with oflag of copied. + /// 3. Clean the oflag of copied in l2 entry of this cluster, and apply image check. + /// Expect: + /// 2. The wrong of oflag in l2 entry can be found and fixed. + #[test] + fn test_check_remove_oflag_copied_in_l2_entry() { + let path = "/tmp/test_check_remove_oflag_copied_in_l2_entry.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let image_size = 1 << 30; + let image_size_str = "+1G"; + + let test_case = vec![ + // (fix, guest_offset, corruptions, corruptions_fixed) + (0, 0, 1, 0), + (FIX_LEAKS, 0, 0, 1), + (FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, cluster_size * 10, 0, 1), + (FIX_LEAKS, cluster_size * 10, 0, 1), + (FIX_ERRORS, cluster_size * 10, 0, 1), + ( + FIX_LEAKS | FIX_ERRORS, + cluster_size + cluster_size / 2, + 0, + 1, + ), + (FIX_LEAKS, image_size - cluster_size, 0, 1), + (FIX_LEAKS | FIX_ERRORS, image_size - cluster_size, 0, 1), + ]; + + // Test different guest offset + for case in test_case { + let quite = false; + let fix = case.0; + let guest_offset = case.1; + let expect_corruptions = case.2; + let expect_corruptions_fixed = case.3; + let test_image = + TestQcow2Image::create(cluster_bits, refcount_bits, path, image_size_str); + // Write data to guest offset. + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(guest_offset, &buf).is_ok()); + + // Modify the oflag of l2 entry. + let mut qcow2_driver = test_image.create_driver(); + let l2_idx = qcow2_driver.table.get_l2_table_index(guest_offset) as usize; + let cache_entry = qcow2_driver.get_table_cluster(guest_offset).unwrap(); + let mut l2_entry = cache_entry.borrow_mut().get_entry_map(l2_idx).unwrap(); + l2_entry &= !QCOW2_OFFSET_COPIED; + assert!(cache_entry + .borrow_mut() + .set_entry_map(l2_idx, l2_entry) + .is_ok()); + qcow2_driver + .table + .l2_table_cache + .add_dirty_table(cache_entry); + drop(qcow2_driver); + + // Check and fix copied flag in l2 table. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + assert_eq!(res.corruptions, expect_corruptions); + assert_eq!(res.corruptions_fixed, expect_corruptions_fixed); + let message = res.collect_check_message(); + println!("{}", message); + drop(qcow2_driver); + + // The oflag error in l2 table has been fixed. + if expect_corruptions_fixed != 0 { + let mut qcow2_driver = test_image.create_driver(); + let cache_entry = qcow2_driver.get_table_cluster(guest_offset).unwrap(); + let l2_entry = cache_entry.borrow_mut().get_entry_map(l2_idx).unwrap(); + assert_ne!(l2_entry & QCOW2_OFFSET_COPIED, 0); + drop(qcow2_driver); + } + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create a new image. + /// 2. Alloc a a new cluster, and create a new snapshot. So the oflag of copied in l1 entry will be cleared, + /// which means writing to this cluster will result in copy on write. + /// 3. Set the oflag of copied in l1 entry of this cluster, and apply image check. + /// Expect: + /// 1. The wrong of oflag in l1 entry can be found and fixed. + #[test] + fn test_check_add_oflag_copied_in_l1_entry() { + let path = "/tmp/test_check_add_oflag_copied_in_l1_entry.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let image_size = 1 << 30; + let image_size_str = "+1G"; + let test_case = vec![ + // (fix, guest_offset, corruptions, corruptions_fixed) + (0, 0, 1, 0), + (FIX_LEAKS, 0, 0, 1), + (FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, cluster_size * 10, 0, 1), + (FIX_LEAKS, cluster_size * 10, 0, 1), + (FIX_ERRORS, cluster_size * 10, 0, 1), + ( + FIX_LEAKS | FIX_ERRORS, + cluster_size + cluster_size / 2, + 0, + 1, + ), + (FIX_LEAKS, image_size - cluster_size, 0, 1), + (FIX_LEAKS | FIX_ERRORS, image_size - cluster_size, 0, 1), + ]; + + // Test different guest offset + for case in test_case { + let quite = false; + let fix = case.0; + let guest_offset = case.1; + let expect_corruptions = case.2; + let expect_corruptions_fixed = case.3; + + // Write data to guest offset. + let test_image = + TestQcow2Image::create(cluster_bits, refcount_bits, path, image_size_str); + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(guest_offset, &buf).is_ok()); + + // Create a new snapshot, ensure that the refcount of data cluster is 2. + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_ok()); + + // Add the oflag copied for l1 entry. + let mut qcow2_driver = test_image.create_driver(); + let l1_idx = qcow2_driver.table.get_l1_table_index(guest_offset); + qcow2_driver.table.l1_table[l1_idx as usize] |= QCOW2_OFFSET_COPIED; + assert!(qcow2_driver.table.save_l1_table().is_ok()); + drop(qcow2_driver); + + // Check and fix error copied flag in l1 table. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + assert_eq!(res.corruptions, expect_corruptions); + assert_eq!(res.corruptions_fixed, expect_corruptions_fixed); + let message = res.collect_check_message(); + println!("{}", message); + drop(qcow2_driver); + + // The oflag error in l1 table has been fixed. + if expect_corruptions_fixed != 0 { + let qcow2_driver = test_image.create_driver(); + assert_eq!( + qcow2_driver.table.l1_table[l1_idx as usize] & QCOW2_OFFSET_COPIED, + 0 + ); + drop(qcow2_driver); + } + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create a new image. + /// 2. Alloc a a new cluster, and create a new snapshot. So the oflag of l2 entry will be cleared, + /// which means writing to this cluster will result in copy on write. + /// 3. Set the oflag of copied in l2 entry of this cluster, and apply image check. + /// Expect: + /// 2. The wrong of oflag in l2 entry can be found and fixed. + #[test] + fn test_check_add_oflag_copied_in_l2_entry() { + let path = "/tmp/test_check_add_oflag_copied_in_l2_entry.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let image_size = 1 << 30; + let image_size_str = "+1G"; + let test_case = vec![ + // (fix, guest_offset, corruptions, corruptions_fixed) + (0, 0, 1, 0), + (FIX_LEAKS, 0, 0, 1), + (FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, 0, 0, 1), + (FIX_LEAKS | FIX_ERRORS, cluster_size * 10, 0, 1), + (FIX_LEAKS, cluster_size * 10, 0, 1), + (FIX_ERRORS, cluster_size * 10, 0, 1), + ( + FIX_LEAKS | FIX_ERRORS, + cluster_size + cluster_size / 2, + 0, + 1, + ), + (FIX_LEAKS, image_size - cluster_size, 0, 1), + (FIX_LEAKS | FIX_ERRORS, image_size - cluster_size, 0, 1), + ]; + + // Test different guest offset + for case in test_case { + let fix = case.0; + let quite = false; + let guest_offset = case.1; + let expect_corruptions = case.2; + let expect_corruptions_fixed = case.3; + + // Write data to guest offset. + let test_image = + TestQcow2Image::create(cluster_bits, refcount_bits, path, image_size_str); + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(guest_offset, &buf).is_ok()); + + // Create a new snapshot, ensure that the refcount of data cluster is 2. + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_ok()); + + // Add the oflag copide for l2 entry. + let mut qcow2_driver = test_image.create_driver(); + let l2_idx = qcow2_driver.table.get_l2_table_index(guest_offset) as usize; + let cache_entry = qcow2_driver.get_table_cluster(guest_offset).unwrap(); + let mut l2_entry = cache_entry.borrow_mut().get_entry_map(l2_idx).unwrap(); + l2_entry |= QCOW2_OFFSET_COPIED; + assert!(cache_entry + .borrow_mut() + .set_entry_map(l2_idx, l2_entry) + .is_ok()); + qcow2_driver + .table + .l2_table_cache + .add_dirty_table(cache_entry); + drop(qcow2_driver); + + // Check and fix error copied flag in l1 table. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + assert_eq!(res.corruptions, expect_corruptions); + assert_eq!(res.corruptions_fixed, expect_corruptions_fixed); + let message = res.collect_check_message(); + println!("{}", message); + drop(qcow2_driver); + + // The oflag error in l2 table has been fixed. + if expect_corruptions_fixed != 0 { + let mut qcow2_driver = test_image.create_driver(); + let cache_entry = qcow2_driver.get_table_cluster(guest_offset).unwrap(); + let l2_entry = cache_entry.borrow_mut().get_entry_map(l2_idx).unwrap(); + assert_eq!(l2_entry & QCOW2_OFFSET_COPIED, 0); + drop(qcow2_driver); + } + } + } + + /// Test the function of image check. + /// + /// TestStep: + /// 1. Create a new image with different cluster bits and image size. + /// 2. Set the refcount table offset and refcount table clusters to 0 in header, + /// so it is unable to find data of refcount table by header. + /// 3. Apply image check. + /// Expect: + /// 1. The refcount table and refcount block of this image can be rebuild. + #[test] + fn test_rebuild_refcount() { + let path = "/tmp/test_rebuild_refcount.qcow2"; + // (cluster bits, image size in str, image size in number) + let test_case = [ + (9, "+1G", G), + (10, "+20M", 20 * M), + (16, "+50M", 50 * M), + (16, "1024M", G), + (16, "+128G", 128 * G), + ]; + // Only refcount bit=16 is supported currently. + let refcount_bits = 16; + + for case in test_case { + let cluster_bits = case.0; + let mut test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, case.1); + test_image.clear_reftable(); + + // Try to rebuild refcount table. + let fix = FIX_ERRORS | FIX_LEAKS; + let quite = false; + + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + assert_eq!(res.corruptions, 0); + assert!(res.corruptions_fixed != 0); + drop(qcow2_driver); + } + } + + /// Test the function of image check. + /// 1. If the l2 offset is not align to cluster size, it will be set to zero during checking. + /// 2. The value of reserved area of l2 entry is expected to 0(Seen L2_STD_RESERVED_MASK). If not , + /// and error message will be recorded, but the repair action will not be tooken, as this error has no other impact. + /// + /// TestStep: + /// 1. Create a new image. + /// 2. Alloc a new cluster, change the offset in l2 entry of this cluster. + /// 3. Apply image check. + /// Expect: + /// 1. The refcount table and refcount block of this image can be rebuild. + #[test] + fn test_check_fix_l2_entry() { + let path = "/tmp/test_check_fix_l2_entry.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let image_size_str = "+1G"; + let fix = FIX_LEAKS | FIX_ERRORS; + let quite = false; + + let guest_offset = 0; + let test_case: Vec<(u64, bool)> = vec![ + (1 << 1, false), + (1 << 8, false), + (1 << 9, true), + (1 << (cluster_bits - 1), true), + (1 << 57, false), + ]; + + for case in test_case { + // Write data to guest offset. + let offset = case.0; + let need_fixed = case.1; + println!("offset: {}, need_fixed: {}", offset, need_fixed); + let test_image = + TestQcow2Image::create(cluster_bits, refcount_bits, path, image_size_str); + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(guest_offset, &buf).is_ok()); + + // Modify the l2 offset, make it not align to cluster size. + let mut qcow2_driver = test_image.create_driver(); + let l2_idx = qcow2_driver.table.get_l2_table_index(guest_offset) as usize; + let cache_entry = qcow2_driver.get_table_cluster(guest_offset).unwrap(); + let mut l2_entry = cache_entry.borrow_mut().get_entry_map(l2_idx).unwrap(); + let l2_offset = l2_entry & L2_TABLE_OFFSET_MASK; + let l2_entry_flag = l2_entry & !L2_TABLE_OFFSET_MASK; + l2_entry = (l2_offset + offset) | l2_entry_flag; + assert!(cache_entry + .borrow_mut() + .set_entry_map(l2_idx, l2_entry) + .is_ok()); + qcow2_driver + .table + .l2_table_cache + .add_dirty_table(cache_entry); + drop(qcow2_driver); + + // Check and fix error copied flag in l1 table. + let mut res = CheckResult::default(); + let mut qcow2_driver = test_image.create_driver_for_check(); + assert!(qcow2_driver.check_image(&mut res, quite, fix).is_ok()); + if need_fixed { + assert_eq!(res.corruptions, 0); + assert_eq!(res.corruptions_fixed, 1); + } else { + assert_eq!(res.corruptions, 1); + assert_eq!(res.corruptions_fixed, 0); + } + drop(qcow2_driver); + } + } + + /// Test the function of snapshot operation. + /// TestStep: + /// 1. Operate snapshot with different args. + /// Expect: + /// 1. If the args is invalid, operation failed. + #[test] + fn test_args_parse_of_image_snapshot() { + let path = "/tmp/test_args_parse_of_image_snapshot"; + let create_str = "-f disk_fmt img_path +1G".replace("img_path", path); + let test_case = [ + ("qcow2", "-c snapshot0 img_path", true), + ("qcow2", "-f qcow2 -l img_path", true), + ("qcow2", "-r old_snapshot_name img_path", false), + ("qcow2", "-d snapshot0 img_path", false), + ("qcow2", "-a snapshot0 img_path", false), + ("qcow2", "-c snapshot0 -l img_path", false), + ( + "raw", + "-r old_snapshot_name new_snapshot_name img_path", + false, + ), + ("raw", "-f qcow2 -l img_path", false), + ("raw", "-l img_path", false), + ]; + + for case in test_case { + let create_string = create_str.replace("disk_fmt", case.0); + let create_args: Vec = create_string + .split(' ') + .map(|str| str.to_string()) + .collect(); + println!("Create args: {}", create_string); + assert!(image_create(create_args).is_ok()); + + let snapshot_str = case.1.replace("img_path", path); + let snapshot_args: Vec = + snapshot_str.split(' ').map(|str| str.to_string()).collect(); + let ret = image_snapshot(snapshot_args); + if case.2 { + assert!(ret.is_ok()); + } else { + assert!(ret.is_err()); + } + + assert!(remove_file(path).is_ok()); + } + } + + /// Test the function of apply snapshot. + /// + /// TestStep: + /// 1. Create a new image. alloc a new cluster and write 1. + /// 2. Create snapshot named snapshot0, write 2 to the cluster. + /// 3. Create snapshot named snapshot1, write 3 to the cluster. + /// 4. Apply snapshot named snapshot0, and read the data by qcow2 driver. + /// Expect: + /// 1. No errors were found during the image check. + /// 2. The data read after snapshot apply is 2. + #[test] + fn test_check_snapshot_apply_basic() { + let path = "/tmp/test_check_snapshot_apply_basic.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + let test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, "+1G"); + let quite = false; + let fix = FIX_ERRORS | FIX_LEAKS; + + assert!(test_image.check_image(quite, fix)); + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + assert!(test_image.check_image(quite, fix)); + + // Create a snapshot named test_snapshot0 + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_ok()); + + assert!(test_image.check_image(quite, fix)); + let buf = vec![2_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + assert!(test_image.check_image(quite, fix)); + + // Create as snapshot named test_snapshot1. + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot1".to_string(), + path.to_string() + ]) + .is_ok()); + + assert!(test_image.check_image(quite, fix)); + let buf = vec![3_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + assert!(test_image.check_image(quite, fix)); + + // Apply snapshot named test_snapshot0. + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_ok()); + + assert!(test_image.check_image(quite, fix)); + let buf = vec![0_u8; cluster_size as usize]; + assert!(test_image.read_data(0, &buf).is_ok()); + for elem in buf { + assert_eq!(elem, 1); + } + let buf = vec![4_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + assert!(test_image.check_image(quite, fix)); + + // Apply snapshot named test_snapshot1 + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot1".to_string(), + path.to_string() + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let buf = vec![0_u8; cluster_size as usize]; + assert!(test_image.read_data(0, &buf).is_ok()); + for elem in buf { + assert_eq!(elem, 2); + } + } + + /// Test the function of snapshot rename. + /// + /// TestStep: + /// 1. Create a new image. alloc a new cluster and write 1. + /// 2. Create snapshot named test_snapshot0, write 2 to the cluster. + /// 3. Create snapshot named test_snapshot1, write 3 to the cluster. + /// 4. Rename test_snapshot0 to test_snapshot0-new. + /// 5. Apply snapshot named test_snapshot0. + /// 6. Apply snapshot named test_snapshot0-new. + /// Expect: + /// 1. step 5 is failure and step 1/2/3/4/6 is success. + /// 2. The data read after snapshot apply is 2. + #[test] + fn test_snapshot_rename_basic() { + let path = "/tmp/test_snapshot_rename_basic.qcow2"; + let cluster_bits = 16; + let cluster_size = 1 << cluster_bits; + let refcount_bits = 16; + + // Create a new image. alloc a new cluster and write 1. + let test_image = TestQcow2Image::create(cluster_bits, refcount_bits, path, "+1G"); + let buf = vec![1_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + + // Create snapshot named test_snapshot0, write 2 to the cluster. + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_ok()); + let buf = vec![2_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + + // Create snapshot named test_snapshot1, write 3 to the cluster. + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot1".to_string(), + path.to_string() + ]) + .is_ok()); + let buf = vec![3_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + + // Rename test_snapshot0 to test_snapshot0-new. + assert!(image_snapshot(vec![ + "-r".to_string(), + "test_snapshot0".to_string(), + "test_snapshot0-new".to_string(), + path.to_string() + ]) + .is_ok()); + + // Apply snapshot named test_snapshot0. + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot0".to_string(), + path.to_string() + ]) + .is_err()); + + // Apply snapshot named test_snapshot-new. + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot0-new".to_string(), + path.to_string() + ]) + .is_ok()); + + // The data read after snapshot apply is 2. + let buf = vec![0_u8; cluster_size as usize]; + assert!(test_image.read_data(0, &buf).is_ok()); + for elem in buf { + assert_eq!(elem, 1); + } + + // Rename non-existed snapshot name. + assert!(image_snapshot(vec![ + "-r".to_string(), + "test_snapshot11111".to_string(), + "test_snapshot11111-new".to_string(), + path.to_string() + ]) + .is_err()); + + let buf = vec![4_u8; cluster_size as usize]; + assert!(test_image.write_data(0, &buf).is_ok()); + + // Rename test_snapshot1 to test_snapshot123. + assert!(image_snapshot(vec![ + "-r".to_string(), + "test_snapshot1".to_string(), + "test_snapshot123".to_string(), + path.to_string() + ]) + .is_ok()); + + // Apply snapshot named test_snapshot123 + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot123".to_string(), + path.to_string() + ]) + .is_ok()); + let buf = vec![0_u8; cluster_size as usize]; + assert!(test_image.read_data(0, &buf).is_ok()); + for elem in buf { + assert_eq!(elem, 2); + } + } + + /// Test the function of resize image. + /// + /// TestStep: + /// 1. Resize the image with different args. + /// Expect: + /// 1. If the format of args is invalid, the operation return error. + #[test] + fn test_arg_parse_of_image_resize() { + let path = "/tmp/test_arg_parse_of_image_resize.img"; + let test_case = vec![ + ("qcow2", "-f qcow2 img_path +1M", true), + ("qcow2", "-f raw img_path +1M", true), + ("qcow2", "img_path +0M", true), + ("qcow2", "img_path +1M", true), + ("qcow2", "img_path 10M", true), + ("qcow2", "img_path 11M", true), + ("qcow2", "img_path 9M", false), + ("qcow2", "img_path 1M", false), + ("qcow2", "img_path ++1M", false), + ("qcow2", "img_path +511", false), + ("qcow2", "img_path -1M", false), + ("qcow2", "img_path +1M extra_args", false), + ("qcow2", "+1M", false), + ("qcow2", "", false), + ("raw", "img_path +1M", true), + ("raw", "img_path 18446744073709551615", false), + ("raw", "img_path +511", false), + ("raw", "-f raw img_path +1M", true), + ("raw", "-f qcow2 img_path +1M", false), + ]; + + for (img_type, cmd, res) in test_case { + assert!(image_create(vec![ + "-f".to_string(), + img_type.to_string(), + path.to_string(), + "+10M".to_string() + ]) + .is_ok()); + + // Apply resize operation. + let cmd = cmd.replace("img_path", path); + let args: Vec = cmd.split(' ').map(|str| str.to_string()).collect(); + assert_eq!(image_resize(args).is_ok(), res); + + assert!(remove_file(path).is_ok()); + } + } + + /// Test the function of resize for raw image. + /// + /// TestStep: + /// 1. Create a raw image with size of 10M. + /// 2. Write data 1 in the range of [0, 10M], with expect 1. + /// 3. Resize the image with +10M. + /// 4. Write data 2 in the range of [10M, 20M], with expect 1. + /// 5. Read the data from the image, with expect 2. + /// + /// Expect: + /// 1. Data successfully written to disk. + /// 2. The data read from disk meets expectation. + #[test] + fn test_resize_raw() { + // Step 1: Create a raw image with size of 10M. + let path = "/tmp/test_resize_raw.raw"; + let mut test_image = TestRawImage::create(path.to_string(), "10M".to_string()); + + // Step 2: Write data 1 in the range of [0, 10M]. + let mut offset: usize = 0; + let buf = vec![1; 10240]; + let mut driver = test_image.create_driver(); + while offset < 10 * M as usize { + assert!(image_write(&mut driver, offset, &buf).is_ok()); + offset += 10240; + } + drop(driver); + + // Step 3: Resize the image with 10M. + assert!(image_resize(vec![ + "-f".to_string(), + "raw".to_string(), + path.to_string(), + "+10M".to_string(), + ]) + .is_ok()); + + // Step 4: Write data 2 in the range of [10M, 20M] + let buf = vec![2; 10240]; + let mut driver = test_image.create_driver(); + while offset < (10 + 10) * M as usize { + assert!(image_write(&mut driver, offset, &buf).is_ok()); + offset += 10240; + } + + // Step 5: Read the data from the image, the data read from disk meets expectation. + let mut offset = 0; + let buf = vec![0; 10240]; + while offset < 10 * M as usize { + assert!(image_read(&mut driver, offset, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 1)); + offset += 10240; + } + + while offset < (10 + 10) * M as usize { + assert!(image_read(&mut driver, offset, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 2)); + offset += 10240; + } + } + + /// Test the function of resize for qcow2 image. + /// + /// TestStep: + /// 1. Create a qcow2 image with size of 10M. + /// 2. Write data 1 in the range of [0, 10M], with expect 1. + /// 3. Resize the image with +10M. + /// 4. Write data 2 in the range of [10M, 20M], with expect 1. + /// 5. Read the data from the image, with expect 2. + /// + /// Expect: + /// 1. Data successfully written to disk. + /// 2. The data read from disk meets expectation. + #[test] + fn test_resize_qcow2() { + let path = "/tmp/test_resize_qcow2.qcow2"; + // Step 1: Create a qcow2 image with size of 10M. + let test_image = TestQcow2Image::create(16, 16, path, "+10M"); + + // Step 2: Write data of 1 to the disk in the range of [0, 10M] + let mut offset = 0; + let buf = vec![1; 10240]; + let mut qcow2_driver = test_image.create_driver(); + while offset < 10 * M { + assert!(image_write(&mut qcow2_driver, offset as usize, &buf).is_ok()); + offset += 10240; + } + // If the offset exceed the virtual size, it is expect to be failed. + assert!(image_write(&mut qcow2_driver, offset as usize, &buf).is_err()); + drop(qcow2_driver); + + // Step 3: Resize the image with 10M. + assert!(image_resize(vec![ + "-f".to_string(), + "qcow2".to_string(), + path.to_string(), + "+10M".to_string(), + ]) + .is_ok()); + + // Step4: Write data 2 in the range of [10M, 20M] + let buf = vec![2; 10240]; + let mut qcow2_driver = test_image.create_driver(); + while offset < (10 + 10) * M { + assert!(image_write(&mut qcow2_driver, offset as usize, &buf).is_ok()); + offset += 10240; + } + assert!(image_write(&mut qcow2_driver, offset as usize, &buf).is_err()); + + // Step 5: Read the data from the image. + let mut offset = 0; + let buf = vec![0; 10240]; + while offset < 10 * M { + assert!(image_read(&mut qcow2_driver, offset as usize, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 1)); + offset += 10240; + } + + while offset < (10 + 10) * M { + assert!(image_read(&mut qcow2_driver, offset as usize, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 2)); + offset += 10240; + } + assert!(image_read(&mut qcow2_driver, offset as usize, &buf).is_err()); + } + + /// Test resize image with snapshot operation. + /// + /// TestStep: + /// 1. Create a qcow2 image with size of 1G. + /// 2. Perform the operations of snapshot and image resize. + #[test] + fn test_image_resize_with_snapshot() { + let path = "/tmp/test_image_resize_with_snapshot.qcow2"; + let test_image = TestQcow2Image::create(16, 16, path, "1G"); + let mut driver = test_image.create_driver(); + let buf = vec![1; 1024 * 1024]; + assert!(image_write(&mut driver, 0, &buf).is_ok()); + assert_eq!(driver.header.size, G); + drop(driver); + let quite = false; + let fix = FIX_ERRORS | FIX_LEAKS; + + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot_1G".to_string(), + path.to_string() + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![2; 1024 * 1024]; + assert!(image_write(&mut driver, 0, &buf).is_ok()); + assert_eq!(driver.header.size, G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + + assert!(image_resize(vec![ + "-f".to_string(), + "qcow2".to_string(), + path.to_string(), + "+20G".to_string(), + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![3; 1024 * 1024]; + assert!(image_write(&mut driver, 20 * G as usize, &buf).is_ok()); + + assert!(image_read(&mut driver, 0, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 2)); + assert_eq!(driver.header.size, 21 * G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + + assert!(image_snapshot(vec![ + "-c".to_string(), + "test_snapshot_21G".to_string(), + path.to_string() + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![4; 1024 * 1024]; + assert!(image_write(&mut driver, 20 * G as usize, &buf).is_ok()); + assert_eq!(driver.header.size, 21 * G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + + assert!(image_resize(vec![ + "-f".to_string(), + "qcow2".to_string(), + path.to_string(), + "+10G".to_string(), + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![5; 1024 * 1024]; + assert!(image_write(&mut driver, 30 * G as usize, &buf).is_ok()); + + assert!(image_read(&mut driver, 20 * G as usize, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 4)); + assert_eq!(driver.header.size, 31 * G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot_1G".to_string(), + path.to_string() + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![0; 1024 * 1024]; + assert!(image_read(&mut driver, 0, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 1)); + assert_eq!(driver.header.size, G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + + assert!(image_snapshot(vec![ + "-a".to_string(), + "test_snapshot_21G".to_string(), + path.to_string() + ]) + .is_ok()); + assert!(test_image.check_image(quite, fix)); + let mut driver = test_image.create_driver(); + let buf = vec![0; 1024 * 1024]; + assert!(image_read(&mut driver, 20 * G as usize, &buf).is_ok()); + assert!(vec_is_fill_with(&buf, 3)); + assert_eq!(driver.header.size, 21 * G); + drop(driver); + assert!(test_image.check_image(quite, fix)); + } +} diff --git a/image/src/main.rs b/image/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..b9c7aef5e57bb8dfc2c5c210b2fd4d142c55638e --- /dev/null +++ b/image/src/main.rs @@ -0,0 +1,95 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod cmdline; +mod img; + +use std::{ + env, + process::{ExitCode, Termination}, +}; + +use anyhow::{bail, Result}; + +use crate::img::{ + image_check, image_create, image_info, image_resize, image_snapshot, print_help, print_version, +}; + +const BINARY_NAME: &str = "stratovirt-img"; + +macro_rules! image_operation_matches { + ( $cmd:expr; + $(($($opt_0:tt)|+, $function_0:tt, $arg:expr)),*; + $(($($opt_1:tt)|+, $function_1:tt)),* + ) => { + match $cmd { + $( + $($opt_0)|+ => { + if let Err(e) = $function_0($arg) { + bail!("{}: {:?}", BINARY_NAME, e); + } + }, + )* + $( + $($opt_1)|+ => { + $function_1() + }, + )* + _ => { + bail!( + "{}: Command not found: {}\n\ + Try 'stratovirt-img --help' for more information.", + BINARY_NAME, + $cmd + ); + } + } + } +} + +fn main() -> ExitCode { + let args: Vec = env::args().collect(); + + match run(args) { + Ok(ret) => ret.report(), + Err(e) => { + println!("{:?}", e); + ExitCode::FAILURE + } + } +} + +fn run(args: Vec) -> Result<()> { + if args.len() < 2 { + bail!( + "{0}: Not enough arguments\n\ + Try '{0} --help' for more information", + BINARY_NAME + ); + } + + let opt = args[1].clone(); + let cmd_args = args[2..].to_vec(); + + image_operation_matches!( + opt.as_str(); + ("create", image_create, cmd_args), + ("info", image_info, cmd_args), + ("check", image_check, cmd_args), + ("resize", image_resize, cmd_args), + ("snapshot", image_snapshot, cmd_args); + ("-v" | "--version", print_version), + ("-h" | "--help", print_help) + ); + + Ok(()) +} diff --git a/license/LICENSE b/license/LICENSE index 9e32cdef1625daed25cf365c865f01050877cff3..a589e86a28c0a587f8647e4c416fbbaaf55be347 100644 --- a/license/LICENSE +++ b/license/LICENSE @@ -1,6 +1,6 @@ 木兰宽松许可证, 第2版 - 木兰宽松许可证, 第2版 + 木兰宽松许可证, 第2版 2020年1月 http://license.coscl.org.cn/MulanPSL2 @@ -36,15 +36,15 @@ 5. 免责声明与责任限制 - “软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于何种法律理论,即使其曾被建议有此种损失的可能性。 + “软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于何种法律理论,即使其曾被建议有此种损失的可能性。 6. 语言 “本许可证”以中英文双语表述,中英文版本具有同等法律效力。如果中英文版本存在任何冲突不一致,以中文版为准。 - 条款结束 + 条款结束 如何将木兰宽松许可证,第2版,应用到您的软件 - + 如果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步: 1, 请您补充如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字; @@ -55,11 +55,11 @@ Copyright (c) [Year] [name of copyright holder] [Software Name] is licensed under Mulan PSL v2. - You can use this software according to the terms and conditions of the Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: - http://license.coscl.org.cn/MulanPSL2 - THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - See the Mulan PSL v2 for more details. + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. Mulan Permissive Software License,Version 2 @@ -67,25 +67,25 @@ Mulan Permissive Software License,Version 2 (Mulan PSL v2) January 2020 http://license.coscl.org.cn/MulanPSL2 - Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions: - + Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions: + 0. Definition - - Software means the program and related documents which are licensed under this License and comprise all Contribution(s). - + + Software means the program and related documents which are licensed under this License and comprise all Contribution(s). + Contribution means the copyrightable work licensed by a particular Contributor under this License. - + Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License. - + Legal Entity means the entity making a Contribution and all its Affiliates. - + Affiliates means entities that control, are controlled by, or are under common control with the acting entity under this License, ‘control’ means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity. 1. Grant of Copyright License Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not. - 2. Grant of Patent License + 2. Grant of Patent License Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution, where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed. The patent license shall not apply to any modification of the Contribution, and any other combination which includes the Contribution. If you or your Affiliates directly or indirectly institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken. @@ -111,7 +111,7 @@ To apply the Mulan PSL v2 to your work, for easy identification by recipients, you are suggested to complete following three steps: - i Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner; + i Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner; ii Create a file named “LICENSE” which contains the whole context of this License in the first directory of your software package; @@ -120,8 +120,8 @@ Copyright (c) [Year] [name of copyright holder] [Software Name] is licensed under Mulan PSL v2. - You can use this software according to the terms and conditions of the Mulan PSL v2. + You can use this software according to the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: - http://license.coscl.org.cn/MulanPSL2 - THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - See the Mulan PSL v2 for more details. + http://license.coscl.org.cn/MulanPSL2 + THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + See the Mulan PSL v2 for more details. diff --git a/license/Third_Party_Open_Source_Software_Notice.md b/license/Third_Party_Open_Source_Software_Notice.md index 4712cf5356d96dd766bb8df094e10b9fd7b668ab..721534eafa4b0ec554f308658a69a1bd67bcbe52 100644 --- a/license/Third_Party_Open_Source_Software_Notice.md +++ b/license/Third_Party_Open_Source_Software_Notice.md @@ -5,7 +5,7 @@ Warranty Disclaimer THE OPEN SOURCE SOFTWARE IN THIS SOFTWARE IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. Copyright Notice and License Texts -Software: libc 0.2.94 +Software: libc 0.2.146 Copyright notice: Copyright (c) 2014-2020 The Rust Project Developers License: MIT or Apache License Version 2.0 @@ -241,7 +241,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Software: log 0.4.14 +Software: log 0.4.18 Copyright notice: Copyright (c) 2014 The Rust Project Developers Copyright 2014-2015 The Rust Project Developers @@ -282,27 +282,33 @@ OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to -Software: serde 1.0.125 +Software: serde 1.0.163 Copyright notice: Copyright (c) David Tolnay Copyright (c) Erick Tryzelaar License: MIT or Apache License Version 2.0 Please see above. -Software: serde_json 1.0.64 +Software: serde_json 1.0.96 Copyright notice: Copyright (c) David Tolnay Copyright (c) Erick Tryzelaar License: MIT or Apache License Version 2.0 Please see above. -Software: error-chain 0.12.4 +Software: anyhow 1.0.71 Copyright notice: -Copyright (c) 2017 The Error-Chain Project Developers +Copyright (c) David Tolnay +License: MIT or Apache License Version 2.0 +Please see above. + +Software: thiserror 1.0 +Copyright notice: +Copyright (c) David Tolnay License: MIT or Apache License Version 2.0 Please see above. -Software: vmm-sys-util 0.7.0 +Software: vmm-sys-util 0.12.1 Copyright notice: Copyright 2019 Intel Corporation. All Rights Reserved. Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. @@ -312,7 +318,13 @@ Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved. Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. Copyright 2018 The Chromium OS Authors. All rights reserved. Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -License: Apache License Version 2.0 or BSD 3-Clause +License: BSD 3-Clause +Please see above. + +Software: libusb1-sys 0.6.4 +Copyright notice: +Copyright (c) 2015 David Cuddeback +License: MIT Please see above. ---------------------------------------------------------------- @@ -344,7 +356,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Software: kvm-ioctls 0.6.1 +Software: kvm-ioctls 0.16.0 Copyright notice: Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Portions Copyright 2017 The Chromium OS Authors. All rights reserved. @@ -353,63 +365,69 @@ Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. License: MIT or Apache License Version 2.0 Please see above. -Software: kvm-bindings 0.3.1 +Software: kvm-bindings 0.7.0 Copyright notice: Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. License: The APACHE 2.0 License Please see above. -Software: arc-swap 0.4.8 +Software: arc-swap 1.6.0 Copyright notice: Copyright (c) 2017 arc-swap developers License: MIT or Apache License Version 2.0 Please see above. -Software: serial_test 0.5.1 -Copyright notice: -Copyright (c) 2018 Tom Parker-Shemilt -License: MIT -Please see above. - -Software: syn 1.0.72 +Software: syn 2.0.18 Copyright notice: Copyright (c) David Tolnay License: MIT or Apache License Version 2.0 Please see above. -Software: quote 1.0.7 +Software: quote 1.0.28 Copyright notice: Copyright (c) David Tolnay License: MIT or Apache License Version 2.0 Please see above. -Software: proc-macro2 1.0 +Software: proc-macro2 1.0.59 Copyright notice: Copyright (c) David Tolnay Copyright (c) Alex Crichton License: MIT or Apache License Version 2.0 Please see above. -Software: strum 0.20 +Software: strum 0.24.1 Copyright notice: Copyright (c) 2019 Peter Glotfelty License: MIT Please see above. -Software: strum_macros 0.20 +Software: strum_macros 0.24.3 Copyright notice: Copyright (c) 2019 Peter Glotfelty License: MIT Please see above. -Software: vfio-bindings 0.2.0 +Software: vfio-bindings 0.3.1 Copyright notice: Copyright (c) 2019 Intel Corporation. All Rights Reserved. License: Apache License Version 2.0 or BSD 3-Clause License Please see above. -Software: once_cell 1.9.0 +Software: once_cell 1.18.0 Copyright notice: Copyright (c) Aleksey Kladov License: MIT OR Apache-2.0 Please see above. + +Software: io-uring 0.6.0 +Copyright notice: +Copyright (c) tokio-rs +License: MIT OR Apache-2.0 +Please see above. + +Software: capng 0.2.3 +Copyright notice: +Copyright (C) 2020 Red Hat, Inc. All rights reserved. +License: Apache License Version 2.0 or BSD 3-Clause License +Please see above. diff --git a/machine/Cargo.toml b/machine/Cargo.toml index b2674ba83ca97313122fac93c7a2a4703a271d31..ecead47e132177d1497ce55cbaa995176b783779 100644 --- a/machine/Cargo.toml +++ b/machine/Cargo.toml @@ -1,35 +1,63 @@ [package] name = "machine" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "Emulation machines" [dependencies] -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -log = "0.4.8" -libc = ">=0.2.71" -serde = { version = ">=1.0.114", features = ["derive"] } -serde_json = "1.0.55" -vmm-sys-util = ">=0.7.0" -vfio-bindings = "0.2.0" +log = "0.4" +libc = "0.2" +serde_json = "1.0" +vmm-sys-util = "0.12.1" +thiserror = "1.0" +anyhow = "1.0" acpi = { path = "../acpi" } address_space = { path = "../address_space" } boot_loader = { path = "../boot_loader" } cpu = { path = "../cpu" } devices = { path = "../devices" } -hypervisor = { path = "../hypervisor" } +hypervisor = { path = "../hypervisor"} machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -pci = { path = "../pci" } -sysbus = { path = "../sysbus" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } virtio = { path = "../virtio" } -vfio = { path = "../vfio" } +vfio = { path = "../vfio" , optional = true } +block_backend = { path = "../block_backend" } +ui = { path = "../ui" } +trace = { path = "../trace" } +clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } [features] -default = ["qmp"] -qmp = [] +default = [] +boot_time = ["cpu/boot_time", "hypervisor/boot_time"] +scream = ["devices/scream", "machine_manager/scream"] +scream_alsa = ["scream", "devices/scream_alsa", "machine_manager/scream_alsa"] +scream_pulseaudio = ["scream", "devices/scream_pulseaudio","machine_manager/scream_pulseaudio"] +scream_ohaudio = ["scream", "devices/scream_ohaudio", "machine_manager/scream_ohaudio"] +pvpanic = ["devices/pvpanic"] +demo_device = ["devices/demo_device", "machine_manager/demo_device"] +usb_host = ["devices/usb_host", "machine_manager/usb_host"] +usb_camera = ["devices/usb_camera", "machine_manager/usb_camera"] +usb_camera_v4l2 = ["usb_camera", "devices/usb_camera_v4l2", "machine_manager/usb_camera_v4l2", "util/usb_camera_v4l2"] +usb_camera_oh = ["usb_camera", "devices/usb_camera_oh", "machine_manager/usb_camera_oh"] +windows_emu_pid = ["ui/console", "machine_manager/windows_emu_pid"] +gtk = ["windows_emu_pid", "ui/gtk", "machine_manager/gtk"] +vnc = ["ui/vnc", "machine_manager/vnc"] +vnc_auth = ["vnc"] +ohui_srv = ["windows_emu_pid", "ui/ohui_srv", "machine_manager/ohui_srv", "virtio/ohui_srv"] +ramfb = ["devices/ramfb", "machine_manager/ramfb"] +virtio_gpu = ["virtio/virtio_gpu", "machine_manager/virtio_gpu"] +vfio_device = ["vfio", "hypervisor/vfio_device"] +usb_uas = ["devices/usb_uas"] +virtio_rng = ["virtio/virtio_rng"] +virtio_scsi = ["virtio/virtio_scsi"] +vhost_vsock = ["virtio/vhost_vsock"] +vhostuser_block = ["virtio/vhostuser_block"] +vhostuser_net = ["virtio/vhostuser_net"] +vhost_net = ["virtio/vhost_net"] +trace_to_logger = ["devices/trace_to_logger"] +trace_to_ftrace = ["devices/trace_to_ftrace"] +trace_to_hitrace = ["devices/trace_to_hitrace"] diff --git a/machine/src/aarch64/fdt.rs b/machine/src/aarch64/fdt.rs new file mode 100644 index 0000000000000000000000000000000000000000..f015ad1ff8280a45ccce23119fc2c9e1183d869a --- /dev/null +++ b/machine/src/aarch64/fdt.rs @@ -0,0 +1,339 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::Result; + +use crate::MachineBase; +use cpu::PMU_INTR; +use devices::sysbus::{to_sysbusdevops, SysBusDevType, SysRes}; +use devices::{Bus, SYS_BUS_DEVICE}; +use util::device_tree::{self, FdtBuilder}; + +/// Function that helps to generate arm pmu in device-tree. +/// +/// # Arguments +/// +/// * `fdt` - Flatted device-tree blob where node will be filled into. +fn generate_pmu_node(fdt: &mut FdtBuilder) -> Result<()> { + let node = "pmu"; + let pmu_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("compatible", "arm,armv8-pmuv3")?; + fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; + fdt.set_property_array_u32( + "interrupts", + &[ + device_tree::GIC_FDT_IRQ_TYPE_PPI, + PMU_INTR, + device_tree::IRQ_TYPE_LEVEL_HIGH, + ], + )?; + fdt.end_node(pmu_node_dep) +} + +/// Function that helps to generate serial node in device-tree. +/// +/// # Arguments +/// +/// * `dev_info` - Device resource info of serial device. +/// * `fdt` - Flatted device-tree blob where serial node will be filled into. +fn generate_serial_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> Result<()> { + let node = format!("pl011@{:x}", res.region_base); + let serial_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "arm,pl011\0arm,primecell")?; + fdt.set_property_string("clock-names", "uartclk\0apb_pclk")?; + fdt.set_property_array_u32( + "clocks", + &[device_tree::CLK_PHANDLE, device_tree::CLK_PHANDLE], + )?; + fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; + fdt.set_property_array_u32( + "interrupts", + &[ + device_tree::GIC_FDT_IRQ_TYPE_SPI, + res.irq as u32, + device_tree::IRQ_TYPE_EDGE_RISING, + ], + )?; + fdt.end_node(serial_node_dep) +} + +/// Function that helps to generate RTC node in device-tree. +/// +/// # Arguments +/// +/// * `dev_info` - Device resource info of RTC device. +/// * `fdt` - Flatted device-tree blob where RTC node will be filled into. +fn generate_rtc_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> Result<()> { + let node = format!("pl031@{:x}", res.region_base); + let rtc_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "arm,pl031\0arm,primecell\0")?; + fdt.set_property_string("clock-names", "apb_pclk")?; + fdt.set_property_u32("clocks", device_tree::CLK_PHANDLE)?; + fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; + fdt.set_property_array_u32( + "interrupts", + &[ + device_tree::GIC_FDT_IRQ_TYPE_SPI, + res.irq as u32, + device_tree::IRQ_TYPE_LEVEL_HIGH, + ], + )?; + fdt.end_node(rtc_node_dep) +} + +/// Function that helps to generate Virtio-Mmio device's node in device-tree. +/// +/// # Arguments +/// +/// * `dev_info` - Device resource info of Virtio-Mmio device. +/// * `fdt` - Flatted device-tree blob where node will be filled into. +fn generate_virtio_devices_node(fdt: &mut FdtBuilder, res: &SysRes) -> Result<()> { + let node = format!("virtio_mmio@{:x}", res.region_base); + let virtio_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "virtio,mmio")?; + fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; + fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; + fdt.set_property_array_u32( + "interrupts", + &[ + device_tree::GIC_FDT_IRQ_TYPE_SPI, + res.irq as u32, + device_tree::IRQ_TYPE_EDGE_RISING, + ], + )?; + fdt.end_node(virtio_node_dep) +} + +/// Function that helps to generate fw-cfg node in device-tree. +/// +/// # Arguments +/// +/// * `dev_info` - Device resource info of fw-cfg device. +/// * `fdt` - Flatted device-tree blob where fw-cfg node will be filled into. +fn generate_fwcfg_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> Result<()> { + let node = format!("fw-cfg@{:x}", res.region_base); + let fwcfg_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "qemu,fw-cfg-mmio")?; + fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; + fdt.end_node(fwcfg_node_dep) +} + +/// Function that helps to generate flash node in device-tree. +/// +/// # Arguments +/// +/// * `dev_info` - Device resource info of fw-cfg device. +/// * `flash` - Flatted device-tree blob where fw-cfg node will be filled into. +fn generate_flash_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> Result<()> { + let flash_base = res.region_base; + let flash_size = res.region_size; + let node = format!("flash@{:x}", flash_base); + let flash_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "cfi-flash")?; + fdt.set_property_array_u64("reg", &[flash_base, flash_size])?; + fdt.set_property_u32("bank-width", 4)?; + fdt.end_node(flash_node_dep) +} + +/// Trait that helps to generate all nodes in device-tree. +#[allow(clippy::upper_case_acronyms)] +trait CompileFDTHelper { + /// Function that helps to generate cpu nodes. + fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> Result<()>; + /// Function that helps to generate Virtio-mmio devices' nodes. + fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> Result<()>; + /// Function that helps to generate numa node distances. + fn generate_distance_node(&self, fdt: &mut FdtBuilder) -> Result<()>; +} + +impl CompileFDTHelper for MachineBase { + fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> Result<()> { + let node = "cpus"; + + let cpus_node_dep = fdt.begin_node(node)?; + fdt.set_property_u32("#address-cells", 0x02)?; + fdt.set_property_u32("#size-cells", 0x0)?; + + // Generate CPU topology + let cpu_map_node_dep = fdt.begin_node("cpu-map")?; + for socket in 0..self.cpu_topo.sockets { + let sock_name = format!("cluster{}", socket); + let sock_node_dep = fdt.begin_node(&sock_name)?; + for cluster in 0..self.cpu_topo.clusters { + let clster = format!("cluster{}", cluster); + let cluster_node_dep = fdt.begin_node(&clster)?; + + for core in 0..self.cpu_topo.cores { + let core_name = format!("core{}", core); + let core_node_dep = fdt.begin_node(&core_name)?; + + for thread in 0..self.cpu_topo.threads { + let thread_name = format!("thread{}", thread); + let thread_node_dep = fdt.begin_node(&thread_name)?; + let vcpuid = self.cpu_topo.threads * self.cpu_topo.cores * cluster + + self.cpu_topo.threads * core + + thread; + fdt.set_property_u32( + "cpu", + u32::from(vcpuid) + device_tree::CPU_PHANDLE_START, + )?; + fdt.end_node(thread_node_dep)?; + } + fdt.end_node(core_node_dep)?; + } + fdt.end_node(cluster_node_dep)?; + } + fdt.end_node(sock_node_dep)?; + } + fdt.end_node(cpu_map_node_dep)?; + + for cpu_index in 0..self.cpu_topo.nrcpus { + let mpidr = self.cpus[cpu_index as usize].arch().lock().unwrap().mpidr(); + + let node = format!("cpu@{:x}", mpidr); + let mpidr_node_dep = fdt.begin_node(&node)?; + fdt.set_property_u32( + "phandle", + u32::from(cpu_index) + device_tree::CPU_PHANDLE_START, + )?; + fdt.set_property_string("device_type", "cpu")?; + fdt.set_property_string("compatible", "arm,arm-v8")?; + if self.cpu_topo.max_cpus > 1 { + fdt.set_property_string("enable-method", "psci")?; + } + fdt.set_property_u64("reg", mpidr & 0x007F_FFFF)?; + fdt.set_property_u32("phandle", device_tree::FIRST_VCPU_PHANDLE)?; + + if let Some(numa_nodes) = &self.numa_nodes { + for numa_index in 0..numa_nodes.len() { + let numa_node = numa_nodes.get(&(numa_index as u32)); + if numa_node.unwrap().cpus.contains(&(cpu_index)) { + fdt.set_property_u32("numa-node-id", numa_index as u32)?; + } + } + } + + fdt.end_node(mpidr_node_dep)?; + } + + fdt.end_node(cpus_node_dep)?; + + if self.cpus[0].arch().lock().unwrap().get_features().pmu { + generate_pmu_node(fdt)?; + } + + Ok(()) + } + + fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + // timer + let mut cells: Vec = Vec::new(); + for &irq in [13, 14, 11, 10].iter() { + cells.push(device_tree::GIC_FDT_IRQ_TYPE_PPI); + cells.push(irq); + cells.push(device_tree::IRQ_TYPE_LEVEL_HIGH); + } + let node = "timer"; + let timer_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("compatible", "arm,armv8-timer")?; + fdt.set_property("always-on", &Vec::new())?; + fdt.set_property_array_u32("interrupts", &cells)?; + fdt.end_node(timer_node_dep)?; + + // clock + let node = "apb-pclk"; + let clock_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("compatible", "fixed-clock")?; + fdt.set_property_string("clock-output-names", "clk24mhz")?; + fdt.set_property_u32("#clock-cells", 0x0)?; + fdt.set_property_u32("clock-frequency", 24_000_000)?; + fdt.set_property_u32("phandle", device_tree::CLK_PHANDLE)?; + fdt.end_node(clock_node_dep)?; + + // psci + let node = "psci"; + let psci_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("compatible", "arm,psci-0.2")?; + fdt.set_property_string("method", "hvc")?; + fdt.end_node(psci_node_dep)?; + + let devices = self.sysbus.lock().unwrap().child_devices(); + for dev in devices.values() { + SYS_BUS_DEVICE!(dev, locked_dev, sysbusdev); + match sysbusdev.sysbusdev_base().dev_type { + SysBusDevType::PL011 => { + generate_serial_device_node(fdt, &sysbusdev.sysbusdev_base().res)? + } + SysBusDevType::Rtc => { + generate_rtc_device_node(fdt, &sysbusdev.sysbusdev_base().res)? + } + SysBusDevType::VirtioMmio => { + generate_virtio_devices_node(fdt, &sysbusdev.sysbusdev_base().res)? + } + SysBusDevType::FwCfg => { + generate_fwcfg_device_node(fdt, &sysbusdev.sysbusdev_base().res)?; + } + SysBusDevType::Flash => { + generate_flash_device_node(fdt, &sysbusdev.sysbusdev_base().res)?; + } + _ => (), + } + } + + Ok(()) + } + + fn generate_distance_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + if self.numa_nodes.is_none() { + return Ok(()); + } + + let distance_node_dep = fdt.begin_node("distance-map")?; + fdt.set_property_string("compatible", "numa-distance-map-v1")?; + + let mut matrix = Vec::new(); + let numa_nodes = self.numa_nodes.as_ref().unwrap(); + let existing_nodes: Vec = numa_nodes.keys().cloned().collect(); + for (id, node) in numa_nodes.iter().enumerate() { + let distances = &node.1.distances; + for i in existing_nodes.iter() { + matrix.push(id as u32); + matrix.push(*i); + let dist: u32 = if id as u32 == *i { + 10 + } else if let Some(distance) = distances.get(i) { + u32::from(*distance) + } else { + 20 + }; + matrix.push(dist); + } + } + + fdt.set_property_array_u32("distance-matrix", matrix.as_ref())?; + fdt.end_node(distance_node_dep) + } +} + +impl device_tree::CompileFDT for MachineBase { + fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + fdt.set_property_string("compatible", "linux,dummy-virt")?; + fdt.set_property_u32("#address-cells", 0x2)?; + fdt.set_property_u32("#size-cells", 0x2)?; + fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; + + self.generate_cpu_nodes(fdt)?; + self.generate_devices_node(fdt)?; + self.irq_chip.as_ref().unwrap().generate_fdt_node(fdt)?; + self.generate_distance_node(fdt) + } +} diff --git a/machine/src/aarch64/micro.rs b/machine/src/aarch64/micro.rs new file mode 100644 index 0000000000000000000000000000000000000000..4cf525047b90adf2607e21ef1d52d8fa69238710 --- /dev/null +++ b/machine/src/aarch64/micro.rs @@ -0,0 +1,322 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; + +use crate::{micro_common::syscall::syscall_whitelist, MachineBase, MachineError}; +use crate::{register_shutdown_event, LightMachine, MachineOps}; +use address_space::{AddressAttr, AddressSpace, GuestAddress, Region}; +use cpu::CPUTopology; +use devices::legacy::{PL011, PL031}; +use devices::{Device, ICGICConfig, ICGICv2Config, ICGICv3Config, GIC_IRQ_MAX}; +use hypervisor::kvm::aarch64::*; +use machine_manager::config::{Param, SerialConfig, VmConfig}; +use migration::{MigrationManager, MigrationStatus}; +use util::device_tree::{self, CompileFDT, FdtBuilder}; +use util::gen_base_func; +use util::seccomp::{BpfRule, SeccompCmpOpt}; +use virtio::{VirtioDevice, VirtioMmioDevice}; + +#[repr(usize)] +pub enum LayoutEntryType { + GicDist, + GicCpu, + GicIts, + GicRedist, + Uart, + Rtc, + Mmio, + Mem, + HighGicRedist, +} + +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0x0800_0000, 0x0001_0000), // GicDist + (0x0801_0000, 0x0001_0000), // GicCpu + (0x0808_0000, 0x0002_0000), // GicIts + (0x080A_0000, 0x00F6_0000), // GicRedist (max 123 redistributors) + (0x0900_0000, 0x0000_1000), // Uart + (0x0901_0000, 0x0000_1000), // Rtc + (0x0A00_0000, 0x0000_0200), // Mmio + (0x4000_0000, 0x80_0000_0000), // Mem + (256 << 30, 0x200_0000), // HighGicRedist, (where remaining redistributors locates) +]; + +impl MachineOps for LightMachine { + gen_base_func!(machine_base, machine_base_mut, MachineBase, base); + + fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()> { + let vm_ram = self.get_vm_ram(); + let layout_size = MEM_LAYOUT[LayoutEntryType::Mem as usize].1; + let ram = Region::init_alias_region( + vm_ram.clone(), + 0, + std::cmp::min(layout_size, mem_size), + "pc_ram", + ); + sys_mem + .root() + .add_subregion(ram, MEM_LAYOUT[LayoutEntryType::Mem as usize].0) + } + + fn init_interrupt_controller(&mut self, vcpu_count: u64) -> Result<()> { + let v3 = ICGICv3Config { + msi: true, + dist_range: MEM_LAYOUT[LayoutEntryType::GicDist as usize], + redist_region_ranges: vec![ + MEM_LAYOUT[LayoutEntryType::GicRedist as usize], + MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize], + ], + its_range: Some(MEM_LAYOUT[LayoutEntryType::GicIts as usize]), + }; + let v2 = ICGICv2Config { + dist_range: MEM_LAYOUT[LayoutEntryType::GicDist as usize], + cpu_range: MEM_LAYOUT[LayoutEntryType::GicCpu as usize], + v2m_range: None, + sys_mem: None, + }; + // Passing both v2 and v3, leave GIC self to decide which one to use. + let intc_conf = ICGICConfig { + version: None, + vcpu_count, + max_irq: GIC_IRQ_MAX, + v3: Some(v3), + v2: Some(v2), + }; + + let hypervisor = self.get_hypervisor(); + let mut locked_hypervisor = hypervisor.lock().unwrap(); + self.base.irq_chip = Some(locked_hypervisor.create_interrupt_controller(&intc_conf)?); + self.base.irq_chip.as_ref().unwrap().realize()?; + + let irq_manager = locked_hypervisor.create_irq_manager()?; + self.base.sysbus.lock().unwrap().irq_manager = irq_manager.line_irq_manager; + Ok(()) + } + + fn add_rtc_device(&mut self) -> Result<()> { + let pl031 = PL031::new( + &self.base.sysbus, + MEM_LAYOUT[LayoutEntryType::Rtc as usize].0, + MEM_LAYOUT[LayoutEntryType::Rtc as usize].1, + )?; + pl031 + .realize() + .with_context(|| "Failed to realize pl031.")?; + Ok(()) + } + + fn add_serial_device(&mut self, config: &SerialConfig) -> Result<()> { + let region_base: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].0; + let region_size: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].1; + let pl011 = PL011::new(config.clone(), &self.base.sysbus, region_base, region_size) + .with_context(|| "Failed to create PL011")?; + pl011.realize().with_context(|| "Failed to realize PL011")?; + let mut bs = self.base.boot_source.lock().unwrap(); + bs.kernel_cmdline.push(Param { + param_type: "earlycon".to_string(), + value: format!("pl011,mmio,0x{:08x}", region_base), + }); + Ok(()) + } + + fn realize(vm: &Arc>, vm_config: &mut VmConfig) -> Result<()> { + let mut locked_vm = vm.lock().unwrap(); + + trace::sysbus(&locked_vm.base.sysbus.lock().unwrap()); + trace::vm_state(&locked_vm.base.vm_state); + + let topology = CPUTopology::new().set_topology(( + vm_config.machine_config.nr_threads, + vm_config.machine_config.nr_cores, + vm_config.machine_config.nr_dies, + )); + trace::cpu_topo(&topology); + locked_vm.base.numa_nodes = locked_vm.add_numa_nodes(vm_config)?; + let locked_hypervisor = locked_vm.base.hypervisor.lock().unwrap(); + locked_hypervisor.init_machine(&locked_vm.base.sys_mem)?; + drop(locked_hypervisor); + locked_vm.init_memory( + &vm_config.machine_config.mem_config, + &locked_vm.base.sys_mem, + vm_config.machine_config.nr_cpus, + )?; + + let boot_config = + locked_vm.load_boot_source(None, MEM_LAYOUT[LayoutEntryType::Mem as usize].0)?; + let cpu_config = locked_vm.load_cpu_features(vm_config)?; + + let hypervisor = locked_vm.base.hypervisor.clone(); + // vCPUs init,and apply CPU features (for aarch64) + locked_vm.base.cpus.extend(::init_vcpu( + vm.clone(), + hypervisor, + vm_config.machine_config.nr_cpus, + &topology, + &boot_config, + &cpu_config, + )?); + + locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; + + locked_vm.cpu_post_init(&cpu_config)?; + + // Add mmio devices + locked_vm + .create_replaceable_devices() + .with_context(|| "Failed to create replaceable devices.")?; + locked_vm.add_devices(vm_config)?; + trace::replaceable_info(&locked_vm.replaceable_info); + + let mut fdt_helper = FdtBuilder::new(); + locked_vm + .generate_fdt_node(&mut fdt_helper) + .with_context(|| MachineError::GenFdtErr)?; + let fdt_vec = fdt_helper.finish()?; + locked_vm + .base + .sys_mem + .write( + &mut fdt_vec.as_slice(), + GuestAddress(boot_config.fdt_addr), + fdt_vec.len() as u64, + AddressAttr::Ram, + ) + .with_context(|| MachineError::WrtFdtErr(boot_config.fdt_addr, fdt_vec.len()))?; + register_shutdown_event(locked_vm.shutdown_req.clone(), vm.clone()) + .with_context(|| "Failed to register shutdown event")?; + + MigrationManager::register_vm_instance(vm.clone()); + MigrationManager::register_migration_instance(locked_vm.base.migration_hypervisor.clone()); + if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { + bail!("Failed to set migration status {}", e); + } + + Ok(()) + } + + fn add_virtio_mmio_net(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + self.add_virtio_mmio_net(vm_config, cfg_args) + } + + fn add_virtio_mmio_block(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + self.add_virtio_mmio_block(vm_config, cfg_args) + } + + fn add_virtio_mmio_device( + &mut self, + name: String, + device: Arc>, + ) -> Result>> { + self.add_virtio_mmio_device(name, device) + } + + fn syscall_whitelist(&self) -> Vec { + syscall_whitelist() + } +} + +pub(crate) fn arch_ioctl_allow_list(bpf_rule: BpfRule) -> BpfRule { + bpf_rule + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_ONE_REG() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEVICE_ATTR() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REG_LIST() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_ONE_REG() as u32) +} + +pub(crate) fn arch_syscall_whitelist() -> Vec { + vec![ + BpfRule::new(libc::SYS_epoll_pwait), + BpfRule::new(libc::SYS_newfstatat), + BpfRule::new(libc::SYS_unlinkat), + BpfRule::new(libc::SYS_mkdirat), + ] +} + +/// Trait that helps to generate all nodes in device-tree. +#[allow(clippy::upper_case_acronyms)] +trait CompileFDTHelper { + /// Function that helps to generate memory nodes. + fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> Result<()>; + /// Function that helps to generate the chosen node. + fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> Result<()>; +} + +impl CompileFDTHelper for LightMachine { + fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + if self.base.numa_nodes.is_none() { + let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let mem_size = self.base.sys_mem.memory_end_address().raw_value() + - MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let node = "memory"; + let memory_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("device_type", "memory")?; + fdt.set_property_array_u64("reg", &[mem_base, mem_size])?; + fdt.end_node(memory_node_dep)?; + + return Ok(()); + } + + // Set NUMA node information. + let mut mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + for (id, node) in self.base.numa_nodes.as_ref().unwrap().iter().enumerate() { + let mem_size = node.1.size; + let node = format!("memory@{:x}", mem_base); + let memory_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("device_type", "memory")?; + fdt.set_property_array_u64("reg", &[mem_base, mem_size])?; + fdt.set_property_u32("numa-node-id", id as u32)?; + fdt.end_node(memory_node_dep)?; + mem_base += mem_size; + } + + Ok(()) + } + + fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + let node = "chosen"; + let boot_source = self.base.boot_source.lock().unwrap(); + + let chosen_node_dep = fdt.begin_node(node)?; + let cmdline = &boot_source.kernel_cmdline.to_string(); + fdt.set_property_string("bootargs", cmdline.as_str())?; + + let pl011_property_string = + format!("/pl011@{:x}", MEM_LAYOUT[LayoutEntryType::Uart as usize].0); + fdt.set_property_string("stdout-path", &pl011_property_string)?; + + match &boot_source.initrd { + Some(initrd) => { + fdt.set_property_u64("linux,initrd-start", initrd.initrd_addr)?; + let initrd_end = initrd + .initrd_addr + .checked_add(initrd.initrd_size) + .with_context(|| "initrd end overflow")?; + fdt.set_property_u64("linux,initrd-end", initrd_end)?; + } + None => {} + } + fdt.end_node(chosen_node_dep) + } +} + +impl device_tree::CompileFDT for LightMachine { + fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + let node_dep = fdt.begin_node("")?; + self.base.generate_fdt_node(fdt)?; + self.generate_memory_node(fdt)?; + self.generate_chosen_node(fdt)?; + fdt.end_node(node_dep) + } +} diff --git a/machine/src/aarch64/mod.rs b/machine/src/aarch64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..3ffe949fe294ef86d09c4857269f5187e900a250 --- /dev/null +++ b/machine/src/aarch64/mod.rs @@ -0,0 +1,17 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod micro; +pub mod pci_host_root; +pub mod standard; + +mod fdt; diff --git a/machine/src/standard_vm/aarch64/pci_host_root.rs b/machine/src/aarch64/pci_host_root.rs similarity index 32% rename from machine/src/standard_vm/aarch64/pci_host_root.rs rename to machine/src/aarch64/pci_host_root.rs index 36789f1debf3661a5e9ab418f8dc82c0cecbd69f..65ec778f8dbf03e5fd3f320c99a8f30e69cd45dd 100644 --- a/machine/src/standard_vm/aarch64/pci_host_root.rs +++ b/machine/src/aarch64/pci_host_root.rs @@ -12,112 +12,72 @@ use std::sync::{Arc, Mutex, Weak}; -use pci::{ - config::{ - PciConfig, CLASS_CODE_HOST_BRIDGE, DEVICE_ID, PCI_CONFIG_SPACE_SIZE, PCI_VENDOR_ID_REDHAT, - REVISION_ID, SUB_CLASS_CODE, VENDOR_ID, - }, - errors::Result as PciResult, - le_write_u16, PciBus, PciDevOps, +use anyhow::Result; + +use devices::pci::config::{ + PciConfig, CLASS_CODE_HOST_BRIDGE, DEVICE_ID, PCI_CONFIG_SPACE_SIZE, PCI_VENDOR_ID_REDHAT, + REVISION_ID, SUB_CLASS_CODE, VENDOR_ID, }; +use devices::pci::{le_write_u16, PciDevBase, PciDevOps}; +use devices::{Bus, Device, DeviceBase}; +use util::gen_base_func; const DEVICE_ID_PCIE_HOST: u16 = 0x0008; /// PciHost root (Device 0:Function 0). pub struct PciHostRoot { - /// Pci config space. - config: PciConfig, - /// Primary Bus. - parent_bus: Weak>, + base: PciDevBase, } impl PciHostRoot { - pub fn new(parent_bus: Weak>) -> Self { + pub fn new(parent_bus: Weak>) -> Self { Self { - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus, + base: PciDevBase { + base: DeviceBase::new("PCI Host Root".to_string(), false, Some(parent_bus)), + config: PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 0), + devfn: 0, + }, } } } -impl PciDevOps for PciHostRoot { - fn init_write_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_mask() - } - - fn init_write_clear_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_clear_mask() - } +impl Device for PciHostRoot { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); - fn realize(mut self) -> PciResult<()> { - self.init_write_mask()?; - self.init_write_clear_mask()?; + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, VENDOR_ID as usize, PCI_VENDOR_ID_REDHAT, )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, DEVICE_ID as usize, DEVICE_ID_PCIE_HOST, )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, SUB_CLASS_CODE as usize, CLASS_CODE_HOST_BRIDGE, )?; - le_write_u16(&mut self.config.config, REVISION_ID as usize, 0)?; + le_write_u16(&mut self.base.config.config, REVISION_ID, 0)?; - let parent_bus = self.parent_bus.upgrade().unwrap(); - parent_bus - .lock() - .unwrap() - .devices - .insert(0, Arc::new(Mutex::new(self))); - Ok(()) - } + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + let mut locked_bus = parent_bus.lock().unwrap(); + let dev = Arc::new(Mutex::new(self)); + locked_bus.attach_child(0, dev.clone())?; - fn read_config(&self, offset: usize, data: &mut [u8]) { - let size = data.len(); - if size > 4 { - error!( - "Failed to read PciHostRoot config space: Invalid data size {}", - size - ); - return; - } - if offset + size > PCI_CONFIG_SPACE_SIZE { - debug!( - "Failed to read PciHostRoot config space: offset {}, size {}, config space size {}", - offset, size, PCI_CONFIG_SPACE_SIZE - ); - return; - } - self.config.read(offset, data); + Ok(dev) } +} - fn write_config(&mut self, offset: usize, data: &[u8]) { - let size = data.len(); - if size > 4 { - error!( - "Failed to write PciHostRoot config space: Invalid data size {}", - size - ); - return; - } - if offset + size > PCI_CONFIG_SPACE_SIZE { - debug!( - "Failed to write PciHostRoot config space: offset {}, size {}, config space size {}", - offset, size, PCI_CONFIG_SPACE_SIZE - ); - return; - } - self.config.write(offset, data, 0); - } +impl PciDevOps for PciHostRoot { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); - fn name(&self) -> String { - "PCI Host Root".to_string() + fn write_config(&mut self, offset: usize, data: &[u8]) { + self.base.config.write(offset, data, 0, None); } } diff --git a/machine/src/aarch64/standard.rs b/machine/src/aarch64/standard.rs new file mode 100644 index 0000000000000000000000000000000000000000..fd1aa3495a1cc641756c77dfe9aa4cf7298c4e77 --- /dev/null +++ b/machine/src/aarch64/standard.rs @@ -0,0 +1,1316 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub use crate::error::MachineError; + +use std::mem::size_of; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use std::sync::RwLock; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +#[cfg(feature = "ramfb")] +use clap::Parser; + +use super::pci_host_root::PciHostRoot; +use crate::standard_common::syscall::syscall_whitelist; +use crate::standard_common::{AcpiBuilder, StdMachineOps}; +use crate::{register_shutdown_event, MachineBase, MachineOps, StdMachine}; +use acpi::{ + processor_append_priv_res, AcpiGicCpu, AcpiGicDistributor, AcpiGicIts, AcpiGicRedistributor, + AcpiSratGiccAffinity, AcpiSratMemoryAffinity, AcpiTable, AmlBuilder, AmlDevice, AmlInteger, + AmlNameDecl, AmlScope, AmlScopeBuilder, AmlString, CacheHierarchyNode, CacheType, + ProcessorHierarchyNode, TableLoader, ACPI_GTDT_ARCH_TIMER_NS_EL1_IRQ, + ACPI_GTDT_ARCH_TIMER_NS_EL2_IRQ, ACPI_GTDT_ARCH_TIMER_S_EL1_IRQ, ACPI_GTDT_ARCH_TIMER_VIRT_IRQ, + ACPI_GTDT_CAP_ALWAYS_ON, ACPI_GTDT_INTERRUPT_MODE_LEVEL, ACPI_IORT_NODE_ITS_GROUP, + ACPI_IORT_NODE_PCI_ROOT_COMPLEX, ACPI_MADT_GENERIC_CPU_INTERFACE, + ACPI_MADT_GENERIC_DISTRIBUTOR, ACPI_MADT_GENERIC_REDISTRIBUTOR, ACPI_MADT_GENERIC_TRANSLATOR, + ARCH_GIC_MAINT_IRQ, ID_MAPPING_ENTRY_SIZE, INTERRUPT_PPIS_COUNT, INTERRUPT_SGIS_COUNT, + ROOT_COMPLEX_ENTRY_SIZE, +}; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use address_space::FileBackend; +use address_space::{AddressAttr, AddressSpace, GuestAddress, Region}; +use cpu::{CPUInterface, CPUTopology, CpuLifecycleState, PMU_INTR, PPI_BASE}; +use devices::acpi::ged::{acpi_dsdt_add_power_button, Ged, GedEvent}; +use devices::acpi::power::PowerDev; +use devices::legacy::{ + FwCfgEntryType, FwCfgMem, FwCfgOps, LegacyError as DevErrorKind, PFlash, PL011, PL031, +}; +#[cfg(feature = "ramfb")] +use devices::legacy::{Ramfb, RamfbConfig}; +use devices::pci::{PciBus, PciHost, PciIntxState}; +use devices::sysbus::{to_sysbusdevops, SysBusDevType}; +use devices::{ + convert_bus_mut, Device, ICGICConfig, ICGICv3Config, GIC_IRQ_MAX, MUT_PCI_BUS, SYS_BUS_DEVICE, +}; +use hypervisor::kvm::aarch64::*; +use hypervisor::kvm::*; +#[cfg(feature = "ramfb")] +use machine_manager::config::str_slip_to_clap; +#[cfg(feature = "gtk")] +use machine_manager::config::UiContext; +use machine_manager::config::{ + BootIndexInfo, DriveConfig, NumaNode, Param, SerialConfig, VmConfig, +}; +use machine_manager::event; +use machine_manager::machine::{MachineLifecycle, VmState}; +use machine_manager::qmp::{qmp_channel::QmpChannel, qmp_schema}; +use migration::{MigrationManager, MigrationStatus}; +#[cfg(feature = "gtk")] +use ui::gtk::gtk_display_init; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use ui::ohui_srv::{ohui_init, OhUiServer}; +#[cfg(feature = "vnc")] +use ui::vnc::vnc_init; +use util::byte_code::ByteCode; +use util::device_tree::{self, CompileFDT, FdtBuilder}; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; +use util::seccomp::{BpfRule, SeccompCmpOpt}; + +/// The type of memory layout entry on aarch64 +pub enum LayoutEntryType { + Flash = 0, + GicDist, + GicIts, + GicRedist, + Uart, + Rtc, + FwCfg, + Ged, + PowerDev, + Mmio, + PcieMmio, + PciePio, + Mem, + HighGicRedist, + HighPcieEcam, + HighPcieMmio, +} + +/// Layout of aarch64 +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0, 0x0800_0000), // Flash + (0x0800_0000, 0x0001_0000), // GicDist + (0x0808_0000, 0x0002_0000), // GicIts + (0x080A_0000, 0x00F6_0000), // GicRedist (max 123 redistributors) + (0x0900_0000, 0x0000_1000), // Uart + (0x0901_0000, 0x0000_1000), // Rtc + (0x0902_0000, 0x0000_0018), // FwCfg + (0x0908_0000, 0x0000_0004), // Ged + (0x0909_0000, 0x0000_1000), // PowerDev + (0x0A00_0000, 0x0000_0200), // Mmio + (0x1000_0000, 0x2EFF_0000), // PcieMmio + (0x3EFF_0000, 0x0001_0000), // PciePio + (0x4000_0000, 0x7F_4000_0000), // Mem + (510 << 30, 0x200_0000), // HighGicRedist, (where remaining redistributors locates) + (511 << 30, 0x1000_0000), // HighPcieEcam + (512 << 30, 512 << 30), // HighPcieMmio +]; + +/// The type of Irq entry on aarch64 +enum IrqEntryType { + Sysbus, + Pcie, +} + +/// IRQ MAP of aarch64 +const IRQ_MAP: &[(i32, i32)] = &[ + (5, 15), // Sysbus + (16, 19), // Pcie +]; + +impl StdMachine { + pub fn new(vm_config: &VmConfig) -> Result { + let free_irqs = ( + IRQ_MAP[IrqEntryType::Sysbus as usize].0, + IRQ_MAP[IrqEntryType::Sysbus as usize].1, + ); + let mmio_region: (u64, u64) = ( + MEM_LAYOUT[LayoutEntryType::Mmio as usize].0, + MEM_LAYOUT[LayoutEntryType::Mmio as usize + 1].0, + ); + let base = MachineBase::new(vm_config, free_irqs, mmio_region)?; + let sys_mem = base.sys_mem.clone(); + + Ok(StdMachine { + base, + pci_host: Arc::new(Mutex::new(PciHost::new( + &sys_mem, + MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize], + MEM_LAYOUT[LayoutEntryType::PcieMmio as usize], + MEM_LAYOUT[LayoutEntryType::PciePio as usize], + MEM_LAYOUT[LayoutEntryType::HighPcieMmio as usize], + IRQ_MAP[IrqEntryType::Pcie as usize].0, + ))), + power_button: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("power_button".to_string()))?, + ), + shutdown_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("shutdown_req".to_string()))?, + ), + reset_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("reset_req".to_string()))?, + ), + pause_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("pause_req".to_string()))?, + ), + resume_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("resume_req".to_string()))?, + ), + dtb_vec: Vec::new(), + boot_order_list: Arc::new(Mutex::new(Vec::new())), + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + ohui_server: None, + }) + } + + pub fn handle_reset_request(vm: &Arc>) -> Result<()> { + let mut locked_vm = vm.lock().unwrap(); + let mut fdt_addr: u64 = 0; + + for (cpu_index, cpu) in locked_vm.base.cpus.iter().enumerate() { + cpu.pause() + .with_context(|| format!("Failed to pause vcpu{}", cpu_index))?; + + cpu.hypervisor_cpu.reset_vcpu(cpu.clone())?; + if cpu_index == 0 { + fdt_addr = cpu.arch().lock().unwrap().core_regs().regs.regs[0]; + } + } + + locked_vm + .base + .sys_mem + .write( + &mut locked_vm.dtb_vec.as_slice(), + GuestAddress(fdt_addr), + locked_vm.dtb_vec.len() as u64, + AddressAttr::Ram, + ) + .with_context(|| "Fail to write dtb into sysmem")?; + + locked_vm + .reset_all_devices() + .with_context(|| "Fail to reset all devices")?; + locked_vm + .reset_fwcfg_boot_order() + .with_context(|| "Fail to update boot order imformation to FwCfg device")?; + + if QmpChannel::is_connected() { + let reset_msg = qmp_schema::Reset { guest: true }; + event!(Reset; reset_msg); + } + + locked_vm.base.irq_chip.as_ref().unwrap().reset()?; + + for (cpu_index, cpu) in locked_vm.base.cpus.iter().enumerate() { + cpu.resume() + .with_context(|| format!("Failed to resume vcpu{}", cpu_index))?; + } + + Ok(()) + } + + fn build_pptt_cores(&self, pptt: &mut AcpiTable, cluster_offset: u32, uid: &mut u32) { + for core in 0..self.base.cpu_topo.cores { + let mut priv_resources = vec![0; 3]; + priv_resources[0] = pptt.table_len() as u32; + let mut cache_hierarchy_node = CacheHierarchyNode::new(0, CacheType::L2); + pptt.append_child(&cache_hierarchy_node.aml_bytes()); + priv_resources[1] = pptt.table_len() as u32; + cache_hierarchy_node = CacheHierarchyNode::new(priv_resources[0], CacheType::L1D); + pptt.append_child(&cache_hierarchy_node.aml_bytes()); + priv_resources[2] = pptt.table_len() as u32; + cache_hierarchy_node = CacheHierarchyNode::new(priv_resources[0], CacheType::L1I); + pptt.append_child(&cache_hierarchy_node.aml_bytes()); + + if self.base.cpu_topo.threads > 1 { + let core_offset = pptt.table_len(); + let core_hierarchy_node = + ProcessorHierarchyNode::new(0x0, cluster_offset, u32::from(core), 3); + pptt.append_child(&core_hierarchy_node.aml_bytes()); + processor_append_priv_res(pptt, priv_resources); + for _thread in 0..self.base.cpu_topo.threads { + let thread_hierarchy_node = + ProcessorHierarchyNode::new(0xE, core_offset as u32, *uid, 0); + pptt.append_child(&thread_hierarchy_node.aml_bytes()); + (*uid) += 1; + } + } else { + let core_hierarchy_node = ProcessorHierarchyNode::new(0xA, cluster_offset, *uid, 3); + pptt.append_child(&core_hierarchy_node.aml_bytes()); + (*uid) += 1; + processor_append_priv_res(pptt, priv_resources); + } + } + } + + fn build_pptt_clusters(&self, pptt: &mut AcpiTable, socket_offset: u32, uid: &mut u32) { + for cluster in 0..self.base.cpu_topo.clusters { + let cluster_offset = pptt.table_len(); + let cluster_hierarchy_node = + ProcessorHierarchyNode::new(0x0, socket_offset, u32::from(cluster), 0); + pptt.append_child(&cluster_hierarchy_node.aml_bytes()); + self.build_pptt_cores(pptt, cluster_offset as u32, uid); + } + } + + fn build_pptt_sockets(&self, pptt: &mut AcpiTable, uid: &mut u32) { + for socket in 0..self.base.cpu_topo.sockets { + let priv_resources = vec![pptt.table_len() as u32]; + let cache_hierarchy_node = CacheHierarchyNode::new(0, CacheType::L3); + pptt.append_child(&cache_hierarchy_node.aml_bytes()); + + let socket_offset = pptt.table_len(); + let socket_hierarchy_node = ProcessorHierarchyNode::new(0x1, 0, u32::from(socket), 1); + pptt.append_child(&socket_hierarchy_node.aml_bytes()); + processor_append_priv_res(pptt, priv_resources); + + self.build_pptt_clusters(pptt, socket_offset as u32, uid); + } + } + + pub fn get_vcpu_reg_val(&self, addr: u64, vcpu_index: usize) -> Option { + if let Some(vcpu) = self.get_cpus().get(vcpu_index) { + let (cpu_state, _) = vcpu.state(); + let cpu_state = *cpu_state.lock().unwrap(); + if cpu_state != CpuLifecycleState::Paused && !self.pause() { + self.notify_lifecycle(VmState::Paused, VmState::Running); + return None; + } + + let value = match vcpu.hypervisor_cpu.get_one_reg(addr) { + Ok(value) => Some(value), + _ => None, + }; + + if cpu_state != CpuLifecycleState::Paused { + self.resume(); + } + return value; + } + None + } + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn add_ohui_server(&mut self, vm_config: &VmConfig) -> Result<()> { + if let Some(dpy) = vm_config.display.as_ref() { + if dpy.display_type != "ohui" { + return Ok(()); + } + self.ohui_server = Some(Arc::new(OhUiServer::new( + dpy.get_ui_path(), + dpy.get_sock_path(), + )?)); + } + Ok(()) + } +} + +impl StdMachineOps for StdMachine { + fn init_pci_host(&self) -> Result<()> { + let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().child_bus().unwrap()); + let mmconfig_region_ops = PciHost::build_mmconfig_ops(self.pci_host.clone()); + let mmconfig_region = Region::init_io_region( + MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1, + mmconfig_region_ops, + "PcieEcamIo", + ); + self.base + .sys_mem + .root() + .add_subregion( + mmconfig_region, + MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0, + ) + .with_context(|| "Failed to register ECAM in memory space.")?; + + let pcihost_root = PciHostRoot::new(root_bus); + pcihost_root + .realize() + .with_context(|| "Failed to realize pcihost root device.")?; + Ok(()) + } + + fn add_fwcfg_device(&mut self, nr_cpus: u8) -> Result>>> { + if self.base.vm_config.lock().unwrap().pflashs.is_none() { + return Ok(None); + } + + let mut fwcfg = FwCfgMem::new( + self.base.sys_mem.clone(), + &self.base.sysbus, + MEM_LAYOUT[LayoutEntryType::FwCfg as usize].0, + MEM_LAYOUT[LayoutEntryType::FwCfg as usize].1, + )?; + fwcfg + .add_data_entry(FwCfgEntryType::NbCpus, nr_cpus.as_bytes().to_vec()) + .with_context(|| DevErrorKind::AddEntryErr("NbCpus".to_string()))?; + + let cmdline = self + .base + .boot_source + .lock() + .unwrap() + .kernel_cmdline + .to_string(); + fwcfg + .add_data_entry( + FwCfgEntryType::CmdlineSize, + (cmdline.len() + 1).as_bytes().to_vec(), + ) + .with_context(|| DevErrorKind::AddEntryErr("CmdlineSize".to_string()))?; + fwcfg + .add_string_entry(FwCfgEntryType::CmdlineData, cmdline.as_str()) + .with_context(|| DevErrorKind::AddEntryErr("CmdlineData".to_string()))?; + + let boot_order = Vec::::new(); + fwcfg + .add_file_entry("bootorder", boot_order) + .with_context(|| DevErrorKind::AddEntryErr("bootorder".to_string()))?; + + let bios_geometry = Vec::::new(); + fwcfg + .add_file_entry("bios-geometry", bios_geometry) + .with_context(|| DevErrorKind::AddEntryErr("bios-geometry".to_string()))?; + + let fwcfg_dev = fwcfg + .realize() + .with_context(|| "Failed to realize fwcfg device")?; + self.base.fwcfg_dev = Some(fwcfg_dev.clone()); + + Ok(Some(fwcfg_dev)) + } +} + +impl MachineOps for StdMachine { + gen_base_func!(machine_base, machine_base_mut, MachineBase, base); + + fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()> { + let vm_ram = self.get_vm_ram(); + + let layout_size = MEM_LAYOUT[LayoutEntryType::Mem as usize].1; + let ram = Region::init_alias_region( + vm_ram.clone(), + 0, + std::cmp::min(layout_size, mem_size), + "pc_ram", + ); + sys_mem + .root() + .add_subregion(ram, MEM_LAYOUT[LayoutEntryType::Mem as usize].0) + } + + fn init_interrupt_controller(&mut self, vcpu_count: u64) -> Result<()> { + let v3 = ICGICv3Config { + msi: true, + dist_range: MEM_LAYOUT[LayoutEntryType::GicDist as usize], + redist_region_ranges: vec![ + MEM_LAYOUT[LayoutEntryType::GicRedist as usize], + MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize], + ], + its_range: Some(MEM_LAYOUT[LayoutEntryType::GicIts as usize]), + }; + let intc_conf = ICGICConfig { + version: None, + vcpu_count, + max_irq: GIC_IRQ_MAX, + v2: None, + v3: Some(v3), + }; + + let hypervisor = self.get_hypervisor(); + let mut locked_hypervisor = hypervisor.lock().unwrap(); + self.base.irq_chip = Some(locked_hypervisor.create_interrupt_controller(&intc_conf)?); + self.base.irq_chip.as_ref().unwrap().realize()?; + + let root_bus = &self.pci_host.lock().unwrap().child_bus().unwrap(); + MUT_PCI_BUS!(root_bus, locked_bus, root_pci_bus); + let irq_manager = locked_hypervisor.create_irq_manager()?; + root_pci_bus.msi_irq_manager = irq_manager.msi_irq_manager; + let line_irq_manager = irq_manager.line_irq_manager; + if let Some(line_irq_manager) = line_irq_manager.clone() { + let irq_state = Some(Arc::new(Mutex::new(PciIntxState::new( + IRQ_MAP[IrqEntryType::Pcie as usize].0 as u32, + line_irq_manager.clone(), + )))); + root_pci_bus.intx_state = irq_state; + } else { + return Err(anyhow!( + "Failed to create intx state: legacy irq manager is none." + )); + } + self.base.sysbus.lock().unwrap().irq_manager = line_irq_manager; + + Ok(()) + } + + fn add_rtc_device(&mut self) -> Result<()> { + let rtc = PL031::new( + &self.base.sysbus, + MEM_LAYOUT[LayoutEntryType::Rtc as usize].0, + MEM_LAYOUT[LayoutEntryType::Rtc as usize].1, + )?; + rtc.realize().with_context(|| "Failed to realize PL031")?; + Ok(()) + } + + fn add_ged_device(&mut self) -> Result<()> { + let battery_present = self.base.vm_config.lock().unwrap().machine_config.battery; + let ged = Ged::new( + battery_present, + &self.base.sysbus, + MEM_LAYOUT[LayoutEntryType::Ged as usize].0, + MEM_LAYOUT[LayoutEntryType::Ged as usize].1, + GedEvent::new(self.power_button.clone()), + )?; + let ged_dev = ged.realize().with_context(|| "Failed to realize Ged")?; + if battery_present { + let pdev = PowerDev::new( + ged_dev, + &self.base.sysbus, + MEM_LAYOUT[LayoutEntryType::PowerDev as usize].0, + MEM_LAYOUT[LayoutEntryType::PowerDev as usize].1, + )?; + pdev.realize() + .with_context(|| "Failed to realize PowerDev")?; + } + Ok(()) + } + + fn add_serial_device(&mut self, config: &SerialConfig) -> Result<()> { + let region_base: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].0; + let region_size: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].1; + let pl011 = PL011::new(config.clone(), &self.base.sysbus, region_base, region_size) + .with_context(|| "Failed to create PL011")?; + pl011.realize().with_context(|| "Failed to realize PL011")?; + let mut bs = self.base.boot_source.lock().unwrap(); + bs.kernel_cmdline.push(Param { + param_type: "earlycon".to_string(), + value: format!("pl011,mmio,0x{:08x}", region_base), + }); + Ok(()) + } + + fn syscall_whitelist(&self) -> Vec { + syscall_whitelist() + } + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn update_ohui_srv(&mut self, passthru: bool) { + self.ohui_server.as_ref().unwrap().set_passthru(passthru); + } + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn get_ohui_fb(&self) -> Option { + match &self.ohui_server { + Some(server) => server.get_ohui_fb(), + None => None, + } + } + + fn realize(vm: &Arc>, vm_config: &mut VmConfig) -> Result<()> { + let nr_cpus = vm_config.machine_config.nr_cpus; + let mut locked_vm = vm.lock().unwrap(); + locked_vm.init_global_config(vm_config)?; + register_shutdown_event(locked_vm.shutdown_req.clone(), vm.clone()) + .with_context(|| "Failed to register shutdown event")?; + locked_vm + .register_reset_event(locked_vm.reset_req.clone(), vm.clone()) + .with_context(|| "Fail to register reset event")?; + locked_vm + .register_pause_event(locked_vm.pause_req.clone(), vm.clone()) + .with_context(|| "Fail to register pause event")?; + locked_vm + .register_resume_event(locked_vm.resume_req.clone(), vm.clone()) + .with_context(|| "Fail to register resume event")?; + + locked_vm.base.numa_nodes = locked_vm.add_numa_nodes(vm_config)?; + let locked_hypervisor = locked_vm.base.hypervisor.lock().unwrap(); + locked_hypervisor.init_machine(&locked_vm.base.sys_mem)?; + drop(locked_hypervisor); + locked_vm.init_memory( + &vm_config.machine_config.mem_config, + &locked_vm.base.sys_mem, + nr_cpus, + )?; + + locked_vm + .init_pci_host() + .with_context(|| MachineError::InitPCIeHostErr)?; + let fwcfg = locked_vm.add_fwcfg_device(nr_cpus)?; + + let boot_config = locked_vm + .load_boot_source(fwcfg.as_ref(), MEM_LAYOUT[LayoutEntryType::Mem as usize].0)?; + let cpu_config = locked_vm.load_cpu_features(vm_config)?; + + let hypervisor = locked_vm.base.hypervisor.clone(); + locked_vm.base.cpus.extend(::init_vcpu( + vm.clone(), + hypervisor, + nr_cpus, + &CPUTopology::new(), + &boot_config, + &cpu_config, + )?); + + // Interrupt Controller Chip init + locked_vm.init_interrupt_controller(u64::from(nr_cpus))?; + + locked_vm.cpu_post_init(&cpu_config)?; + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + locked_vm.add_ohui_server(vm_config)?; + + locked_vm + .add_devices(vm_config) + .with_context(|| "Failed to add devices")?; + + let mut fdt_helper = FdtBuilder::new(); + locked_vm + .generate_fdt_node(&mut fdt_helper) + .with_context(|| MachineError::GenFdtErr)?; + let fdt_vec = fdt_helper.finish()?; + locked_vm.dtb_vec = fdt_vec.clone(); + locked_vm + .base + .sys_mem + .write( + &mut fdt_vec.as_slice(), + GuestAddress(boot_config.fdt_addr), + fdt_vec.len() as u64, + AddressAttr::Ram, + ) + .with_context(|| MachineError::WrtFdtErr(boot_config.fdt_addr, fdt_vec.len()))?; + + // If it is direct kernel boot mode, the ACPI can not be enabled. + if let Some(fw_cfg) = fwcfg { + let mut mem_array = Vec::new(); + let mem_size = vm_config.machine_config.mem_config.mem_size; + mem_array.push((MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)); + locked_vm + .build_acpi_tables(&fw_cfg) + .with_context(|| "Failed to create ACPI tables")?; + locked_vm + .build_smbios(&fw_cfg, mem_array) + .with_context(|| "Failed to create smbios tables")?; + } + + locked_vm + .reset_fwcfg_boot_order() + .with_context(|| "Fail to update boot order imformation to FwCfg device")?; + + locked_vm + .display_init(vm_config) + .with_context(|| "Fail to init display")?; + + #[cfg(feature = "windows_emu_pid")] + crate::watch_windows_emu_pid( + vm_config, + locked_vm.power_button.clone(), + locked_vm.shutdown_req.clone(), + vm.clone(), + ); + + MigrationManager::register_vm_config(locked_vm.get_vm_config()); + MigrationManager::register_vm_instance(vm.clone()); + MigrationManager::register_migration_instance(locked_vm.base.migration_hypervisor.clone()); + if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { + bail!("Failed to set migration status {}", e); + } + Ok(()) + } + + fn add_pflash_device(&mut self, configs: &[DriveConfig]) -> Result<()> { + let mut configs_vec = configs.to_vec(); + configs_vec.sort_by_key(|c| c.unit.unwrap()); + let sector_len: u32 = 1024 * 256; + let mut flash_base: u64 = MEM_LAYOUT[LayoutEntryType::Flash as usize].0; + let flash_size: u64 = MEM_LAYOUT[LayoutEntryType::Flash as usize].1 / 2; + for i in 0..=1 { + let (fd, read_only) = if i < configs_vec.len() { + let path = &configs_vec[i].path_on_host; + let read_only = configs_vec[i].readonly; + let fd = self.fetch_drive_file(path)?; + (Some(fd), read_only) + } else { + (None, false) + }; + + let pflash = PFlash::new( + flash_size, + fd, + sector_len, + 4, + 2, + read_only, + &self.base.sysbus, + flash_base, + ) + .with_context(|| MachineError::InitPflashErr)?; + pflash + .realize() + .with_context(|| MachineError::RlzPflashErr)?; + flash_base += flash_size; + } + + Ok(()) + } + + /// Create display. + #[allow(unused_variables)] + fn display_init(&mut self, vm_config: &mut VmConfig) -> Result<()> { + // GTK display init. + #[cfg(any(feature = "gtk", all(target_env = "ohos", feature = "ohui_srv")))] + match vm_config.display { + #[cfg(feature = "gtk")] + Some(ref ds_cfg) if ds_cfg.display_type == "gtk" => { + let ui_context = UiContext { + vm_name: vm_config.guest_name.clone(), + power_button: Some(self.power_button.clone()), + shutdown_req: Some(self.shutdown_req.clone()), + pause_req: Some(self.pause_req.clone()), + resume_req: Some(self.resume_req.clone()), + }; + gtk_display_init(ds_cfg, ui_context) + .with_context(|| "Failed to init GTK display!")?; + } + // OHUI server init. + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + Some(ref ds_cfg) if ds_cfg.display_type == "ohui" => { + ohui_init(self.ohui_server.as_ref().unwrap().clone(), ds_cfg) + .with_context(|| "Failed to init OH UI server!")?; + } + _ => {} + }; + + // VNC display init. + #[cfg(feature = "vnc")] + vnc_init(&vm_config.vnc, &vm_config.object) + .with_context(|| "Failed to init VNC server!")?; + Ok(()) + } + + #[cfg(feature = "ramfb")] + fn add_ramfb(&mut self, cfg_args: &str) -> Result<()> { + let config = RamfbConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let fwcfg_dev = self + .get_fwcfg_dev() + .with_context(|| "Ramfb device must be used UEFI to boot, please add pflash devices")?; + let sys_mem = self.get_sys_mem(); + let mut ramfb = Ramfb::new(sys_mem.clone(), &self.base.sysbus, config.install); + + ramfb.ramfb_state.setup(&fwcfg_dev)?; + ramfb.realize()?; + Ok(()) + } + + fn get_pci_host(&mut self) -> Result<&Arc>> { + Ok(&self.pci_host) + } + + fn get_boot_order_list(&self) -> Option>>> { + Some(self.boot_order_list.clone()) + } + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn get_token_id(&self) -> Option>> { + self.ohui_server.as_ref().map(|srv| srv.token_id.clone()) + } +} + +pub(crate) fn arch_ioctl_allow_list(bpf_rule: BpfRule) -> BpfRule { + bpf_rule + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_ONE_REG() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEVICE_ATTR() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REG_LIST() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_ARM_VCPU_INIT() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_IRQ_LINE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_ONE_REG() as u32) +} + +pub(crate) fn arch_syscall_whitelist() -> Vec { + vec![ + BpfRule::new(libc::SYS_epoll_pwait), + BpfRule::new(libc::SYS_mkdirat), + BpfRule::new(libc::SYS_unlinkat), + BpfRule::new(libc::SYS_rt_sigaction), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_rseq), + #[cfg(target_env = "gnu")] + BpfRule::new(223), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_listen), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_fchmodat), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_shmctl), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_shmat), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_shmdt), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_lremovexattr), + ] +} + +impl AcpiBuilder for StdMachine { + fn build_gtdt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut gtdt = AcpiTable::new(*b"GTDT", 2, *b"STRATO", *b"VIRTGTDT", 1); + gtdt.set_table_len(96); + + // Counter control block physical address + gtdt.set_field(36, 0xFFFF_FFFF_FFFF_FFFF_u64); + // Secure EL1 interrupt + gtdt.set_field(48, ACPI_GTDT_ARCH_TIMER_S_EL1_IRQ + INTERRUPT_PPIS_COUNT); + // Secure EL1 flags + gtdt.set_field(52, ACPI_GTDT_INTERRUPT_MODE_LEVEL); + + // Non secure EL1 interrupt + gtdt.set_field(56, ACPI_GTDT_ARCH_TIMER_NS_EL1_IRQ + INTERRUPT_PPIS_COUNT); + // Non secure EL1 flags + gtdt.set_field(60, ACPI_GTDT_INTERRUPT_MODE_LEVEL | ACPI_GTDT_CAP_ALWAYS_ON); + + // Virtual timer interrupt + gtdt.set_field(64, ACPI_GTDT_ARCH_TIMER_VIRT_IRQ + INTERRUPT_PPIS_COUNT); + // Virtual timer flags + gtdt.set_field(68, ACPI_GTDT_INTERRUPT_MODE_LEVEL); + + // Non secure EL2 interrupt + gtdt.set_field(72, ACPI_GTDT_ARCH_TIMER_NS_EL2_IRQ + INTERRUPT_PPIS_COUNT); + // Non secure EL2 flags + gtdt.set_field(76, ACPI_GTDT_INTERRUPT_MODE_LEVEL); + // Counter read block physical address + gtdt.set_field(80, 0xFFFF_FFFF_FFFF_FFFF_u64); + + let gtdt_begin = StdMachine::add_table_to_loader(acpi_data, loader, >dt) + .with_context(|| "Fail to add GTDT table to loader")?; + Ok(gtdt_begin) + } + + fn build_dbg2_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + // Table format described at: + // https://learn.microsoft.com/en-us/windows-hardware/drivers/bringup/acpi-debug-port-table + + let dev_name = "COM0"; + let dev_name_length = dev_name.len() + 1; + + let dbg2_table_size = 82 // Fixed size part of table + + dev_name_length; + + let dbg2_info_size = 22 // BaseAddressRegister offset + + 12 // BaseAddressRegister + + 4 // AddressSize + + dev_name_length; + + let mut dbg2 = AcpiTable::new(*b"DBG2", 0, *b"STRATO", *b"VIRTDBG2", 1); + dbg2.set_table_len(dbg2_table_size); + + // Table 1. Debug Port Table 2 format + // OffsetDbgDeviceInfo + dbg2.set_field(36, 44_u32); + // NumberDbgDeviceInfo + dbg2.set_field(40, 1_u32); + + // Table 2. Debug Device Information structure format + let offset = 44_usize; + // Revision + dbg2.set_field(offset, 0_u8); + // Length + dbg2.set_field(offset + 1, dbg2_info_size as u16); + // NumberofGenericAddressRegisters + dbg2.set_field(offset + 3, 1_u8); + // NamespaceStringLength + dbg2.set_field(offset + 4, dev_name_length as u16); + // NamespaceStringOffset + dbg2.set_field(offset + 6, 38_u16); + // OemDataLength + dbg2.set_field(offset + 8, 0_u16); + // OemDataOffset + dbg2.set_field(offset + 10, 0_u16); + // Port Type: 0x8000 is serial + dbg2.set_field(offset + 12, 0x8000_u16); + // Port Subtype: 0x3 is ARM PL011 UART + dbg2.set_field(offset + 14, 0x3_u16); + + // BaseAddressRegisterOffset + dbg2.set_field(offset + 18, 22_u16); + // AddressSizeOffset + dbg2.set_field(offset + 20, 34_u16); + + let uart_memory_address = MEM_LAYOUT[LayoutEntryType::Uart as usize].0; + let uart_memory_size = MEM_LAYOUT[LayoutEntryType::Uart as usize].1; + + // BaseAddressRegister: aml address space + dbg2.set_field(offset + 22, 0_u8); + // BaseAddressRegister: bit width + dbg2.set_field(offset + 23, 8_u8); + // BaseAddressRegister: bit offset + dbg2.set_field(offset + 24, 0_u8); + // BaseAddressRegister: access width + dbg2.set_field(offset + 25, 1_u8); + // BaseAddressRegister: address + dbg2.set_field(offset + 26, uart_memory_address); + // AddressSize + dbg2.set_field(offset + 34, uart_memory_size as u32); + + // NamespaceString + let mut offset = offset + 38; + for ch in dev_name.chars() { + dbg2.set_field(offset, ch as u8); + offset += 1; + } + dbg2.set_field(offset, 0_u8); + + let dbg2_begin = StdMachine::add_table_to_loader(acpi_data, loader, &dbg2) + .with_context(|| "Fail to add DBG2 table to loader")?; + Ok(dbg2_begin) + } + + fn build_iort_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut iort = AcpiTable::new(*b"IORT", 3, *b"STRATO", *b"VIRTIORT", 1); + iort.set_table_len(128); + + // Number of IORT nodes is 2: ITS group node and Root Complex Node. + iort.set_field(36, 2_u32); + // Node offset + iort.set_field(40, 48_u32); + + // ITS group node + iort.set_field(48, ACPI_IORT_NODE_ITS_GROUP); + // ITS node length + iort.set_field(49, 24_u16); + // ITS node revision + iort.set_field(51, 1_u8); + // ITS count + iort.set_field(64, 1_u32); + + // Root Complex Node + iort.set_field(72, ACPI_IORT_NODE_PCI_ROOT_COMPLEX); + // Length of Root Complex node + let len = ROOT_COMPLEX_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; + iort.set_field(73, len); + // Revision of Root Complex node + iort.set_field(75, 3_u8); + // Identifier of Root Complex node + iort.set_field(76, 1_u32); + // Mapping counts of Root Complex Node + iort.set_field(80, 1_u32); + // Mapping offset of Root Complex Node + iort.set_field(84, u32::from(ROOT_COMPLEX_ENTRY_SIZE)); + // Cache of coherent device + iort.set_field(88, 1_u32); + // Memory flags of coherent device + iort.set_field(95, 3_u8); + // Memory address size limit + iort.set_field(104, 0x40_u8); + // Identity RID mapping + iort.set_field(112, 0xffff_u32); + // Without SMMU, id mapping is the first node in ITS group node + iort.set_field(120, 48_u32); + + let iort_begin = StdMachine::add_table_to_loader(acpi_data, loader, &iort) + .with_context(|| "Fail to add IORT table to loader")?; + Ok(iort_begin) + } + + fn build_spcr_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut spcr = AcpiTable::new(*b"SPCR", 2, *b"STRATO", *b"VIRTSPCR", 1); + spcr.set_table_len(80); + + // Interface type: ARM PL011 UART + spcr.set_field(36, 3_u8); + // Bit width of AcpiGenericAddress + spcr.set_field(41, 8_u8); + // Access width of AcpiGenericAddress + spcr.set_field(43, 1_u8); + // Base addr of AcpiGenericAddress + spcr.set_field(44, MEM_LAYOUT[LayoutEntryType::Uart as usize].0); + // Interrupt Type: Arm GIC Interrupt + spcr.set_field(52, 1_u8 << 3); + // Irq number used by the UART + let mut uart_irq: u32 = 0; + let devices = self.get_sysbus_devices(); + for dev in devices.values() { + SYS_BUS_DEVICE!(dev, locked_dev, sysbusdev); + if sysbusdev.sysbusdev_base().dev_type == SysBusDevType::PL011 { + uart_irq = sysbusdev.sysbusdev_base().irq_state.irq as _; + break; + } + } + spcr.set_field(54, uart_irq + INTERRUPT_SGIS_COUNT + INTERRUPT_PPIS_COUNT); + // Set baud rate: 3 = 9600 + spcr.set_field(58, 3_u8); + // Stop bit + spcr.set_field(60, 1_u8); + // Hardware flow control + spcr.set_field(61, 2_u8); + // PCI Device ID: it is not a PCI device + spcr.set_field(64, 0xffff_u16); + // PCI Vendor ID: it is not a PCI device + spcr.set_field(66, 0xffff_u16); + + let spcr_begin = StdMachine::add_table_to_loader(acpi_data, loader, &spcr) + .with_context(|| "Fail to add SPCR table to loader")?; + Ok(spcr_begin) + } + + fn build_dsdt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut dsdt = AcpiTable::new(*b"DSDT", 2, *b"STRATO", *b"VIRTDSDT", 1); + + // 1. CPU info. + let cpus_count = self.base.cpus.len() as u64; + let mut sb_scope = AmlScope::new("\\_SB"); + for cpu_id in 0..cpus_count { + let mut dev = AmlDevice::new(format!("C{:03}", cpu_id).as_str()); + dev.append_child(AmlNameDecl::new("_HID", AmlString("ACPI0007".to_string()))); + dev.append_child(AmlNameDecl::new("_UID", AmlInteger(cpu_id))); + sb_scope.append_child(dev); + } + + // 2. Create pci host bridge node. + sb_scope.append_child(self.pci_host.lock().unwrap().clone()); + + sb_scope.append_child(acpi_dsdt_add_power_button()); + + dsdt.append_child(sb_scope.aml_bytes().as_slice()); + + // 3. Info of devices attached to system bus. + dsdt.append_child(self.base.sysbus.lock().unwrap().aml_bytes().as_slice()); + + let dsdt_begin = StdMachine::add_table_to_loader(acpi_data, loader, &dsdt) + .with_context(|| "Fail to add DSDT table to loader")?; + Ok(dsdt_begin) + } + + fn build_madt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut madt = AcpiTable::new(*b"APIC", 5, *b"STRATO", *b"VIRTAPIC", 1); + madt.set_table_len(44); + + // 1. GIC Distributor. + let mut gic_dist = AcpiGicDistributor::default(); + gic_dist.type_id = ACPI_MADT_GENERIC_DISTRIBUTOR; + gic_dist.length = 24; + gic_dist.base_addr = MEM_LAYOUT[LayoutEntryType::GicDist as usize].0; + gic_dist.gic_version = 3; + madt.append_child(&gic_dist.aml_bytes()); + + // 2. GIC CPU. + let cpus_count = self.base.cpus.len() as u64; + for cpu_index in 0..cpus_count { + let mpidr = self.base.cpus[cpu_index as usize] + .arch() + .lock() + .unwrap() + .mpidr(); + let mpidr_mask: u64 = 0x007f_ffff; + let mut gic_cpu = AcpiGicCpu::default(); + gic_cpu.type_id = ACPI_MADT_GENERIC_CPU_INTERFACE; + gic_cpu.length = 80; + gic_cpu.cpu_interface_num = cpu_index as u32; + gic_cpu.processor_uid = cpu_index as u32; + gic_cpu.flags = 5; + gic_cpu.mpidr = mpidr & mpidr_mask; + gic_cpu.vgic_interrupt = ARCH_GIC_MAINT_IRQ + INTERRUPT_PPIS_COUNT; + gic_cpu.perf_interrupt = PMU_INTR + PPI_BASE; + madt.append_child(&gic_cpu.aml_bytes()); + } + + // 3. GIC Redistributor. + let mut gic_redist = AcpiGicRedistributor::default(); + gic_redist.type_id = ACPI_MADT_GENERIC_REDISTRIBUTOR; + gic_redist.range_length = MEM_LAYOUT[LayoutEntryType::GicRedist as usize].1 as u32; + gic_redist.base_addr = MEM_LAYOUT[LayoutEntryType::GicRedist as usize].0; + gic_redist.length = 16; + madt.append_child(&gic_redist.aml_bytes()); + // SAFETY: ARM architecture must have interrupt controllers in user mode. + if self.base.irq_chip.as_ref().unwrap().get_redist_count() > 1 { + gic_redist.range_length = MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize].1 as u32; + gic_redist.base_addr = MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize].0; + madt.append_child(&gic_redist.aml_bytes()); + } + + // 4. GIC Its. + let mut gic_its = AcpiGicIts::default(); + gic_its.type_id = ACPI_MADT_GENERIC_TRANSLATOR; + gic_its.length = 20; + gic_its.base_addr = MEM_LAYOUT[LayoutEntryType::GicIts as usize].0; + madt.append_child(&gic_its.aml_bytes()); + + let madt_begin = StdMachine::add_table_to_loader(acpi_data, loader, &madt) + .with_context(|| "Fail to add MADT table to loader")?; + Ok(madt_begin) + } + + fn build_srat_cpu(&self, proximity_domain: u32, node: &NumaNode, srat: &mut AcpiTable) { + for cpu in node.cpus.iter() { + srat.append_child( + &AcpiSratGiccAffinity { + type_id: 3_u8, + length: size_of::() as u8, + proximity_domain, + process_uid: u32::from(*cpu), + flags: 1, + clock_domain: 0_u32, + } + .aml_bytes(), + ); + } + } + + fn build_srat_mem( + &self, + base_addr: u64, + proximity_domain: u32, + node: &NumaNode, + srat: &mut AcpiTable, + ) -> u64 { + srat.append_child( + &AcpiSratMemoryAffinity { + type_id: 1, + length: size_of::() as u8, + proximity_domain, + base_addr, + range_length: node.size, + flags: 1, + ..Default::default() + } + .aml_bytes(), + ); + base_addr + node.size + } + + fn build_srat_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut srat = AcpiTable::new(*b"SRAT", 1, *b"STRATO", *b"VIRTSRAT", 1); + // Reserved + srat.append_child(&[1_u8; 4_usize]); + // Reserved + srat.append_child(&[0_u8; 8_usize]); + + let mut next_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + // SAFETY: the SRAT table is created only when numa node configured. + for (id, node) in self.base.numa_nodes.as_ref().unwrap().iter() { + self.build_srat_cpu(*id, node, &mut srat); + next_base = self.build_srat_mem(next_base, *id, node, &mut srat); + } + + let srat_begin = StdMachine::add_table_to_loader(acpi_data, loader, &srat) + .with_context(|| "Fail to add SRAT table to loader")?; + Ok(srat_begin) + } + + fn build_pptt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut pptt = AcpiTable::new(*b"PPTT", 2, *b"STRATO", *b"VIRTPPTT", 1); + let mut uid = 0_u32; + self.build_pptt_sockets(&mut pptt, &mut uid); + let pptt_begin = StdMachine::add_table_to_loader(acpi_data, loader, &pptt) + .with_context(|| "Fail to add PPTT table to loader")?; + Ok(pptt_begin) + } + + fn get_hardware_signature(&self) -> Option { + let vm_config = self.machine_base().vm_config.lock().unwrap(); + vm_config.hardware_signature + } +} + +/// Function that helps to generate flash node in device-tree. +/// + +/// Trait that helps to generate all nodes in device-tree. +#[allow(clippy::upper_case_acronyms)] +trait CompileFDTHelper { + /// Function that helps to generate memory nodes. + fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> Result<()>; + /// Function that helps to generate pci node in device-tree. + fn generate_pci_host_node(&self, fdt: &mut FdtBuilder) -> Result<()>; + /// Function that helps to generate the chosen node. + fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> Result<()>; +} + +impl CompileFDTHelper for StdMachine { + fn generate_pci_host_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + let pcie_ecam_base = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0; + let pcie_ecam_size = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1; + let pcie_buses_num = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1 >> 20; + let node = format!("pcie@{:x}", pcie_ecam_base); + let pci_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("compatible", "pci-host-ecam-generic")?; + fdt.set_property_string("device_type", "pci")?; + fdt.set_property_array_u64("reg", &[pcie_ecam_base, pcie_ecam_size])?; + fdt.set_property_array_u32("bus-range", &[0, (pcie_buses_num - 1) as u32])?; + fdt.set_property_u32("linux,pci-domain", 0)?; + fdt.set_property_u32("#address-cells", 3)?; + fdt.set_property_u32("#size-cells", 2)?; + + let high_pcie_mmio_base = MEM_LAYOUT[LayoutEntryType::HighPcieMmio as usize].0; + let high_pcie_mmio_size = MEM_LAYOUT[LayoutEntryType::HighPcieMmio as usize].1; + let fdt_pci_mmio_type_64bit: u32 = device_tree::FDT_PCI_RANGE_MMIO_64BIT; + let high_mmio_base_hi: u32 = (high_pcie_mmio_base >> 32) as u32; + let high_mmio_base_lo: u32 = (high_pcie_mmio_base & 0xffff_ffff) as u32; + let high_mmio_size_hi: u32 = (high_pcie_mmio_size >> 32) as u32; + let high_mmio_size_lo: u32 = (high_pcie_mmio_size & 0xffff_ffff) as u32; + + let pcie_mmio_base = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].0; + let pcie_mmio_size = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].1; + let fdt_pci_mmio_type: u32 = device_tree::FDT_PCI_RANGE_MMIO; + let mmio_base_hi: u32 = (pcie_mmio_base >> 32) as u32; + let mmio_base_lo: u32 = (pcie_mmio_base & 0xffff_ffff) as u32; + let mmio_size_hi: u32 = (pcie_mmio_size >> 32) as u32; + let mmio_size_lo: u32 = (pcie_mmio_size & 0xffff_ffff) as u32; + + let pcie_pio_base = MEM_LAYOUT[LayoutEntryType::PciePio as usize].0; + let pcie_pio_size = MEM_LAYOUT[LayoutEntryType::PciePio as usize].1; + let fdt_pci_pio_type: u32 = device_tree::FDT_PCI_RANGE_IOPORT; + let pio_base_hi: u32 = (pcie_pio_base >> 32) as u32; + let pio_base_lo: u32 = (pcie_pio_base & 0xffff_ffff) as u32; + let pio_size_hi: u32 = (pcie_pio_size >> 32) as u32; + let pio_size_lo: u32 = (pcie_pio_size & 0xffff_ffff) as u32; + + fdt.set_property_array_u32( + "ranges", + &[ + fdt_pci_pio_type, + 0, + 0, + pio_base_hi, + pio_base_lo, + pio_size_hi, + pio_size_lo, + fdt_pci_mmio_type, + mmio_base_hi, + mmio_base_lo, + mmio_base_hi, + mmio_base_lo, + mmio_size_hi, + mmio_size_lo, + fdt_pci_mmio_type_64bit, + high_mmio_base_hi, + high_mmio_base_lo, + high_mmio_base_hi, + high_mmio_base_lo, + high_mmio_size_hi, + high_mmio_size_lo, + ], + )?; + + fdt.set_property_u32("msi-parent", device_tree::GIC_ITS_PHANDLE)?; + fdt.end_node(pci_node_dep) + } + + fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + if self.base.numa_nodes.is_none() { + let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let mem_size = self.base.sys_mem.memory_end_address().raw_value() + - MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + let node = "memory"; + let memory_node_dep = fdt.begin_node(node)?; + fdt.set_property_string("device_type", "memory")?; + fdt.set_property_array_u64("reg", &[mem_base, mem_size])?; + fdt.end_node(memory_node_dep)?; + + return Ok(()); + } + + // Set NUMA node information. + let mut mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + for (id, node) in self.base.numa_nodes.as_ref().unwrap().iter().enumerate() { + let mem_size = node.1.size; + let node = format!("memory@{:x}", mem_base); + let memory_node_dep = fdt.begin_node(&node)?; + fdt.set_property_string("device_type", "memory")?; + fdt.set_property_array_u64("reg", &[mem_base, mem_size])?; + fdt.set_property_u32("numa-node-id", id as u32)?; + fdt.end_node(memory_node_dep)?; + mem_base += mem_size; + } + + Ok(()) + } + + fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + let node = "chosen"; + + let boot_source = self.base.boot_source.lock().unwrap(); + + let chosen_node_dep = fdt.begin_node(node)?; + let cmdline = &boot_source.kernel_cmdline.to_string(); + fdt.set_property_string("bootargs", cmdline.as_str())?; + + let pl011_property_string = + format!("/pl011@{:x}", MEM_LAYOUT[LayoutEntryType::Uart as usize].0); + fdt.set_property_string("stdout-path", &pl011_property_string)?; + + match &boot_source.initrd { + Some(initrd) => { + fdt.set_property_u64("linux,initrd-start", initrd.initrd_addr)?; + let initrd_end = initrd + .initrd_addr + .checked_add(initrd.initrd_size) + .with_context(|| "initrd end overflow")?; + fdt.set_property_u64("linux,initrd-end", initrd_end)?; + } + None => {} + } + fdt.end_node(chosen_node_dep) + } +} + +impl device_tree::CompileFDT for StdMachine { + fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> Result<()> { + let node_dep = fdt.begin_node("")?; + self.base.generate_fdt_node(fdt)?; + self.generate_memory_node(fdt)?; + self.generate_chosen_node(fdt)?; + self.generate_pci_host_node(fdt)?; + fdt.end_node(node_dep) + } +} diff --git a/machine/src/error.rs b/machine/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..c8fb8ee67bae7eb2c5282d4b56ef91a72a00c534 --- /dev/null +++ b/machine/src/error.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum MachineError { + #[error("")] + Cpu { + #[from] + source: cpu::error::CpuError, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("IntCtrl")] + #[cfg(target_arch = "aarch64")] + IntCtrl { + #[from] + source: devices::IntCtrlErrs, + }, + #[error("Legacy")] + Legacy { + #[from] + source: devices::legacy::error::LegacyError, + }, + #[error("")] + PciErr { + #[from] + source: devices::pci::error::PciError, + }, + #[error("Util")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("")] + Acpi { + #[from] + source: acpi::error::AcpiError, + }, + #[error("Virtio")] + Virtio { + #[from] + source: virtio::error::VirtioError, + }, + #[error("MachineManager")] + MachineManager { + #[from] + source: machine_manager::config::error::ConfigError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Failed to init PCIe host.")] + InitPCIeHostErr, + #[error("Failed to add {0} device.")] + AddDevErr(String), + #[error("Failed to load kernel.")] + LoadKernErr, + #[error("Failed to create memory address space")] + CrtMemSpaceErr, + #[error("Failed to create I/O address space")] + CrtIoSpaceErr, + #[error("Failed to register region in memory space: base={0},size={1}")] + RegMemRegionErr(u64, u64), + #[error("Failed to init eventfd {0}.")] + InitEventFdErr(String), + #[error("Failed to realize virtio mmio.")] + RlzVirtioMmioErr, + #[error("Failed to create irq chip.")] + #[cfg(target_arch = "x86_64")] + CrtIrqchipErr, + #[error("Failed to set identity map address.")] + #[cfg(target_arch = "x86_64")] + SetIdentityMapAddr, + #[error("Failed to set tss address.")] + #[cfg(target_arch = "x86_64")] + SetTssErr, + #[error("Failed to create PIT.")] + #[cfg(target_arch = "x86_64")] + CrtPitErr, + #[error("Failed to generate FDT.")] + #[cfg(target_arch = "aarch64")] + GenFdtErr, + #[error("Failed to write FDT: addr={0}, size={1}")] + #[cfg(target_arch = "aarch64")] + WrtFdtErr(u64, usize), + #[error("Failed to register event notifier.")] + RegNotifierErr, + #[error("Failed to run vcpu{0}.")] + StartVcpuErr(u8), + #[error("Failed to pause vcpu{0}.")] + PauseVcpuErr(u8), + #[error("Failed to resume vcpu{0}")] + ResumeVcpuErr(u8), + #[error("Failed to destroy vcpu{0}.")] + DestroyVcpuErr(u8), + #[error("A maximum of {0} {1} replaceable devices are supported.")] + RplDevLmtErr(String, usize), + #[error("The device type is {0}, but the target config is not for this type.")] + DevTypeErr(String), + #[error("{0}: failed to update config.")] + UpdCfgErr(String), + #[error("Failed to open file: {0}.")] + OpenFileErr(String), + #[error("Failed to init pflash device.")] + InitPflashErr, + #[error("Failed to realize pflash device.")] + RlzPflashErr, +} diff --git a/machine/src/lib.rs b/machine/src/lib.rs index e759bd8392de664cda81660b1a9108d2539d38ee..8ec8ad7782cfa1850d4a5fab7ed82be084234e77 100644 --- a/machine/src/lib.rs +++ b/machine/src/lib.rs @@ -10,153 +10,418 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate machine_manager; +#[cfg(target_arch = "aarch64")] +pub mod aarch64; +pub mod error; +pub mod standard_common; #[cfg(target_arch = "x86_64")] -#[macro_use] -extern crate vmm_sys_util; +pub mod x86_64; -pub mod errors { - error_chain! { - links { - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - IntCtrl(devices::IntCtrlErrs::Error, devices::IntCtrlErrs::ErrorKind) #[cfg(target_arch = "aarch64")]; - Legacy(devices::LegacyErrs::Error, devices::LegacyErrs::ErrorKind); - MicroVm(super::micro_vm::errors::Error, super::micro_vm::errors::ErrorKind); - StdVm(super::standard_vm::errors::Error, super::standard_vm::errors::ErrorKind); - Util(util::errors::Error, util::errors::ErrorKind); - Virtio(virtio::errors::Error, virtio::errors::ErrorKind); - MachineManager(machine_manager::config::errors::Error, machine_manager::config::errors::ErrorKind); - Hypervisor(hypervisor::errors::Error, hypervisor::errors::ErrorKind); - } +mod micro_common; - foreign_links { - KvmIoctl(kvm_ioctls::Error); - Io(std::io::Error); - } +pub use crate::error::MachineError; +pub use micro_common::LightMachine; +pub use standard_common::StdMachine; - errors { - AddDevErr(dev: String) { - display("Failed to add {} device.", dev) - } - LoadKernErr { - display("Failed to load kernel.") - } - CrtMemSpaceErr { - display("Failed to create memory address space") - } - CrtIoSpaceErr { - display("Failed to create I/O address space") - } - RegMemRegionErr(base: u64, size: u64) { - display("Failed to register region in memory space: base={},size={}", base, size) - } - InitEventFdErr(fd: String) { - display("Failed to init eventfd {}.", fd) - } - RlzVirtioMmioErr { - display("Failed to realize virtio mmio.") - } - #[cfg(target_arch = "x86_64")] - CrtIrqchipErr { - display("Failed to create irq chip.") - } - #[cfg(target_arch = "x86_64")] - SetTssErr { - display("Failed to set tss address.") - } +use std::collections::{BTreeMap, HashMap}; +use std::fs::{remove_file, File}; +use std::net::TcpListener; +use std::ops::Deref; +use std::os::unix::io::AsRawFd; +use std::os::unix::net::UnixListener; +#[cfg(any(feature = "windows_emu_pid", feature = "vfio_device"))] +use std::path::Path; +use std::rc::Rc; +use std::sync::{Arc, Barrier, Condvar, Mutex, RwLock, Weak}; +#[cfg(feature = "windows_emu_pid")] +use std::time::Duration; +use std::u64; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use log::{error, info, warn}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use address_space::FileBackend; +use address_space::{ + create_backend_mem, create_default_mem, AddressAttr, AddressSpace, GuestAddress, Region, +}; +#[cfg(target_arch = "aarch64")] +use cpu::CPUFeatures; +use cpu::{ArchCPU, CPUBootConfig, CPUHypervisorOps, CPUInterface, CPUTopology, CpuTopology, CPU}; +use devices::legacy::FwCfgOps; +#[cfg(feature = "pvpanic")] +use devices::misc::pvpanic::{PvPanicPci, PvpanicDevConfig}; +#[cfg(feature = "scream")] +use devices::misc::scream::{Scream, ScreamConfig}; +#[cfg(feature = "demo_device")] +use devices::pci::demo_device::{DemoDev, DemoDevConfig}; +use devices::pci::{ + devices_register_pcidevops_type, register_pcidevops_type, PciBus, PciDevOps, PciHost, RootPort, + RootPortConfig, +}; +use devices::smbios::smbios_table::{build_smbios_ep30, SmbiosTable}; +use devices::smbios::{SMBIOS_ANCHOR_FILE, SMBIOS_TABLE_FILE}; +use devices::sysbus::{devices_register_sysbusdevops_type, to_sysbusdevops, SysBus, SysBusDevType}; +#[cfg(feature = "usb_camera")] +use devices::usb::camera::{UsbCamera, UsbCameraConfig}; +use devices::usb::keyboard::{UsbKeyboard, UsbKeyboardConfig}; +use devices::usb::storage::{UsbStorage, UsbStorageConfig}; +use devices::usb::tablet::{UsbTablet, UsbTabletConfig}; +#[cfg(feature = "usb_uas")] +use devices::usb::uas::{UsbUas, UsbUasConfig}; +#[cfg(feature = "usb_host")] +use devices::usb::usbhost::{UsbHost, UsbHostConfig}; +use devices::usb::xhci::xhci_pci::{XhciConfig, XhciPciDevice}; +use devices::usb::UsbDevice; +#[cfg(target_arch = "aarch64")] +use devices::InterruptController; +use devices::{convert_bus_ref, Bus, Device, PCI_BUS, SYS_BUS_DEVICE}; +#[cfg(feature = "virtio_scsi")] +use devices::{ + ScsiBus::get_scsi_key, + ScsiDisk::{ScsiDevConfig, ScsiDevice}, +}; +use hypervisor::{kvm::KvmHypervisor, test::TestHypervisor, HypervisorOps}; +#[cfg(feature = "usb_camera")] +use machine_manager::config::get_cameradev_by_id; +#[cfg(feature = "vhostuser_net")] +use machine_manager::config::get_chardev_socket_path; +use machine_manager::config::{ + complete_numa_node, get_class_type, get_pci_bdf, get_value_of_parameter, parse_numa_distance, + parse_numa_mem, str_slip_to_clap, BootIndexInfo, BootSource, ConfigCheck, DriveConfig, + DriveFile, Incoming, MachineMemConfig, MigrateMode, NetworkInterfaceConfig, NumaNode, + NumaNodes, PciBdf, SerialConfig, VirtioSerialInfo, VirtioSerialPortCfg, VmConfig, + FAST_UNPLUG_ON, MAX_VIRTIO_QUEUE, +}; +use machine_manager::event_loop::EventLoop; +use machine_manager::machine::{HypervisorType, MachineInterface, MachineLifecycle, VmState}; +use machine_manager::notifier::pause_notify; +use machine_manager::{check_arg_exist, check_arg_nonexist}; +use migration::{MigrateOps, MigrationManager}; +#[cfg(feature = "windows_emu_pid")] +use ui::console::{get_run_stage, VmRunningStage}; +use util::arg_parser; +use util::file::{clear_file, lock_file, unlock_file}; +use util::loop_context::{ + gen_delete_notifiers, EventNotifier, NotifierCallback, NotifierOperation, +}; +use util::seccomp::{BpfRule, SeccompOpt, SyscallFilter}; +#[cfg(feature = "vfio_device")] +use vfio::{vfio_register_pcidevops_type, VfioConfig, VfioDevice, VfioPciDevice, KVM_DEVICE_FD}; +#[cfg(feature = "virtio_scsi")] +use virtio::ScsiCntlr::{scsi_cntlr_create_scsi_bus, ScsiCntlr, ScsiCntlrConfig}; +#[cfg(any(feature = "vhost_vsock", feature = "vhost_net"))] +use virtio::VhostKern; +#[cfg(any(feature = "vhostuser_block", feature = "vhostuser_net"))] +use virtio::VhostUser; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use virtio::VirtioDeviceQuirk; +use virtio::{ + balloon_allow_list, find_port_by_nr, get_max_nr, vhost, virtio_register_pcidevops_type, + virtio_register_sysbusdevops_type, Balloon, BalloonConfig, Block, BlockState, Serial, + SerialPort, VirtioBlkDevConfig, VirtioDevice, VirtioMmioDevice, VirtioMmioState, + VirtioNetState, VirtioPciDevice, VirtioSerialState, VIRTIO_TYPE_CONSOLE, +}; +#[cfg(feature = "virtio_gpu")] +use virtio::{Gpu, GpuDevConfig}; +#[cfg(feature = "virtio_rng")] +use virtio::{Rng, RngConfig, RngState}; + +#[cfg(feature = "windows_emu_pid")] +const WINDOWS_EMU_PID_DEFAULT_INTERVAL: u64 = 4000; +#[cfg(feature = "windows_emu_pid")] +const WINDOWS_EMU_PID_SHUTDOWN_INTERVAL: u64 = 1000; +#[cfg(feature = "windows_emu_pid")] +const WINDOWS_EMU_PID_POWERDOWN_INTERVAL: u64 = 30000; + +/// Machine structure include base members. +pub struct MachineBase { + /// `vCPU` topology, support sockets, cores, threads. + cpu_topo: CpuTopology, + /// `vCPU` devices. + cpus: Vec>, + /// Interrupt controller device. + #[cfg(target_arch = "aarch64")] + irq_chip: Option>, + /// Memory address space. + sys_mem: Arc, + // IO address space. + #[cfg(target_arch = "x86_64")] + sys_io: Arc, + /// System bus. + sysbus: Arc>, + /// VM running state. + vm_state: Arc<(Mutex, Condvar)>, + /// Vm boot_source config. + boot_source: Arc>, + /// All configuration information of virtual machine. + vm_config: Arc>, + /// List of guest NUMA nodes information. + numa_nodes: Option, + /// Drive backend files. + drive_files: Arc>>, + /// FwCfg device. + fwcfg_dev: Option>>, + /// machine all backend memory region tree + machine_ram: Arc, + /// machine hypervisor. + hypervisor: Arc>, + /// migrate hypervisor. + migration_hypervisor: Arc>, +} + +impl MachineBase { + pub fn new( + vm_config: &VmConfig, + free_irqs: (i32, i32), + mmio_region: (u64, u64), + ) -> Result { + let cpu_topo = CpuTopology::new( + vm_config.machine_config.nr_cpus, + vm_config.machine_config.nr_sockets, + vm_config.machine_config.nr_dies, + vm_config.machine_config.nr_clusters, + vm_config.machine_config.nr_cores, + vm_config.machine_config.nr_threads, + vm_config.machine_config.max_cpus, + ); + let machine_ram = Arc::new(Region::init_container_region(u64::MAX, "MachineRam")); + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::MAX, "SysMem"), + "sys_mem", + Some(machine_ram.clone()), + ) + .with_context(|| MachineError::CrtIoSpaceErr)?; + + #[cfg(target_arch = "x86_64")] + let sys_io = AddressSpace::new( + Region::init_container_region(1 << 16, "SysIo"), + "SysIo", + None, + ) + .with_context(|| MachineError::CrtIoSpaceErr)?; + let sysbus = SysBus::new( #[cfg(target_arch = "x86_64")] - CrtPitErr { - display("Failed to create PIT.") + &sys_io, + &sys_mem, + free_irqs, + mmio_region, + ); + + let hypervisor: Arc>; + let migration_hypervisor: Arc>; + match vm_config.machine_config.hypervisor { + HypervisorType::Kvm => { + let kvm_hypervisor = Arc::new(Mutex::new(KvmHypervisor::new()?)); + hypervisor = kvm_hypervisor.clone(); + migration_hypervisor = kvm_hypervisor; } - #[cfg(target_arch = "aarch64")] - GenFdtErr { - display("Failed to generate FDT.") + HypervisorType::Test => { + let test_hypervisor = Arc::new(Mutex::new(TestHypervisor::new()?)); + hypervisor = test_hypervisor.clone(); + migration_hypervisor = test_hypervisor; } + }; + + Ok(MachineBase { + cpu_topo, + cpus: Vec::new(), #[cfg(target_arch = "aarch64")] - WrtFdtErr(addr: u64, size: usize) { - display("Failed to write FDT: addr={}, size={}", addr, size) - } - RegNotifierErr { - display("Failed to register event notifier.") - } - StartVcpuErr(id: u8) { - display("Failed to run vcpu{}.", id) - } - PauseVcpuErr(id: u8) { - display("Failed to pause vcpu{}.", id) - } - ResumeVcpuErr(id: u8) { - display("Failed to resume vcpu{}.", id) - } - DestroyVcpuErr(id: u8) { - display("Failed to destroy vcpu{}.", id) + irq_chip: None, + sys_mem, + #[cfg(target_arch = "x86_64")] + sys_io, + sysbus: Arc::new(Mutex::new(sysbus)), + vm_state: Arc::new((Mutex::new(VmState::Created), Condvar::new())), + boot_source: Arc::new(Mutex::new(vm_config.clone().boot_source)), + vm_config: Arc::new(Mutex::new(vm_config.clone())), + numa_nodes: None, + drive_files: Arc::new(Mutex::new(vm_config.init_drive_files()?)), + fwcfg_dev: None, + machine_ram, + hypervisor, + migration_hypervisor, + }) + } + + #[cfg(target_arch = "x86_64")] + fn pio_in(&self, addr: u64, mut data: &mut [u8]) -> bool { + // The function pit_calibrate_tsc() in kernel gets stuck if data read from + // io-port 0x61 is not 0x20. + // This problem only happens before Linux version 4.18 (fixed by 368a540e0) + if addr == 0x61 { + data[0] = 0x20; + return true; + } + if addr == 0x64 { + // UEFI will read PS2 Keyboard's Status register 0x64 to detect if + // this device is present. + data[0] = 0xFF; + } + + let length = data.len() as u64; + self.sys_io + .read(&mut data, GuestAddress(addr), length, AddressAttr::MMIO) + .is_ok() + } + + #[cfg(target_arch = "x86_64")] + fn pio_out(&self, addr: u64, mut data: &[u8]) -> bool { + use crate::x86_64::ich9_lpc::SLEEP_CTRL_OFFSET; + + let count = data.len() as u64; + if addr == u64::from(SLEEP_CTRL_OFFSET) { + if let Err(e) = self.cpus[0].pause() { + log::error!("Fail to pause bsp, {:?}", e); } } + self.sys_io + .write(&mut data, GuestAddress(addr), count, AddressAttr::MMIO) + .is_ok() } + + fn mmio_read(&self, addr: u64, mut data: &mut [u8]) -> bool { + let length = data.len() as u64; + self.sys_mem + .read(&mut data, GuestAddress(addr), length, AddressAttr::MMIO) + .is_ok() + } + + fn mmio_write(&self, addr: u64, mut data: &[u8]) -> bool { + let count = data.len() as u64; + self.sys_mem + .write(&mut data, GuestAddress(addr), count, AddressAttr::MMIO) + .is_ok() + } +} + +macro_rules! create_device_add_matches { + ( $command:expr; $controller: expr; + $(($($driver_name:tt)|+, $function_name:tt, $($arg:tt),*)),*; + $(#[cfg($($features: tt)*)] + ($($driver_name1:tt)|+, $function_name1:tt, $($arg1:tt),*)),* + ) => { + match $command { + $( + $($driver_name)|+ => { + $controller.$function_name($($arg),*).with_context(|| format!("add {} fail.", $command))?; + }, + )* + $( + #[cfg($($features)*)] + $($driver_name1)|+ => { + $controller.$function_name1($($arg1),*).with_context(|| format!("add {} fail.", $command))?; + }, + )* + _ => bail!("Unsupported device: {:?}", $command), + } + }; } -mod micro_vm; -mod standard_vm; +pub trait MachineOps: MachineLifecycle { + fn machine_base(&self) -> &MachineBase; -pub use micro_vm::LightMachine; -use pci::{PciBus, PciDevOps, PciHost, RootPort}; -pub use standard_vm::StdMachine; -use virtio::{ - BlockState, RngState, VhostKern, VirtioConsoleState, VirtioDevice, VirtioMmioState, - VirtioNetState, -}; + fn machine_base_mut(&mut self) -> &mut MachineBase; -use std::os::unix::io::AsRawFd; -use std::path::Path; -use std::sync::{Arc, Barrier, Mutex, Weak}; + fn build_smbios( + &self, + fw_cfg: &Arc>, + mem_array: Vec<(u64, u64)>, + ) -> Result<()> { + let vm_config = self.get_vm_config(); + let vmcfg_lock = vm_config.lock().unwrap(); + + let mut smbios = SmbiosTable::new(); + let table = smbios.build_smbios_tables( + vmcfg_lock.smbios.clone(), + &vmcfg_lock.machine_config, + mem_array, + ); + let ep = build_smbios_ep30(table.len() as u32); -#[cfg(target_arch = "x86_64")] -use address_space::KvmIoListener; -use address_space::{create_host_mmaps, AddressSpace, KvmMemoryListener, Region}; -use cpu::{ArchCPU, CPUBootConfig, CPUInterface, CPU}; -use devices::legacy::FwCfgOps; -#[cfg(target_arch = "aarch64")] -use devices::InterruptController; -use hypervisor::kvm::KVM_FDS; -use kvm_ioctls::VcpuFd; -use machine_manager::config::{ - get_multi_function, get_pci_bdf, parse_balloon, parse_blk, parse_device_id, parse_net, - parse_rng_dev, parse_root_port, parse_vfio, parse_virtconsole, parse_virtio_serial, - parse_vsock, MachineMemConfig, PFlashConfig, PciBdf, SerialConfig, VfioConfig, VmConfig, - FAST_UNPLUG_ON, -}; -use machine_manager::event_loop::EventLoop; -use machine_manager::machine::{KvmVmState, MachineInterface}; -use migration::{MigrationManager, MigrationRestoreOrder}; -use util::loop_context::{EventNotifier, NotifierCallback, NotifierOperation}; -use util::seccomp::{BpfRule, SeccompOpt, SyscallFilter}; -use vfio::{VfioDevice, VfioPciDevice}; -use virtio::{balloon_allow_list, Balloon, Block, Console, Rng, VirtioMmioDevice, VirtioPciDevice}; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; + let mut locked_fw_cfg = fw_cfg.lock().unwrap(); + locked_fw_cfg + .add_file_entry(SMBIOS_TABLE_FILE, table) + .with_context(|| "Failed to add smbios table file entry")?; + locked_fw_cfg + .add_file_entry(SMBIOS_ANCHOR_FILE, ep) + .with_context(|| "Failed to add smbios anchor file entry")?; + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + fn load_boot_source(&self, fwcfg: Option<&Arc>>) -> Result; + + #[cfg(target_arch = "aarch64")] + fn load_boot_source( + &self, + fwcfg: Option<&Arc>>, + mem_start: u64, + ) -> Result { + use boot_loader::{load_linux, BootLoaderConfig}; + + let mut boot_source = self.machine_base().boot_source.lock().unwrap(); + let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); + + let bootloader_config = BootLoaderConfig { + kernel: boot_source.kernel_file.clone(), + initrd, + mem_start, + }; + let layout = load_linux(&bootloader_config, &self.machine_base().sys_mem, fwcfg) + .with_context(|| MachineError::LoadKernErr)?; + if let Some(rd) = &mut boot_source.initrd { + rd.initrd_addr = layout.initrd_start; + rd.initrd_size = layout.initrd_size; + } -use errors::{ErrorKind, Result, ResultExt}; -use standard_vm::errors::Result as StdResult; + Ok(CPUBootConfig { + fdt_addr: layout.dtb_start, + boot_pc: layout.boot_pc, + }) + } -pub trait MachineOps { - /// Calculate the ranges of memory according to architecture. + #[cfg(target_arch = "aarch64")] + fn load_cpu_features(&self, vmcfg: &VmConfig) -> Result { + Ok((&vmcfg.machine_config.cpu_config).into()) + } + + /// Init memory of vm to architecture. /// /// # Arguments /// /// * `mem_size` - memory size of VM. - /// - /// # Returns - /// - /// A array of ranges, it's element represents (start_addr, size). - /// On x86_64, there is a gap ranged from (4G - 768M) to 4G, which will be skipped. - fn arch_ram_ranges(&self, mem_size: u64) -> Vec<(u64, u64)>; + fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()>; - fn load_boot_source(&self, fwcfg: Option<&Arc>>) -> Result; + fn create_machine_ram(&self, mem_config: &MachineMemConfig, thread_num: u8) -> Result<()> { + let root = self.get_vm_ram(); + let numa_nodes = self.get_numa_nodes(); + + if numa_nodes.is_none() || mem_config.mem_zones.is_none() { + let default_mem = create_default_mem(mem_config, thread_num)?; + root.add_subregion_not_update(default_mem, 0_u64)?; + return Ok(()); + } + let zones = mem_config.mem_zones.as_ref().unwrap(); + let mut offset = 0_u64; + for node in numa_nodes.as_ref().unwrap().iter() { + for zone in zones.iter() { + if zone.id.eq(&node.1.mem_dev) { + let ram = create_backend_mem(zone, thread_num)?; + root.add_subregion_not_update(ram, offset)?; + offset = offset + .checked_add(zone.size) + .with_context(|| "total zone size overflow")?; + break; + } + } + } + Ok(()) + } /// Init I/O & memory address space and mmap guest memory. /// @@ -168,41 +433,17 @@ pub trait MachineOps { fn init_memory( &self, mem_config: &MachineMemConfig, - #[cfg(target_arch = "x86_64")] sys_io: &Arc, sys_mem: &Arc, - is_migrate: bool, nr_cpus: u8, ) -> Result<()> { - // KVM_CREATE_VM system call is invoked when KVM_FDS is used for the first time. The system - // call registers some notifier functions in the KVM, which are frequently triggered when - // doing memory prealloc.To avoid affecting memory prealloc performance, create_host_mmaps - // needs to be invoked first. - let mut mem_mappings = Vec::new(); - if !is_migrate { - let ram_ranges = self.arch_ram_ranges(mem_config.mem_size); - mem_mappings = create_host_mmaps(&ram_ranges, mem_config, nr_cpus) - .chain_err(|| "Failed to mmap guest ram.")?; - } - - sys_mem - .register_listener(Arc::new(Mutex::new(KvmMemoryListener::new( - KVM_FDS.load().fd.as_ref().unwrap().get_nr_memslots() as u32, - )))) - .chain_err(|| "Failed to register KVM listener for memory space.")?; - #[cfg(target_arch = "x86_64")] - sys_io - .register_listener(Arc::new(Mutex::new(KvmIoListener::default()))) - .chain_err(|| "Failed to register KVM listener for I/O address space.")?; + trace::trace_scope_start!(init_memory); + let migrate_info = self.get_migrate_info(); + if migrate_info.0 != MigrateMode::File { + self.create_machine_ram(mem_config, nr_cpus)?; + } - if !is_migrate { - for mmap in mem_mappings.iter() { - let base = mmap.start_address().raw_value(); - let size = mmap.size(); - sys_mem - .root() - .add_subregion(Region::init_ram_region(mmap.clone()), base) - .chain_err(|| ErrorKind::RegMemRegionErr(base, size))?; - } + if migrate_info.0 != MigrateMode::File { + self.init_machine_ram(sys_mem, mem_config.mem_size)?; } MigrationManager::register_memory_instance(sys_mem.clone()); @@ -210,19 +451,63 @@ pub trait MachineOps { Ok(()) } + fn mem_show(&self) { + self.machine_base().sys_mem.memspace_show(); + #[cfg(target_arch = "x86_64")] + self.machine_base().sys_io.memspace_show(); + self.get_vm_ram().mtree(0_u32); + } + + /// Create vcpu for virtual machine. + /// + /// # Arguments + /// + /// * `vcpu_id` - The id number of vcpu. + /// * `vm` - `MachineInterface` to obtain functions cpu can use. + /// * `max_cpus` - max cpu number of virtual machine. + fn create_vcpu( + vcpu_id: u8, + vm: Arc>, + hypervisor: Arc>, + #[cfg(target_arch = "x86_64")] max_cpus: u8, + ) -> Result> + where + Self: Sized, + { + let locked_hypervisor = hypervisor.lock().unwrap(); + let hypervisor_cpu: Arc = + locked_hypervisor.create_hypervisor_cpu(vcpu_id)?; + + #[cfg(target_arch = "aarch64")] + let arch_cpu = ArchCPU::new(u32::from(vcpu_id)); + #[cfg(target_arch = "x86_64")] + let arch_cpu = ArchCPU::new(u32::from(vcpu_id), max_cpus); + + let cpu = Arc::new(CPU::new( + hypervisor_cpu, + vcpu_id, + Arc::new(Mutex::new(arch_cpu)), + vm.clone(), + )); + Ok(cpu) + } + /// Init vcpu register with boot message. /// /// # Arguments /// /// * `vm` - `MachineInterface` to obtain functions cpu can use. /// * `nr_cpus` - The number of vcpus. - /// * `fds` - File descriptors obtained by creating new Vcpu in KVM. + /// * `max_cpus` - The max number of vcpus. /// * `boot_cfg` - Boot message generated by reading boot source to guest memory. fn init_vcpu( vm: Arc>, + hypervisor: Arc>, nr_cpus: u8, - fds: &[Arc], - boot_cfg: &Option, + #[cfg(target_arch = "x86_64")] max_cpus: u8, + topology: &CPUTopology, + boot_cfg: &CPUBootConfig, + #[cfg(target_arch = "aarch64")] vcpu_cfg: &CPUFeatures, ) -> Result>> where Self: Sized, @@ -230,42 +515,51 @@ pub trait MachineOps { let mut cpus = Vec::>::new(); for vcpu_id in 0..nr_cpus { - #[cfg(target_arch = "aarch64")] - let arch_cpu = ArchCPU::new(u32::from(vcpu_id)); - #[cfg(target_arch = "x86_64")] - let arch_cpu = ArchCPU::new(u32::from(vcpu_id), u32::from(nr_cpus)); - - let cpu = Arc::new(CPU::new( - fds[vcpu_id as usize].clone(), + let cpu = Self::create_vcpu( vcpu_id, - Arc::new(Mutex::new(arch_cpu)), vm.clone(), - )); + hypervisor.clone(), + #[cfg(target_arch = "x86_64")] + max_cpus, + )?; cpus.push(cpu.clone()); - MigrationManager::register_device_instance( - cpu::ArchCPU::descriptor(), - cpu, - MigrationRestoreOrder::Default, - ); + MigrationManager::register_cpu_instance(cpu::ArchCPU::descriptor(), cpu, vcpu_id); } - if let Some(boot_config) = boot_cfg { - for cpu_index in 0..nr_cpus as usize { - cpus[cpu_index as usize] - .realize(boot_config) - .chain_err(|| { - format!( - "Failed to realize arch cpu register for CPU {}/KVM", - cpu_index - ) - })?; - } + for (cpu_index, cpu) in cpus.iter().enumerate() { + cpu.realize( + boot_cfg, + topology, + #[cfg(target_arch = "aarch64")] + vcpu_cfg, + ) + .with_context(|| { + format!( + "Failed to realize arch cpu register/features for CPU {}", + cpu_index + ) + })?; } Ok(cpus) } + /// Must be called after the CPUs have been realized and GIC has been created. + /// + /// # Arguments + /// + /// * `CPUFeatures` - The features of vcpu. + #[cfg(target_arch = "aarch64")] + fn cpu_post_init(&self, vcpu_cfg: &CPUFeatures) -> Result<()> { + if vcpu_cfg.pmu { + for cpu in self.machine_base().cpus.iter() { + cpu.hypervisor_cpu.init_pmu()?; + } + } + Ok(()) + } + /// Add interrupt controller. /// /// # Arguments @@ -274,7 +568,14 @@ pub trait MachineOps { fn init_interrupt_controller(&mut self, vcpu_count: u64) -> Result<()>; /// Add RTC device. - fn add_rtc_device(&mut self, #[cfg(target_arch = "x86_64")] mem_size: u64) -> Result<()>; + fn add_rtc_device(&mut self, #[cfg(target_arch = "x86_64")] _mem_size: u64) -> Result<()> { + Ok(()) + } + + /// Add Generic event device. + fn add_ged_device(&mut self) -> Result<()> { + Ok(()) + } /// Add serial device. /// @@ -298,49 +599,95 @@ pub trait MachineOps { /// # Arguments /// /// * `cfg_args` - Device configuration. + #[cfg(feature = "vhost_vsock")] fn add_virtio_vsock(&mut self, cfg_args: &str) -> Result<()> { - let device_cfg = parse_vsock(cfg_args)?; + let device_cfg = + VhostKern::VsockConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; let sys_mem = self.get_sys_mem().clone(); let vsock = Arc::new(Mutex::new(VhostKern::Vsock::new(&device_cfg, &sys_mem))); - if cfg_args.contains("vhost-vsock-device") { - let device = VirtioMmioDevice::new(&sys_mem, vsock.clone()); - MigrationManager::register_device_instance_mutex( - VirtioMmioState::descriptor(), - self.realize_virtio_mmio_device(device) - .chain_err(|| ErrorKind::RlzVirtioMmioErr)?, - ); - } else { - let bdf = get_pci_bdf(cfg_args)?; - let multi_func = get_multi_function(cfg_args)?; - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; - let virtio_pci_device = VirtioPciDevice::new( - device_cfg.id, - devfn, - sys_mem, - vsock.clone(), - parent_bus, - multi_func, - ); - virtio_pci_device - .realize() - .chain_err(|| "Failed to add virtio pci vsock device")?; + match device_cfg.classtype.as_str() { + "vhost-vsock-device" => { + check_arg_nonexist!( + ("bus", device_cfg.bus), + ("addr", device_cfg.addr), + ("multifunction", device_cfg.multifunction) + ); + let device = self + .add_virtio_mmio_device(device_cfg.id.clone(), vsock.clone()) + .with_context(|| MachineError::RlzVirtioMmioErr)?; + MigrationManager::register_device_instance( + VirtioMmioState::descriptor(), + device, + &device_cfg.id, + ); + } + _ => { + check_arg_exist!(("bus", device_cfg.bus), ("addr", device_cfg.addr)); + let bdf = PciBdf::new(device_cfg.bus.clone().unwrap(), device_cfg.addr.unwrap()); + let multi_func = device_cfg.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&device_cfg.id, &bdf, vsock.clone(), multi_func, true) + .with_context(|| "Failed to add virtio pci vsock device")?; + } } - MigrationManager::register_device_instance_mutex( + + MigrationManager::register_device_instance( VhostKern::VsockState::descriptor(), vsock, + &device_cfg.id, ); Ok(()) } - fn realize_virtio_mmio_device( + fn add_virtio_mmio_device( &mut self, - _dev: VirtioMmioDevice, + _name: String, + _device: Arc>, ) -> Result>> { bail!("Virtio mmio devices not supported"); } - fn get_sys_mem(&mut self) -> &Arc; + fn get_cpu_topo(&self) -> &CpuTopology { + &self.machine_base().cpu_topo + } + + fn get_cpus(&self) -> &Vec> { + &self.machine_base().cpus + } + + fn get_sys_mem(&mut self) -> &Arc { + &self.machine_base().sys_mem + } + + fn get_vm_config(&self) -> Arc> { + self.machine_base().vm_config.clone() + } + + fn get_vm_state(&self) -> &Arc<(Mutex, Condvar)> { + &self.machine_base().vm_state + } + + fn get_vm_ram(&self) -> &Arc { + &self.machine_base().machine_ram + } + + fn get_numa_nodes(&self) -> &Option { + &self.machine_base().numa_nodes + } + + fn get_hypervisor(&self) -> Arc> { + self.machine_base().hypervisor.clone() + } + + /// Get migration mode and path from VM config. There are four modes in total: + /// Tcp, Unix, File and Unknown. + fn get_migrate_info(&self) -> Incoming { + if let Some((mode, path)) = self.get_vm_config().lock().unwrap().incoming.as_ref() { + return (*mode, path.to_string()); + } + + (MigrateMode::Unknown, String::new()) + } /// Add net device. /// @@ -353,81 +700,168 @@ pub trait MachineOps { } fn add_virtio_balloon(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - let device_cfg = parse_balloon(vm_config, cfg_args)?; + if vm_config.dev_name.contains_key("balloon") { + bail!("Only one balloon device is supported for each vm."); + } + let config = BalloonConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + vm_config.dev_name.insert("balloon".to_string(), 1); + let sys_mem = self.get_sys_mem(); - let balloon = Arc::new(Mutex::new(Balloon::new(&device_cfg, sys_mem.clone()))); + let balloon = Arc::new(Mutex::new(Balloon::new(config.clone(), sys_mem.clone()))); Balloon::object_init(balloon.clone()); - if cfg_args.contains("virtio-balloon-device") { - let device = VirtioMmioDevice::new(sys_mem, balloon); - self.realize_virtio_mmio_device(device)?; - } else { - let name = device_cfg.id; - let bdf = get_pci_bdf(cfg_args)?; - let multi_func = get_multi_function(cfg_args)?; - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; - let sys_mem = self.get_sys_mem().clone(); - let virtio_pci_device = - VirtioPciDevice::new(name, devfn, sys_mem, balloon, parent_bus, multi_func); - virtio_pci_device - .realize() - .chain_err(|| "Failed to add virtio pci balloon device")?; + match config.classtype.as_str() { + "virtio-balloon-device" => { + check_arg_nonexist!( + ("bus", config.bus), + ("addr", config.addr), + ("multifunction", config.multifunction) + ); + self.add_virtio_mmio_device(config.id.clone(), balloon)?; + } + _ => { + check_arg_exist!(("bus", config.bus), ("addr", config.addr)); + let bdf = PciBdf::new(config.bus.unwrap(), config.addr.unwrap()); + let multi_func = config.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&config.id, &bdf, balloon, multi_func, false) + .with_context(|| "Failed to add virtio pci balloon device")?; + } } Ok(()) } - /// Add console device. + /// Add virtio serial device. /// /// # Arguments /// /// * `vm_config` - VM configuration. /// * `cfg_args` - Device configuration args. - fn add_virtio_console(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - let device_cfg = parse_virtconsole(vm_config, cfg_args)?; - let sys_mem = self.get_sys_mem(); - let console = Arc::new(Mutex::new(Console::new(device_cfg.clone()))); - if let Some(serial) = &vm_config.virtio_serial { - if serial.pci_bdf.is_none() { - let device = VirtioMmioDevice::new(sys_mem, console.clone()); - MigrationManager::register_device_instance_mutex( - VirtioMmioState::descriptor(), - self.realize_virtio_mmio_device(device) - .chain_err(|| ErrorKind::RlzVirtioMmioErr)?, + fn add_virtio_serial(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + if vm_config.virtio_serial.is_some() { + bail!("Only one virtio serial device is supported"); + } + let mut serial_cfg = + VirtioSerialInfo::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + serial_cfg.auto_max_ports(); + let serial = Arc::new(Mutex::new(Serial::new(serial_cfg.clone()))); + + match serial_cfg.classtype.as_str() { + "virtio-serial-device" => { + check_arg_nonexist!( + ("bus", serial_cfg.bus), + ("addr", serial_cfg.addr), + ("multifunction", serial_cfg.multifunction) ); - } else { - let name = device_cfg.id; - let virtio_serial_info = if let Some(serial_info) = &vm_config.virtio_serial { - serial_info - } else { - bail!("No virtio-serial-pci device configured for virtconsole"); - }; - // Reasonable, because for virtio-serial-pci device, the bdf has been checked. - let bdf = virtio_serial_info.pci_bdf.clone().unwrap(); - let multi_func = virtio_serial_info.multifunction; - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; - let sys_mem = self.get_sys_mem().clone(); - let virtio_pci_device = VirtioPciDevice::new( - name, - devfn, - sys_mem, - console.clone(), - parent_bus, - multi_func, + let device = self + .add_virtio_mmio_device(serial_cfg.id.clone(), serial.clone()) + .with_context(|| MachineError::RlzVirtioMmioErr)?; + MigrationManager::register_device_instance( + VirtioMmioState::descriptor(), + device, + &serial_cfg.id, ); - virtio_pci_device - .realize() - .chain_err(|| "Failed to add virtio pci console device")?; } - } else { - bail!("No virtio-serial-bus specified"); + _ => { + check_arg_exist!(("bus", serial_cfg.bus), ("addr", serial_cfg.addr)); + let bdf = PciBdf::new(serial_cfg.bus.clone().unwrap(), serial_cfg.addr.unwrap()); + let multi_func = serial_cfg.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&serial_cfg.id, &bdf, serial.clone(), multi_func, false) + .with_context(|| "Failed to add virtio pci serial device")?; + } } - MigrationManager::register_device_instance_mutex(VirtioConsoleState::descriptor(), console); + MigrationManager::register_device_instance( + VirtioSerialState::descriptor(), + serial, + &serial_cfg.id, + ); + + vm_config.virtio_serial = Some(serial_cfg); Ok(()) } - fn add_virtio_serial(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - parse_virtio_serial(vm_config, cfg_args)?; + /// Add virtio serial port. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `cfg_args` - Device configuration args. + fn add_virtio_serial_port(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let serial_cfg = vm_config + .virtio_serial + .as_ref() + .with_context(|| "No virtio serial device specified")?; + + let mut virtio_device = None; + if serial_cfg.bus.is_none() { + // Micro_vm. + for dev in self.get_sysbus_devices().values() { + SYS_BUS_DEVICE!(dev, locked_busdev, sysbusdev); + if sysbusdev.sysbusdev_base().dev_type == SysBusDevType::VirtioMmio { + let virtio_mmio_dev = locked_busdev + .as_any() + .downcast_ref::() + .unwrap(); + if virtio_mmio_dev.device.lock().unwrap().device_type() == VIRTIO_TYPE_CONSOLE { + virtio_device = Some(virtio_mmio_dev.device.clone()); + break; + } + } + } + } else { + // Standard_vm. + let pci_dev = self + .get_pci_dev_by_id_and_type(vm_config, Some(&serial_cfg.id), "virtio-serial-pci") + .with_context(|| { + format!( + "Can not find virtio serial pci device {} from pci bus", + serial_cfg.id + ) + })?; + let locked_pcidev = pci_dev.lock().unwrap(); + let virtio_pcidev = locked_pcidev + .as_any() + .downcast_ref::() + .unwrap(); + virtio_device = Some(virtio_pcidev.get_virtio_device().clone()); + } + + let virtio_dev = virtio_device.with_context(|| "No virtio serial device found")?; + let mut virtio_dev_h = virtio_dev.lock().unwrap(); + let serial = virtio_dev_h.as_any_mut().downcast_mut::().unwrap(); + + let mut serialport_cfg = + VirtioSerialPortCfg::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let free_port0 = find_port_by_nr(&serial.ports, 0).is_none(); + // Note: port 0 is reserved for a virtconsole. + let free_nr = get_max_nr(&serial.ports) + 1; + serialport_cfg.auto_nr(free_port0, free_nr, serial.max_nr_ports)?; + serialport_cfg.check()?; + if find_port_by_nr(&serial.ports, serialport_cfg.nr.unwrap()).is_some() { + bail!( + "Repetitive virtio serial port nr {}.", + serialport_cfg.nr.unwrap() + ); + } + let is_console = matches!(serialport_cfg.classtype.as_str(), "virtconsole"); + let chardev_cfg = vm_config + .chardev + .remove(&serialport_cfg.chardev) + .with_context(|| { + format!( + "Chardev {:?} not found or is in use", + &serialport_cfg.chardev + ) + })?; + + let mut serial_port = SerialPort::new(serialport_cfg, chardev_cfg); + let port = Arc::new(Mutex::new(serial_port.clone())); + serial_port.realize()?; + if !is_console { + serial_port.chardev.lock().unwrap().set_device(port.clone()); + } + serial.ports.lock().unwrap().push(port); + Ok(()) } @@ -437,126 +871,620 @@ pub trait MachineOps { /// /// * `vm_config` - VM configuration. /// * `cfg_args` - Device configuration arguments. + #[cfg(feature = "virtio_rng")] fn add_virtio_rng(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - let device_cfg = parse_rng_dev(vm_config, cfg_args)?; - let sys_mem = self.get_sys_mem(); - let rng_dev = Arc::new(Mutex::new(Rng::new(device_cfg.clone()))); - if cfg_args.contains("virtio-rng-device") { - let device = VirtioMmioDevice::new(sys_mem, rng_dev.clone()); - self.realize_virtio_mmio_device(device) - .chain_err(|| "Failed to add virtio mmio rng device")?; - } else { - let bdf = get_pci_bdf(cfg_args)?; - let multi_func = get_multi_function(cfg_args)?; - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; - let sys_mem = self.get_sys_mem().clone(); - let vitio_pci_device = VirtioPciDevice::new( - device_cfg.id, - devfn, - sys_mem, - rng_dev.clone(), - parent_bus, - multi_func, - ); - vitio_pci_device - .realize() - .chain_err(|| "Failed to add pci rng device")?; + let rng_cfg = RngConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + rng_cfg.bytes_per_sec()?; + let rngobj_cfg = vm_config + .object + .rng_object + .remove(&rng_cfg.rng) + .with_context(|| "Object for rng-random device not found")?; + let rng_dev = Arc::new(Mutex::new(Rng::new(rng_cfg.clone(), rngobj_cfg))); + + match rng_cfg.classtype.as_str() { + "virtio-rng-device" => { + check_arg_nonexist!( + ("bus", rng_cfg.bus), + ("addr", rng_cfg.addr), + ("multifunction", rng_cfg.multifunction) + ); + self.add_virtio_mmio_device(rng_cfg.id.clone(), rng_dev.clone()) + .with_context(|| "Failed to add virtio mmio rng device")?; + } + _ => { + check_arg_exist!(("bus", rng_cfg.bus), ("addr", rng_cfg.addr)); + let bdf = PciBdf::new(rng_cfg.bus.clone().unwrap(), rng_cfg.addr.unwrap()); + let multi_func = rng_cfg.multifunction.unwrap_or_default(); + self.add_virtio_pci_device(&rng_cfg.id, &bdf, rng_dev.clone(), multi_func, false) + .with_context(|| "Failed to add pci rng device")?; + } } - MigrationManager::register_device_instance_mutex(RngState::descriptor(), rng_dev); + + MigrationManager::register_device_instance(RngState::descriptor(), rng_dev, &rng_cfg.id); Ok(()) } - fn get_pci_host(&mut self) -> StdResult<&Arc>> { + fn get_pci_host(&mut self) -> Result<&Arc>> { bail!("No pci host found"); } + /// Add virtioFs device. + /// + /// # Arguments + /// + /// * 'vm_config' - VM configuration. + /// * 'cfg_args' - Device configuration arguments. + fn add_virtio_fs(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let dev_cfg = + vhost::user::FsConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let char_dev = vm_config + .chardev + .remove(&dev_cfg.chardev) + .with_context(|| format!("Chardev {:?} not found or is in use", &dev_cfg.chardev))?; + let sys_mem = self.get_sys_mem().clone(); + + if !vm_config.machine_config.mem_config.mem_share { + bail!("When configuring the vhost-user-fs-device or vhost-user-fs-pci device, the memory must be shared."); + } + + let device = Arc::new(Mutex::new(vhost::user::Fs::new( + dev_cfg.clone(), + char_dev, + sys_mem, + ))); + match dev_cfg.classtype.as_str() { + "vhost-user-fs-device" => { + check_arg_nonexist!( + ("bus", dev_cfg.bus), + ("addr", dev_cfg.addr), + ("multifunction", dev_cfg.multifunction) + ); + self.add_virtio_mmio_device(dev_cfg.id.clone(), device) + .with_context(|| "Failed to add vhost user fs device")?; + } + _ => { + check_arg_exist!(("bus", dev_cfg.bus), ("addr", dev_cfg.addr)); + let bdf = PciBdf::new(dev_cfg.bus.clone().unwrap(), dev_cfg.addr.unwrap()); + let multi_func = dev_cfg.multifunction.unwrap_or_default(); + let root_bus = self.get_pci_host()?.lock().unwrap().child_bus().unwrap(); + PCI_BUS!(root_bus, locked_bus, root_pci_bus); + let msi_irq_manager = root_pci_bus.msi_irq_manager.clone(); + drop(locked_bus); + let need_irqfd = msi_irq_manager.as_ref().unwrap().irqfd_enable(); + self.add_virtio_pci_device(&dev_cfg.id, &bdf, device, multi_func, need_irqfd) + .with_context(|| "Failed to add pci fs device")?; + } + } + + Ok(()) + } + + fn get_sysbus_devices(&self) -> BTreeMap>> { + self.machine_base().sysbus.lock().unwrap().child_devices() + } + + fn get_fwcfg_dev(&mut self) -> Option>> { + self.machine_base().fwcfg_dev.clone() + } + + fn get_boot_order_list(&self) -> Option>>> { + None + } + + fn reset_all_devices(&mut self) -> Result<()> { + let sysbus = self.machine_base().sysbus.clone(); + sysbus.lock().unwrap().reset()?; + + // Todo: this logic will be deleted after deleting pci_host in machine struct. + if let Ok(pci_host) = self.get_pci_host() { + pci_host + .lock() + .unwrap() + .reset(true) + .with_context(|| "Fail to reset pci host")?; + } + + Ok(()) + } + + fn check_id_existed_in_xhci(&mut self, id: &str) -> Result { + let vm_config = self.get_vm_config(); + let locked_vmconfig = vm_config.lock().unwrap(); + let parent_dev = self + .get_pci_dev_by_id_and_type(&locked_vmconfig, None, "nec-usb-xhci") + .with_context(|| "Can not find parent device from pci bus")?; + let locked_parent_dev = parent_dev.lock().unwrap(); + let xhci_pci = locked_parent_dev + .as_any() + .downcast_ref::() + .with_context(|| "PciDevOps can not downcast to XhciPciDevice")?; + let mut locked_xhci = xhci_pci.xhci.lock().unwrap(); + let port = locked_xhci.find_usb_port_by_id(id); + Ok(port.is_some()) + } + fn check_device_id_existed(&mut self, name: &str) -> Result<()> { // If there is no pci bus, skip the id check, such as micro vm. if let Ok(pci_host) = self.get_pci_host() { - // Because device_del needs an id when removing a device, it's necessary to ensure that the id is unique. + // Because device_del needs an id when removing a device, it's necessary to ensure that + // the id is unique. if name.is_empty() { bail!("Device id is empty"); } - if PciBus::find_attached_bus(&pci_host.lock().unwrap().root_bus, name).is_some() { + if PciBus::find_attached_bus(&pci_host.lock().unwrap().child_bus().unwrap(), name) + .is_some() + { bail!("Device id {} existed", name); } + if self.check_id_existed_in_xhci(name).unwrap_or_default() { + bail!("Device id {} existed in xhci", name); + } } Ok(()) } - fn add_virtio_pci_blk(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - let bdf = get_pci_bdf(cfg_args)?; - let multi_func = get_multi_function(cfg_args)?; - let device_cfg = parse_blk(vm_config, cfg_args)?; - let device = Arc::new(Mutex::new(Block::new(device_cfg.clone()))); - self.add_virtio_pci_device(&device_cfg.id, &bdf, device.clone(), multi_func)?; - MigrationManager::register_device_instance_mutex_with_id( + fn reset_fwcfg_boot_order(&mut self) -> Result<()> { + // SAFETY: unwrap is safe because stand machine always make sure it not return null. + let boot_order_vec = self.get_boot_order_list().unwrap(); + let mut locked_boot_order_vec = boot_order_vec.lock().unwrap().clone(); + if locked_boot_order_vec.is_empty() { + return Ok(()); + } + locked_boot_order_vec.sort_by(|x, y| x.boot_index.cmp(&y.boot_index)); + let mut fwcfg_boot_order_string = String::new(); + for item in &locked_boot_order_vec { + fwcfg_boot_order_string.push_str(&item.dev_path); + fwcfg_boot_order_string.push('\n'); + } + fwcfg_boot_order_string.push('\0'); + + let fwcfg = self.get_fwcfg_dev(); + if fwcfg.is_none() { + warn!("Direct kernel boot mode don't support set boot order"); + return Ok(()); + } + fwcfg + .unwrap() + .lock() + .unwrap() + .modify_file_entry("bootorder", fwcfg_boot_order_string.as_bytes().to_vec()) + .with_context(|| "Fail to add bootorder entry for standard VM.")?; + Ok(()) + } + + /// Check the boot index of device is duplicated or not. + /// + /// # Arguments + /// + /// * `bootindex` - The boot index of the device. + fn check_bootindex(&mut self, boot_index: u8) -> Result<()> { + // SAFETY: Unwrap is safe because StdMachine will overwrite this function, + // which ensure boot_order_list is not None. + let boot_order_list = self.get_boot_order_list().unwrap(); + if boot_order_list + .lock() + .unwrap() + .iter() + .any(|item| item.boot_index == boot_index) + { + bail!("Failed to add duplicated bootindex {}.", boot_index); + } + + Ok(()) + } + + /// Add boot index of device. + /// + /// # Arguments + /// + /// * `bootindex` - The boot index of the device. + /// * `dev_path` - The firmware device path of the device. + /// * `dev_id` - The id of the device. + fn add_bootindex_devices(&mut self, boot_index: u8, dev_path: &str, dev_id: &str) { + // SAFETY: Unwrap is safe because StdMachine will overwrite this function, + // which ensure boot_order_list is not None. + let boot_order_list = self.get_boot_order_list().unwrap(); + boot_order_list.lock().unwrap().push(BootIndexInfo { + boot_index, + id: dev_id.to_string(), + dev_path: dev_path.to_string(), + }); + } + + /// Delete boot index of device. + /// + /// # Arguments + /// + /// * `dev_id` - The id of the device. + fn del_bootindex_devices(&self, dev_id: &str) { + // Unwrap is safe because StdMachine will overwrite this function, + // which ensure boot_order_list is not None. + let boot_order_list = self.get_boot_order_list().unwrap(); + let mut locked_boot_order_list = boot_order_list.lock().unwrap(); + locked_boot_order_list.retain(|item| item.id != dev_id); + } + + #[cfg(feature = "pvpanic")] + fn add_pvpanic(&mut self, cfg_args: &str) -> Result<()> { + let config = PvpanicDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(config.bus.clone(), config.addr); + let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; + let pcidev = PvPanicPci::new(&config, devfn, parent_bus); + pcidev + .realize() + .with_context(|| "Failed to realize pvpanic device")?; + + Ok(()) + } + + fn add_virtio_pci_blk( + &mut self, + vm_config: &mut VmConfig, + cfg_args: &str, + hotplug: bool, + ) -> Result<()> { + let mut device_cfg = + VirtioBlkDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + check_arg_exist!(("bus", device_cfg.bus), ("addr", device_cfg.addr)); + let bdf = PciBdf::new(device_cfg.bus.clone().unwrap(), device_cfg.addr.unwrap()); + let multi_func = device_cfg.multifunction.unwrap_or_default(); + if device_cfg.num_queues.is_none() { + let queues_auto = VirtioPciDevice::virtio_pci_auto_queues_num( + 0, + vm_config.machine_config.nr_cpus, + MAX_VIRTIO_QUEUE, + ); + device_cfg.num_queues = Some(queues_auto); + } + if let Some(bootindex) = device_cfg.bootindex { + self.check_bootindex(bootindex) + .with_context(|| "Fail to add virtio pci blk device for invalid bootindex")?; + } + + let drive_cfg = vm_config + .drives + .remove(&device_cfg.drive) + .with_context(|| "No drive configured matched for blk device")?; + + let device = Arc::new(Mutex::new(Block::new( + device_cfg.clone(), + drive_cfg, + self.get_drive_files(), + ))); + let pci_dev = self + .add_virtio_pci_device(&device_cfg.id, &bdf, device.clone(), multi_func, false) + .with_context(|| "Failed to add virtio pci device")?; + if let Some(bootindex) = device_cfg.bootindex { + // Eg: OpenFirmware device path(virtio-blk disk): + // /pci@i0cf8/scsi@6[,3]/disk@0,0 + // | | | | | + // | | | | | + // | | | fixed 0. + // | PCI slot,[function] holding disk. + // PCI root as system bus port. + if let Some(dev_path) = pci_dev.lock().unwrap().get_dev_path() { + self.add_bootindex_devices(bootindex, &dev_path, &device_cfg.id); + } + } + MigrationManager::register_device_instance( BlockState::descriptor(), device, &device_cfg.id, ); - self.reset_bus(&device_cfg.id)?; + if !hotplug { + self.reset_bus(&device_cfg.id)?; + } Ok(()) } - fn add_virtio_pci_net(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { - let bdf = get_pci_bdf(cfg_args)?; - let multi_func = get_multi_function(cfg_args)?; - let device_cfg = parse_net(vm_config, cfg_args)?; - let device: Arc> = if device_cfg.vhost_type.is_some() { - Arc::new(Mutex::new(VhostKern::Net::new( - &device_cfg, - self.get_sys_mem(), - ))) + #[cfg(feature = "virtio_scsi")] + fn add_virtio_pci_scsi( + &mut self, + vm_config: &mut VmConfig, + cfg_args: &str, + hotplug: bool, + ) -> Result<()> { + let mut device_cfg = + ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(device_cfg.bus.clone(), device_cfg.addr); + let multi_func = device_cfg.multifunction.unwrap_or_default(); + if device_cfg.num_queues.is_none() { + let queues_auto = VirtioPciDevice::virtio_pci_auto_queues_num( + 0, + vm_config.machine_config.nr_cpus, + MAX_VIRTIO_QUEUE, + ); + device_cfg.num_queues = Some(u32::from(queues_auto)); + } + let device = Arc::new(Mutex::new(ScsiCntlr::new(device_cfg.clone()))); + + let bus_name = format!("{}.0", device_cfg.id); + scsi_cntlr_create_scsi_bus(&bus_name, &device)?; + + self.add_virtio_pci_device(&device_cfg.id, &bdf, device, multi_func, false) + .with_context(|| "Failed to add virtio scsi controller")?; + if !hotplug { + self.reset_bus(&device_cfg.id)?; + } + Ok(()) + } + + #[cfg(feature = "virtio_scsi")] + fn add_scsi_device(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let device_cfg = ScsiDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let drive_arg = vm_config + .drives + .remove(&device_cfg.drive) + .with_context(|| "No drive configured matched for scsi device")?; + if let Some(bootindex) = device_cfg.bootindex { + self.check_bootindex(bootindex) + .with_context(|| "Failed to add scsi device for invalid bootindex")?; + } + + // Bus name `$parent_cntlr_name.0` is checked when parsing by clap. + let cntlr = device_cfg.bus.split('.').collect::>()[0].to_string(); + let pci_dev = self + .get_pci_dev_by_id_and_type(vm_config, Some(&cntlr), "virtio-scsi-pci") + .with_context(|| format!("Can not find scsi controller from pci bus {}", cntlr))?; + let locked_pcidev = pci_dev.lock().unwrap(); + let virtio_pcidev = locked_pcidev + .as_any() + .downcast_ref::() + .unwrap(); + let prefix = virtio_pcidev.get_dev_path().unwrap(); + let virtio_device = virtio_pcidev.get_virtio_device().lock().unwrap(); + let cntlr = virtio_device.as_any().downcast_ref::().unwrap(); + let bus = cntlr.bus.as_ref().unwrap(); + let key = get_scsi_key(device_cfg.target, device_cfg.lun); + if bus.lock().unwrap().child_dev(key).is_some() { + bail!("Wrong! Two scsi devices have the same scsi-id and lun!"); + } + let iothread = cntlr.config.iothread.clone(); + let scsi_device = ScsiDevice::new( + device_cfg.clone(), + drive_arg, + self.get_drive_files(), + iothread, + bus.clone(), + ); + let device = scsi_device.realize()?; + bus.lock().unwrap().attach_child(key, device)?; + + if let Some(bootindex) = device_cfg.bootindex { + // Eg: OpenFirmware device path(virtio-scsi disk): + // /pci@i0cf8/scsi@7[,3]/channel@0/disk@2,3 + // | | | | | | + // | | | | target,lun. + // | | | channel(unused, fixed 0). + // | PCI slot,[function] holding SCSI controller. + // PCI root as system bus port. + let dev_path = + format! {"{}/channel@0/disk@{:x},{:x}", prefix, device_cfg.target, device_cfg.lun}; + self.add_bootindex_devices(bootindex, &dev_path, &device_cfg.id); + } + Ok(()) + } + + fn add_virtio_pci_net( + &mut self, + vm_config: &mut VmConfig, + cfg_args: &str, + hotplug: bool, + ) -> Result<()> { + let mut net_cfg = + NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + net_cfg.auto_iothread(); + let netdev_cfg = vm_config + .netdevs + .remove(&net_cfg.netdev) + .with_context(|| format!("Netdev: {:?} not found for net device", &net_cfg.netdev))?; + check_arg_exist!(("bus", net_cfg.bus), ("addr", net_cfg.addr)); + let bdf = PciBdf::new(net_cfg.bus.clone().unwrap(), net_cfg.addr.unwrap()); + let multi_func = net_cfg.multifunction.unwrap_or_default(); + + #[cfg(all(not(feature = "vhost_net"), not(feature = "vhostuser_net")))] + let need_irqfd = false; + #[cfg(any(feature = "vhost_net", feature = "vhostuser_net"))] + let mut need_irqfd = false; + let device: Arc> = if netdev_cfg.vhost_type().is_some() { + if netdev_cfg.vhost_type().unwrap() == "vhost-kernel" { + #[cfg(not(feature = "vhost_net"))] + bail!("Unsupported Vhost_net"); + + #[cfg(feature = "vhost_net")] + { + need_irqfd = true; + Arc::new(Mutex::new(VhostKern::Net::new( + &net_cfg, + netdev_cfg, + self.get_sys_mem(), + ))) + } + } else { + #[cfg(not(feature = "vhostuser_net"))] + bail!("Unsupported Vhostuser_net"); + + #[cfg(feature = "vhostuser_net")] + { + need_irqfd = true; + let chardev = netdev_cfg.chardev.clone().with_context(|| { + format!("Chardev not configured for netdev {:?}", netdev_cfg.id) + })?; + let chardev_cfg = vm_config + .chardev + .remove(&chardev) + .with_context(|| format!("Chardev: {:?} not found for netdev", chardev))?; + let sock_path = get_chardev_socket_path(chardev_cfg)?; + Arc::new(Mutex::new(VhostUser::Net::new( + &net_cfg, + netdev_cfg, + sock_path, + self.get_sys_mem(), + ))) + } + } } else { - let device = Arc::new(Mutex::new(virtio::Net::new(device_cfg.clone()))); - MigrationManager::register_device_instance_mutex_with_id( + let device = Arc::new(Mutex::new(virtio::Net::new(net_cfg.clone(), netdev_cfg))); + MigrationManager::register_device_instance( VirtioNetState::descriptor(), device.clone(), - &device_cfg.id, + &net_cfg.id, ); device }; - self.add_virtio_pci_device(&device_cfg.id, &bdf, device, multi_func)?; - self.reset_bus(&device_cfg.id)?; + self.add_virtio_pci_device(&net_cfg.id, &bdf, device, multi_func, need_irqfd)?; + if !hotplug { + self.reset_bus(&net_cfg.id)?; + } Ok(()) } - fn create_vfio_pci_device( + #[cfg(feature = "vhostuser_block")] + fn add_vhost_user_blk_pci( &mut self, - id: &str, - bdf: &PciBdf, - host: &str, - multifunc: bool, + vm_config: &mut VmConfig, + cfg_args: &str, + hotplug: bool, ) -> Result<()> { - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(bdf)?; - let path = format!("/sys/bus/pci/devices/{}", host); + let mut device_cfg = VhostUser::VhostUserBlkDevConfig::try_parse_from(str_slip_to_clap( + cfg_args, true, false, + ))?; + check_arg_exist!(("bus", device_cfg.bus), ("addr", device_cfg.addr)); + let bdf = PciBdf::new(device_cfg.bus.clone().unwrap(), device_cfg.addr.unwrap()); + if device_cfg.num_queues.is_none() { + let queues_auto = VirtioPciDevice::virtio_pci_auto_queues_num( + 0, + vm_config.machine_config.nr_cpus, + MAX_VIRTIO_QUEUE, + ); + device_cfg.num_queues = Some(queues_auto); + } + let chardev_cfg = vm_config + .chardev + .remove(&device_cfg.chardev) + .with_context(|| { + format!( + "Chardev: {:?} not found for vhost user blk", + &device_cfg.chardev + ) + })?; + + let device: Arc> = Arc::new(Mutex::new(VhostUser::Block::new( + &device_cfg, + chardev_cfg, + self.get_sys_mem(), + ))); + let pci_dev = self + .add_virtio_pci_device(&device_cfg.id, &bdf, device.clone(), false, true) + .with_context(|| { + format!( + "Failed to add virtio pci device, device id: {}", + &device_cfg.id + ) + })?; + if let Some(bootindex) = device_cfg.bootindex { + if let Some(dev_path) = pci_dev.lock().unwrap().get_dev_path() { + self.add_bootindex_devices(bootindex, &dev_path, &device_cfg.id); + } + } + if !hotplug { + self.reset_bus(&device_cfg.id)?; + } + Ok(()) + } + + #[cfg(feature = "vhostuser_block")] + fn add_vhost_user_blk_device( + &mut self, + vm_config: &mut VmConfig, + cfg_args: &str, + ) -> Result<()> { + let device_cfg = VhostUser::VhostUserBlkDevConfig::try_parse_from(str_slip_to_clap( + cfg_args, true, false, + ))?; + check_arg_nonexist!(("bus", device_cfg.bus), ("addr", device_cfg.addr)); + let chardev_cfg = vm_config + .chardev + .remove(&device_cfg.chardev) + .with_context(|| { + format!( + "Chardev: {:?} not found for vhost user blk", + &device_cfg.chardev + ) + })?; + let device: Arc> = Arc::new(Mutex::new(VhostUser::Block::new( + &device_cfg, + chardev_cfg, + self.get_sys_mem(), + ))); + self.add_virtio_mmio_device(device_cfg.id.clone(), device) + .with_context(|| "Failed to add vhost user block device")?; + Ok(()) + } + #[cfg(feature = "vfio_device")] + fn add_vfio_device(&mut self, cfg_args: &str, hotplug: bool) -> Result<()> { + let hypervisor = self.get_hypervisor(); + let locked_hypervisor = hypervisor.lock().unwrap(); + *KVM_DEVICE_FD.lock().unwrap() = locked_hypervisor.create_vfio_device(); + + let device_cfg = VfioConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(device_cfg.bus.clone(), device_cfg.addr); + let multi_func = device_cfg.multifunction.unwrap_or_default(); + let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; + let path = if device_cfg.host.is_some() { + format!("/sys/bus/pci/devices/{}", device_cfg.host.unwrap()) + } else { + device_cfg.sysfsdev.unwrap() + }; let device = VfioDevice::new(Path::new(&path), self.get_sys_mem()) - .chain_err(|| "Failed to create vfio device.")?; + .with_context(|| "Failed to create vfio device.")?; let vfio_pci = VfioPciDevice::new( device, devfn, - id.to_string(), + device_cfg.id.to_string(), parent_bus, - multifunc, + multi_func, self.get_sys_mem().clone(), ); - VfioPciDevice::realize(vfio_pci).chain_err(|| "Failed to realize vfio-pci device.")?; + VfioPciDevice::realize(vfio_pci).with_context(|| "Failed to realize vfio-pci device.")?; + + if !hotplug { + self.reset_bus(&device_cfg.id)?; + } Ok(()) } - fn add_vfio_device(&mut self, cfg_args: &str) -> Result<()> { - let device_cfg: VfioConfig = parse_vfio(cfg_args)?; - let bdf = get_pci_bdf(cfg_args)?; - let multifunc = get_multi_function(cfg_args)?; - self.create_vfio_pci_device(&device_cfg.id, &bdf, &device_cfg.host, multifunc)?; - self.reset_bus(&device_cfg.id)?; + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn update_ohui_srv(&mut self, _passthru: bool) {} + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + fn get_ohui_fb(&self) -> Option { + None + } + + #[cfg(feature = "virtio_gpu")] + fn add_virtio_pci_gpu(&mut self, cfg_args: &str) -> Result<()> { + let config = GpuDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + config.check(); + let bdf = PciBdf::new(config.bus.clone(), config.addr); + let device = Arc::new(Mutex::new(Gpu::new(config.clone()))); + + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + if device.lock().unwrap().device_quirk() == Some(VirtioDeviceQuirk::VirtioGpuEnableBar0) + && self.get_ohui_fb().is_some() + { + self.update_ohui_srv(true); + device.lock().unwrap().set_bar0_fb(self.get_ohui_fb()); + } + + self.add_virtio_pci_device(&config.id, &bdf, device, false, false)?; Ok(()) } - fn get_devfn_and_parent_bus(&mut self, bdf: &PciBdf) -> StdResult<(u8, Weak>)> { + fn get_devfn_and_parent_bus(&mut self, bdf: &PciBdf) -> Result<(u8, Weak>)> { let pci_host = self.get_pci_host()?; - let bus = pci_host.lock().unwrap().root_bus.clone(); + let bus = pci_host.lock().unwrap().child_bus().unwrap().clone(); let pci_bus = PciBus::find_bus_by_name(&bus, &bdf.bus); if pci_bus.is_none() { bail!("Parent bus :{} not found", &bdf.bus); @@ -567,24 +1495,18 @@ pub trait MachineOps { } fn add_pci_root_port(&mut self, cfg_args: &str) -> Result<()> { - let bdf = get_pci_bdf(cfg_args)?; - let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; - let device_cfg = parse_root_port(cfg_args)?; + let dev_cfg = RootPortConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(dev_cfg.bus.clone(), dev_cfg.addr); + let (_, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; let pci_host = self.get_pci_host()?; - let bus = pci_host.lock().unwrap().root_bus.clone(); - if PciBus::find_bus_by_name(&bus, &device_cfg.id).is_some() { - bail!("ID {} already exists."); + let bus = pci_host.lock().unwrap().child_bus().unwrap().clone(); + if PciBus::find_bus_by_name(&bus, &dev_cfg.id).is_some() { + bail!("ID {} already exists.", &dev_cfg.id); } - let rootport = RootPort::new( - device_cfg.id, - devfn, - device_cfg.port, - parent_bus, - device_cfg.multifunction, - ); + let rootport = RootPort::new(dev_cfg, parent_bus); rootport .realize() - .chain_err(|| "Failed to add pci root port")?; + .with_context(|| "Failed to add pci root port")?; Ok(()) } @@ -594,7 +1516,8 @@ pub trait MachineOps { bdf: &PciBdf, device: Arc>, multi_func: bool, - ) -> Result<()> { + need_irqfd: bool, + ) -> Result>> { let (devfn, parent_bus) = self.get_devfn_and_parent_bus(bdf)?; let sys_mem = self.get_sys_mem(); let pcidev = VirtioPciDevice::new( @@ -604,40 +1527,38 @@ pub trait MachineOps { device, parent_bus, multi_func, + need_irqfd, ); + let clone_pcidev = Arc::new(Mutex::new(pcidev.clone())); pcidev .realize() - .chain_err(|| "Failed to add virtio pci device")?; - Ok(()) + .with_context(|| "Failed to add virtio pci device")?; + Ok(clone_pcidev) } /// Set the parent bus slot on when device attached fn reset_bus(&mut self, dev_id: &str) -> Result<()> { let pci_host = self.get_pci_host()?; let locked_pci_host = pci_host.lock().unwrap(); - let bus = - if let Some((bus, _)) = PciBus::find_attached_bus(&locked_pci_host.root_bus, dev_id) { - bus - } else { - bail!("Bus not found, dev id {}", dev_id); - }; + let bus = PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), dev_id) + .with_context(|| format!("Bus not found, dev id {}", dev_id))? + .0; let locked_bus = bus.lock().unwrap(); - if locked_bus.name == "pcie.0" { + if locked_bus.name() == "pcie.0" { // No need to reset root bus return Ok(()); } - let parent_bridge = if let Some(bridge) = locked_bus.parent_bridge.as_ref() { - bridge - } else { - bail!("Parent bridge does not exist, dev id {}", dev_id); - }; + let parent_bridge = locked_bus + .parent_device() + .with_context(|| format!("Parent bridge does not exist, dev id {}", dev_id))?; let dev = parent_bridge.upgrade().unwrap(); let locked_dev = dev.lock().unwrap(); let name = locked_dev.name(); drop(locked_dev); let mut devfn = None; - let locked_bus = locked_pci_host.root_bus.lock().unwrap(); - for (id, dev) in &locked_bus.devices { + let bus = locked_pci_host.child_bus().unwrap(); + let locked_bus = bus.lock().unwrap(); + for (id, dev) in &locked_bus.child_devices() { if dev.lock().unwrap().name() == name { devfn = Some(*id); break; @@ -645,12 +1566,12 @@ pub trait MachineOps { } drop(locked_bus); // It's safe to call devfn.unwrap(), because the bus exists. - match locked_pci_host.find_device(0, devfn.unwrap()) { + match locked_pci_host.find_device(0, u8::try_from(devfn.unwrap())?) { Some(dev) => dev .lock() .unwrap() .reset(false) - .chain_err(|| "Failed to reset bus"), + .with_context(|| "Failed to reset bus"), None => bail!("Failed to found device"), } } @@ -670,6 +1591,307 @@ pub trait MachineOps { Ok(()) } + /// Add numa nodes information to standard machine. + /// + /// # Arguments + /// + /// * `vm_config` - VM Configuration. + fn add_numa_nodes(&mut self, vm_config: &mut VmConfig) -> Result> { + if vm_config.numa_nodes.is_empty() { + return Ok(None); + } + + let mut numa_nodes: NumaNodes = BTreeMap::new(); + vm_config.numa_nodes.sort_by(|p, n| n.0.cmp(&p.0)); + for numa in vm_config.numa_nodes.iter() { + match numa.0.as_str() { + "node" => { + let node_config = parse_numa_mem(numa.1.as_str())?; + if numa_nodes.contains_key(&node_config.numa_id) { + bail!("Numa node id is repeated {}", node_config.numa_id); + } + let mut numa_node = NumaNode { + cpus: node_config.cpus, + mem_dev: node_config.mem_dev.clone(), + ..Default::default() + }; + + numa_node.size = vm_config + .object + .mem_object + .remove(&node_config.mem_dev) + .map(|mem_conf| mem_conf.size) + .with_context(|| { + format!( + "Object for memory-backend {} config not found", + node_config.mem_dev + ) + })?; + numa_nodes.insert(node_config.numa_id, numa_node); + } + "dist" => { + let dist_config = parse_numa_distance(numa.1.as_str())?; + if !numa_nodes.contains_key(&dist_config.numa_id) { + bail!("Numa node id is not found {}", dist_config.numa_id); + } + if !numa_nodes.contains_key(&dist_config.destination) { + bail!("Numa node id is not found {}", dist_config.destination); + } + + if let Some(n) = numa_nodes.get_mut(&dist_config.numa_id) { + if n.distances.contains_key(&dist_config.destination) { + bail!( + "Numa destination info {} repeat settings", + dist_config.destination + ); + } + n.distances + .insert(dist_config.destination, dist_config.distance); + } + } + _ => { + bail!("Unsupported args for NUMA node: {}", numa.0.as_str()); + } + } + } + + // Complete user parameters if necessary. + complete_numa_node( + &mut numa_nodes, + vm_config.machine_config.nr_cpus, + vm_config.machine_config.mem_config.mem_size, + )?; + + Ok(Some(numa_nodes)) + } + + /// Add usb xhci controller. + /// + /// # Arguments + /// + /// * `cfg_args` - XHCI Configuration. + fn add_usb_xhci(&mut self, cfg_args: &str) -> Result<()> { + let device_cfg = XhciConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(device_cfg.bus.clone(), device_cfg.addr); + let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; + + let pcidev = XhciPciDevice::new(&device_cfg, devfn, parent_bus, self.get_sys_mem()); + + pcidev + .realize() + .with_context(|| "Failed to realize usb xhci device")?; + Ok(()) + } + + /// Add scream sound based on ivshmem. + /// + /// # Arguments + /// + /// * `cfg_args` - scream configuration. + #[cfg(feature = "scream")] + fn add_ivshmem_scream( + &mut self, + vm_config: &mut VmConfig, + cfg_args: &str, + token_id: Option>>, + ) -> Result<()> { + let config = ScreamConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let bdf = PciBdf::new(config.bus.clone(), config.addr); + let (_, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; + + let mem_cfg = vm_config + .object + .mem_object + .remove(&config.memdev) + .with_context(|| { + format!( + "Object for memory-backend-ram {} config not found", + config.memdev + ) + })?; + + if !mem_cfg.share { + bail!("Object for share config is not on"); + } + + let mut scream = Scream::new(mem_cfg.size, config, token_id)?; + scream + .realize(parent_bus) + .with_context(|| "Failed to realize scream device") + } + + /// Get the corresponding device from the PCI bus based on the device id and device type name. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `id` - Device id. + /// * `dev_type` - Device type name. + fn get_pci_dev_by_id_and_type( + &mut self, + vm_config: &VmConfig, + id: Option<&str>, + dev_type: &str, + ) -> Option>> { + let (id_check, id_str) = if id.is_some() { + (true, format! {"id={}", id.unwrap()}) + } else { + (false, "".to_string()) + }; + + for dev in &vm_config.devices { + if dev.0.as_str() != dev_type || id_check && !dev.1.contains(&id_str) { + continue; + } + + let cfg_args = dev.1.as_str(); + let bdf = get_pci_bdf(cfg_args).ok()?; + let devfn = (bdf.addr.0 << 3) + bdf.addr.1; + let pci_host = self.get_pci_host().ok()?; + let root_bus = pci_host.lock().unwrap().child_bus().unwrap().clone(); + if let Some(bus) = PciBus::find_bus_by_name(&root_bus, &bdf.bus) { + PCI_BUS!(bus, locked_bus, pci_bus); + return pci_bus.get_device(0, devfn); + } else { + return None; + } + } + None + } + + /// Attach usb device to xhci controller. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `usb_dev` - Usb device. + fn attach_usb_to_xhci_controller( + &mut self, + vm_config: &mut VmConfig, + usb_dev: Arc>, + ) -> Result<()> { + let parent_dev = self + .get_pci_dev_by_id_and_type(vm_config, None, "nec-usb-xhci") + .with_context(|| "Can not find parent device from pci bus")?; + let locked_parent_dev = parent_dev.lock().unwrap(); + let xhci_pci = locked_parent_dev + .as_any() + .downcast_ref::() + .with_context(|| "PciDevOps can not downcast to XhciPciDevice")?; + xhci_pci.attach_device(&(usb_dev))?; + + Ok(()) + } + + /// Detach usb device from xhci controller. + /// + /// # Arguments + /// + /// * `vm_config` - VM configuration. + /// * `id` - id of the usb device. + fn detach_usb_from_xhci_controller( + &mut self, + vm_config: &mut VmConfig, + id: String, + ) -> Result<()> { + let parent_dev = self + .get_pci_dev_by_id_and_type(vm_config, None, "nec-usb-xhci") + .with_context(|| "Can not find parent device from pci bus")?; + let locked_parent_dev = parent_dev.lock().unwrap(); + let xhci_pci = locked_parent_dev + .as_any() + .downcast_ref::() + .with_context(|| "PciDevOps can not downcast to XhciPciDevice")?; + xhci_pci.detach_device(id)?; + + Ok(()) + } + + /// Add usb device. + /// + /// # Arguments + /// + /// * `driver` - USB device class. + /// * `cfg_args` - USB device Configuration. + fn add_usb_device(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + let usb_device = match get_class_type(cfg_args)?.as_str() { + "usb-kbd" => { + let config = + UsbKeyboardConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let keyboard = UsbKeyboard::new(config); + keyboard + .realize() + .with_context(|| "Failed to realize usb keyboard device")? + } + "usb-tablet" => { + let config = + UsbTabletConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let tablet = UsbTablet::new(config); + tablet + .realize() + .with_context(|| "Failed to realize usb tablet device")? + } + #[cfg(feature = "usb_camera")] + "usb-camera" => { + let token_id = match self.get_token_id() { + Some(id) => *id.read().unwrap(), + None => 0, + }; + let config = + UsbCameraConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let cameradev = get_cameradev_by_id(vm_config, config.cameradev.clone()) + .with_context(|| { + format!( + "no cameradev found with id {:?} for usb-camera", + config.cameradev + ) + })?; + + let camera = UsbCamera::new(config, cameradev, token_id)?; + camera + .realize() + .with_context(|| "Failed to realize usb camera device")? + } + "usb-storage" => { + let device_cfg = + UsbStorageConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let drive_cfg = vm_config + .drives + .remove(&device_cfg.drive) + .with_context(|| "No drive configured matched for usb storage device.")?; + let storage = UsbStorage::new(device_cfg, drive_cfg, self.get_drive_files())?; + storage + .realize() + .with_context(|| "Failed to realize usb storage device")? + } + #[cfg(feature = "usb_uas")] + "usb-uas" => { + let device_cfg = + UsbUasConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let drive_cfg = vm_config + .drives + .remove(&device_cfg.drive) + .with_context(|| "No drive configured matched for usb uas device.")?; + let uas = UsbUas::new(device_cfg, drive_cfg, self.get_drive_files())?; + uas.realize() + .with_context(|| "Failed to realize usb uas device")? + } + #[cfg(feature = "usb_host")] + "usb-host" => { + let config = + UsbHostConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + let usbhost = UsbHost::new(config)?; + usbhost + .realize() + .with_context(|| "Failed to realize usb host device")? + } + _ => bail!("Unknown usb device classes."), + }; + + self.attach_usb_to_xhci_controller(vm_config, usb_device)?; + Ok(()) + } + /// Add peripheral devices. /// /// # Arguments @@ -680,72 +1902,103 @@ pub trait MachineOps { #[cfg(target_arch = "x86_64")] vm_config.machine_config.mem_config.mem_size, ) - .chain_err(|| ErrorKind::AddDevErr("RTC".to_string()))?; + .with_context(|| MachineError::AddDevErr("RTC".to_string()))?; + + self.add_ged_device() + .with_context(|| MachineError::AddDevErr("Ged".to_string()))?; let cloned_vm_config = vm_config.clone(); if let Some(serial) = cloned_vm_config.serial.as_ref() { self.add_serial_device(serial) - .chain_err(|| ErrorKind::AddDevErr("serial".to_string()))?; + .with_context(|| MachineError::AddDevErr("serial".to_string()))?; } if let Some(pflashs) = cloned_vm_config.pflashs.as_ref() { self.add_pflash_device(pflashs) - .chain_err(|| ErrorKind::AddDevErr("pflash".to_string()))?; + .with_context(|| MachineError::AddDevErr("pflash".to_string()))?; } for dev in &cloned_vm_config.devices { let cfg_args = dev.1.as_str(); // Check whether the device id exists to ensure device uniqueness. - let id = parse_device_id(cfg_args)?; + let id = get_value_of_parameter("id", cfg_args)?; self.check_device_id_existed(&id) - .chain_err(|| format!("Failed to check device id: config {}", cfg_args))?; - match dev.0.as_str() { - "virtio-blk-device" => { - self.add_virtio_mmio_block(vm_config, cfg_args)?; - } - "virtio-blk-pci" => { - self.add_virtio_pci_blk(vm_config, cfg_args)?; - } - "virtio-net-device" => { - self.add_virtio_mmio_net(vm_config, cfg_args)?; - } - "virtio-net-pci" => { - self.add_virtio_pci_net(vm_config, cfg_args)?; - } - "pcie-root-port" => { - self.add_pci_root_port(cfg_args)?; - } - "vhost-vsock-pci" | "vhost-vsock-device" => { - self.add_virtio_vsock(cfg_args)?; - } - "virtio-balloon-device" | "virtio-balloon-pci" => { - self.add_virtio_balloon(vm_config, cfg_args)?; - } - "virtio-serial-device" | "virtio-serial-pci" => { - self.add_virtio_serial(vm_config, cfg_args)?; - } - "virtconsole" => { - self.add_virtio_console(vm_config, cfg_args)?; - } - "virtio-rng-device" | "virtio-rng-pci" => { - self.add_virtio_rng(vm_config, cfg_args)?; - } - "vfio-pci" => { - self.add_vfio_device(cfg_args)?; - } - _ => { - bail!("Unsupported device: {:?}", dev.0.as_str()); - } - } + .with_context(|| format!("Failed to check device id: config {}", cfg_args))?; + #[cfg(feature = "scream")] + let token_id = self.get_token_id(); + + create_device_add_matches!( + dev.0.as_str(); self; + ("virtio-blk-device", add_virtio_mmio_block, vm_config, cfg_args), + ("virtio-blk-pci", add_virtio_pci_blk, vm_config, cfg_args, false), + ("virtio-net-device", add_virtio_mmio_net, vm_config, cfg_args), + ("virtio-net-pci", add_virtio_pci_net, vm_config, cfg_args, false), + ("pcie-root-port", add_pci_root_port, cfg_args), + ("virtio-balloon-device" | "virtio-balloon-pci", add_virtio_balloon, vm_config, cfg_args), + ("virtio-serial-device" | "virtio-serial-pci", add_virtio_serial, vm_config, cfg_args), + ("virtconsole" | "virtserialport", add_virtio_serial_port, vm_config, cfg_args), + ("vhost-user-fs-pci" | "vhost-user-fs-device", add_virtio_fs, vm_config, cfg_args), + ("nec-usb-xhci", add_usb_xhci, cfg_args), + ("usb-kbd" | "usb-storage" | "usb-uas" | "usb-tablet" | "usb-camera" | "usb-host", add_usb_device, vm_config, cfg_args); + #[cfg(feature = "vhostuser_block")] + ("vhost-user-blk-device",add_vhost_user_blk_device, vm_config, cfg_args), + #[cfg(feature = "vhostuser_block")] + ("vhost-user-blk-pci",add_vhost_user_blk_pci, vm_config, cfg_args, false), + #[cfg(feature = "vhost_vsock")] + ("vhost-vsock-pci" | "vhost-vsock-device", add_virtio_vsock, cfg_args), + #[cfg(feature = "virtio_rng")] + ("virtio-rng-device" | "virtio-rng-pci", add_virtio_rng, vm_config, cfg_args), + #[cfg(feature = "vfio_device")] + ("vfio-pci", add_vfio_device, cfg_args, false), + #[cfg(feature = "virtio_gpu")] + ("virtio-gpu-pci", add_virtio_pci_gpu, cfg_args), + #[cfg(feature = "virtio_scsi")] + ("virtio-scsi-pci", add_virtio_pci_scsi, vm_config, cfg_args, false), + #[cfg(feature = "virtio_scsi")] + ("scsi-hd" | "scsi-cd", add_scsi_device, vm_config, cfg_args), + #[cfg(feature = "ramfb")] + ("ramfb", add_ramfb, cfg_args), + #[cfg(feature = "demo_device")] + ("pcie-demo-dev", add_demo_dev, cfg_args), + #[cfg(feature = "scream")] + ("ivshmem-scream", add_ivshmem_scream, vm_config, cfg_args, token_id), + #[cfg(feature = "pvpanic")] + ("pvpanic", add_pvpanic, cfg_args) + ); } Ok(()) } - fn add_pflash_device(&mut self, _configs: &[PFlashConfig]) -> Result<()> { + fn get_token_id(&self) -> Option>> { + None + } + + fn add_pflash_device(&mut self, _configs: &[DriveConfig]) -> Result<()> { bail!("Pflash device is not supported!"); } + fn add_ramfb(&mut self, _cfg_args: &str) -> Result<()> { + bail!("ramfb device is not supported!"); + } + + fn display_init(&mut self, _vm_config: &mut VmConfig) -> Result<()> { + bail!("Display is not supported."); + } + + #[cfg(feature = "demo_device")] + fn add_demo_dev(&mut self, cfg_args: &str) -> Result<()> { + let config = DemoDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false)) + .with_context(|| "failed to parse cmdline for demo dev.")?; + let bdf = PciBdf::new(config.bus.clone(), config.addr); + let (devfn, parent_bus) = self.get_devfn_and_parent_bus(&bdf)?; + let sys_mem = self.get_sys_mem().clone(); + let demo_dev = DemoDev::new(config, devfn, sys_mem, parent_bus); + + demo_dev.realize()?; + Ok(()) + } + /// Return the syscall whitelist for seccomp. fn syscall_whitelist(&self) -> Vec; @@ -757,37 +2010,86 @@ pub trait MachineOps { balloon_allow_list(&mut bpf_rules); } + if let Ok(cov_enable) = std::env::var("STRATOVIRT_COV") { + if cov_enable.eq("on") { + coverage_allow_list(&mut bpf_rules); + } + } + for bpf_rule in &mut bpf_rules { seccomp_filter.push(bpf_rule); } seccomp_filter .realize() - .chain_err(|| "Failed to init seccomp filter.")?; + .with_context(|| "Failed to init seccomp filter.")?; Ok(()) } - /// Register event notifier for power button of mainboard. - /// - /// # Arguments - /// - /// * `power_button` - Eventfd of the power button. - fn register_power_event(&self, power_button: &EventFd) -> Result<()> { - let power_button = power_button.try_clone().unwrap(); - let button_fd = power_button.as_raw_fd(); - let power_button_handler: Arc>> = - Arc::new(Mutex::new(Box::new(move |_, _| { - let _ret = power_button.read().unwrap(); - None - }))); - let notifier = EventNotifier::new( - NotifierOperation::AddShared, - button_fd, - None, - EventSet::IN, - vec![power_button_handler], - ); + /// Get the drive backend files. + fn get_drive_files(&self) -> Arc>> { + self.machine_base().drive_files.clone() + } + + /// Fetch a cloned file from drive backend files. + fn fetch_drive_file(&self, path: &str) -> Result> { + let files = self.get_drive_files(); + let drive_files = files.lock().unwrap(); + VmConfig::fetch_drive_file(&drive_files, path) + } + + /// Register a new drive backend file. + fn register_drive_file( + &self, + id: &str, + path: &str, + read_only: bool, + direct: bool, + ) -> Result<()> { + let files = self.get_drive_files(); + let mut drive_files = files.lock().unwrap(); + VmConfig::add_drive_file(&mut drive_files, id, path, read_only, direct)?; + + // Lock the added file if VM is running. + let drive_file = drive_files.get_mut(path).unwrap(); + let vm_state = self.get_vm_state().deref().0.lock().unwrap(); + if *vm_state == VmState::Running && !drive_file.locked { + if let Err(e) = lock_file(&drive_file.file, path, read_only) { + VmConfig::remove_drive_file(&mut drive_files, path)?; + return Err(e); + } + drive_file.locked = true; + } + Ok(()) + } + + /// Unregister a drive backend file. + fn unregister_drive_file(&self, path: &str) -> Result<()> { + let files = self.get_drive_files(); + let mut drive_files = files.lock().unwrap(); + VmConfig::remove_drive_file(&mut drive_files, path) + } + + /// Active drive backend files. i.e., Apply lock. + fn active_drive_files(&self) -> Result<()> { + for drive_file in self.get_drive_files().lock().unwrap().values_mut() { + if drive_file.locked { + continue; + } + lock_file(&drive_file.file, &drive_file.path, drive_file.read_only)?; + drive_file.locked = true; + } + Ok(()) + } - EventLoop::update_event(vec![notifier], None).chain_err(|| ErrorKind::RegNotifierErr)?; + /// Deactive drive backend files. i.e., Release lock. + fn deactive_drive_files(&self) -> Result<()> { + for drive_file in self.get_drive_files().lock().unwrap().values_mut() { + if !drive_file.locked { + continue; + } + unlock_file(&drive_file.file, &drive_file.path)?; + drive_file.locked = false; + } Ok(()) } @@ -797,7 +2099,7 @@ pub trait MachineOps { /// /// * `vm` - The machine structure. /// * `vm_config` - VM configuration. - fn realize(vm: &Arc>, vm_config: &mut VmConfig, is_migrate: bool) -> Result<()> + fn realize(vm: &Arc>, vm_config: &mut VmConfig) -> Result<()> where Self: Sized; @@ -806,7 +2108,13 @@ pub trait MachineOps { /// # Arguments /// /// * `paused` - Flag for `paused` when `LightMachine` starts to run. - fn run(&self, paused: bool) -> Result<()>; + fn run(&self, paused: bool) -> Result<()> { + self.vm_start( + paused, + &self.machine_base().cpus, + &mut self.machine_base().vm_state.0.lock().unwrap(), + ) + } /// Start machine as `Running` or `Paused` state. /// @@ -814,24 +2122,27 @@ pub trait MachineOps { /// /// * `paused` - After started, paused all vcpu or not. /// * `cpus` - Cpus vector restore cpu structure. - /// * `vm_state` - Vm kvm vm state. - fn vm_start(paused: bool, cpus: &[Arc], vm_state: &mut KvmVmState) -> Result<()> - where - Self: Sized, - { + /// * `vm_state` - Vm state. + fn vm_start(&self, paused: bool, cpus: &[Arc], vm_state: &mut VmState) -> Result<()> { + if !paused { + EventLoop::get_ctx(None).unwrap().enable_clock(); + self.active_drive_files()?; + } + let nr_vcpus = cpus.len(); - let cpus_thread_barrier = Arc::new(Barrier::new((nr_vcpus + 1) as usize)); - for cpu_index in 0..nr_vcpus { + let cpus_thread_barrier = Arc::new(Barrier::new(nr_vcpus + 1)); + for (cpu_index, cpu) in cpus.iter().enumerate() { let cpu_thread_barrier = cpus_thread_barrier.clone(); - let cpu = cpus[cpu_index as usize].clone(); - CPU::start(cpu, cpu_thread_barrier, paused) - .chain_err(|| format!("Failed to run vcpu{}", cpu_index))?; + if let Err(e) = CPU::start(cpu.clone(), cpu_thread_barrier, paused) { + self.deactive_drive_files()?; + return Err(anyhow!("Failed to run vcpu{}, {:?}", cpu_index, e)); + } } if paused { - *vm_state = KvmVmState::Paused; + *vm_state = VmState::Paused; } else { - *vm_state = KvmVmState::Running; + *vm_state = VmState::Running; } cpus_thread_barrier.wait(); @@ -843,24 +2154,32 @@ pub trait MachineOps { /// # Arguments /// /// * `cpus` - Cpus vector restore cpu structure. - /// * `vm_state` - Vm kvm vm state. + /// * `vm_state` - Vm state. fn vm_pause( + &self, cpus: &[Arc], #[cfg(target_arch = "aarch64")] irq_chip: &Option>, - vm_state: &mut KvmVmState, - ) -> Result<()> - where - Self: Sized, - { + vm_state: &mut VmState, + ) -> Result<()> { + EventLoop::get_ctx(None).unwrap().disable_clock(); + + self.deactive_drive_files()?; + for (cpu_index, cpu) in cpus.iter().enumerate() { - cpu.pause() - .chain_err(|| format!("Failed to pause vcpu{}", cpu_index))?; + if let Err(e) = cpu.pause() { + self.active_drive_files()?; + return Err(anyhow!("Failed to pause vcpu{}, {:?}", cpu_index, e)); + } } #[cfg(target_arch = "aarch64")] + // SAFETY: ARM architecture must have interrupt controllers in user mode. irq_chip.as_ref().unwrap().stop(); - *vm_state = KvmVmState::Paused; + *vm_state = VmState::Paused; + + // Notify VM paused. + pause_notify(true); Ok(()) } @@ -870,17 +2189,23 @@ pub trait MachineOps { /// # Arguments /// /// * `cpus` - Cpus vector restore cpu structure. - /// * `vm_state` - Vm kvm vm state. - fn vm_resume(cpus: &[Arc], vm_state: &mut KvmVmState) -> Result<()> - where - Self: Sized, - { + /// * `vm_state` - Vm state. + fn vm_resume(&self, cpus: &[Arc], vm_state: &mut VmState) -> Result<()> { + EventLoop::get_ctx(None).unwrap().enable_clock(); + + self.active_drive_files()?; + + // Notify VM resumed. + pause_notify(false); + for (cpu_index, cpu) in cpus.iter().enumerate() { - cpu.resume() - .chain_err(|| format!("Failed to resume vcpu{}", cpu_index))?; + if let Err(e) = cpu.resume() { + self.deactive_drive_files()?; + return Err(anyhow!("Failed to resume vcpu{}, {:?}", cpu_index, e)); + } } - *vm_state = KvmVmState::Running; + *vm_state = VmState::Running; Ok(()) } @@ -890,17 +2215,14 @@ pub trait MachineOps { /// # Arguments /// /// * `cpus` - Cpus vector restore cpu structure. - /// * `vm_state` - Vm kvm vm state. - fn vm_destroy(cpus: &[Arc], vm_state: &mut KvmVmState) -> Result<()> - where - Self: Sized, - { + /// * `vm_state` - Vm state. + fn vm_destroy(&self, cpus: &[Arc], vm_state: &mut VmState) -> Result<()> { for (cpu_index, cpu) in cpus.iter().enumerate() { cpu.destroy() - .chain_err(|| format!("Failed to destroy vcpu{}", cpu_index))?; + .with_context(|| format!("Failed to destroy vcpu{}", cpu_index))?; } - *vm_state = KvmVmState::Shutdown; + *vm_state = VmState::Shutdown; Ok(()) } @@ -910,41 +2232,41 @@ pub trait MachineOps { /// # Arguments /// /// * `cpus` - Cpus vector restore cpu structure. - /// * `vm_state` - Vm kvm vm state. + /// * `vm_state` - Vm state. /// * `old_state` - Old vm state want to leave. /// * `new_state` - New vm state want to transfer to. fn vm_state_transfer( + &self, cpus: &[Arc], #[cfg(target_arch = "aarch64")] irq_chip: &Option>, - vm_state: &mut KvmVmState, - old_state: KvmVmState, - new_state: KvmVmState, - ) -> Result<()> - where - Self: Sized, - { - use KvmVmState::*; + vm_state: &mut VmState, + old_state: VmState, + new_state: VmState, + ) -> Result<()> { + use VmState::*; if *vm_state != old_state { bail!("Vm lifecycle error: state check failed."); } match (old_state, new_state) { - (Created, Running) => ::vm_start(false, cpus, vm_state) - .chain_err(|| "Failed to start vm.")?, - (Running, Paused) => ::vm_pause( - cpus, - #[cfg(target_arch = "aarch64")] - irq_chip, - vm_state, - ) - .chain_err(|| "Failed to pause vm.")?, - (Paused, Running) => ::vm_resume(cpus, vm_state) - .chain_err(|| "Failed to resume vm.")?, - (_, Shutdown) => { - ::vm_destroy(cpus, vm_state) - .chain_err(|| "Failed to destroy vm.")?; - } + (Created, Running) => self + .vm_start(false, cpus, vm_state) + .with_context(|| "Failed to start vm.")?, + (Running, Paused) => self + .vm_pause( + cpus, + #[cfg(target_arch = "aarch64")] + irq_chip, + vm_state, + ) + .with_context(|| "Failed to pause vm.")?, + (Paused, Running) => self + .vm_resume(cpus, vm_state) + .with_context(|| "Failed to resume vm.")?, + (_, Shutdown) => self + .vm_destroy(cpus, vm_state) + .with_context(|| "Failed to destroy vm.")?, (_, _) => { bail!("Vm lifecycle error: this transform is illegal."); } @@ -961,3 +2283,238 @@ pub trait MachineOps { Ok(()) } } + +fn register_shutdown_event( + shutdown_req: Arc, + vm: Arc>, +) -> Result<()> { + let shutdown_req_fd = shutdown_req.as_raw_fd(); + let shutdown_req_handler: Rc = Rc::new(move |_, _| { + let _ret = shutdown_req.read(); + if handle_destroy_request(&vm) { + Some(gen_delete_notifiers(&[shutdown_req_fd])) + } else { + warn!("Fail to shutdown VM, try again"); + if shutdown_req.write(1).is_err() { + error!("Failed to send shutdown request"); + } + None + } + }); + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + shutdown_req_fd, + None, + EventSet::IN, + vec![shutdown_req_handler], + ); + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register event notifier.") +} + +fn handle_destroy_request(vm: &Arc>) -> bool { + let locked_vm = vm.lock().unwrap(); + let vmstate: VmState = { + let state = locked_vm.machine_base().vm_state.deref().0.lock().unwrap(); + *state + }; + + if !locked_vm.notify_lifecycle(vmstate, VmState::Shutdown) { + return false; + } + + info!("vm destroy"); + EventLoop::kick_all(); + + true +} + +/// Normal run or resume virtual machine from migration/snapshot. +/// +/// # Arguments +/// +/// * `vm` - virtual machine that implement `MachineOps`. +/// * `cmd_args` - Command arguments from user. +pub fn vm_run( + vm: &Arc>, + cmd_args: &arg_parser::ArgMatches, +) -> Result<()> { + let migrate = vm.lock().unwrap().get_migrate_info(); + if migrate.0 == MigrateMode::Unknown { + vm.lock() + .unwrap() + .run(cmd_args.is_present("freeze_cpu")) + .with_context(|| "Failed to start VM.")?; + } else { + start_incoming_migration(vm).with_context(|| "Failed to start migration.")?; + } + + Ok(()) +} + +/// Start incoming migration from destination. +fn start_incoming_migration(vm: &Arc>) -> Result<()> { + let (mode, path) = vm.lock().unwrap().get_migrate_info(); + match mode { + MigrateMode::File => { + MigrationManager::restore_snapshot(&path) + .with_context(|| "Failed to restore snapshot")?; + vm.lock() + .unwrap() + .run(false) + .with_context(|| "Failed to start VM.")?; + } + MigrateMode::Unix => { + clear_file(path.clone())?; + let listener = UnixListener::bind(&path)?; + let (mut sock, _) = listener.accept()?; + remove_file(&path)?; + + MigrationManager::recv_migration(&mut sock) + .with_context(|| "Failed to receive migration with unix mode")?; + vm.lock() + .unwrap() + .run(false) + .with_context(|| "Failed to start VM.")?; + MigrationManager::finish_migration(&mut sock) + .with_context(|| "Failed to finish migraton.")?; + } + MigrateMode::Tcp => { + let listener = TcpListener::bind(&path)?; + let mut sock = listener.accept().map(|(stream, _)| stream)?; + + MigrationManager::recv_migration(&mut sock) + .with_context(|| "Failed to receive migration with tcp mode")?; + vm.lock() + .unwrap() + .run(false) + .with_context(|| "Failed to start VM.")?; + MigrationManager::finish_migration(&mut sock) + .with_context(|| "Failed to finish migraton.")?; + } + MigrateMode::Unknown => { + bail!("Unknown migration mode"); + } + } + + // End the migration and reset the mode. + let locked_vm = vm.lock().unwrap(); + let vm_config = locked_vm.get_vm_config(); + if let Some((mode, _)) = vm_config.lock().unwrap().incoming.as_mut() { + *mode = MigrateMode::Unknown; + } + + Ok(()) +} + +fn coverage_allow_list(syscall_allow_list: &mut Vec) { + syscall_allow_list.extend(vec![ + BpfRule::new(libc::SYS_fcntl), + BpfRule::new(libc::SYS_ftruncate), + ]) +} + +#[cfg(feature = "windows_emu_pid")] +fn check_windows_emu_pid( + pid_path: String, + powerdown_req: Arc, + shutdown_req: Arc, + vm: Arc>, +) { + let mut check_delay = Duration::from_millis(WINDOWS_EMU_PID_DEFAULT_INTERVAL); + if !Path::new(&pid_path).exists() { + info!("Detect emulator exited, let VM exits now"); + let locked_vm = vm.lock().unwrap(); + let mut vm_state = locked_vm.get_vm_state().deref().0.lock().unwrap(); + if *vm_state == VmState::Paused { + info!("VM state is paused, resume VM before exit"); + if let Err(e) = locked_vm.vm_resume(&locked_vm.machine_base().cpus, &mut vm_state) { + log::error!("Failed to resume VM when check windows emu pid: {:?}", e); + } + } + drop(vm_state); + drop(locked_vm); + if get_run_stage() == VmRunningStage::Os { + // Wait 30s for windows normal exit. + check_delay = Duration::from_millis(WINDOWS_EMU_PID_POWERDOWN_INTERVAL); + if let Err(e) = powerdown_req.write(1) { + log::error!("Failed to send powerdown request after emu exits: {:?}", e); + } + } else { + // Wait 1s for windows shutdown. + check_delay = Duration::from_millis(WINDOWS_EMU_PID_SHUTDOWN_INTERVAL); + if let Err(e) = shutdown_req.write(1) { + log::error!("Failed to send shutdown request after emu exits: {:?}", e); + } + } + } + + let check_emu_alive = Box::new(move || { + check_windows_emu_pid( + pid_path.clone(), + powerdown_req.clone(), + shutdown_req.clone(), + vm.clone(), + ); + }); + EventLoop::get_ctx(None) + .unwrap() + .timer_add(check_emu_alive, check_delay); +} + +/// When windows emu exits, stratovirt should exits too. +#[cfg(feature = "windows_emu_pid")] +pub(crate) fn watch_windows_emu_pid( + vm_config: &VmConfig, + power_button: Arc, + shutdown_req: Arc, + vm: Arc>, +) { + let emu_pid = vm_config.emulator_pid.as_ref(); + if emu_pid.is_none() { + return; + } + info!("Watching on emulator lifetime"); + let pid_path = "/proc/".to_owned() + emu_pid.unwrap(); + let check_delay = Duration::from_millis(WINDOWS_EMU_PID_DEFAULT_INTERVAL); + let check_emu_alive = Box::new(move || { + check_windows_emu_pid( + pid_path.clone(), + power_button.clone(), + shutdown_req.clone(), + vm.clone(), + ); + }); + EventLoop::get_ctx(None) + .unwrap() + .timer_add(check_emu_alive, check_delay); +} + +fn machine_register_pcidevops_type() -> Result<()> { + #[cfg(target_arch = "x86_64")] + { + register_pcidevops_type::()?; + register_pcidevops_type::()?; + } + #[cfg(target_arch = "aarch64")] + { + register_pcidevops_type::()?; + } + + Ok(()) +} + +pub fn type_init() -> Result<()> { + // Register all sysbus devices type. + virtio_register_sysbusdevops_type()?; + devices_register_sysbusdevops_type()?; + + // Register all pci devices type. + machine_register_pcidevops_type()?; + #[cfg(feature = "vfio_device")] + vfio_register_pcidevops_type()?; + virtio_register_pcidevops_type()?; + devices_register_pcidevops_type()?; + + Ok(()) +} diff --git a/machine/src/micro_vm/mod.rs b/machine/src/micro_common/mod.rs similarity index 31% rename from machine/src/micro_vm/mod.rs rename to machine/src/micro_common/mod.rs index ad5850afbcfcbf26d42a08b498dfff29a316655e..200028d568d0b0f827435933e7fc2315d0603953 100644 --- a/machine/src/micro_vm/mod.rs +++ b/machine/src/micro_common/mod.rs @@ -28,89 +28,58 @@ //! - `x86_64` //! - `aarch64` -pub mod errors { - error_chain! { - links { - Util(util::errors::Error, util::errors::ErrorKind); - Virtio(virtio::errors::Error, virtio::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - Kvm(kvm_ioctls::Error); - Nul(std::ffi::NulError); - } - errors { - RplDevLmtErr(dev: String, nr: usize) { - display("A maximum of {} {} replaceable devices are supported.", nr, dev) - } - UpdCfgErr(id: String) { - display("{}: failed to update config.", id) - } - RlzVirtioMmioErr { - display("Failed to realize virtio mmio.") - } - } - } -} - -mod mem_layout; -mod syscall; +pub mod syscall; -use std::fs::metadata; +use std::fmt; +use std::fmt::Debug; use std::ops::Deref; -use std::os::linux::fs::MetadataExt; use std::os::unix::io::RawFd; -use std::path::Path; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Mutex}; use std::vec::Vec; -use address_space::{AddressSpace, GuestAddress, Region}; -use boot_loader::{load_linux, BootLoaderConfig}; -use cpu::{CPUBootConfig, CpuLifecycleState, CpuTopology, CPU}; +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use log::error; +use vmm_sys_util::eventfd::EventFd; + #[cfg(target_arch = "aarch64")] -use devices::legacy::PL031; +use crate::aarch64::micro::{LayoutEntryType, MEM_LAYOUT}; #[cfg(target_arch = "x86_64")] -use devices::legacy::SERIAL_ADDR; -use devices::legacy::{FwCfgOps, Serial}; -#[cfg(target_arch = "aarch64")] -use devices::{InterruptController, InterruptControllerConfig}; -use error_chain::ChainedError; -use hypervisor::kvm::KVM_FDS; +use crate::x86_64::micro::{LayoutEntryType, MEM_LAYOUT}; +use crate::{MachineBase, MachineError, MachineOps}; +use cpu::CpuLifecycleState; #[cfg(target_arch = "x86_64")] -use kvm_bindings::{kvm_pit_config, KVM_PIT_SPEAKER_DUMMY}; -use machine_manager::config::parse_blk; -use machine_manager::config::parse_net; -use machine_manager::config::BlkDevConfig; +use devices::sysbus::SysBusDevOps; +use devices::sysbus::{IRQ_BASE, IRQ_MAX}; +use devices::Device; +#[cfg(feature = "vhostuser_net")] +use machine_manager::config::get_chardev_socket_path; +#[cfg(target_arch = "x86_64")] +use machine_manager::config::Param; +use machine_manager::config::{ + parse_incoming_uri, str_slip_to_clap, ConfigCheck, DriveConfig, MigrateMode, NetDevcfg, + NetworkInterfaceConfig, VmConfig, +}; use machine_manager::machine::{ - DeviceInterface, KvmVmState, MachineAddressInterface, MachineExternalInterface, - MachineInterface, MachineLifecycle, MigrateInterface, + DeviceInterface, MachineAddressInterface, MachineExternalInterface, MachineInterface, + MachineLifecycle, MigrateInterface, VmState, }; -use machine_manager::{ - config::{BootSource, ConfigCheck, NetworkInterfaceConfig, SerialConfig, VmConfig}, - qmp::{qmp_schema, QmpChannel, Response}, +use machine_manager::qmp::{ + qmp_channel::QmpChannel, qmp_response::Response, qmp_schema, qmp_schema::UpdateRegionArgument, }; -use migration::{MigrationManager, MigrationStatus}; -use sysbus::SysBus; -#[cfg(target_arch = "aarch64")] -use sysbus::{SysBusDevType, SysRes}; -#[cfg(target_arch = "aarch64")] -use util::device_tree::{self, CompileFDT, FdtBuilder}; -use util::loop_context::EventLoopManager; -use util::seccomp::BpfRule; -use util::set_termi_canon_mode; +use machine_manager::{check_arg_nonexist, event}; +use migration::MigrationManager; +use util::loop_context::{create_new_eventfd, EventLoopManager}; +use util::{num_ops::str_to_num, set_termi_canon_mode}; +use virtio::device::block::VirtioBlkDevConfig; +#[cfg(feature = "vhost_net")] +use virtio::VhostKern; +#[cfg(feature = "vhostuser_net")] +use virtio::VhostUser; use virtio::{ - create_tap, qmp_balloon, qmp_query_balloon, Block, BlockState, Net, VhostKern, VirtioDevice, + create_tap, qmp_balloon, qmp_query_balloon, Block, BlockState, Net, VirtioDevice, VirtioMmioDevice, VirtioMmioState, VirtioNetState, }; -use vmm_sys_util::eventfd::EventFd; - -use super::{ - errors::{ErrorKind as MachineErrorKind, Result as MachineResult}, - MachineOps, -}; -use errors::{ErrorKind, Result}; -use mem_layout::{LayoutEntryType, MEM_LAYOUT}; -use syscall::syscall_whitelist; // The replaceable block device maximum count. const MMIO_REPLACEABLE_BLK_NR: usize = 4; @@ -118,11 +87,13 @@ const MMIO_REPLACEABLE_BLK_NR: usize = 4; const MMIO_REPLACEABLE_NET_NR: usize = 2; // The config of replaceable device. +#[derive(Debug)] struct MmioReplaceableConfig { // Device id. id: String, - // The dev_config of the related backend device. - dev_config: Arc, + // The config of the related backend device. + // Eg: Drive config of virtio mmio block. Netdev config of virtio mmio net. + back_config: Arc, } // The device information of replaceable device. @@ -135,16 +106,27 @@ struct MmioReplaceableDevInfo { used: bool, } +impl fmt::Debug for MmioReplaceableDevInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MmioReplaceableDevInfo") + .field("device_type", &self.device.lock().unwrap().device_type()) + .field("id", &self.id) + .field("used", &self.used) + .finish() + } +} + // The gather of config, info and count of all replaceable devices. -struct MmioReplaceableInfo { +#[derive(Debug)] +pub(crate) struct MmioReplaceableInfo { // The arrays of all replaceable configs. configs: Arc>>, // The arrays of all replaceable device information. devices: Arc>>, // The count of block device which is plugin. - block_count: usize, + pub(crate) block_count: usize, // The count of network device which is plugin. - net_count: usize, + pub(crate) net_count: usize, } impl MmioReplaceableInfo { @@ -158,30 +140,14 @@ impl MmioReplaceableInfo { } } -/// A wrapper around creating and using a kvm-based micro VM. +/// A wrapper around creating and using a micro VM. pub struct LightMachine { - // `vCPU` topology, support sockets, cores, threads. - cpu_topo: CpuTopology, - // `vCPU` devices. - cpus: Vec>, - // Interrupt controller device. - #[cfg(target_arch = "aarch64")] - irq_chip: Option>, - // Memory address space. - sys_mem: Arc, - // IO address space. - #[cfg(target_arch = "x86_64")] - sys_io: Arc, - // System bus. - sysbus: SysBus, + // Machine base members. + pub(crate) base: MachineBase, // All replaceable device information. - replaceable_info: MmioReplaceableInfo, - // VM running state. - vm_state: Arc<(Mutex, Condvar)>, - // Vm boot_source config. - boot_source: Arc>, - // VM power button, handle VM `Shutdown` event. - power_button: EventFd, + pub(crate) replaceable_info: MmioReplaceableInfo, + /// Shutdown request, handle VM `shutdown` event. + pub(crate) shutdown_req: Arc, } impl LightMachine { @@ -190,134 +156,85 @@ impl LightMachine { /// # Arguments /// /// * `vm_config` - Represents the configuration for VM. - pub fn new(vm_config: &VmConfig) -> MachineResult { - use crate::errors::ResultExt; - - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())) - .chain_err(|| MachineErrorKind::CrtMemSpaceErr)?; - #[cfg(target_arch = "x86_64")] - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)) - .chain_err(|| MachineErrorKind::CrtIoSpaceErr)?; - #[cfg(target_arch = "x86_64")] - let free_irqs: (i32, i32) = (5, 15); - #[cfg(target_arch = "aarch64")] - let free_irqs: (i32, i32) = (32, 191); + pub fn new(vm_config: &VmConfig) -> Result { + let free_irqs: (i32, i32) = (IRQ_BASE, IRQ_MAX); let mmio_region: (u64, u64) = ( MEM_LAYOUT[LayoutEntryType::Mmio as usize].0, MEM_LAYOUT[LayoutEntryType::Mmio as usize + 1].0, ); - let sysbus = SysBus::new( - #[cfg(target_arch = "x86_64")] - &sys_io, - &sys_mem, - free_irqs, - mmio_region, - ); - - // Machine state init - let vm_state = Arc::new((Mutex::new(KvmVmState::Created), Condvar::new())); - let power_button = EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| MachineErrorKind::InitEventFdErr("power_button".to_string()))?; - - if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { - error!("{}", e); - } + let base = MachineBase::new(vm_config, free_irqs, mmio_region)?; Ok(LightMachine { - cpu_topo: CpuTopology::new(vm_config.machine_config.nr_cpus), - cpus: Vec::new(), - #[cfg(target_arch = "aarch64")] - irq_chip: None, - sys_mem, - #[cfg(target_arch = "x86_64")] - sys_io, - sysbus, + base, replaceable_info: MmioReplaceableInfo::new(), - boot_source: Arc::new(Mutex::new(vm_config.clone().boot_source)), - vm_state, - power_button, + shutdown_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("shutdown_req".to_string()))?, + ), }) } - #[cfg(target_arch = "x86_64")] - fn arch_init() -> MachineResult<()> { - use crate::errors::ResultExt; - - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - vm_fd - .set_tss_address(0xfffb_d000_usize) - .chain_err(|| MachineErrorKind::SetTssErr)?; - - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - pad: Default::default(), - }; - vm_fd - .create_pit2(pit_config) - .chain_err(|| MachineErrorKind::CrtPitErr)?; - - Ok(()) - } - - fn create_replaceable_devices(&mut self) -> Result<()> { - use errors::ResultExt; - - let mut rpl_devs: Vec = Vec::new(); - for _ in 0..MMIO_REPLACEABLE_BLK_NR { - let block = Arc::new(Mutex::new(Block::default())); - let virtio_mmio = VirtioMmioDevice::new(&self.sys_mem, block.clone()); - rpl_devs.push(virtio_mmio); - - MigrationManager::register_device_instance_mutex(BlockState::descriptor(), block); - } - for _ in 0..MMIO_REPLACEABLE_NET_NR { - let net = Arc::new(Mutex::new(Net::default())); - let virtio_mmio = VirtioMmioDevice::new(&self.sys_mem, net.clone()); - rpl_devs.push(virtio_mmio); + pub(crate) fn create_replaceable_devices(&mut self) -> Result<()> { + for id in 0..MMIO_REPLACEABLE_BLK_NR { + let block = Arc::new(Mutex::new(Block::new( + VirtioBlkDevConfig::default(), + DriveConfig::default(), + self.get_drive_files(), + ))); + MigrationManager::register_device_instance( + BlockState::descriptor(), + block.clone(), + &id.to_string(), + ); - MigrationManager::register_device_instance_mutex(VirtioNetState::descriptor(), net); + let blk_mmio = self.add_virtio_mmio_device(id.to_string(), block.clone())?; + let info = MmioReplaceableDevInfo { + device: block, + id: id.to_string(), + used: false, + }; + self.replaceable_info.devices.lock().unwrap().push(info); + MigrationManager::register_transport_instance( + VirtioMmioState::descriptor(), + blk_mmio, + &id.to_string(), + ); } + for id in 0..MMIO_REPLACEABLE_NET_NR { + let total_id = id + MMIO_REPLACEABLE_BLK_NR; + let net = Arc::new(Mutex::new(Net::new( + NetworkInterfaceConfig::default(), + NetDevcfg::default(), + ))); + MigrationManager::register_device_instance( + VirtioNetState::descriptor(), + net.clone(), + &total_id.to_string(), + ); - let mut region_base = self.sysbus.min_free_base; - let region_size = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; - for dev in rpl_devs { - self.replaceable_info - .devices - .lock() - .unwrap() - .push(MmioReplaceableDevInfo { - device: dev.device.clone(), - id: "".to_string(), - used: false, - }); - - MigrationManager::register_device_instance_mutex( + let net_mmio = self.add_virtio_mmio_device(total_id.to_string(), net.clone())?; + let info = MmioReplaceableDevInfo { + device: net, + id: total_id.to_string(), + used: false, + }; + self.replaceable_info.devices.lock().unwrap().push(info); + MigrationManager::register_transport_instance( VirtioMmioState::descriptor(), - VirtioMmioDevice::realize( - dev, - &mut self.sysbus, - region_base, - MEM_LAYOUT[LayoutEntryType::Mmio as usize].1, - #[cfg(target_arch = "x86_64")] - &self.boot_source, - ) - .chain_err(|| ErrorKind::RlzVirtioMmioErr)?, + net_mmio, + &total_id.to_string(), ); - region_base += region_size; } - self.sysbus.min_free_base = region_base; + Ok(()) } - fn fill_replaceable_device( + pub(crate) fn fill_replaceable_device( &mut self, id: &str, - dev_config: Arc, + dev_config: Vec>, index: usize, ) -> Result<()> { - use errors::ResultExt; - let mut replaceable_devices = self.replaceable_info.devices.lock().unwrap(); if let Some(device_info) = replaceable_devices.get_mut(index) { if device_info.used { @@ -330,19 +247,18 @@ impl LightMachine { .device .lock() .unwrap() - .update_config(Some(dev_config.clone())) - .chain_err(|| ErrorKind::UpdCfgErr(id.to_string()))?; + .update_config(dev_config.clone()) + .with_context(|| MachineError::UpdCfgErr(id.to_string()))?; } - self.add_replaceable_config(id, dev_config)?; - Ok(()) + self.add_replaceable_config(id, dev_config[0].clone()) } - fn add_replaceable_config(&self, id: &str, dev_config: Arc) -> Result<()> { + fn add_replaceable_config(&self, id: &str, back_config: Arc) -> Result<()> { let mut configs_lock = self.replaceable_info.configs.lock().unwrap(); let limit = MMIO_REPLACEABLE_BLK_NR + MMIO_REPLACEABLE_NET_NR; if configs_lock.len() >= limit { - return Err(ErrorKind::RplDevLmtErr("".to_string(), limit).into()); + return Err(anyhow!(MachineError::RplDevLmtErr("".to_string(), limit))); } for config in configs_lock.iter() { @@ -353,45 +269,82 @@ impl LightMachine { let config = MmioReplaceableConfig { id: id.to_string(), - dev_config, + back_config, }; + + trace::mmio_replaceable_config(&config); configs_lock.push(config); Ok(()) } - fn add_replaceable_device(&self, id: &str, driver: &str, slot: usize) -> Result<()> { - use errors::ResultExt; + fn add_replaceable_device( + &self, + args: Box, + slot: usize, + ) -> Result<()> { + let id = args.id; + let driver = args.driver; + + // Find the configuration by id. + let configs_lock = self.replaceable_info.configs.lock().unwrap(); + let mut configs = Vec::new(); + for config in configs_lock.iter() { + if config.id == id { + configs.push(config.back_config.clone()); + } + } + if configs.is_empty() { + bail!("Failed to find device configuration."); + } + // Sanity check for config, driver and slot. + let cfg_any = configs[0].as_any(); let index = if driver.contains("net") { if slot >= MMIO_REPLACEABLE_NET_NR { - return Err( - ErrorKind::RplDevLmtErr("net".to_string(), MMIO_REPLACEABLE_NET_NR).into(), - ); + return Err(anyhow!(MachineError::RplDevLmtErr( + "net".to_string(), + MMIO_REPLACEABLE_NET_NR + ))); + } + if cfg_any.downcast_ref::().is_none() { + return Err(anyhow!(MachineError::DevTypeErr("net".to_string()))); } + let mut net_config = NetworkInterfaceConfig { + classtype: driver, + id: id.clone(), + netdev: args.chardev.with_context(|| "No chardev set")?, + mac: args.mac, + iothread: args.iothread, + ..Default::default() + }; + net_config.auto_iothread(); + configs.push(Arc::new(net_config)); slot + MMIO_REPLACEABLE_BLK_NR } else if driver.contains("blk") { if slot >= MMIO_REPLACEABLE_BLK_NR { - return Err( - ErrorKind::RplDevLmtErr("block".to_string(), MMIO_REPLACEABLE_BLK_NR).into(), - ); + return Err(anyhow!(MachineError::RplDevLmtErr( + "block".to_string(), + MMIO_REPLACEABLE_BLK_NR + ))); + } + if cfg_any.downcast_ref::().is_none() { + return Err(anyhow!(MachineError::DevTypeErr("blk".to_string()))); } + let dev_config = VirtioBlkDevConfig { + classtype: driver, + id: id.clone(), + drive: args.drive.with_context(|| "No drive set")?, + bootindex: args.boot_index, + iothread: args.iothread, + serial: args.serial_num, + ..Default::default() + }; + configs.push(Arc::new(dev_config)); slot } else { bail!("Unsupported replaceable device type."); }; - // Find the configuration by id. - let configs_lock = self.replaceable_info.configs.lock().unwrap(); - let mut dev_config = None; - for config in configs_lock.iter() { - if config.id == id { - dev_config = Some(config.dev_config.clone()); - } - } - if dev_config.is_none() { - bail!("Failed to find device configuration."); - } - // Find the replaceable device and replace it. let mut replaceable_devices = self.replaceable_info.devices.lock().unwrap(); if let Some(device_info) = replaceable_devices.get_mut(index) { @@ -405,20 +358,23 @@ impl LightMachine { .device .lock() .unwrap() - .update_config(dev_config) - .chain_err(|| ErrorKind::UpdCfgErr(id.to_string()))?; + .update_config(configs) + .with_context(|| MachineError::UpdCfgErr(id.to_string()))?; } Ok(()) } fn del_replaceable_device(&self, id: &str) -> Result { - use errors::ResultExt; - // find the index of configuration by name and remove it let mut is_exist = false; let mut configs_lock = self.replaceable_info.configs.lock().unwrap(); for (index, config) in configs_lock.iter().enumerate() { if config.id == id { + if let Some(drive_config) = + config.back_config.as_any().downcast_ref::() + { + self.unregister_drive_file(&drive_config.path_on_host)?; + } configs_lock.remove(index); is_exist = true; break; @@ -435,8 +391,8 @@ impl LightMachine { .device .lock() .unwrap() - .update_config(None) - .chain_err(|| ErrorKind::UpdCfgErr(id.to_string()))?; + .update_config(Vec::new()) + .with_context(|| MachineError::UpdCfgErr(id.to_string()))?; } } @@ -445,216 +401,61 @@ impl LightMachine { } Ok(id.to_string()) } -} - -impl MachineOps for LightMachine { - fn arch_ram_ranges(&self, mem_size: u64) -> Vec<(u64, u64)> { - #[allow(unused_mut)] - let mut ranges: Vec<(u64, u64)>; - - #[cfg(target_arch = "aarch64")] - { - let mem_start = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - ranges = vec![(mem_start, mem_size)]; - } - #[cfg(target_arch = "x86_64")] - { - let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 - + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; - ranges = vec![(0, std::cmp::min(gap_start, mem_size))]; - if mem_size > gap_start { - let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; - ranges.push((gap_end, mem_size - gap_start)); - } - } - ranges - } - - #[cfg(target_arch = "x86_64")] - fn init_interrupt_controller(&mut self, _vcpu_count: u64) -> MachineResult<()> { - use crate::errors::ResultExt; - - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_irq_chip() - .chain_err(|| MachineErrorKind::CrtIrqchipErr)?; - Ok(()) - } - - #[cfg(target_arch = "aarch64")] - fn init_interrupt_controller(&mut self, vcpu_count: u64) -> MachineResult<()> { - // Interrupt Controller Chip init - let intc_conf = InterruptControllerConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - vcpu_count, - max_irq: 192, - msi: true, - dist_range: MEM_LAYOUT[LayoutEntryType::GicDist as usize], - redist_region_ranges: vec![ - MEM_LAYOUT[LayoutEntryType::GicRedist as usize], - MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize], - ], - its_range: Some(MEM_LAYOUT[LayoutEntryType::GicIts as usize]), - }; - let irq_chip = InterruptController::new(&intc_conf)?; - self.irq_chip = Some(Arc::new(irq_chip)); - self.irq_chip.as_ref().unwrap().realize()?; - Ok(()) - } - - #[cfg(target_arch = "x86_64")] - fn load_boot_source( - &self, - fwcfg: Option<&Arc>>, - ) -> MachineResult { - use crate::errors::ResultExt; - - let boot_source = self.boot_source.lock().unwrap(); - let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); - - let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 - + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; - let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; - let bootloader_config = BootLoaderConfig { - kernel: boot_source.kernel_file.clone(), - initrd, - kernel_cmdline: boot_source.kernel_cmdline.to_string(), - cpu_count: self.cpu_topo.nrcpus, - gap_range: (gap_start, gap_end - gap_start), - ioapic_addr: MEM_LAYOUT[LayoutEntryType::IoApic as usize].0 as u32, - lapic_addr: MEM_LAYOUT[LayoutEntryType::LocalApic as usize].0 as u32, - ident_tss_range: None, - prot64_mode: true, - }; - let layout = load_linux(&bootloader_config, &self.sys_mem, fwcfg) - .chain_err(|| MachineErrorKind::LoadKernErr)?; - - Ok(CPUBootConfig { - prot64_mode: true, - boot_ip: layout.boot_ip, - boot_sp: layout.boot_sp, - boot_selector: layout.boot_selector, - zero_page: layout.zero_page_addr, - code_segment: layout.segments.code_segment, - data_segment: layout.segments.data_segment, - gdt_base: layout.segments.gdt_base, - gdt_size: layout.segments.gdt_limit, - idt_base: layout.segments.idt_base, - idt_size: layout.segments.idt_limit, - pml4_start: layout.boot_pml4_addr, - }) - } - - #[cfg(target_arch = "aarch64")] - fn load_boot_source( - &self, - fwcfg: Option<&Arc>>, - ) -> MachineResult { - use crate::errors::ResultExt; - - let mut boot_source = self.boot_source.lock().unwrap(); - let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); - - let bootloader_config = BootLoaderConfig { - kernel: boot_source.kernel_file.clone(), - initrd, - mem_start: MEM_LAYOUT[LayoutEntryType::Mem as usize].0, - }; - let layout = load_linux(&bootloader_config, &self.sys_mem, fwcfg) - .chain_err(|| MachineErrorKind::LoadKernErr)?; - if let Some(rd) = &mut boot_source.initrd { - rd.initrd_addr = layout.initrd_start; - rd.initrd_size = layout.initrd_size; - } - - Ok(CPUBootConfig { - fdt_addr: layout.dtb_start, - boot_pc: layout.boot_pc, - }) - } - fn realize_virtio_mmio_device( - &mut self, - dev: VirtioMmioDevice, - ) -> MachineResult>> { - use errors::ResultExt; - - let region_base = self.sysbus.min_free_base; - let region_size = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; - let realized_virtio_mmio_device = VirtioMmioDevice::realize( - dev, - &mut self.sysbus, - region_base, - region_size, - #[cfg(target_arch = "x86_64")] - &self.boot_source, - ) - .chain_err(|| ErrorKind::RlzVirtioMmioErr)?; - self.sysbus.min_free_base += region_size; - Ok(realized_virtio_mmio_device) - } - - fn get_sys_mem(&mut self) -> &Arc { - &self.sys_mem - } - - #[cfg(target_arch = "aarch64")] - fn add_rtc_device(&mut self) -> MachineResult<()> { - use crate::errors::ResultExt; - - PL031::realize( - PL031::default(), - &mut self.sysbus, - MEM_LAYOUT[LayoutEntryType::Rtc as usize].0, - MEM_LAYOUT[LayoutEntryType::Rtc as usize].1, - ) - .chain_err(|| "Failed to realize pl031.")?; - Ok(()) - } - - #[cfg(target_arch = "x86_64")] - fn add_rtc_device(&mut self, _mem_size: u64) -> MachineResult<()> { - Ok(()) - } - - fn add_serial_device(&mut self, config: &SerialConfig) -> MachineResult<()> { - use crate::errors::ResultExt; - - #[cfg(target_arch = "x86_64")] - let region_base: u64 = SERIAL_ADDR; - #[cfg(target_arch = "aarch64")] - let region_base: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].0; - #[cfg(target_arch = "x86_64")] - let region_size: u64 = 8; - #[cfg(target_arch = "aarch64")] - let region_size: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].1; - - let serial = Serial::new(config.clone()); - serial - .realize( - &mut self.sysbus, - region_base, - region_size, - #[cfg(target_arch = "aarch64")] - &self.boot_source, - ) - .chain_err(|| "Failed to realize serial device.")?; - Ok(()) - } - - fn add_virtio_mmio_net( + pub(crate) fn add_virtio_mmio_net( &mut self, vm_config: &mut VmConfig, cfg_args: &str, - ) -> MachineResult<()> { - let device_cfg = parse_net(vm_config, cfg_args)?; - if device_cfg.vhost_type.is_some() { - let net = Arc::new(Mutex::new(VhostKern::Net::new(&device_cfg, &self.sys_mem))); - let device = VirtioMmioDevice::new(&self.sys_mem, net); - self.realize_virtio_mmio_device(device)?; + ) -> Result<()> { + let mut net_cfg = + NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + net_cfg.auto_iothread(); + check_arg_nonexist!( + ("bus", net_cfg.bus), + ("addr", net_cfg.addr), + ("multifunction", net_cfg.multifunction) + ); + let netdev_cfg = vm_config + .netdevs + .remove(&net_cfg.netdev) + .with_context(|| format!("Netdev: {:?} not found for net device", &net_cfg.netdev))?; + if netdev_cfg.vhost_type().is_some() { + if netdev_cfg.vhost_type().unwrap() == "vhost-kernel" { + #[cfg(not(feature = "vhost_net"))] + bail!("Unsupported Vhost_Net"); + + #[cfg(feature = "vhost_net")] + { + let net = Arc::new(Mutex::new(VhostKern::Net::new( + &net_cfg, + netdev_cfg, + &self.base.sys_mem, + ))); + self.add_virtio_mmio_device(net_cfg.id.clone(), net)?; + } + } else { + #[cfg(not(feature = "vhostuser_net"))] + bail!("Unsupported Vhostuser_Net"); + + #[cfg(feature = "vhostuser_net")] + { + let chardev = netdev_cfg.chardev.clone().with_context(|| { + format!("Chardev not configured for netdev {:?}", netdev_cfg.id) + })?; + let chardev_cfg = vm_config + .chardev + .remove(&chardev) + .with_context(|| format!("Chardev: {:?} not found for netdev", chardev))?; + let sock_path = get_chardev_socket_path(chardev_cfg)?; + let net = Arc::new(Mutex::new(VhostUser::Net::new( + &net_cfg, + netdev_cfg, + sock_path, + &self.base.sys_mem, + ))); + self.add_virtio_mmio_device(net_cfg.id.clone(), net)?; + } + }; } else { let index = MMIO_REPLACEABLE_BLK_NR + self.replaceable_info.net_count; if index >= MMIO_REPLACEABLE_BLK_NR + MMIO_REPLACEABLE_NET_NR { @@ -663,18 +464,30 @@ impl MachineOps for LightMachine { MMIO_REPLACEABLE_NET_NR ); } - self.fill_replaceable_device(&device_cfg.id, Arc::new(device_cfg.clone()), index)?; + let configs: Vec> = + vec![Arc::new(netdev_cfg), Arc::new(net_cfg.clone())]; + self.fill_replaceable_device(&net_cfg.id, configs, index)?; self.replaceable_info.net_count += 1; } Ok(()) } - fn add_virtio_mmio_block( + pub(crate) fn add_virtio_mmio_block( &mut self, vm_config: &mut VmConfig, cfg_args: &str, - ) -> MachineResult<()> { - let device_cfg = parse_blk(vm_config, cfg_args)?; + ) -> Result<()> { + let device_cfg = + VirtioBlkDevConfig::try_parse_from(str_slip_to_clap(cfg_args, true, false))?; + check_arg_nonexist!( + ("bus", device_cfg.bus), + ("addr", device_cfg.addr), + ("multifunction", device_cfg.multifunction) + ); + let drive_cfg = vm_config + .drives + .remove(&device_cfg.drive) + .with_context(|| "No drive configured matched for blk device")?; if self.replaceable_info.block_count >= MMIO_REPLACEABLE_BLK_NR { bail!( "A maximum of {} block replaceable devices are supported.", @@ -682,100 +495,49 @@ impl MachineOps for LightMachine { ); } let index = self.replaceable_info.block_count; - self.fill_replaceable_device(&device_cfg.id, Arc::new(device_cfg.clone()), index)?; + let configs: Vec> = + vec![Arc::new(drive_cfg), Arc::new(device_cfg.clone())]; + self.fill_replaceable_device(&device_cfg.id, configs, index)?; self.replaceable_info.block_count += 1; Ok(()) } - fn syscall_whitelist(&self) -> Vec { - syscall_whitelist() - } - - fn realize( - vm: &Arc>, - vm_config: &mut VmConfig, - is_migrate: bool, - ) -> MachineResult<()> { - use crate::errors::ResultExt; - - let mut locked_vm = vm.lock().unwrap(); - - locked_vm.init_memory( - &vm_config.machine_config.mem_config, - #[cfg(target_arch = "x86_64")] - &locked_vm.sys_io, - &locked_vm.sys_mem, - is_migrate, - vm_config.machine_config.nr_cpus, + pub(crate) fn add_virtio_mmio_device( + &mut self, + name: String, + device: Arc>, + ) -> Result>> { + let sys_mem = self.get_sys_mem().clone(); + let region_base = self.base.sysbus.lock().unwrap().min_free_base; + let region_size = MEM_LAYOUT[LayoutEntryType::Mmio as usize].1; + let dev = VirtioMmioDevice::new( + &sys_mem, + name, + device, + &self.base.sysbus, + region_base, + region_size, )?; - + let mmio_device = dev + .realize() + .with_context(|| MachineError::RlzVirtioMmioErr)?; #[cfg(target_arch = "x86_64")] { - locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; - LightMachine::arch_init()?; - } - let mut vcpu_fds = vec![]; - for vcpu_id in 0..vm_config.machine_config.nr_cpus { - vcpu_fds.push(Arc::new( - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_vcpu(vcpu_id)?, - )); + let res = mmio_device.lock().unwrap().get_sys_resource().clone(); + let mut bs = self.base.boot_source.lock().unwrap(); + bs.kernel_cmdline.push(Param { + param_type: "virtio_mmio.device".to_string(), + value: format!("{}@0x{:08x}:{}", res.region_size, res.region_base, res.irq), + }); } - #[cfg(target_arch = "aarch64")] - locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; - - // Add mmio devices - locked_vm - .create_replaceable_devices() - .chain_err(|| "Failed to create replaceable devices.")?; - locked_vm.add_devices(vm_config)?; - - let boot_config = if !is_migrate { - Some(locked_vm.load_boot_source(None)?) - } else { - None - }; - locked_vm.cpus.extend(::init_vcpu( - vm.clone(), - vm_config.machine_config.nr_cpus, - &vcpu_fds, - &boot_config, - )?); - - #[cfg(target_arch = "aarch64")] - if let Some(boot_cfg) = boot_config { - let mut fdt_helper = FdtBuilder::new(); - locked_vm - .generate_fdt_node(&mut fdt_helper) - .chain_err(|| MachineErrorKind::GenFdtErr)?; - let fdt_vec = fdt_helper.finish()?; - locked_vm - .sys_mem - .write( - &mut fdt_vec.as_slice(), - GuestAddress(boot_cfg.fdt_addr as u64), - fdt_vec.len() as u64, - ) - .chain_err(|| MachineErrorKind::WrtFdtErr(boot_cfg.fdt_addr, fdt_vec.len()))?; - } - locked_vm - .register_power_event(&locked_vm.power_button) - .chain_err(|| MachineErrorKind::InitEventFdErr("power_button".to_string()))?; - Ok(()) - } - - fn run(&self, paused: bool) -> MachineResult<()> { - ::vm_start(paused, &self.cpus, &mut self.vm_state.0.lock().unwrap()) + self.base.sysbus.lock().unwrap().min_free_base += region_size; + Ok(mmio_device) } } impl MachineLifecycle for LightMachine { fn pause(&self) -> bool { - if self.notify_lifecycle(KvmVmState::Running, KvmVmState::Paused) { + if self.notify_lifecycle(VmState::Running, VmState::Paused) { event!(Stop); true } else { @@ -784,7 +546,7 @@ impl MachineLifecycle for LightMachine { } fn resume(&self) -> bool { - if !self.notify_lifecycle(KvmVmState::Paused, KvmVmState::Running) { + if !self.notify_lifecycle(VmState::Paused, VmState::Running) { return false; } @@ -793,22 +555,17 @@ impl MachineLifecycle for LightMachine { } fn destroy(&self) -> bool { - let vmstate = { - let state = self.vm_state.deref().0.lock().unwrap(); - *state - }; - - if !self.notify_lifecycle(vmstate, KvmVmState::Shutdown) { + if self.shutdown_req.write(1).is_err() { + error!("Failed to send shutdown request."); return false; } - self.power_button.write(1).unwrap(); true } fn reset(&mut self) -> bool { // For micro vm, the reboot command is equivalent to the shutdown command. - for cpu in self.cpus.iter() { + for cpu in self.base.cpus.iter() { let (cpu_state, _) = cpu.state(); *cpu_state.lock().unwrap() = CpuLifecycleState::Stopped; } @@ -816,101 +573,91 @@ impl MachineLifecycle for LightMachine { self.destroy() } - fn notify_lifecycle(&self, old: KvmVmState, new: KvmVmState) -> bool { - ::vm_state_transfer( - &self.cpus, + fn notify_lifecycle(&self, old: VmState, new: VmState) -> bool { + if let Err(e) = self.vm_state_transfer( + &self.base.cpus, #[cfg(target_arch = "aarch64")] - &self.irq_chip, - &mut self.vm_state.0.lock().unwrap(), + &self.base.irq_chip, + &mut self.base.vm_state.0.lock().unwrap(), old, new, - ) - .is_ok() + ) { + error!("VM state transfer failed: {:?}", e); + return false; + } + true } } impl MachineAddressInterface for LightMachine { #[cfg(target_arch = "x86_64")] - fn pio_in(&self, addr: u64, mut data: &mut [u8]) -> bool { - // The function pit_calibrate_tsc() in kernel gets stuck if data read from - // io-port 0x61 is not 0x20. - // This problem only happens before Linux version 4.18 (fixed by 368a540e0) - if addr == 0x61 { - data[0] = 0x20; - return true; - } - let length = data.len() as u64; - self.sys_io - .read(&mut data, GuestAddress(addr), length) - .is_ok() + fn pio_in(&self, addr: u64, data: &mut [u8]) -> bool { + self.machine_base().pio_in(addr, data) } #[cfg(target_arch = "x86_64")] fn pio_out(&self, addr: u64, mut data: &[u8]) -> bool { + use address_space::AddressAttr; + use address_space::GuestAddress; + let count = data.len() as u64; - self.sys_io - .write(&mut data, GuestAddress(addr), count) + self.base + .sys_io + .write(&mut data, GuestAddress(addr), count, AddressAttr::MMIO) .is_ok() } - fn mmio_read(&self, addr: u64, mut data: &mut [u8]) -> bool { - let length = data.len() as u64; - self.sys_mem - .read(&mut data, GuestAddress(addr), length) - .is_ok() + fn mmio_read(&self, addr: u64, data: &mut [u8]) -> bool { + self.machine_base().mmio_read(addr, data) } - fn mmio_write(&self, addr: u64, mut data: &[u8]) -> bool { - let count = data.len() as u64; - self.sys_mem - .write(&mut data, GuestAddress(addr), count) - .is_ok() + fn mmio_write(&self, addr: u64, data: &[u8]) -> bool { + self.machine_base().mmio_write(addr, data) } } impl DeviceInterface for LightMachine { fn query_status(&self) -> Response { - let vmstate = self.vm_state.deref().0.lock().unwrap(); + let vmstate = self.get_vm_state().deref().0.lock().unwrap(); let qmp_state = match *vmstate { - KvmVmState::Running => qmp_schema::StatusInfo { + VmState::Running => qmp_schema::StatusInfo { singlestep: false, running: true, status: qmp_schema::RunState::running, }, - KvmVmState::Paused => qmp_schema::StatusInfo { + VmState::Paused => qmp_schema::StatusInfo { singlestep: false, - running: true, + running: false, status: qmp_schema::RunState::paused, }, _ => Default::default(), }; - Response::create_response(serde_json::to_value(&qmp_state).unwrap(), None) + Response::create_response(serde_json::to_value(qmp_state).unwrap(), None) } fn query_cpus(&self) -> Response { let mut cpu_vec: Vec = Vec::new(); - for cpu_index in 0..self.cpu_topo.max_cpus { - if self.cpu_topo.get_mask(cpu_index as usize) == 1 { - let thread_id = self.cpus[cpu_index as usize].tid(); - let (socketid, coreid, threadid) = self.cpu_topo.get_topo(cpu_index as usize); - let cpu_instance = qmp_schema::CpuInstanceProperties { - node_id: None, - socket_id: Some(socketid as isize), - core_id: Some(coreid as isize), - thread_id: Some(threadid as isize), + let cpu_topo = self.get_cpu_topo(); + let cpus = self.get_cpus(); + for cpu_index in 0..cpu_topo.max_cpus { + if cpu_topo.get_mask(cpu_index as usize) == 1 { + let thread_id = cpus[cpu_index as usize].tid(); + let cpu_instance = cpu_topo.get_topo_instance_for_qmp(cpu_index); + let cpu_common = qmp_schema::CpuInfoCommon { + current: true, + qom_path: String::from("/machine/unattached/device[") + + &cpu_index.to_string() + + "]", + halted: false, + props: Some(cpu_instance), + CPU: cpu_index as isize, + thread_id: thread_id as isize, }; #[cfg(target_arch = "x86_64")] { let cpu_info = qmp_schema::CpuInfo::x86 { - current: true, - qom_path: String::from("/machine/unattached/device[") - + &cpu_index.to_string() - + "]", - halted: false, - props: Some(cpu_instance), - CPU: cpu_index as isize, - thread_id: thread_id as isize, + common: cpu_common, x86: qmp_schema::CpuInfoX86 {}, }; cpu_vec.push(serde_json::to_value(cpu_info).unwrap()); @@ -918,14 +665,7 @@ impl DeviceInterface for LightMachine { #[cfg(target_arch = "aarch64")] { let cpu_info = qmp_schema::CpuInfo::Arm { - current: true, - qom_path: String::from("/machine/unattached/device[") - + &cpu_index.to_string() - + &"]".to_string(), - halted: false, - props: Some(cpu_instance), - CPU: cpu_index as isize, - thread_id: thread_id as isize, + common: cpu_common, arm: qmp_schema::CpuInfoArm {}, }; cpu_vec.push(serde_json::to_value(cpu_info).unwrap()); @@ -942,15 +682,9 @@ impl DeviceInterface for LightMachine { #[cfg(target_arch = "aarch64")] let cpu_type = String::from("host-aarch64-cpu"); - for cpu_index in 0..self.cpu_topo.max_cpus { - if self.cpu_topo.get_mask(cpu_index as usize) == 0 { - let (socketid, coreid, threadid) = self.cpu_topo.get_topo(cpu_index as usize); - let cpu_instance = qmp_schema::CpuInstanceProperties { - node_id: None, - socket_id: Some(socketid as isize), - core_id: Some(coreid as isize), - thread_id: Some(threadid as isize), - }; + for cpu_index in 0..self.base.cpu_topo.max_cpus { + if self.base.cpu_topo.get_mask(cpu_index as usize) == 0 { + let cpu_instance = self.base.cpu_topo.get_topo_instance_for_qmp(cpu_index); let hotpluggable_cpu = qmp_schema::HotpluggableCPU { type_: cpu_type.clone(), vcpus_count: 1, @@ -959,13 +693,7 @@ impl DeviceInterface for LightMachine { }; hotplug_vec.push(serde_json::to_value(hotpluggable_cpu).unwrap()); } else { - let (socketid, coreid, threadid) = self.cpu_topo.get_topo(cpu_index as usize); - let cpu_instance = qmp_schema::CpuInstanceProperties { - node_id: None, - socket_id: Some(socketid as isize), - core_id: Some(coreid as isize), - thread_id: Some(threadid as isize), - }; + let cpu_instance = self.base.cpu_topo.get_topo_instance_for_qmp(cpu_index); let hotpluggable_cpu = qmp_schema::HotpluggableCPU { type_: cpu_type.clone(), vcpus_count: 1, @@ -995,7 +723,7 @@ impl DeviceInterface for LightMachine { fn query_balloon(&self) -> Response { if let Some(actual) = qmp_query_balloon() { let ret = qmp_schema::BalloonInfo { actual }; - return Response::create_response(serde_json::to_value(&ret).unwrap(), None); + return Response::create_response(serde_json::to_value(ret).unwrap(), None); } Response::create_error_response( qmp_schema::QmpErrorClass::DeviceNotActive( @@ -1005,23 +733,53 @@ impl DeviceInterface for LightMachine { ) } + fn query_mem(&self) -> Response { + self.mem_show(); + Response::create_empty_response() + } + + /// VNC is not supported by light machine currently. + fn query_vnc(&self) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "The service of VNC is not supported".to_string(), + ), + None, + ) + } + + fn query_display_image(&self) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "query-display-image is not supported".to_string(), + ), + None, + ) + } + fn device_add(&mut self, args: Box) -> Response { // get slot of bus by addr or lun - let mut slot = 0; - if let Some(addr) = args.addr { - let slot_str = addr.as_str().trim_start_matches("0x"); - - if let Ok(n) = usize::from_str_radix(slot_str, 16) { - slot = n; + let mut slot = 0_usize; + if let Some(addr) = args.addr.clone() { + if let Ok(num) = str_to_num::(&addr) { + slot = num; + } else { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!( + "Invalid addr for device {}", + args.id + )), + None, + ); } } else if let Some(lun) = args.lun { slot = lun + 1; } - match self.add_replaceable_device(&args.id, &args.driver, slot) { + match self.add_replaceable_device(args.clone(), slot) { Ok(()) => Response::create_empty_response(), Err(ref e) => { - error!("{}", e.display_chain()); + error!("{:?}", e); error!("Failed to add device: id {}, type {}", args.id, args.driver); Response::create_error_response( qmp_schema::QmpErrorClass::GenericError(e.to_string()), @@ -1043,7 +801,7 @@ impl DeviceInterface for LightMachine { Response::create_empty_response() } Err(ref e) => { - error!("Failed to delete device: {}", e.display_chain()); + error!("Failed to delete device: {:?}", e); Response::create_error_response( qmp_schema::QmpErrorClass::GenericError(e.to_string()), None, @@ -1053,71 +811,44 @@ impl DeviceInterface for LightMachine { } fn blockdev_add(&self, args: Box) -> Response { - const MAX_STRING_LENGTH: usize = 255; - let read_only = args.read_only.unwrap_or(false); + let readonly = args.read_only.unwrap_or(false); + let mut direct = true; + if args.cache.is_some() && !args.cache.unwrap().direct.unwrap_or(true) { + direct = false; + } - let direct = if let Some(cache) = args.cache { - match cache.direct { - Some(direct) => direct, - _ => true, - } - } else { - true + let config = DriveConfig { + id: args.node_name.clone(), + drive_type: "none".to_string(), + path_on_host: args.file.filename.clone(), + readonly, + direct, + aio: args.file.aio, + ..Default::default() }; - let blk = Path::new(&args.file.filename); - match metadata(blk) { - Ok(meta) => { - if (meta.st_mode() & libc::S_IFREG != libc::S_IFREG) - && (meta.st_mode() & libc::S_IFBLK != libc::S_IFBLK) - { - error!("File {:?} is not a regular file or block device", blk); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError( - "File is not a regular file or block device".to_string(), - ), - None, - ); - } - } - Err(ref e) => { - error!("Blockdev_add failed: {}", e); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } + if let Err(e) = config.check() { + error!("{:?}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); } - - if let Some(file_name) = blk.file_name() { - if file_name.len() > MAX_STRING_LENGTH { - error!("File name {:?} is illegal", file_name); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError("Illegal block name".to_string()), - None, - ); - } - } else { - error!("Path: {:?} is not valid", blk); + // Register drive backend file for hotplugged drive. + if let Err(e) = self.register_drive_file(&config.id, &args.file.filename, readonly, direct) + { + error!("{:?}", e); return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError("Invalid block path".to_string()), + qmp_schema::QmpErrorClass::GenericError(e.to_string()), None, ); } - - let config = BlkDevConfig { - id: args.node_name.clone(), - path_on_host: args.file.filename, - read_only, - direct, - serial_num: None, - iothread: None, - iops: None, - }; match self.add_replaceable_config(&args.node_name, Arc::new(config)) { Ok(()) => Response::create_empty_response(), Err(ref e) => { - error!("{}", e.display_chain()); + error!("{:?}", e); + // It's safe to unwrap as the path has been registered. + self.unregister_drive_file(&args.file.filename).unwrap(); Response::create_error_response( qmp_schema::QmpErrorClass::GenericError(e.to_string()), None, @@ -1134,16 +865,9 @@ impl DeviceInterface for LightMachine { } fn netdev_add(&mut self, args: Box) -> Response { - let mut config = NetworkInterfaceConfig { + let mut netdev_cfg = NetDevcfg { id: args.id.clone(), - host_dev_name: "".to_string(), - mac: None, - tap_fds: None, - vhost_type: None, - vhost_fds: None, - iothread: None, - queues: 2, - mq: false, + ..Default::default() }; if let Some(fds) = args.fds { @@ -1155,7 +879,7 @@ impl DeviceInterface for LightMachine { }; if let Some(fd_num) = QmpChannel::get_fd(&netdev_fd) { - config.tap_fds = Some(vec![fd_num]); + netdev_cfg.tap_fds = Some(vec![fd_num]); } else { // try to convert string to RawFd let fd_num = match netdev_fd.parse::() { @@ -1173,10 +897,10 @@ impl DeviceInterface for LightMachine { ); } }; - config.tap_fds = Some(vec![fd_num]); + netdev_cfg.tap_fds = Some(vec![fd_num]); } } else if let Some(if_name) = args.if_name { - config.host_dev_name = if_name.clone(); + netdev_cfg.ifname = if_name.clone(); if create_tap(None, Some(&if_name), 1).is_err() { return Response::create_error_response( qmp_schema::QmpErrorClass::GenericError( @@ -1187,10 +911,10 @@ impl DeviceInterface for LightMachine { } } - match self.add_replaceable_config(&args.id, Arc::new(config)) { + match self.add_replaceable_config(&args.id, Arc::new(netdev_cfg)) { Ok(()) => Response::create_empty_response(), Err(ref e) => { - error!("{}", e.display_chain()); + error!("{:?}", e); Response::create_error_response( qmp_schema::QmpErrorClass::GenericError(e.to_string()), None, @@ -1206,6 +930,42 @@ impl DeviceInterface for LightMachine { ) } + fn chardev_add(&mut self, _args: qmp_schema::CharDevAddArgument) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "chardev_add not supported yet for microVM".to_string(), + ), + None, + ) + } + + fn chardev_remove(&mut self, _id: String) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "chardev_remove not supported yet for microVM".to_string(), + ), + None, + ) + } + + fn cameradev_add(&mut self, _args: qmp_schema::CameraDevAddArgument) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "cameradev_add not supported for MicroVM".to_string(), + ), + None, + ) + } + + fn cameradev_del(&mut self, _id: String) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "cameradev_del not supported for MicroVM".to_string(), + ), + None, + ) + } + fn getfd(&self, fd_name: String, if_fd: Option) -> Response { if let Some(fd) = if_fd { QmpChannel::set_fd(fd_name, fd); @@ -1216,46 +976,36 @@ impl DeviceInterface for LightMachine { Response::create_error_response(err_resp, None) } } + + fn update_region(&mut self, _args: UpdateRegionArgument) -> Response { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("The micro vm is not supported".to_string()), + None, + ) + } } impl MigrateInterface for LightMachine { fn migrate(&self, uri: String) -> Response { - use util::unix::{parse_uri, UnixPath}; - - match parse_uri(&uri) { - Ok((UnixPath::File, path)) => { - if let Err(e) = MigrationManager::save_snapshot(&path) { - error!( - "Failed to migrate to path \'{:?}\': {}", - path, - e.display_chain() - ); - let _ = MigrationManager::set_status(MigrationStatus::Failed) - .map_err(|e| error!("{}", e)); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - } - _ => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(format!("Invalid uri: {}", uri)), + match parse_incoming_uri(&uri) { + Ok((MigrateMode::File, path)) => migration::snapshot(path), + Ok((MigrateMode::Unix, _)) | Ok((MigrateMode::Tcp, _)) => { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "MicroVM does not support migration".to_string(), + ), None, - ); + ) } + _ => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!("Invalid uri: {}", uri)), + None, + ), } - - Response::create_empty_response() } fn query_migrate(&self) -> Response { - let status_str = MigrationManager::migration_get_status().to_string(); - let migration_info = qmp_schema::MigrationInfo { - status: Some(status_str), - }; - - Response::create_response(serde_json::to_value(migration_info).unwrap(), None) + migration::query_migrate() } } @@ -1264,291 +1014,12 @@ impl MachineExternalInterface for LightMachine {} impl EventLoopManager for LightMachine { fn loop_should_exit(&self) -> bool { - let vmstate = self.vm_state.deref().0.lock().unwrap(); - *vmstate == KvmVmState::Shutdown - } - - fn loop_cleanup(&self) -> util::errors::Result<()> { - use util::errors::ResultExt; - - set_termi_canon_mode().chain_err(|| "Failed to set terminal to canonical mode")?; - Ok(()) - } -} - -// Function that helps to generate serial node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of serial device. -// * `fdt` - Flatted device-tree blob where serial node will be filled into. -#[cfg(target_arch = "aarch64")] -fn generate_serial_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("uart@{:x}", res.region_base); - let serial_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "ns16550a")?; - fdt.set_property_string("clock-names", "apb_pclk")?; - fdt.set_property_u32("clocks", device_tree::CLK_PHANDLE)?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_EDGE_RISING, - ], - )?; - fdt.end_node(serial_node_dep)?; - - Ok(()) -} - -// Function that helps to generate RTC node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of RTC device. -// * `fdt` - Flatted device-tree blob where RTC node will be filled into. -#[cfg(target_arch = "aarch64")] -fn generate_rtc_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("pl031@{:x}", res.region_base); - let rtc_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "arm,pl031\0arm,primecell\0")?; - fdt.set_property_string("clock-names", "apb_pclk")?; - fdt.set_property_u32("clocks", device_tree::CLK_PHANDLE)?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_LEVEL_HIGH, - ], - )?; - fdt.end_node(rtc_node_dep)?; - - Ok(()) -} - -// Function that helps to generate Virtio-Mmio device's node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of Virtio-Mmio device. -// * `fdt` - Flatted device-tree blob where node will be filled into. -#[cfg(target_arch = "aarch64")] -fn generate_virtio_devices_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("virtio_mmio@{:x}", res.region_base); - let virtio_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "virtio,mmio")?; - fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_EDGE_RISING, - ], - )?; - fdt.end_node(virtio_node_dep)?; - Ok(()) -} - -/// Trait that helps to generate all nodes in device-tree. -#[allow(clippy::upper_case_acronyms)] -#[cfg(target_arch = "aarch64")] -trait CompileFDTHelper { - /// Function that helps to generate cpu nodes. - fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate memory nodes. - fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate Virtio-mmio devices' nodes. - fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate the chosen node. - fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; -} - -#[cfg(target_arch = "aarch64")] -impl CompileFDTHelper for LightMachine { - fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node = "cpus"; - - let cpus_node_dep = fdt.begin_node(node)?; - fdt.set_property_u32("#address-cells", 0x02)?; - fdt.set_property_u32("#size-cells", 0x0)?; - - // Generate CPU topology - if self.cpu_topo.max_cpus > 0 && self.cpu_topo.max_cpus % 8 == 0 { - let cpu_map_node_dep = fdt.begin_node("cpu-map")?; - - let sockets = self.cpu_topo.max_cpus / 8; - for cluster in 0..u32::from(sockets) { - let clster = format!("cluster{}", cluster); - let cluster_node_dep = fdt.begin_node(&clster)?; - - for i in 0..2_u32 { - let sub_cluster = format!("cluster{}", i); - let sub_cluster_node_dep = fdt.begin_node(&sub_cluster)?; - - let core0 = "core0".to_string(); - let core0_node_dep = fdt.begin_node(&core0)?; - - let thread0 = "thread0".to_string(); - let thread0_node_dep = fdt.begin_node(&thread0)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10)?; - fdt.end_node(thread0_node_dep)?; - - let thread1 = "thread1".to_string(); - let thread1_node_dep = fdt.begin_node(&thread1)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 1)?; - fdt.end_node(thread1_node_dep)?; - - fdt.end_node(core0_node_dep)?; - - let core1 = "core1".to_string(); - let core1_node_dep = fdt.begin_node(&core1)?; - - let thread0 = "thread0".to_string(); - let thread0_node_dep = fdt.begin_node(&thread0)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 2)?; - fdt.end_node(thread0_node_dep)?; - - let thread1 = "thread1".to_string(); - let thread1_node_dep = fdt.begin_node(&thread1)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 3)?; - fdt.end_node(thread1_node_dep)?; - - fdt.end_node(core1_node_dep)?; - - fdt.end_node(sub_cluster_node_dep)?; - } - fdt.end_node(cluster_node_dep)?; - } - fdt.end_node(cpu_map_node_dep)?; - } - - for cpu_index in 0..self.cpu_topo.max_cpus { - let mpidr = self.cpus[cpu_index as usize].arch().lock().unwrap().mpidr(); - - let node = format!("cpu@{:x}", mpidr); - let mpidr_node_dep = fdt.begin_node(&node)?; - fdt.set_property_u32( - "phandle", - u32::from(cpu_index) + device_tree::CPU_PHANDLE_START, - )?; - fdt.set_property_string("device_type", "cpu")?; - fdt.set_property_string("compatible", "arm,arm-v8")?; - if self.cpu_topo.max_cpus > 1 { - fdt.set_property_string("enable-method", "psci")?; - } - fdt.set_property_u64("reg", mpidr & 0x007F_FFFF)?; - fdt.end_node(mpidr_node_dep)?; - } - - fdt.end_node(cpus_node_dep)?; - - Ok(()) + let vmstate = self.base.vm_state.deref().0.lock().unwrap(); + *vmstate == VmState::Shutdown } - fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - let mem_size = self.sys_mem.memory_end_address().raw_value() - - MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - let node = "memory"; - let memory_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("device_type", "memory")?; - fdt.set_property_array_u64("reg", &[mem_base, mem_size as u64])?; - fdt.end_node(memory_node_dep)?; - - Ok(()) - } - - fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - // timer - let mut cells: Vec = Vec::new(); - for &irq in [13, 14, 11, 10].iter() { - cells.push(device_tree::GIC_FDT_IRQ_TYPE_PPI); - cells.push(irq); - cells.push(device_tree::IRQ_TYPE_LEVEL_HIGH); - } - let node = "timer"; - let timer_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "arm,armv8-timer")?; - fdt.set_property("always-on", &Vec::new())?; - fdt.set_property_array_u32("interrupts", &cells)?; - fdt.end_node(timer_node_dep)?; - - // clock - let node = "apb-pclk"; - let clock_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "fixed-clock")?; - fdt.set_property_string("clock-output-names", "clk24mhz")?; - fdt.set_property_u32("#clock-cells", 0x0)?; - fdt.set_property_u32("clock-frequency", 24_000_000)?; - fdt.set_property_u32("phandle", device_tree::CLK_PHANDLE)?; - fdt.end_node(clock_node_dep)?; - - // psci - let node = "psci"; - let psci_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "arm,psci-0.2")?; - fdt.set_property_string("method", "hvc")?; - fdt.end_node(psci_node_dep)?; - - for dev in self.sysbus.devices.iter() { - let mut locked_dev = dev.lock().unwrap(); - let dev_type = locked_dev.get_type(); - let sys_res = locked_dev.get_sys_resource().unwrap(); - match dev_type { - SysBusDevType::Serial => generate_serial_device_node(fdt, sys_res)?, - SysBusDevType::Rtc => generate_rtc_device_node(fdt, sys_res)?, - SysBusDevType::VirtioMmio => generate_virtio_devices_node(fdt, sys_res)?, - _ => (), - } - } - Ok(()) - } - - fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node = "chosen"; - let boot_source = self.boot_source.lock().unwrap(); - - let chosen_node_dep = fdt.begin_node(node)?; - let cmdline = &boot_source.kernel_cmdline.to_string(); - fdt.set_property_string("bootargs", cmdline.as_str())?; - - match &boot_source.initrd { - Some(initrd) => { - fdt.set_property_u64("linux,initrd-start", initrd.initrd_addr)?; - fdt.set_property_u64("linux,initrd-end", initrd.initrd_addr + initrd.initrd_size)?; - } - None => {} - } - fdt.end_node(chosen_node_dep)?; - - Ok(()) - } -} - -#[cfg(target_arch = "aarch64")] -impl device_tree::CompileFDT for LightMachine { - fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node_dep = fdt.begin_node("")?; - - fdt.set_property_string("compatible", "linux,dummy-virt")?; - fdt.set_property_u32("#address-cells", 0x2)?; - fdt.set_property_u32("#size-cells", 0x2)?; - fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; - - self.generate_cpu_nodes(fdt)?; - self.generate_memory_node(fdt)?; - self.generate_devices_node(fdt)?; - self.generate_chosen_node(fdt)?; - self.irq_chip.as_ref().unwrap().generate_fdt_node(fdt)?; - - fdt.end_node(node_dep)?; - + fn loop_cleanup(&self) -> Result<()> { + set_termi_canon_mode().with_context(|| "Failed to set terminal to canonical mode")?; Ok(()) } } diff --git a/machine/src/micro_vm/syscall.rs b/machine/src/micro_common/syscall.rs similarity index 69% rename from machine/src/micro_vm/syscall.rs rename to machine/src/micro_common/syscall.rs index 81637a3880ad8e021d6b889e2175bd232a324d20..6ae9a56a28ef7957c0dab9f1e2dc010c7aa73967 100644 --- a/machine/src/micro_vm/syscall.rs +++ b/machine/src/micro_common/syscall.rs @@ -10,9 +10,13 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +#[cfg(target_arch = "aarch64")] +use crate::aarch64::micro::{arch_ioctl_allow_list, arch_syscall_whitelist}; +#[cfg(target_arch = "x86_64")] +use crate::x86_64::micro::{arch_ioctl_allow_list, arch_syscall_whitelist}; use hypervisor::kvm::*; use util::seccomp::{BpfRule, SeccompCmpOpt}; -use util::tap::{TUNSETIFF, TUNSETOFFLOAD, TUNSETVNETHDRSZ}; +use util::tap::{TUNGETFEATURES, TUNSETIFF, TUNSETOFFLOAD, TUNSETQUEUE, TUNSETVNETHDRSZ}; use virtio::VhostKern::*; /// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/futex.h @@ -30,12 +34,6 @@ const FUTEX_CMP_REQUEUE_PRIVATE: u32 = FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG; const FUTEX_WAKE_OP_PRIVATE: u32 = FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG; const FUTEX_WAIT_BITSET_PRIVATE: u32 = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG; -/// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/fcntl.h -const F_GETFD: u32 = 1; -const F_SETFD: u32 = 2; -const F_LINUX_SPECIFIC_BASE: u32 = 1024; -const F_DUPFD_CLOEXEC: u32 = F_LINUX_SPECIFIC_BASE + 6; - // See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/asm-generic/ioctls.h const TCGETS: u32 = 0x5401; const TCSETS: u32 = 0x5402; @@ -47,25 +45,21 @@ const KVM_RUN: u32 = 0xae80; /// Create a syscall whitelist for seccomp. /// /// # Notes -/// This allowlist limit syscall with: -/// * x86_64-unknown-gnu: 43 syscalls -/// * x86_64-unknown-musl: 43 syscalls -/// * aarch64-unknown-gnu: 41 syscalls -/// * aarch64-unknown-musl: 42 syscalls +/// /// To reduce performance losses, the syscall rules is ordered by frequency. pub fn syscall_whitelist() -> Vec { - vec![ + let mut syscall = vec![ BpfRule::new(libc::SYS_read), BpfRule::new(libc::SYS_readv), BpfRule::new(libc::SYS_write), BpfRule::new(libc::SYS_writev), ioctl_allow_list(), - #[cfg(not(all(target_env = "gnu", target_arch = "x86_64")))] - BpfRule::new(libc::SYS_epoll_pwait), - #[cfg(all(target_env = "gnu", target_arch = "x86_64"))] - BpfRule::new(libc::SYS_epoll_wait), BpfRule::new(libc::SYS_io_getevents), BpfRule::new(libc::SYS_io_submit), + BpfRule::new(libc::SYS_io_destroy), + BpfRule::new(libc::SYS_io_uring_enter), + BpfRule::new(libc::SYS_io_uring_setup), + BpfRule::new(libc::SYS_io_uring_register), BpfRule::new(libc::SYS_dup), BpfRule::new(libc::SYS_close), BpfRule::new(libc::SYS_eventfd2), @@ -73,17 +67,18 @@ pub fn syscall_whitelist() -> Vec { BpfRule::new(libc::SYS_fdatasync), BpfRule::new(libc::SYS_recvmsg), BpfRule::new(libc::SYS_sendmsg), + BpfRule::new(libc::SYS_sendto), BpfRule::new(libc::SYS_recvfrom), BpfRule::new(libc::SYS_mremap), BpfRule::new(libc::SYS_io_setup), BpfRule::new(libc::SYS_brk), BpfRule::new(libc::SYS_fcntl) - .add_constraint(SeccompCmpOpt::Eq, 1, F_DUPFD_CLOEXEC) - .add_constraint(SeccompCmpOpt::Eq, 1, F_SETFD) - .add_constraint(SeccompCmpOpt::Eq, 1, F_GETFD), + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_DUPFD_CLOEXEC as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_SETFD as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_GETFD as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_SETLK as u32), + BpfRule::new(libc::SYS_flock), BpfRule::new(libc::SYS_rt_sigprocmask), - #[cfg(target_arch = "x86_64")] - BpfRule::new(libc::SYS_open), BpfRule::new(libc::SYS_openat), BpfRule::new(libc::SYS_sigaltstack), BpfRule::new(libc::SYS_mmap), @@ -91,10 +86,11 @@ pub fn syscall_whitelist() -> Vec { BpfRule::new(libc::SYS_accept4), BpfRule::new(libc::SYS_lseek), futex_rule(), + BpfRule::new(libc::SYS_clone), BpfRule::new(libc::SYS_exit), BpfRule::new(libc::SYS_exit_group), BpfRule::new(libc::SYS_rt_sigreturn), - #[cfg(target_env = "musl")] + #[cfg(any(target_env = "musl", target_env = "ohos"))] BpfRule::new(libc::SYS_tkill), #[cfg(target_env = "gnu")] BpfRule::new(libc::SYS_tgkill), @@ -102,24 +98,35 @@ pub fn syscall_whitelist() -> Vec { BpfRule::new(libc::SYS_getpid), BpfRule::new(libc::SYS_fstat), BpfRule::new(libc::SYS_pread64), + BpfRule::new(libc::SYS_preadv), BpfRule::new(libc::SYS_pwrite64), + BpfRule::new(libc::SYS_pwritev), BpfRule::new(libc::SYS_statx), - #[cfg(all(target_env = "musl", target_arch = "x86_64"))] - BpfRule::new(libc::SYS_stat), - #[cfg(all(target_env = "gnu", target_arch = "x86_64"))] - BpfRule::new(libc::SYS_newfstatat), - #[cfg(all(target_env = "musl", target_arch = "aarch64"))] - BpfRule::new(libc::SYS_newfstatat), - #[cfg(target_arch = "x86_64")] - BpfRule::new(libc::SYS_unlink), - #[cfg(target_arch = "aarch64")] - BpfRule::new(libc::SYS_unlinkat), - #[cfg(target_arch = "x86_64")] - BpfRule::new(libc::SYS_mkdir), - #[cfg(target_arch = "aarch64")] - BpfRule::new(libc::SYS_mkdirat), + BpfRule::new(libc::SYS_renameat), + BpfRule::new(libc::SYS_getrandom), + BpfRule::new(libc::SYS_fallocate), + BpfRule::new(libc::SYS_socket), + BpfRule::new(libc::SYS_mprotect), + BpfRule::new(libc::SYS_ppoll), + BpfRule::new(libc::SYS_connect), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_clone3), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_rt_sigaction), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_rseq), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_set_robust_list), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_sched_getaffinity), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_clock_gettime), + BpfRule::new(libc::SYS_prctl), madvise_rule(), - ] + ]; + syscall.append(&mut arch_syscall_whitelist()); + + syscall } /// Create a syscall bpf rule for syscall `ioctl`. @@ -144,53 +151,37 @@ fn ioctl_allow_list() -> BpfRule { .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_FEATURES() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_MEM_TABLE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_NET_SET_BACKEND() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, TUNGETFEATURES() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETIFF() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETOFFLOAD() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETVNETHDRSZ() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETQUEUE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_API_VERSION() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MP_STATE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MP_STATE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_VCPU_EVENTS() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_VCPU_EVENTS() as u32); - ioctl_arch_allow_list(bpf_rule) -} - -#[cfg(target_arch = "x86_64")] -fn ioctl_arch_allow_list(bpf_rule: BpfRule) -> BpfRule { - bpf_rule - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_PIT2() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_CLOCK() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_IRQCHIP() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XSAVE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEBUGREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XCRS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_LAPIC() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MSRS() as u32) -} - -#[cfg(target_arch = "aarch64")] -fn ioctl_arch_allow_list(bpf_rule: BpfRule) -> BpfRule { - bpf_rule - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_ONE_REG() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEVICE_ATTR() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REG_LIST() as u32) + arch_ioctl_allow_list(bpf_rule) } fn madvise_rule() -> BpfRule { - #[cfg(all(target_env = "musl", target_arch = "x86_64"))] + #[cfg(any(target_env = "musl", target_env = "ohos"))] return BpfRule::new(libc::SYS_madvise) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_FREE as u32) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32); - #[cfg(not(all(target_env = "musl", target_arch = "x86_64")))] + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_REMOVE as u32); + #[cfg(target_env = "gnu")] return BpfRule::new(libc::SYS_madvise) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32); + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_REMOVE as u32); } fn futex_rule() -> BpfRule { - #[cfg(target_env = "musl")] + #[cfg(any(target_env = "musl", target_env = "ohos"))] return BpfRule::new(libc::SYS_futex) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_PRIVATE) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_PRIVATE) diff --git a/machine/src/micro_vm/mem_layout.rs b/machine/src/micro_vm/mem_layout.rs deleted file mode 100644 index c9d6f56f5e14319263f22856bbdf420e79934a26..0000000000000000000000000000000000000000 --- a/machine/src/micro_vm/mem_layout.rs +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -/// The type of memory layout entry on aarch64 -#[allow(dead_code)] -#[cfg(target_arch = "aarch64")] -#[repr(usize)] -pub enum LayoutEntryType { - GicDist, - GicCpu, - GicIts, - GicRedist, - Uart, - Rtc, - Mmio, - Mem, - HighGicRedist, -} - -/// Layout of aarch64 -#[cfg(target_arch = "aarch64")] -pub const MEM_LAYOUT: &[(u64, u64)] = &[ - (0x0800_0000, 0x0001_0000), // GicDist - (0x0801_0000, 0x0001_0000), // GicCpu - (0x0808_0000, 0x0002_0000), // GicIts - (0x080A_0000, 0x00F6_0000), // GicRedist (max 123 redistributors) - (0x0900_0000, 0x0000_1000), // Uart - (0x0901_0000, 0x0000_1000), // Rtc - (0x0A00_0000, 0x0000_0200), // Mmio - (0x4000_0000, 0x80_0000_0000), // Mem - (256 << 30, 0x200_0000), // HighGicRedist, (where remaining redistributors locates) -]; - -/// The type of memory layout entry on x86_64 -#[allow(dead_code)] -#[cfg(target_arch = "x86_64")] -#[repr(usize)] -pub enum LayoutEntryType { - MemBelow4g = 0_usize, - Mmio, - IoApic, - LocalApic, - MemAbove4g, -} - -/// Layout of x86_64 -#[cfg(target_arch = "x86_64")] -pub const MEM_LAYOUT: &[(u64, u64)] = &[ - (0, 0xC000_0000), // MemBelow4g - (0xF010_0000, 0x200), // Mmio - (0xFEC0_0000, 0x10_0000), // IoApic - (0xFEE0_0000, 0x10_0000), // LocalApic - (0x1_0000_0000, 0x80_0000_0000), // MemAbove4g -]; diff --git a/machine/src/standard_common/mod.rs b/machine/src/standard_common/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..7ac6bcbbcbe9273984930cdc707aff0285ea523a --- /dev/null +++ b/machine/src/standard_common/mod.rs @@ -0,0 +1,2174 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod syscall; + +pub use crate::error::MachineError; + +use std::mem::size_of; +use std::ops::Deref; +use std::os::unix::io::RawFd; +use std::os::unix::prelude::AsRawFd; +use std::rc::Rc; +use std::string::String; +use std::sync::{Arc, Mutex}; +use std::u64; + +use anyhow::{bail, Context, Result}; +use log::{error, warn}; +use serde_json::json; +use util::set_termi_canon_mode; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +#[cfg(target_arch = "aarch64")] +use crate::aarch64::standard::{LayoutEntryType, MEM_LAYOUT}; +#[cfg(target_arch = "x86_64")] +use crate::x86_64::ich9_lpc::{ + PM_CTRL_OFFSET, PM_EVENT_OFFSET, RST_CTRL_OFFSET, SLEEP_CTRL_OFFSET, +}; +#[cfg(target_arch = "x86_64")] +use crate::x86_64::standard::{LayoutEntryType, MEM_LAYOUT}; +use crate::{MachineBase, MachineOps}; +#[cfg(target_arch = "x86_64")] +use acpi::AcpiGenericAddress; +use acpi::{ + AcpiRsdp, AcpiTable, AmlBuilder, TableLoader, ACPI_RSDP_FILE, ACPI_TABLE_FILE, + ACPI_TABLE_LOADER_FILE, TABLE_CHECKSUM_OFFSET, +}; +use address_space::{ + AddressAttr, AddressRange, FileBackend, GuestAddress, HostMemMapping, Region, RegionIoEventFd, + RegionOps, +}; +use block_backend::{qcow2::QCOW2_LIST, BlockStatus}; +#[cfg(target_arch = "x86_64")] +use devices::acpi::cpu_controller::CpuController; +use devices::legacy::FwCfgOps; +#[cfg(feature = "scream")] +use devices::misc::scream::set_record_authority; +use devices::pci::hotplug::{handle_plug, handle_unplug_pci_request}; +use devices::pci::{PciBus, PciHost}; +use devices::Device; +#[cfg(feature = "usb_camera")] +use machine_manager::config::get_cameradev_config; +#[cfg(target_arch = "aarch64")] +use machine_manager::config::ShutdownAction; +use machine_manager::config::{ + get_chardev_config, get_netdev_config, memory_unit_conversion, parse_incoming_uri, + BootIndexInfo, ConfigCheck, DiskFormat, DriveConfig, ExBool, MigrateMode, NumaNode, NumaNodes, + M, +}; +use machine_manager::event; +use machine_manager::event_loop::EventLoop; +use machine_manager::machine::{ + DeviceInterface, MachineAddressInterface, MachineExternalInterface, MachineInterface, + MachineLifecycle, MachineTestInterface, MigrateInterface, VmState, +}; +use machine_manager::qmp::qmp_schema::{BlockDevAddArgument, UpdateRegionArgument}; +use machine_manager::qmp::{qmp_channel::QmpChannel, qmp_response::Response, qmp_schema}; +use machine_manager::state_query::query_workloads; +#[cfg(feature = "gtk")] +use ui::gtk::qmp_query_display_image; +use ui::input::{input_button, input_move_abs, input_point_sync, key_event, Axis}; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use ui::ohui_srv::OhUiServer; +#[cfg(feature = "vnc")] +use ui::vnc::qmp_query_vnc; +use util::aio::{AioEngine, WriteZeroesState}; +use util::byte_code::ByteCode; +use util::loop_context::{ + create_new_eventfd, read_fd, EventLoopManager, EventNotifier, NotifierCallback, + NotifierOperation, +}; +use virtio::{qmp_balloon, qmp_query_balloon}; + +const MAX_REGION_SIZE: u64 = 65536; + +/// Standard machine structure. +pub struct StdMachine { + /// Machine base members. + pub(crate) base: MachineBase, + /// PCI/PCIe host bridge. + pub(crate) pci_host: Arc>, + /// Reset request, handle VM `Reset` event. + pub(crate) reset_req: Arc, + /// Shutdown request, handle VM `shutdown` event. + pub(crate) shutdown_req: Arc, + /// VM power button, handle VM `Shutdown` event. + pub(crate) power_button: Arc, + /// List contains the boot order of boot devices. + pub(crate) boot_order_list: Arc>>, + /// CPU Resize request, handle vm cpu hot(un)plug event. + #[cfg(target_arch = "x86_64")] + pub(crate) cpu_resize_req: Arc, + /// Cpu Controller. + #[cfg(target_arch = "x86_64")] + pub(crate) cpu_controller: Option>>, + /// Pause request, handle VM `Pause` event. + #[cfg(target_arch = "aarch64")] + pub(crate) pause_req: Arc, + /// Resume request, handle VM `Resume` event. + #[cfg(target_arch = "aarch64")] + pub(crate) resume_req: Arc, + /// Device Tree Blob. + #[cfg(target_arch = "aarch64")] + pub(crate) dtb_vec: Vec, + /// OHUI server + #[cfg(all(target_arch = "aarch64", target_env = "ohos", feature = "ohui_srv"))] + pub(crate) ohui_server: Option>, +} + +pub(crate) trait StdMachineOps: AcpiBuilder + MachineOps { + fn init_pci_host(&self) -> Result<()>; + + /// Build all ACPI tables and RSDP, and add them to FwCfg as file entries. + /// + /// # Arguments + /// + /// `fw_cfg` - FwCfgOps trait object. + fn build_acpi_tables(&self, fw_cfg: &Arc>) -> Result<()> + where + Self: Sized, + { + let mut loader = TableLoader::new(); + let acpi_tables = Arc::new(Mutex::new(Vec::new())); + loader.add_alloc_entry(ACPI_TABLE_FILE, acpi_tables.clone(), 64_u32, false)?; + + let mut xsdt_entries = Vec::new(); + + let facs_addr = self + .build_facs_table(&acpi_tables) + .with_context(|| "Failed to build ACPI FACS table")?; + + let dsdt_addr = self + .build_dsdt_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI DSDT table")?; + let fadt_addr = Self::build_fadt_table(&acpi_tables, &mut loader, facs_addr, dsdt_addr) + .with_context(|| "Failed to build ACPI FADT table")?; + xsdt_entries.push(fadt_addr); + + let madt_addr = self + .build_madt_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI MADT table")?; + xsdt_entries.push(madt_addr); + + #[cfg(target_arch = "aarch64")] + { + let gtdt_addr = self + .build_gtdt_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI GTDT table")?; + xsdt_entries.push(gtdt_addr); + + let dbg2_addr = self + .build_dbg2_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI DBG2 table")?; + xsdt_entries.push(dbg2_addr); + + let iort_addr = self + .build_iort_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI IORT table")?; + xsdt_entries.push(iort_addr); + + let spcr_addr = self + .build_spcr_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI SPCR table")?; + xsdt_entries.push(spcr_addr); + } + + let mcfg_addr = Self::build_mcfg_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI MCFG table")?; + xsdt_entries.push(mcfg_addr); + + if let Some(numa_nodes) = self.get_numa_nodes() { + let srat_addr = self + .build_srat_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI SRAT table")?; + xsdt_entries.push(srat_addr); + + let slit_addr = Self::build_slit_table(numa_nodes, &acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI SLIT table")?; + xsdt_entries.push(slit_addr); + } + + #[cfg(target_arch = "aarch64")] + { + let pptt_addr = self + .build_pptt_table(&acpi_tables, &mut loader) + .with_context(|| "Failed to build ACPI PPTT table")?; + xsdt_entries.push(pptt_addr); + } + + let xsdt_addr = Self::build_xsdt_table(&acpi_tables, &mut loader, xsdt_entries)?; + + let mut locked_fw_cfg = fw_cfg.lock().unwrap(); + Self::build_rsdp( + &mut loader, + &mut *locked_fw_cfg as &mut dyn FwCfgOps, + xsdt_addr, + ) + .with_context(|| "Failed to build ACPI RSDP")?; + + locked_fw_cfg + .add_file_entry(ACPI_TABLE_LOADER_FILE, loader.cmd_entries()) + .with_context(|| "Failed to add ACPI table loader file entry")?; + locked_fw_cfg + .add_file_entry(ACPI_TABLE_FILE, acpi_tables.lock().unwrap().to_vec()) + .with_context(|| "Failed to add ACPI-tables file entry")?; + + Ok(()) + } + + fn add_fwcfg_device( + &mut self, + _nr_cpus: u8, + #[cfg(target_arch = "x86_64")] _max_cpus: u8, + ) -> Result>>> { + bail!("Not implemented"); + } + + /// Get cpu controller. + #[cfg(target_arch = "x86_64")] + fn get_cpu_controller(&self) -> &Arc>; + + /// Add new vcpu device. + /// + /// # Arguments + /// + /// * `clone_vm` - Reference of the StdMachine. + #[cfg(target_arch = "x86_64")] + fn add_vcpu_device(&mut self, clone_vm: Arc>) -> Result<()>; + + /// Register event notifier for hotplug vcpu event. + /// + /// # Arguments + /// + /// * `resize_req` - Eventfd of the cpu hotplug request. + /// * `clone_vm` - Reference of the StdMachine. + #[cfg(target_arch = "x86_64")] + fn register_hotplug_vcpu_event( + &self, + hotplug_req: Arc, + clone_vm: Arc>, + ) -> Result<()> { + let hotplug_req_fd = hotplug_req.as_raw_fd(); + let hotplug_req_handler: Rc = Rc::new(move |_, _| { + read_fd(hotplug_req_fd); + if let Err(e) = StdMachine::handle_hotplug_vcpu_request(&clone_vm) { + error!("Fail to hotplug vcpu, {}", e); + } + None + }); + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + hotplug_req_fd, + None, + EventSet::IN, + vec![hotplug_req_handler], + ); + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register event notifier.") + } + + /// Remove vcpu device. + /// + /// # Arguments + /// + /// * `vcpu_id` - The id number of vcpu. + #[cfg(target_arch = "x86_64")] + fn remove_vcpu_device(&mut self, vcpu_id: u8) -> Result<()>; + + /// Find cpu id by device id. + /// + /// # Arguments + /// + /// * `device_id` - The name of vcpu device. + #[cfg(target_arch = "x86_64")] + fn find_cpu_id_by_device_id(&mut self, device_id: &str) -> Option; + + /// Register event notifier for reset of standard machine. + /// + /// # Arguments + /// + /// * `reset_req` - Eventfd of the reset request. + /// * `clone_vm` - Reference of the StdMachine. + fn register_reset_event( + &self, + reset_req: Arc, + clone_vm: Arc>, + ) -> Result<()> { + let reset_req_fd = reset_req.as_raw_fd(); + let reset_req_handler: Rc = Rc::new(move |_, _| { + read_fd(reset_req_fd); + if let Err(e) = StdMachine::handle_reset_request(&clone_vm) { + warn!("Fail to reboot standard VM, {:?}, try again", e); + if reset_req.write(1).is_err() { + error!("Failed to send VM reset request"); + } + } + + None + }); + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + reset_req_fd, + None, + EventSet::IN, + vec![reset_req_handler], + ); + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register event notifier.") + } + + #[cfg(target_arch = "aarch64")] + fn register_pause_event( + &self, + pause_req: Arc, + clone_vm: Arc>, + ) -> Result<()> { + let pause_req_fd = pause_req.as_raw_fd(); + let pause_req_handler: Rc = Rc::new(move |_, _| { + let _ret = pause_req.read(); + if !clone_vm.lock().unwrap().pause() { + warn!("VM pause failed, try again"); + if pause_req.write(1).is_err() { + error!("Failed to send VM pause request"); + } + } + None + }); + + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + pause_req_fd, + None, + EventSet::IN, + vec![pause_req_handler], + ); + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register event notifier.") + } + + #[cfg(target_arch = "aarch64")] + fn register_resume_event( + &self, + resume_req: Arc, + clone_vm: Arc>, + ) -> Result<()> { + let resume_req_fd = resume_req.as_raw_fd(); + let resume_req_handler: Rc = Rc::new(move |_, _| { + let _ret = resume_req.read(); + if !clone_vm.lock().unwrap().resume() { + error!("VM resume failed!"); + } + None + }); + + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + resume_req_fd, + None, + EventSet::IN, + vec![resume_req_handler], + ); + EventLoop::update_event(vec![notifier], None) + .with_context(|| "Failed to register event notifier.") + } +} + +/// Trait that helps to build ACPI tables. +/// Standard machine struct should at least implement `build_dsdt_table`, `build_madt_table` +/// and `build_mcfg_table` function. +pub(crate) trait AcpiBuilder { + /// Add ACPI table to the end of table loader, returns the offset of ACPI table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + /// `table` - ACPI table. + fn add_table_to_loader( + acpi_data: &Arc>>, + loader: &mut TableLoader, + table: &AcpiTable, + ) -> Result { + let mut locked_acpi_data = acpi_data.lock().unwrap(); + let table_begin = locked_acpi_data.len() as u32; + locked_acpi_data.extend(table.aml_bytes()); + let table_end = locked_acpi_data.len() as u32; + // Drop the lock of acpi_data to avoid dead-lock when adding entry to + // TableLoader, because TableLoader also needs to acquire this lock. + drop(locked_acpi_data); + + loader.add_cksum_entry( + ACPI_TABLE_FILE, + // table_begin is much less than u32::MAX, will not overflow. + table_begin + TABLE_CHECKSUM_OFFSET, + table_begin, + table_end - table_begin, + )?; + + Ok(u64::from(table_begin)) + } + + /// Build ACPI DSDT table, returns the offset of ACPI DSDT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + fn build_dsdt_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result { + bail!("Not implemented"); + } + + /// Build ACPI MADT table, returns the offset of ACPI MADT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + fn build_madt_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result { + bail!("Not implemented"); + } + + /// Build ACPI GTDT table, returns the offset of ACPI GTDT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + #[cfg(target_arch = "aarch64")] + fn build_gtdt_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result + where + Self: Sized, + { + Ok(0) + } + + /// Build ACPI DBG2 table, returns the offset of ACPI DBG2 table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + #[cfg(target_arch = "aarch64")] + fn build_dbg2_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result + where + Self: Sized, + { + bail!("Not implemented"); + } + + /// Build ACPI IORT table, returns the offset of ACPI IORT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + #[cfg(target_arch = "aarch64")] + fn build_iort_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result + where + Self: Sized, + { + Ok(0) + } + + /// Build ACPI SPCR table, returns the offset of ACPI SPCR table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + #[cfg(target_arch = "aarch64")] + fn build_spcr_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result + where + Self: Sized, + { + Ok(0) + } + + /// Build ACPI PPTT table, returns the offset of ACPI PPTT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `Loader` - ACPI table loader. + #[cfg(target_arch = "aarch64")] + fn build_pptt_table( + &self, + _acpi_data: &Arc>>, + _loader: &mut TableLoader, + ) -> Result + where + Self: Sized, + { + Ok(0) + } + + /// Build ACPI MCFG table, returns the offset of ACPI MCFG table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + fn build_mcfg_table(acpi_data: &Arc>>, loader: &mut TableLoader) -> Result + where + Self: Sized, + { + let mut mcfg = AcpiTable::new(*b"MCFG", 1, *b"STRATO", *b"VIRTMCFG", 1); + // Bits 20~28 (totally 9 bits) in PCIE ECAM represents bus number. + let bus_number_mask = (1u64 << 9) - 1; + let ecam_addr: u64; + let max_nr_bus: u64; + #[cfg(target_arch = "x86_64")] + { + ecam_addr = MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].0; + max_nr_bus = (MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].1 >> 20) & bus_number_mask; + } + #[cfg(target_arch = "aarch64")] + { + ecam_addr = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0; + max_nr_bus = + (MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1 >> 20) & bus_number_mask; + } + + // Reserved + mcfg.append_child(&[0_u8; 8]); + // Base address of PCIE ECAM + mcfg.append_child(ecam_addr.as_bytes()); + // PCI Segment Group Number + mcfg.append_child(0_u16.as_bytes()); + // Start Bus Number and End Bus Number. max_nr_bus is no less than 1. + mcfg.append_child(&[0_u8, u8::try_from(max_nr_bus - 1)?]); + // Reserved + mcfg.append_child(&[0_u8; 4]); + + let mut acpi_data_locked = acpi_data.lock().unwrap(); + let mcfg_begin = acpi_data_locked.len() as u32; + acpi_data_locked.extend(mcfg.aml_bytes()); + let mcfg_end = acpi_data_locked.len() as u32; + drop(acpi_data_locked); + + loader.add_cksum_entry( + ACPI_TABLE_FILE, + // mcfg_begin is much less than u32::MAX, will not overflow. + mcfg_begin + TABLE_CHECKSUM_OFFSET, + mcfg_begin, + mcfg_end - mcfg_begin, + )?; + Ok(u64::from(mcfg_begin)) + } + + /// Build ACPI FADT table, returns the offset of ACPI FADT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + /// `facs_addr` - Offset of ACPI FACS table in `acpi_data`. + /// `dsdt_addr` - Offset of ACPI DSDT table in `acpi_data`. + fn build_fadt_table( + acpi_data: &Arc>>, + loader: &mut TableLoader, + facs_addr: u64, + dsdt_addr: u64, + ) -> Result + where + Self: Sized, + { + let mut fadt = AcpiTable::new(*b"FACP", 6, *b"STRATO", *b"VIRTFACP", 1); + + fadt.set_table_len(208_usize); + // PM1A_EVENT bit, offset is 56. + #[cfg(target_arch = "x86_64")] + fadt.set_field(56, 0x600_u32); + // PM1A_CONTROL bit, offset is 64. + #[cfg(target_arch = "x86_64")] + fadt.set_field(64, 0x604_u32); + // PM_TMR_BLK bit, offset is 76. + #[cfg(target_arch = "x86_64")] + fadt.set_field(76, 0x608_u32); + // PM1_EVT_LEN, offset is 88. + #[cfg(target_arch = "x86_64")] + fadt.set_field(88, 4_u8); + // PM1_CNT_LEN, offset is 89. + #[cfg(target_arch = "x86_64")] + fadt.set_field(89, 2_u8); + // PM_TMR_LEN, offset is 91. + #[cfg(target_arch = "x86_64")] + fadt.set_field(91, 4_u8); + #[cfg(target_arch = "aarch64")] + { + // FADT flag: enable HW_REDUCED_ACPI bit on aarch64 plantform. + fadt.set_field(112, 1_u32 << 20 | 1_u32 << 10 | 1_u32 << 8); + // ARM Boot Architecture Flags + fadt.set_field(129, 0x3_u16); + } + // FADT minor revision + fadt.set_field(131, 3_u8); + // X_PM_TMR_BLK bit, offset is 208. + #[cfg(target_arch = "x86_64")] + fadt.append_child(&AcpiGenericAddress::new_io_address(0x608_u32).aml_bytes()); + // FADT table size is fixed. + fadt.set_table_len(276_usize); + + #[cfg(target_arch = "x86_64")] + { + // FADT flag: disable HW_REDUCED_ACPI bit on x86 plantform. + fadt.set_field(112, 1_u32 << 10 | 1_u32 << 8); + // Reset Register bit, offset is 116. + fadt.set_field(116, 0x01_u8); + fadt.set_field(117, 0x08_u8); + fadt.set_field(120, u64::from(RST_CTRL_OFFSET)); + fadt.set_field(128, 0x0F_u8); + // PM1a event register bit, offset is 148. + fadt.set_field(148, 0x01_u8); + fadt.set_field(149, 0x20_u8); + fadt.set_field(152, u64::from(PM_EVENT_OFFSET)); + // PM1a control register bit, offset is 172. + fadt.set_field(172, 0x01_u8); + fadt.set_field(173, 0x10_u8); + fadt.set_field(176, u64::from(PM_CTRL_OFFSET)); + // Sleep control register, offset is 244. + fadt.set_field(244, 0x01_u8); + fadt.set_field(245, 0x08_u8); + fadt.set_field(248, u64::from(SLEEP_CTRL_OFFSET)); + // Sleep status tegister, offset is 256. + fadt.set_field(256, 0x01_u8); + fadt.set_field(257, 0x08_u8); + fadt.set_field(260, u64::from(SLEEP_CTRL_OFFSET)); + } + + let mut locked_acpi_data = acpi_data.lock().unwrap(); + let fadt_begin = locked_acpi_data.len() as u32; + locked_acpi_data.extend(fadt.aml_bytes()); + let fadt_end = locked_acpi_data.len() as u32; + drop(locked_acpi_data); + + // FACS address field's offset in FADT. + let facs_offset = 36_u32; + // Size of FACS address. + let facs_size = 4_u8; + loader.add_pointer_entry( + ACPI_TABLE_FILE, + // fadt_begin is much less than u32::MAX, will not overflow. + fadt_begin + facs_offset, + facs_size, + ACPI_TABLE_FILE, + u32::try_from(facs_addr)?, + )?; + + // xDSDT address field's offset in FADT. + let xdsdt_offset = 140_u32; + // Size of xDSDT address. + let xdsdt_size = 8_u8; + loader.add_pointer_entry( + ACPI_TABLE_FILE, + // fadt_begin is much less than u32::MAX, will not overflow. + fadt_begin + xdsdt_offset, + xdsdt_size, + ACPI_TABLE_FILE, + u32::try_from(dsdt_addr)?, + )?; + + loader.add_cksum_entry( + ACPI_TABLE_FILE, + // fadt_begin is much less than u32::MAX, will not overflow. + fadt_begin + TABLE_CHECKSUM_OFFSET, + fadt_begin, + fadt_end - fadt_begin, + )?; + + Ok(u64::from(fadt_begin)) + } + + /// Get the Hardware Signature used to build FACS table. + fn get_hardware_signature(&self) -> Option; + + /// Build ACPI FACS table, returns the offset of ACPI FACS table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + fn build_facs_table(&self, acpi_data: &Arc>>) -> Result + where + Self: Sized, + { + let mut facs_data = vec![0_u8; 0x40]; + // FACS table signature. + facs_data[0] = b'F'; + facs_data[1] = b'A'; + facs_data[2] = b'C'; + facs_data[3] = b'S'; + // FACS table length. + facs_data[4] = 0x40; + + // FACS table Hardware Signature. + if let Some(signature) = self.get_hardware_signature() { + let signature = signature.as_bytes(); + facs_data[8] = signature[0]; + facs_data[9] = signature[1]; + facs_data[10] = signature[2]; + facs_data[11] = signature[3]; + } + + let mut locked_acpi_data = acpi_data.lock().unwrap(); + let facs_begin = locked_acpi_data.len() as u32; + locked_acpi_data.extend(facs_data); + drop(locked_acpi_data); + + Ok(u64::from(facs_begin)) + } + + /// Build ACPI SRAT CPU table. + /// # Arguments + /// + /// `proximity_domain` - The proximity domain. + /// `node` - The NUMA node. + /// `srat` - The SRAT table. + fn build_srat_cpu(&self, proximity_domain: u32, node: &NumaNode, srat: &mut AcpiTable); + + /// Build ACPI SRAT memory table. + /// # Arguments + /// + /// `base_addr` - The base address of the memory range. + /// `proximity_domain` - The proximity domain. + /// `node` - The NUMA node. + /// `srat` - The SRAT table. + fn build_srat_mem( + &self, + base_addr: u64, + proximity_domain: u32, + node: &NumaNode, + srat: &mut AcpiTable, + ) -> u64; + + /// Build ACPI SRAT table, returns the offset of ACPI SRAT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + fn build_srat_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result; + + /// Build ACPI SLIT table, returns the offset of ACPI SLIT table in `acpi_data`. + /// + /// # Arguments + /// + /// `numa_nodes` - The information of NUMA nodes. + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + fn build_slit_table( + numa_nodes: &NumaNodes, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut slit = AcpiTable::new(*b"SLIT", 1, *b"STRATO", *b"VIRTSLIT", 1); + slit.append_child((numa_nodes.len() as u64).as_bytes()); + + let existing_nodes: Vec = numa_nodes.keys().cloned().collect(); + for (id, node) in numa_nodes.iter().enumerate() { + let distances = &node.1.distances; + for i in existing_nodes.iter() { + let dist: u8 = if id as u32 == *i { + 10 + } else if let Some(distance) = distances.get(i) { + *distance + } else { + 20 + }; + slit.append_child(dist.as_bytes()); + } + } + + let slit_begin = StdMachine::add_table_to_loader(acpi_data, loader, &slit) + .with_context(|| "Fail to add SLIT table to loader")?; + Ok(slit_begin) + } + + /// Build ACPI XSDT table, returns the offset of ACPI XSDT table in `acpi_data`. + /// + /// # Arguments + /// + /// `acpi_data` - Bytes streams that ACPI tables converts to. + /// `loader` - ACPI table loader. + /// `xsdt_entries` - Offset of table entries in `acpi_data`, such as FADT, MADT, MCFG table. + fn build_xsdt_table( + acpi_data: &Arc>>, + loader: &mut TableLoader, + xsdt_entries: Vec, + ) -> Result + where + Self: Sized, + { + let mut xsdt = AcpiTable::new(*b"XSDT", 1, *b"STRATO", *b"VIRTXSDT", 1); + + // usize is enough for storing table len. + xsdt.set_table_len(xsdt.table_len() + size_of::() * xsdt_entries.len()); + + let mut locked_acpi_data = acpi_data.lock().unwrap(); + let xsdt_begin = locked_acpi_data.len() as u32; + locked_acpi_data.extend(xsdt.aml_bytes()); + let xsdt_end = locked_acpi_data.len() as u32; + drop(locked_acpi_data); + + // Offset of table entries in XSDT. + let mut entry_offset = 36_u32; + // Size of each entry. + let entry_size = size_of::() as u8; + for entry in xsdt_entries { + loader.add_pointer_entry( + ACPI_TABLE_FILE, + // xsdt_begin is much less than u32::MAX, will not overflow. + xsdt_begin + entry_offset, + entry_size, + ACPI_TABLE_FILE, + u32::try_from(entry)?, + )?; + // u32 is enough for storing offset. + entry_offset += u32::from(entry_size); + } + + loader.add_cksum_entry( + ACPI_TABLE_FILE, + // xsdt_begin is much less than u32::MAX, will not overflow. + xsdt_begin + TABLE_CHECKSUM_OFFSET, + xsdt_begin, + xsdt_end - xsdt_begin, + )?; + + Ok(u64::from(xsdt_begin)) + } + + /// Build ACPI RSDP and add it to FwCfg as file-entry. + /// + /// # Arguments + /// + /// `loader` - ACPI table loader. + /// `fw_cfg`: FwCfgOps trait object. + /// `xsdt_addr` - Offset of ACPI XSDT table in `acpi_data`. + fn build_rsdp(loader: &mut TableLoader, fw_cfg: &mut dyn FwCfgOps, xsdt_addr: u64) -> Result<()> + where + Self: Sized, + { + let rsdp = AcpiRsdp::new(*b"STRATO"); + let rsdp_data = Arc::new(Mutex::new(rsdp.aml_bytes().to_vec())); + + loader.add_alloc_entry(ACPI_RSDP_FILE, rsdp_data.clone(), 16, true)?; + + let xsdt_offset = 24_u32; + let xsdt_size = 8_u8; + loader.add_pointer_entry( + ACPI_RSDP_FILE, + xsdt_offset, + xsdt_size, + ACPI_TABLE_FILE, + u32::try_from(xsdt_addr)?, + )?; + + let cksum_offset = 8_u32; + let exd_cksum_offset = 32_u32; + loader.add_cksum_entry(ACPI_RSDP_FILE, cksum_offset, 0, 20)?; + loader.add_cksum_entry(ACPI_RSDP_FILE, exd_cksum_offset, 0, 36)?; + + fw_cfg.add_file_entry(ACPI_RSDP_FILE, rsdp_data.lock().unwrap().to_vec())?; + + Ok(()) + } +} + +impl StdMachine { + fn handle_unplug_usb_request(&mut self, id: String) -> Result<()> { + let vm_config = self.get_vm_config(); + let mut locked_vmconfig = vm_config.lock().unwrap(); + self.detach_usb_from_xhci_controller(&mut locked_vmconfig, id) + } + + #[cfg(target_arch = "x86_64")] + fn plug_cpu_device(&mut self, args: &qmp_schema::DeviceAddArgument) -> Result<()> { + if self.get_numa_nodes().is_some() { + bail!("Not support to hotplug/hotunplug cpu in numa architecture now.") + } + let device_id = args.id.clone(); + if device_id.is_empty() { + bail!("Device id can't be empty.") + } + + if let Some(cpu_id) = args.cpu_id { + let nr_cpus = self.get_cpu_topo().nrcpus; + let max_cpus = self.get_cpu_topo().max_cpus; + + if nr_cpus == max_cpus { + bail!("There is no hotpluggable cpu-id for this VM.") + } + if cpu_id < nr_cpus { + bail!("Cpu-id {} already exist.", cpu_id) + } + if cpu_id >= max_cpus { + // max_cpus is no less than 1. + bail!("Max cpu-id is {}", max_cpus - 1) + } + + let mut locked_controller = self.get_cpu_controller().lock().unwrap(); + locked_controller.check_id_existed(&device_id, cpu_id)?; + locked_controller.set_hotplug_cpu_info(device_id, cpu_id)?; + locked_controller.trigger_hotplug_cpu() + } else { + bail!("Argument cpu-id is required.") + } + } +} + +impl MachineLifecycle for StdMachine { + fn pause(&self) -> bool { + if self.notify_lifecycle(VmState::Running, VmState::Paused) { + event!(Stop); + true + } else { + false + } + } + + fn resume(&self) -> bool { + if !self.notify_lifecycle(VmState::Paused, VmState::Running) { + return false; + } + event!(Resume); + true + } + + fn destroy(&self) -> bool { + if self.shutdown_req.write(1).is_err() { + error!("Failed to send shutdown request."); + return false; + } + + true + } + + #[cfg(target_arch = "aarch64")] + fn powerdown(&self) -> bool { + if self.power_button.write(1).is_err() { + error!("Standard vm write power button failed"); + return false; + } + true + } + + #[cfg(target_arch = "aarch64")] + fn get_shutdown_action(&self) -> ShutdownAction { + self.base + .vm_config + .lock() + .unwrap() + .machine_config + .shutdown_action + } + + fn reset(&mut self) -> bool { + if self.reset_req.write(1).is_err() { + error!("Standard vm write reset request failed"); + return false; + } + true + } + + fn notify_lifecycle(&self, old: VmState, new: VmState) -> bool { + if let Err(e) = self.vm_state_transfer( + &self.base.cpus, + #[cfg(target_arch = "aarch64")] + &self.base.irq_chip, + &mut self.base.vm_state.0.lock().unwrap(), + old, + new, + ) { + error!("VM state transfer failed: {:?}", e); + return false; + } + true + } +} + +impl MigrateInterface for StdMachine { + fn migrate(&self, uri: String) -> Response { + match parse_incoming_uri(&uri) { + Ok((MigrateMode::File, path)) => migration::snapshot(path), + Ok((MigrateMode::Unix, path)) => migration::migration_unix_mode(path), + Ok((MigrateMode::Tcp, path)) => migration::migration_tcp_mode(path), + _ => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!("Invalid uri: {}", uri)), + None, + ), + } + } + + fn query_migrate(&self) -> Response { + migration::query_migrate() + } + + fn cancel_migrate(&self) -> Response { + migration::cancel_migrate() + } +} + +impl MachineInterface for StdMachine {} +impl MachineExternalInterface for StdMachine {} +impl MachineTestInterface for StdMachine {} + +impl EventLoopManager for StdMachine { + fn loop_should_exit(&self) -> bool { + let vmstate = self.base.vm_state.deref().0.lock().unwrap(); + *vmstate == VmState::Shutdown + } + + fn loop_cleanup(&self) -> Result<()> { + set_termi_canon_mode().with_context(|| "Failed to set terminal to canonical mode")?; + Ok(()) + } +} + +impl MachineAddressInterface for StdMachine { + #[cfg(target_arch = "x86_64")] + fn pio_in(&self, addr: u64, data: &mut [u8]) -> bool { + self.machine_base().pio_in(addr, data) + } + + #[cfg(target_arch = "x86_64")] + fn pio_out(&self, addr: u64, data: &[u8]) -> bool { + self.machine_base().pio_out(addr, data) + } + + fn mmio_read(&self, addr: u64, data: &mut [u8]) -> bool { + self.machine_base().mmio_read(addr, data) + } + + fn mmio_write(&self, addr: u64, data: &[u8]) -> bool { + self.machine_base().mmio_write(addr, data) + } +} + +impl DeviceInterface for StdMachine { + fn query_status(&self) -> Response { + let vm_state = self.get_vm_state(); + let vmstate = vm_state.deref().0.lock().unwrap(); + let qmp_state = match *vmstate { + VmState::Running => qmp_schema::StatusInfo { + singlestep: false, + running: true, + status: qmp_schema::RunState::running, + }, + VmState::Paused => qmp_schema::StatusInfo { + singlestep: false, + running: false, + status: qmp_schema::RunState::paused, + }, + _ => Default::default(), + }; + + Response::create_response(serde_json::to_value(qmp_state).unwrap(), None) + } + + fn query_cpus(&self) -> Response { + let mut cpu_vec: Vec = Vec::new(); + let cpu_topo = self.get_cpu_topo(); + let cpus = self.get_cpus(); + for cpu_index in 0..cpu_topo.max_cpus { + if cpu_topo.get_mask(cpu_index as usize) == 1 { + let thread_id = cpus[cpu_index as usize].tid(); + let cpu_instance = cpu_topo.get_topo_instance_for_qmp(cpu_index); + let cpu_common = qmp_schema::CpuInfoCommon { + current: true, + qom_path: String::from("/machine/unattached/device[") + + &cpu_index.to_string() + + "]", + halted: false, + props: Some(cpu_instance), + CPU: cpu_index as isize, + thread_id: thread_id as isize, + }; + #[cfg(target_arch = "x86_64")] + { + let cpu_info = qmp_schema::CpuInfo::x86 { + common: cpu_common, + x86: qmp_schema::CpuInfoX86 {}, + }; + cpu_vec.push(serde_json::to_value(cpu_info).unwrap()); + } + #[cfg(target_arch = "aarch64")] + { + let cpu_info = qmp_schema::CpuInfo::Arm { + common: cpu_common, + arm: qmp_schema::CpuInfoArm {}, + }; + cpu_vec.push(serde_json::to_value(cpu_info).unwrap()); + } + } + } + Response::create_response(cpu_vec.into(), None) + } + + fn query_hotpluggable_cpus(&self) -> Response { + Response::create_empty_response() + } + + fn balloon(&self, value: u64) -> Response { + if qmp_balloon(value) { + return Response::create_empty_response(); + } + Response::create_error_response( + qmp_schema::QmpErrorClass::DeviceNotActive( + "No balloon device has been activated".to_string(), + ), + None, + ) + } + + fn query_balloon(&self) -> Response { + if let Some(actual) = qmp_query_balloon() { + let ret = qmp_schema::BalloonInfo { actual }; + return Response::create_response(serde_json::to_value(ret).unwrap(), None); + } + Response::create_error_response( + qmp_schema::QmpErrorClass::DeviceNotActive( + "No balloon device has been activated".to_string(), + ), + None, + ) + } + + fn query_mem(&self) -> Response { + self.mem_show(); + Response::create_empty_response() + } + + fn query_vnc(&self) -> Response { + #[cfg(feature = "vnc")] + if let Some(vnc_info) = qmp_query_vnc() { + return Response::create_response(serde_json::to_value(vnc_info).unwrap(), None); + } + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "The service of VNC is not supported".to_string(), + ), + None, + ) + } + + fn query_display_image(&self) -> Response { + let mut _err = String::from("The gtk feature is not supported"); + #[cfg(feature = "gtk")] + match qmp_query_display_image() { + Ok(gpu_info) => { + return Response::create_response(serde_json::to_value(gpu_info).unwrap(), None); + } + Err(e) => { + _err = format!("Failed to query_display_image: {:?}", e); + } + }; + Response::create_error_response(qmp_schema::QmpErrorClass::GenericError(_err), None) + } + + fn device_add(&mut self, args: Box) -> Response { + if let Err(e) = self.check_device_id_existed(&args.id) { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + + let driver = args.driver.as_str(); + let vm_config = self.get_vm_config(); + let mut locked_vmconfig = vm_config.lock().unwrap(); + let mut vm_config_clone = locked_vmconfig.clone(); + match driver { + "virtio-blk-pci" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_virtio_pci_blk(&mut vm_config_clone, &cfg_args, true) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + let err_str = format!("Failed to add virtio pci blk: {}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + #[cfg(feature = "virtio_scsi")] + "virtio-scsi-pci" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_virtio_pci_scsi(&mut vm_config_clone, &cfg_args, true) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + let err_str = format!("Failed to add virtio scsi controller: {}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + #[cfg(feature = "vhostuser_block")] + "vhost-user-blk-pci" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_vhost_user_blk_pci(&mut vm_config_clone, &cfg_args, true) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + let err_str = format!("Failed to add vhost user blk pci: {}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + "virtio-net-pci" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_virtio_pci_net(&mut vm_config_clone, &cfg_args, true) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + let err_str = format!("Failed to add virtio pci net: {}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + #[cfg(feature = "vfio_device")] + "vfio-pci" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_vfio_device(&cfg_args, true) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + } + "usb-kbd" | "usb-tablet" | "usb-camera" | "usb-host" | "usb-storage" | "usb-uas" => { + let cfg_args = locked_vmconfig.add_device_config(args.as_ref()); + if let Err(e) = self.add_usb_device(&mut vm_config_clone, &cfg_args) { + error!("{:?}", e); + locked_vmconfig.del_device_by_id(args.id); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + return Response::create_empty_response(); + } + #[cfg(target_arch = "x86_64")] + "generic-x86-cpu" => { + if let Err(e) = self.plug_cpu_device(args.as_ref()) { + error!("{:?}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + return Response::create_empty_response(); + } + _ => { + let err_str = format!("Failed to add device: Driver {} is not support", driver); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + drop(locked_vmconfig); + + // It's safe to call get_pci_host().unwrap() because it has been checked before. + let locked_pci_host = self.get_pci_host().unwrap().lock().unwrap(); + if let Some((bus, dev)) = + PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), &args.id) + { + match handle_plug(&bus, &dev) { + Ok(()) => Response::create_empty_response(), + Err(e) => { + if let Err(e) = PciBus::detach_device(&bus, &dev) { + error!("{:?}", e); + error!("Failed to detach device"); + } + let err_str = format!("Failed to plug device: {}", e); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ) + } + } + } else { + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Failed to add device: Bus not found".to_string(), + ), + None, + ) + } + } + + fn device_del(&mut self, device_id: String) -> Response { + let pci_host = match self.get_pci_host() { + Ok(host) => host, + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + }; + + let locked_pci_host = pci_host.lock().unwrap(); + if let Some((bus, dev)) = + PciBus::find_attached_bus(&locked_pci_host.child_bus().unwrap(), &device_id) + { + return match handle_unplug_pci_request(&bus, &dev) { + Ok(()) => { + let locked_dev = dev.lock().unwrap(); + let dev_id = &locked_dev.name(); + drop(locked_pci_host); + self.del_bootindex_devices(dev_id); + let vm_config = self.get_vm_config(); + let mut locked_config = vm_config.lock().unwrap(); + locked_config.del_device_by_id(device_id); + drop(locked_config); + Response::create_empty_response() + } + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + }; + } + drop(locked_pci_host); + + // Assume it is a cpu device, try to find this device in cpu device. + #[cfg(target_arch = "x86_64")] + if let Some(cpu_id) = self.find_cpu_id_by_device_id(&device_id) { + return match self.remove_vcpu_device(cpu_id) { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + }; + } + + // The device is not a pci device and not a cpu device, assume it is a usb device. + match self.handle_unplug_usb_request(device_id) { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn blockdev_add(&self, args: Box) -> Response { + let config = match parse_blockdev(&args) { + Ok(config) => config, + Err(e) => { + error!("{:?}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + }; + + // Register drive backend file for hotplug drive. + if let Err(e) = self.register_drive_file( + &config.id, + &args.file.filename, + config.readonly, + config.direct, + ) { + error!("{:?}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + match self + .get_vm_config() + .lock() + .unwrap() + .add_drive_with_config(config) + { + Ok(()) => Response::create_empty_response(), + Err(e) => { + error!("{:?}", e); + // It's safe to unwrap as the path has been registered. + self.unregister_drive_file(&args.file.filename).unwrap(); + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + } + } + + fn blockdev_del(&self, node_name: String) -> Response { + match self + .get_vm_config() + .lock() + .unwrap() + .del_drive_by_id(&node_name) + { + Ok(path) => { + // It's safe to unwrap as the path has been registered. + self.unregister_drive_file(&path).unwrap(); + Response::create_empty_response() + } + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn chardev_add(&mut self, args: qmp_schema::CharDevAddArgument) -> Response { + let config = match get_chardev_config(args) { + Ok(conf) => conf, + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + }; + + if let Err(e) = config.check() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + + match self + .get_vm_config() + .lock() + .unwrap() + .add_chardev_with_config(config) + { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn chardev_remove(&mut self, id: String) -> Response { + match self.get_vm_config().lock().unwrap().del_chardev_by_id(&id) { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn netdev_add(&mut self, args: Box) -> Response { + let config = match get_netdev_config(args) { + Ok(conf) => conf, + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + }; + + match self + .get_vm_config() + .lock() + .unwrap() + .add_netdev_with_config(config) + { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn netdev_del(&mut self, id: String) -> Response { + match self.get_vm_config().lock().unwrap().del_netdev_by_id(&id) { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + #[cfg(feature = "usb_camera")] + fn cameradev_add(&mut self, args: qmp_schema::CameraDevAddArgument) -> Response { + let config = match get_cameradev_config(args) { + Ok(conf) => conf, + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + }; + + match self + .get_vm_config() + .lock() + .unwrap() + .add_cameradev_with_config(config) + { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + #[cfg(feature = "usb_camera")] + fn cameradev_del(&mut self, id: String) -> Response { + match self + .get_vm_config() + .lock() + .unwrap() + .del_cameradev_by_id(&id) + { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + #[cfg(feature = "scream")] + fn switch_audio_record(&self, authorized: String) -> Response { + match authorized.as_str() { + "on" => set_record_authority(true), + "off" => set_record_authority(false), + _ => { + let err_str = format!("Failed to set audio capture authority: {:?}", authorized); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(err_str), + None, + ); + } + } + Response::create_empty_response() + } + + fn getfd(&self, fd_name: String, if_fd: Option) -> Response { + if let Some(fd) = if_fd { + QmpChannel::set_fd(fd_name, fd); + Response::create_empty_response() + } else { + let err_resp = + qmp_schema::QmpErrorClass::GenericError("Invalid SCM message".to_string()); + Response::create_error_response(err_resp, None) + } + } + + fn update_region(&mut self, args: UpdateRegionArgument) -> Response { + if args.size >= MAX_REGION_SIZE { + let err_resp = qmp_schema::QmpErrorClass::GenericError(format!( + "Region size {} is out of range", + args.size + )); + return Response::create_error_response(err_resp, None); + } + #[derive(Default)] + struct DummyDevice { + head: u64, + } + + impl DummyDevice { + fn read(&mut self, data: &mut [u8], _base: GuestAddress, _offset: u64) -> bool { + if data.len() != std::mem::size_of::() { + return false; + } + + for (i, data) in data.iter_mut().enumerate().take(std::mem::size_of::()) { + // i is less than 8, multiply will not overflow. + *data = (self.head >> (8 * i)) as u8; + } + true + } + + fn write(&mut self, data: &[u8], _addr: GuestAddress, _offset: u64) -> bool { + if data.len() != std::mem::size_of::() { + return false; + } + + let val = match u64::from_bytes(data) { + None => { + error!("DummyDevice: cannot read u64 from data"); + return false; + } + Some(v) => v, + }; + self.head = match val.checked_mul(2) { + None => { + error!("DummyDevice: val({}) * 2 is overflow", val); + return false; + } + Some(v) => v, + }; + true + } + } + + let dummy_dev = Arc::new(Mutex::new(DummyDevice::default())); + let dummy_dev_clone = dummy_dev.clone(); + let read_ops = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { + let mut device_locked = dummy_dev_clone.lock().unwrap(); + device_locked.read(data, addr, offset) + }; + let dummy_dev_clone = dummy_dev; + let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { + let mut device_locked = dummy_dev_clone.lock().unwrap(); + device_locked.write(data, addr, offset) + }; + + let dummy_dev_ops = RegionOps { + read: Arc::new(read_ops), + write: Arc::new(write_ops), + }; + + let mut fd = None; + if args.region_type.eq("rom_device_region") || args.region_type.eq("ram_device_region") { + if let Some(file_name) = args.device_fd_path { + fd = Some(Arc::new( + std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(file_name) + .unwrap(), + )); + } + } + + let region; + match args.region_type.as_str() { + "io_region" => { + region = Region::init_io_region(args.size, dummy_dev_ops, "UpdateRegionTest"); + if args.ioeventfd.is_some() && args.ioeventfd.unwrap() { + let ioeventfds = vec![RegionIoEventFd { + fd: Arc::new(create_new_eventfd().unwrap()), + addr_range: AddressRange::from(( + 0, + args.ioeventfd_size.unwrap_or_default(), + )), + data_match: args.ioeventfd_data.is_some(), + data: args.ioeventfd_data.unwrap_or_default(), + }]; + region.set_ioeventfds(&ioeventfds); + } + } + "rom_device_region" => { + region = Region::init_rom_device_region( + Arc::new( + HostMemMapping::new( + GuestAddress(args.offset), + None, + args.size, + fd.map(FileBackend::new_common), + false, + true, + true, + ) + .unwrap(), + ), + dummy_dev_ops, + "RomDeviceRegionTest", + ); + } + "ram_device_region" => { + region = Region::init_ram_device_region( + Arc::new( + HostMemMapping::new( + GuestAddress(args.offset), + None, + args.size, + fd.map(FileBackend::new_common), + false, + true, + false, + ) + .unwrap(), + ), + "RamdeviceregionTest", + ); + } + _ => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("invalid rergion_type".to_string()), + None, + ); + } + }; + + if i32::try_from(args.priority).is_err() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("priority illegal".to_string()), + None, + ); + } + region.set_priority(args.priority as i32); + if let Some(read_only) = args.romd { + if region.set_rom_device_romd(read_only).is_err() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "set_rom_device_romd failed".to_string(), + ), + None, + ); + } + } + + let sys_mem = self.get_sys_mem(); + match args.update_type.as_str() { + "add" => { + if sys_mem.root().add_subregion(region, args.offset).is_err() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("add subregion failed".to_string()), + None, + ); + } + } + "delete" => { + region.set_offset(GuestAddress(args.offset)); + if sys_mem.root().delete_subregion(®ion).is_err() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "delete subregion failed".to_string(), + ), + None, + ); + } + } + _ => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("invalid update_type".to_string()), + None, + ) + } + }; + + Response::create_empty_response() + } + + fn input_event(&self, key: String, value: String) -> Response { + match send_input_event(key, value) { + Ok(()) => Response::create_empty_response(), + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ), + } + } + + fn human_monitor_command(&self, args: qmp_schema::HumanMonitorCmdArgument) -> Response { + let cmd_args: Vec<&str> = args.command_line.split(' ').collect(); + match cmd_args[0] { + "drive_add" => { + // The drive_add command has three arguments split by space: + // "drive_add dummy file=/path/to/file,format=raw,if=none,id=drive-id..." + // The 'dummy' here is a placeholder for pci address which is not needed for drive. + if cmd_args.len() != 3 { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid number of arguments".to_string(), + ), + None, + ); + } + let drive_cfg = match self.get_vm_config().lock().unwrap().add_drive(cmd_args[2]) { + Ok(cfg) => cfg, + Err(ref e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + }; + if let Err(e) = self.register_drive_file( + &drive_cfg.id, + &drive_cfg.path_on_host, + drive_cfg.readonly, + drive_cfg.direct, + ) { + error!("{:?}", e); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + } + "drive_del" => { + // The drive_del command has two arguments split by space: + // "drive_del drive-id" + if cmd_args.len() != 2 { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid number of arguments".to_string(), + ), + None, + ); + } + return self.blockdev_del(cmd_args[1].to_string()); + } + "info" => { + // Only support to query snapshots information by: + // "info snapshots" + if cmd_args.len() != 2 { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Invalid number of arguments".to_string(), + ), + None, + ); + } + if cmd_args[1] != "snapshots" { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!( + "Unsupported command: {} {}", + cmd_args[0], cmd_args[1] + )), + None, + ); + } + + let qcow2_list = QCOW2_LIST.lock().unwrap(); + if qcow2_list.is_empty() { + return Response::create_response( + serde_json::to_value("There is no snapshot available.\r\n").unwrap(), + None, + ); + } + + let mut info_str = "List of snapshots present on all disks:\r\n".to_string(); + // Note: VM state is "None" in disk snapshots. It's used for vm snapshots which we + // don't support. + let vmstate_str = "None\r\n".to_string(); + info_str += &vmstate_str; + + for (drive_name, qcow2driver) in qcow2_list.iter() { + let dev_str = format!( + "\r\nList of partial (non-loadable) snapshots on \'{}\':\r\n", + drive_name + ); + let snap_infos = qcow2driver.lock().unwrap().list_snapshots(); + info_str += &(dev_str + &snap_infos); + } + + return Response::create_response(serde_json::to_value(info_str).unwrap(), None); + } + _ => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!( + "Unsupported command: {}", + cmd_args[0] + )), + None, + ); + } + } + Response::create_empty_response() + } + + fn blockdev_snapshot_internal_sync( + &self, + args: qmp_schema::BlockdevSnapshotInternalArgument, + ) -> Response { + let qcow2_list = QCOW2_LIST.lock().unwrap(); + let qcow2driver = qcow2_list.get(&args.device); + if qcow2driver.is_none() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::DeviceNotFound(format!( + "No device drive named {} while creating snapshot {}", + args.device, args.name + )), + None, + ); + } + + // Do not unlock or drop the locked_status in this function. + let status = qcow2driver.unwrap().lock().unwrap().get_status(); + let mut locked_status = status.lock().unwrap(); + *locked_status = BlockStatus::Snapshot; + + let vm_clock_nsec = EventLoop::get_ctx(None) + .unwrap() + .get_virtual_clock() + .as_nanos() as u64; + if let Err(e) = qcow2driver + .unwrap() + .lock() + .unwrap() + .create_snapshot(args.name.clone(), vm_clock_nsec) + { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!( + "Device {} Creates snapshot {} error: {}.", + args.device, args.name, e + )), + None, + ); + } + + Response::create_empty_response() + } + + fn blockdev_snapshot_delete_internal_sync( + &self, + args: qmp_schema::BlockdevSnapshotInternalArgument, + ) -> Response { + let qcow2_list = QCOW2_LIST.lock().unwrap(); + let qcow2driver = qcow2_list.get(&args.device); + if qcow2driver.is_none() { + return Response::create_error_response( + qmp_schema::QmpErrorClass::DeviceNotFound(format!( + "No device drive named {} while deleting snapshot {}", + args.device, args.name + )), + None, + ); + } + + // Do not unlock or drop the locked_status in this function. + let status = qcow2driver.unwrap().lock().unwrap().get_status(); + let mut locked_status = status.lock().unwrap(); + *locked_status = BlockStatus::Snapshot; + + let result = qcow2driver + .unwrap() + .lock() + .unwrap() + .delete_snapshot(args.name.clone()); + match result { + Ok(snap_info) => { + Response::create_response(serde_json::to_value(snap_info).unwrap(), None) + } + Err(e) => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(format!( + "Device {} deletes snapshot {} error! {}", + args.device, args.name, e + )), + None, + ), + } + } + + fn query_vcpu_reg(&self, args: qmp_schema::QueryVcpuRegArgument) -> Response { + let vcpu = args.vcpu; + let addr = match u64::from_str_radix(args.addr.as_str(), 16) { + Ok(addr) => addr, + _ => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "address not in hexadecimal".to_string(), + ), + None, + ); + } + }; + + if let Some(val) = self.get_vcpu_reg_val(addr, vcpu) { + return Response::create_response( + serde_json::to_value(format!("{:X}", val)).unwrap(), + None, + ); + } + + Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError("not supported or arguments error".to_string()), + None, + ) + } + + fn query_mem_gpa(&self, args: qmp_schema::QueryMemGpaArgument) -> Response { + let gpa = match u64::from_str_radix(args.gpa.as_str(), 16) { + Ok(gpa) => gpa, + _ => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "address not in hexadecimal".to_string(), + ), + None, + ); + } + }; + + match self + .machine_base() + .sys_mem + .read_object::(GuestAddress(gpa), AddressAttr::Ram) + { + Ok(val) => { + Response::create_response(serde_json::to_value(format!("{:X}", val)).unwrap(), None) + } + _ => Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "this gpa does not support query".to_string(), + ), + None, + ), + } + } + + fn query_workloads(&self) -> Response { + let workloads = query_workloads(); + + if !workloads.is_empty() { + let status = workloads + .iter() + .map(|(module, state)| json!({ "module": module, "state": state })) + .collect(); + + Response::create_response(serde_json::Value::Array(status), None) + } else { + Response::create_empty_response() + } + } +} + +fn parse_blockdev(args: &BlockDevAddArgument) -> Result { + let mut config = DriveConfig { + id: args.node_name.clone(), + drive_type: "none".to_string(), + unit: None, + path_on_host: args.file.filename.clone(), + readonly: args.read_only.unwrap_or(false), + direct: true, + iops: args.iops, + aio: args.file.aio, + media: "disk".to_string(), + discard: false, + write_zeroes: WriteZeroesState::Off, + format: DiskFormat::Raw, + l2_cache_size: None, + refcount_cache_size: None, + }; + if args.cache.is_some() && !args.cache.as_ref().unwrap().direct.unwrap_or(true) { + config.direct = false; + config.aio = AioEngine::Off; + } + if let Some(media) = args.media.as_ref() { + match media.as_str() { + "disk" | "cdrom" => config.media = media.clone(), + _ => bail!("Invalid media argument '{}', expect 'disk | cdrom'", media), + } + } + if let Some(discard) = args.discard.as_ref() { + config.discard = discard + .as_str() + .parse::() + .with_context(|| { + format!( + "Invalid discard argument '{}', expected 'unwrap' or 'ignore'", + discard + ) + })? + .into(); + } + if let Some(detect_zeroes) = args.detect_zeroes.as_ref() { + config.write_zeroes = detect_zeroes + .as_str() + .parse::() + .with_context(|| { + format!( + "Invalid write-zeroes argument '{}', expected 'on | off | unmap'", + detect_zeroes + ) + })?; + } + if let Some(format) = args.driver.as_ref() { + config.format = format.as_str().parse::()?; + } + if let Some(l2_cache) = args.l2_cache_size.as_ref() { + let sz = memory_unit_conversion(l2_cache, M) + .with_context(|| format!("Invalid l2 cache size: {}", l2_cache))?; + config.l2_cache_size = Some(sz); + } + if let Some(rc_cache) = args.refcount_cache_size.as_ref() { + let sz = memory_unit_conversion(rc_cache, M) + .with_context(|| format!("Invalid refcount cache size: {}", rc_cache))?; + config.refcount_cache_size = Some(sz); + } + config.check()?; + config.check_path()?; + Ok(config) +} + +fn send_input_event(key: String, value: String) -> Result<()> { + match key.as_str() { + "keyboard" => { + let vec: Vec<&str> = value.split(',').collect(); + if vec.len() != 2 { + bail!("Invalid keyboard format: {}", value); + } + let keycode = vec[0].parse::()?; + let down = vec[1].parse::()? == 1; + key_event(keycode, down)?; + } + "pointer" => { + let vec: Vec<&str> = value.split(',').collect(); + // There are four expected parameters for input_event, + // includes: x, y, button and down. + if vec.len() != 4 { + bail!("Invalid pointer format: {}", value); + } + let x = vec[0].parse::()?; + let y = vec[1].parse::()?; + let btn = vec[2].parse::()?; + let down = vec[3].parse::()?; + input_move_abs(Axis::X, x)?; + input_move_abs(Axis::Y, y)?; + input_button(btn, down != 0)?; + input_point_sync()?; + } + _ => { + bail!("Invalid input type: {}", key); + } + }; + Ok(()) +} diff --git a/machine/src/standard_vm/x86_64/syscall.rs b/machine/src/standard_common/syscall.rs similarity index 58% rename from machine/src/standard_vm/x86_64/syscall.rs rename to machine/src/standard_common/syscall.rs index 1fe711b695219158077083fdf9a3467e77226c27..30e1998ea25c40bb9052044fe6abea0b05fc01ce 100644 --- a/machine/src/standard_vm/x86_64/syscall.rs +++ b/machine/src/standard_common/syscall.rs @@ -10,14 +10,24 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +#[cfg(target_arch = "aarch64")] +use crate::aarch64::standard::{arch_ioctl_allow_list, arch_syscall_whitelist}; +#[cfg(target_arch = "x86_64")] +use crate::x86_64::standard::{arch_ioctl_allow_list, arch_syscall_whitelist}; use hypervisor::kvm::*; use util::seccomp::{BpfRule, SeccompCmpOpt}; -use util::tap::{TUNSETIFF, TUNSETOFFLOAD, TUNSETVNETHDRSZ}; +use util::tap::{TUNGETFEATURES, TUNSETIFF, TUNSETOFFLOAD, TUNSETQUEUE, TUNSETVNETHDRSZ}; +#[cfg(feature = "usb_camera_v4l2")] +use util::v4l2::{ + VIDIOC_DQBUF, VIDIOC_ENUM_FMT, VIDIOC_ENUM_FRAMEINTERVALS, VIDIOC_ENUM_FRAMESIZES, + VIDIOC_G_FMT, VIDIOC_QBUF, VIDIOC_QUERYBUF, VIDIOC_QUERYCAP, VIDIOC_REQBUFS, VIDIOC_STREAMOFF, + VIDIOC_STREAMON, VIDIOC_S_FMT, VIDIOC_S_PARM, +}; +#[cfg(feature = "vfio_device")] use vfio::{ - VFIO_CHECK_EXTENSION, VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_IRQ_INFO, - VFIO_DEVICE_GET_REGION_INFO, VFIO_DEVICE_RESET, VFIO_DEVICE_SET_IRQS, VFIO_GET_API_VERSION, - VFIO_GROUP_GET_DEVICE_FD, VFIO_GROUP_GET_STATUS, VFIO_GROUP_SET_CONTAINER, VFIO_IOMMU_MAP_DMA, - VFIO_IOMMU_UNMAP_DMA, VFIO_SET_IOMMU, + VFIO_CHECK_EXTENSION, VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, VFIO_DEVICE_RESET, + VFIO_DEVICE_SET_IRQS, VFIO_GET_API_VERSION, VFIO_GROUP_GET_DEVICE_FD, VFIO_GROUP_GET_STATUS, + VFIO_GROUP_SET_CONTAINER, VFIO_IOMMU_MAP_DMA, VFIO_IOMMU_UNMAP_DMA, VFIO_SET_IOMMU, }; use virtio::VhostKern::*; @@ -36,96 +46,156 @@ const FUTEX_CMP_REQUEUE_PRIVATE: u32 = FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG; const FUTEX_WAKE_OP_PRIVATE: u32 = FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG; const FUTEX_WAIT_BITSET_PRIVATE: u32 = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG; -/// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/fcntl.h -const F_GETFD: u32 = 1; -const F_SETFD: u32 = 2; -const F_SETFL: u32 = 4; -const F_LINUX_SPECIFIC_BASE: u32 = 1024; -const F_DUPFD_CLOEXEC: u32 = F_LINUX_SPECIFIC_BASE + 6; - // See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/asm-generic/ioctls.h const TCGETS: u32 = 0x5401; const TCSETS: u32 = 0x5402; const TIOCGWINSZ: u32 = 0x5413; +const FIONREAD: u32 = 0x541B; const FIOCLEX: u32 = 0x5451; const FIONBIO: u32 = 0x5421; const KVM_RUN: u32 = 0xae80; -/// Create a syscall whitelist for seccomp. +/// Create a syscall allowlist for seccomp. /// /// # Notes -/// This allowlist limit syscall with: -/// * x86_64-unknown-gnu: 46 syscalls -/// * x86_64-unknown-musl: 48 syscalls +/// /// To reduce performance losses, the syscall rules is ordered by frequency. pub fn syscall_whitelist() -> Vec { - vec![ + let mut syscall = vec![ BpfRule::new(libc::SYS_read), BpfRule::new(libc::SYS_readv), BpfRule::new(libc::SYS_write), BpfRule::new(libc::SYS_writev), ioctl_allow_list(), - #[cfg(not(target_env = "gnu"))] - BpfRule::new(libc::SYS_epoll_pwait), - BpfRule::new(libc::SYS_epoll_wait), BpfRule::new(libc::SYS_io_getevents), BpfRule::new(libc::SYS_io_submit), + BpfRule::new(libc::SYS_io_destroy), + BpfRule::new(libc::SYS_io_uring_setup), + BpfRule::new(libc::SYS_io_uring_register), + BpfRule::new(libc::SYS_io_uring_enter), BpfRule::new(libc::SYS_dup), BpfRule::new(libc::SYS_close), BpfRule::new(libc::SYS_eventfd2), BpfRule::new(libc::SYS_epoll_ctl), + BpfRule::new(libc::SYS_ppoll), BpfRule::new(libc::SYS_fdatasync), BpfRule::new(libc::SYS_recvmsg), BpfRule::new(libc::SYS_sendmsg), + BpfRule::new(libc::SYS_sendto), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_sendmmsg), BpfRule::new(libc::SYS_recvfrom), BpfRule::new(libc::SYS_mremap), BpfRule::new(libc::SYS_io_setup), BpfRule::new(libc::SYS_brk), BpfRule::new(libc::SYS_fcntl) - .add_constraint(SeccompCmpOpt::Eq, 1, F_DUPFD_CLOEXEC) - .add_constraint(SeccompCmpOpt::Eq, 1, F_SETFD) - .add_constraint(SeccompCmpOpt::Eq, 1, F_GETFD) - .add_constraint(SeccompCmpOpt::Eq, 1, F_SETFL), + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_DUPFD_CLOEXEC as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_SETFD as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_GETFD as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_SETLK as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_GETFL as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, libc::F_SETFL as u32), + BpfRule::new(libc::SYS_flock), BpfRule::new(libc::SYS_rt_sigprocmask), - BpfRule::new(libc::SYS_open), BpfRule::new(libc::SYS_openat), BpfRule::new(libc::SYS_sigaltstack), BpfRule::new(libc::SYS_mmap), - #[cfg(target_env = "gnu")] BpfRule::new(libc::SYS_mprotect), BpfRule::new(libc::SYS_munmap), BpfRule::new(libc::SYS_accept4), BpfRule::new(libc::SYS_lseek), futex_rule(), + BpfRule::new(libc::SYS_clone), BpfRule::new(libc::SYS_exit), BpfRule::new(libc::SYS_exit_group), BpfRule::new(libc::SYS_rt_sigreturn), - #[cfg(target_env = "musl")] + #[cfg(any(target_env = "musl", target_env = "ohos"))] BpfRule::new(libc::SYS_tkill), - #[cfg(target_env = "musl")] - BpfRule::new(libc::SYS_stat), + BpfRule::new(libc::SYS_newfstatat), #[cfg(target_env = "gnu")] BpfRule::new(libc::SYS_tgkill), BpfRule::new(libc::SYS_gettid), BpfRule::new(libc::SYS_getpid), BpfRule::new(libc::SYS_fstat), - BpfRule::new(libc::SYS_newfstatat), BpfRule::new(libc::SYS_pread64), + BpfRule::new(libc::SYS_preadv), BpfRule::new(libc::SYS_pwrite64), + BpfRule::new(libc::SYS_pwritev), BpfRule::new(libc::SYS_statx), - BpfRule::new(libc::SYS_mkdir), - BpfRule::new(libc::SYS_unlink), madvise_rule(), BpfRule::new(libc::SYS_msync), BpfRule::new(libc::SYS_readlinkat), - #[cfg(target_env = "musl")] - BpfRule::new(libc::SYS_readlink), - ] + BpfRule::new(libc::SYS_renameat), + BpfRule::new(libc::SYS_socket), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_bind), + BpfRule::new(libc::SYS_connect), + BpfRule::new(libc::SYS_getcwd), + BpfRule::new(libc::SYS_prctl), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getsockname), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getpeername), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_nanosleep), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_clock_nanosleep), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getuid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_geteuid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getgid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getegid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_gettid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getdents64), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_clock_gettime), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getsockopt), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_uname), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_sysinfo), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_faccessat), + BpfRule::new(libc::SYS_getrandom), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_shutdown), + BpfRule::new(libc::SYS_setsockopt), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_set_robust_list), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_sched_getaffinity), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_pipe2), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_memfd_create), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_ftruncate), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_futex), + BpfRule::new(libc::SYS_fallocate), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getresuid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_getresgid), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_fstatfs), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_shmget), + ]; + syscall.append(&mut arch_syscall_whitelist()); + + syscall } /// Create a syscall bpf rule for syscall `ioctl`. fn ioctl_allow_list() -> BpfRule { - BpfRule::new(libc::SYS_ioctl) + let bpf_rule = BpfRule::new(libc::SYS_ioctl) .add_constraint(SeccompCmpOpt::Eq, 1, TCGETS) .add_constraint(SeccompCmpOpt::Eq, 1, TCSETS) .add_constraint(SeccompCmpOpt::Eq, 1, TIOCGWINSZ) @@ -136,13 +206,14 @@ fn ioctl_allow_list() -> BpfRule { .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_USER_MEMORY_REGION) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_IOEVENTFD) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SIGNAL_MSI) + .add_constraint(SeccompCmpOpt::Eq, 1, FIONREAD) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_VSOCK_SET_GUEST_CID() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_VSOCK_SET_RUNNING() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_CALL() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_NUM() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_VRING_BASE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_ADDR() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_BASE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_VRING_BASE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_KICK() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_OWNER() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_FEATURES() as u32) @@ -151,69 +222,75 @@ fn ioctl_allow_list() -> BpfRule { .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_NET_SET_BACKEND() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_FEATURES() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_RESET_OWNER() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, TUNGETFEATURES() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETIFF() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETOFFLOAD() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETVNETHDRSZ() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETQUEUE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_GSI_ROUTING() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, KVM_IRQFD() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_CREATE_DEVICE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_API_VERSION() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MP_STATE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_VCPU_EVENTS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DIRTY_LOG() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MP_STATE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_VCPU_EVENTS() as u32); + + #[cfg(feature = "vfio_device")] + let bpf_rule = bpf_rule .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_SET_IRQS() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_GET_STATUS() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GET_API_VERSION() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_CHECK_EXTENSION() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_SET_CONTAINER() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_SET_IOMMU() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_CREATE_DEVICE() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_IOMMU_MAP_DMA() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_IOMMU_UNMAP_DMA() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_GET_DEVICE_FD() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_INFO() as u32) .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_RESET() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_REGION_INFO() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_IRQ_INFO() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_API_VERSION() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MP_STATE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_VCPU_EVENTS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_PIT2() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_CLOCK() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_IRQCHIP() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XSAVE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEBUGREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XCRS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_LAPIC() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MSRS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SUPPORTED_CPUID() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_CPUID2() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MP_STATE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_SREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_REGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XSAVE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XCRS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_DEBUGREGS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_LAPIC() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MSRS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_VCPU_EVENTS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_REGION_INFO() as u32); + + #[cfg(feature = "usb_camera_v4l2")] + let bpf_rule = bpf_rule + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_QUERYCAP() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_ENUM_FMT() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_G_FMT() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_S_FMT() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_REQBUFS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_QUERYBUF() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_QBUF() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_DQBUF() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_STREAMON() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_STREAMOFF() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_S_PARM() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_ENUM_FRAMESIZES() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, VIDIOC_ENUM_FRAMEINTERVALS() as u32); + + arch_ioctl_allow_list(bpf_rule) } fn madvise_rule() -> BpfRule { - #[cfg(target_env = "musl")] + #[cfg(any(target_env = "musl", target_env = "ohos"))] return BpfRule::new(libc::SYS_madvise) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_FREE as u32) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32); + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_REMOVE as u32); #[cfg(target_env = "gnu")] return BpfRule::new(libc::SYS_madvise) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32); + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32) + .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_REMOVE as u32); } fn futex_rule() -> BpfRule { - #[cfg(target_env = "musl")] + #[cfg(any(target_env = "musl", target_env = "ohos"))] return BpfRule::new(libc::SYS_futex) + .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_PRIVATE) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_PRIVATE) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_CMP_REQUEUE_PRIVATE) @@ -226,6 +303,7 @@ fn futex_rule() -> BpfRule { 1, FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME, ) + .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_PRIVATE) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_PRIVATE) .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_CMP_REQUEUE_PRIVATE) diff --git a/machine/src/standard_vm/aarch64/mod.rs b/machine/src/standard_vm/aarch64/mod.rs deleted file mode 100644 index 3fd2de30fd79c56f3c201d5180a846e5e01bf16b..0000000000000000000000000000000000000000 --- a/machine/src/standard_vm/aarch64/mod.rs +++ /dev/null @@ -1,1018 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -mod pci_host_root; -mod syscall; - -use std::fs::OpenOptions; -use std::ops::Deref; -use std::sync::{Arc, Condvar, Mutex}; - -use address_space::{AddressSpace, GuestAddress, Region}; -use boot_loader::{load_linux, BootLoaderConfig}; -use cpu::{CPUBootConfig, CPUInterface, CpuTopology, CPU}; -use devices::legacy::{ - errors::ErrorKind as DevErrorKind, FwCfgEntryType, FwCfgMem, FwCfgOps, PFlash, PL011, PL031, -}; -use devices::{InterruptController, InterruptControllerConfig}; -use error_chain::ChainedError; -use hypervisor::kvm::KVM_FDS; -use machine_manager::config::{BootSource, PFlashConfig, SerialConfig, VmConfig}; -use machine_manager::machine::{ - KvmVmState, MachineAddressInterface, MachineExternalInterface, MachineInterface, - MachineLifecycle, MigrateInterface, -}; -use machine_manager::qmp::{qmp_schema, QmpChannel, Response}; -use migration::{MigrationManager, MigrationStatus}; -use pci::{PciDevOps, PciHost}; -use sysbus::{SysBus, SysBusDevOps, SysBusDevType, SysRes}; -use util::byte_code::ByteCode; -use util::device_tree::{self, CompileFDT, FdtBuilder}; -use util::loop_context::EventLoopManager; -use util::seccomp::BpfRule; -use util::set_termi_canon_mode; -use vmm_sys_util::eventfd::EventFd; - -use super::{errors::Result as StdResult, AcpiBuilder, StdMachineOps}; -use crate::errors::{ErrorKind, Result}; -use crate::MachineOps; -use pci_host_root::PciHostRoot; -use syscall::syscall_whitelist; - -/// The type of memory layout entry on aarch64 -#[allow(dead_code)] -pub enum LayoutEntryType { - Flash = 0, - GicDist, - GicCpu, - GicIts, - GicRedist, - Uart, - Rtc, - FwCfg, - Mmio, - PcieMmio, - PciePio, - PcieEcam, - Mem, - HighGicRedist, - HighPcieEcam, - HighPcieMmio, -} - -/// Layout of aarch64 -pub const MEM_LAYOUT: &[(u64, u64)] = &[ - (0, 0x0800_0000), // Flash - (0x0800_0000, 0x0001_0000), // GicDist - (0x0801_0000, 0x0001_0000), // GicCpu - (0x0808_0000, 0x0002_0000), // GicIts - (0x080A_0000, 0x00F6_0000), // GicRedist (max 123 redistributors) - (0x0900_0000, 0x0000_1000), // Uart - (0x0901_0000, 0x0000_1000), // Rtc - (0x0902_0000, 0x0000_0018), // FwCfg - (0x0A00_0000, 0x0000_0200), // Mmio - (0x1000_0000, 0x2EFF_0000), // PcieMmio - (0x3EFF_0000, 0x0001_0000), // PciePio - (0x3F00_0000, 0x0100_0000), // PcieEcam - (0x4000_0000, 0x80_0000_0000), // Mem - (256 << 30, 0x200_0000), // HighGicRedist, (where remaining redistributors locates) - (257 << 30, 0x1000_0000), // HighPcieEcam - (258 << 30, 512 << 30), // HighPcieMmio -]; - -/// Standard machine structure. -pub struct StdMachine { - /// `vCPU` topology, support sockets, cores, threads. - cpu_topo: CpuTopology, - /// `vCPU` devices. - cpus: Vec>, - // Interrupt controller device. - #[cfg(target_arch = "aarch64")] - irq_chip: Option>, - /// Memory address space. - pub sys_mem: Arc, - /// System bus. - sysbus: SysBus, - /// PCI/PCIe host bridge. - pci_host: Arc>, - /// VM running state. - vm_state: Arc<(Mutex, Condvar)>, - /// Vm boot_source config. - boot_source: Arc>, - /// VM power button, handle VM `Shutdown` event. - power_button: EventFd, - vm_config: Mutex, - /// Reset request, handle VM `Reset` event. - reset_req: EventFd, - /// Device Tree Blob. - dtb_vec: Vec, -} - -impl StdMachine { - pub fn new(vm_config: &VmConfig) -> Result { - use crate::errors::ResultExt; - - let cpu_topo = CpuTopology::new(vm_config.machine_config.nr_cpus); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())) - .chain_err(|| ErrorKind::CrtIoSpaceErr)?; - let sysbus = SysBus::new( - &sys_mem, - (32, 192), - ( - MEM_LAYOUT[LayoutEntryType::Mmio as usize].0, - MEM_LAYOUT[LayoutEntryType::Mmio as usize + 1].0, - ), - ); - - Ok(StdMachine { - cpu_topo, - cpus: Vec::new(), - irq_chip: None, - sys_mem: sys_mem.clone(), - sysbus, - pci_host: Arc::new(Mutex::new(PciHost::new( - &sys_mem, - MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize], - MEM_LAYOUT[LayoutEntryType::HighPcieMmio as usize], - ))), - boot_source: Arc::new(Mutex::new(vm_config.clone().boot_source)), - vm_state: Arc::new((Mutex::new(KvmVmState::Created), Condvar::new())), - power_button: EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| ErrorKind::InitEventFdErr("power_button".to_string()))?, - vm_config: Mutex::new(vm_config.clone()), - reset_req: EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| ErrorKind::InitEventFdErr("reset_req".to_string()))?, - dtb_vec: Vec::new(), - }) - } - - pub fn handle_reset_request(vm: &Arc>) -> Result<()> { - use crate::errors::ResultExt; - - let locked_vm = vm.lock().unwrap(); - let mut fdt_addr: u64 = 0; - - for (cpu_index, cpu) in locked_vm.cpus.iter().enumerate() { - cpu.pause() - .chain_err(|| format!("Failed to pause vcpu{}", cpu_index))?; - - cpu.set_to_boot_state(); - if cpu_index == 0 { - fdt_addr = cpu.arch().lock().unwrap().core_regs().regs.regs[0]; - } - cpu.fd() - .vcpu_init(&cpu.arch().lock().unwrap().kvi()) - .chain_err(|| "Failed to init vcpu fd")?; - } - - locked_vm - .sys_mem - .write( - &mut locked_vm.dtb_vec.as_slice(), - GuestAddress(fdt_addr as u64), - locked_vm.dtb_vec.len() as u64, - ) - .chain_err(|| "Fail to write dtb into sysmem")?; - - for dev in locked_vm.sysbus.devices.iter() { - dev.lock() - .unwrap() - .reset() - .chain_err(|| "Fail to reset sysbus device")?; - } - locked_vm - .pci_host - .lock() - .unwrap() - .reset() - .chain_err(|| "Fail to reset pci host")?; - - for (cpu_index, cpu) in locked_vm.cpus.iter().enumerate() { - cpu.resume() - .chain_err(|| format!("Failed to resume vcpu{}", cpu_index))?; - } - - Ok(()) - } -} - -impl StdMachineOps for StdMachine { - fn init_pci_host(&self) -> StdResult<()> { - use super::errors::ResultExt; - - let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().root_bus); - let mmconfig_region_ops = PciHost::build_mmconfig_ops(self.pci_host.clone()); - let mmconfig_region = Region::init_io_region( - MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1, - mmconfig_region_ops, - ); - self.sys_mem - .root() - .add_subregion( - mmconfig_region, - MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0, - ) - .chain_err(|| "Failed to register ECAM in memory space.")?; - - let pcihost_root = PciHostRoot::new(root_bus); - pcihost_root - .realize() - .chain_err(|| "Failed to realize pcihost root device.")?; - - Ok(()) - } - - fn add_fwcfg_device(&mut self) -> StdResult>> { - use super::errors::ResultExt; - - let mut fwcfg = FwCfgMem::new(self.sys_mem.clone()); - let ncpus = self.cpus.len(); - fwcfg - .add_data_entry(FwCfgEntryType::NbCpus, ncpus.as_bytes().to_vec()) - .chain_err(|| DevErrorKind::AddEntryErr("NbCpus".to_string()))?; - - let cmdline = self.boot_source.lock().unwrap().kernel_cmdline.to_string(); - fwcfg - .add_data_entry( - FwCfgEntryType::CmdlineSize, - (cmdline.len() + 1).as_bytes().to_vec(), - ) - .chain_err(|| DevErrorKind::AddEntryErr("CmdlineSize".to_string()))?; - fwcfg - .add_string_entry(FwCfgEntryType::CmdlineData, cmdline.as_str()) - .chain_err(|| DevErrorKind::AddEntryErr("CmdlineData".to_string()))?; - - let boot_order = Vec::::new(); - fwcfg - .add_file_entry("bootorder", boot_order) - .chain_err(|| DevErrorKind::AddEntryErr("bootorder".to_string()))?; - - let bios_geometry = Vec::::new(); - fwcfg - .add_file_entry("bios-geometry", bios_geometry) - .chain_err(|| DevErrorKind::AddEntryErr("bios-geometry".to_string()))?; - - let fwcfg_dev = FwCfgMem::realize( - fwcfg, - &mut self.sysbus, - MEM_LAYOUT[LayoutEntryType::FwCfg as usize].0, - MEM_LAYOUT[LayoutEntryType::FwCfg as usize].1, - ) - .chain_err(|| "Failed to realize fwcfg device")?; - - Ok(fwcfg_dev) - } - - fn get_vm_state(&self) -> &Arc<(Mutex, Condvar)> { - &self.vm_state - } - - fn get_cpu_topo(&self) -> &CpuTopology { - &self.cpu_topo - } - - fn get_cpus(&self) -> &Vec> { - &self.cpus - } - - fn get_vm_config(&self) -> &Mutex { - &self.vm_config - } -} - -impl MachineOps for StdMachine { - fn arch_ram_ranges(&self, mem_size: u64) -> Vec<(u64, u64)> { - vec![(MEM_LAYOUT[LayoutEntryType::Mem as usize].0, mem_size)] - } - - fn init_interrupt_controller(&mut self, vcpu_count: u64) -> Result<()> { - let intc_conf = InterruptControllerConfig { - version: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3, - vcpu_count, - max_irq: 192, - msi: true, - dist_range: MEM_LAYOUT[LayoutEntryType::GicDist as usize], - redist_region_ranges: vec![ - MEM_LAYOUT[LayoutEntryType::GicRedist as usize], - MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize], - ], - its_range: Some(MEM_LAYOUT[LayoutEntryType::GicIts as usize]), - }; - let irq_chip = InterruptController::new(&intc_conf)?; - self.irq_chip = Some(Arc::new(irq_chip)); - self.irq_chip.as_ref().unwrap().realize()?; - KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .init_irq_route_table(); - KVM_FDS.load().commit_irq_routing()?; - Ok(()) - } - - fn load_boot_source(&self, fwcfg: Option<&Arc>>) -> Result { - use crate::errors::ResultExt; - - let mut boot_source = self.boot_source.lock().unwrap(); - let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); - - let bootloader_config = BootLoaderConfig { - kernel: boot_source.kernel_file.clone(), - initrd, - mem_start: MEM_LAYOUT[LayoutEntryType::Mem as usize].0, - }; - let layout = load_linux(&bootloader_config, &self.sys_mem, fwcfg) - .chain_err(|| ErrorKind::LoadKernErr)?; - if let Some(rd) = &mut boot_source.initrd { - rd.initrd_addr = layout.initrd_start; - rd.initrd_size = layout.initrd_size; - } - - Ok(CPUBootConfig { - fdt_addr: layout.dtb_start, - boot_pc: layout.boot_pc, - }) - } - - fn add_rtc_device(&mut self) -> Result<()> { - use crate::errors::ResultExt; - - let rtc = PL031::default(); - PL031::realize( - rtc, - &mut self.sysbus, - MEM_LAYOUT[LayoutEntryType::Rtc as usize].0, - MEM_LAYOUT[LayoutEntryType::Rtc as usize].1, - ) - .chain_err(|| "Failed to realize PL031")?; - Ok(()) - } - - fn add_serial_device(&mut self, config: &SerialConfig) -> Result<()> { - use crate::errors::ResultExt; - - let region_base: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].0; - let region_size: u64 = MEM_LAYOUT[LayoutEntryType::Uart as usize].1; - - let pl011 = PL011::new(config.clone()).chain_err(|| "Failed to create PL011")?; - pl011 - .realize( - &mut self.sysbus, - region_base, - region_size, - &self.boot_source, - ) - .chain_err(|| "Failed to realize PL011")?; - Ok(()) - } - - fn syscall_whitelist(&self) -> Vec { - syscall_whitelist() - } - - fn realize(vm: &Arc>, vm_config: &mut VmConfig, is_migrate: bool) -> Result<()> { - use super::errors::ErrorKind as StdErrorKind; - use crate::errors::ResultExt; - - let clone_vm = vm.clone(); - let mut locked_vm = vm.lock().unwrap(); - locked_vm.init_global_config(vm_config)?; - locked_vm - .register_reset_event(&locked_vm.reset_req, clone_vm) - .chain_err(|| "Fail to register reset event")?; - locked_vm.init_memory( - &vm_config.machine_config.mem_config, - &locked_vm.sys_mem, - is_migrate, - vm_config.machine_config.nr_cpus, - )?; - - let vcpu_fds = { - let mut fds = vec![]; - for vcpu_id in 0..vm_config.machine_config.nr_cpus { - fds.push(Arc::new( - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_vcpu(vcpu_id)?, - )); - } - fds - }; - - // Interrupt Controller Chip init - locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; - locked_vm - .init_pci_host() - .chain_err(|| StdErrorKind::InitPCIeHostErr)?; - locked_vm - .add_devices(vm_config) - .chain_err(|| "Failed to add devices")?; - - let boot_config = if !is_migrate { - let fwcfg = locked_vm.add_fwcfg_device()?; - Some(locked_vm.load_boot_source(Some(&fwcfg))?) - } else { - None - }; - - locked_vm.cpus.extend(::init_vcpu( - vm.clone(), - vm_config.machine_config.nr_cpus, - &vcpu_fds, - &boot_config, - )?); - - if let Some(boot_cfg) = boot_config { - let mut fdt_helper = FdtBuilder::new(); - locked_vm - .generate_fdt_node(&mut fdt_helper) - .chain_err(|| ErrorKind::GenFdtErr)?; - let fdt_vec = fdt_helper.finish()?; - locked_vm.dtb_vec = fdt_vec.clone(); - locked_vm - .sys_mem - .write( - &mut fdt_vec.as_slice(), - GuestAddress(boot_cfg.fdt_addr as u64), - fdt_vec.len() as u64, - ) - .chain_err(|| ErrorKind::WrtFdtErr(boot_cfg.fdt_addr, fdt_vec.len()))?; - } - - locked_vm.register_power_event(&locked_vm.power_button)?; - - if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { - bail!("Failed to set migration status {}", e); - } - Ok(()) - } - - fn add_pflash_device(&mut self, configs: &[PFlashConfig]) -> Result<()> { - use super::errors::ErrorKind as StdErrorKind; - use crate::errors::ResultExt; - - let mut configs_vec = configs.to_vec(); - configs_vec.sort_by_key(|c| c.unit); - let sector_len: u32 = 1024 * 256; - let mut flash_base: u64 = MEM_LAYOUT[LayoutEntryType::Flash as usize].0; - let flash_size: u64 = MEM_LAYOUT[LayoutEntryType::Flash as usize].1 / 2; - for i in 0..=1 { - let (fd, read_only) = if i < configs_vec.len() { - let path = &configs_vec[i].path_on_host; - let read_only = configs_vec[i].read_only; - let fd = OpenOptions::new() - .read(true) - .write(!read_only) - .open(path) - .chain_err(|| StdErrorKind::OpenFileErr(path.to_string()))?; - (Some(fd), read_only) - } else { - (None, false) - }; - - let pflash = PFlash::new(flash_size, &fd, sector_len, 4, 2, read_only) - .chain_err(|| StdErrorKind::InitPflashErr)?; - PFlash::realize(pflash, &mut self.sysbus, flash_base, flash_size, fd) - .chain_err(|| StdErrorKind::RlzPflashErr)?; - flash_base += flash_size; - } - - Ok(()) - } - - fn run(&self, paused: bool) -> Result<()> { - ::vm_start(paused, &self.cpus, &mut self.vm_state.0.lock().unwrap()) - } - - fn get_sys_mem(&mut self) -> &Arc { - &self.sys_mem - } - - fn get_pci_host(&mut self) -> StdResult<&Arc>> { - Ok(&self.pci_host) - } -} - -impl AcpiBuilder for StdMachine {} - -impl MachineLifecycle for StdMachine { - fn pause(&self) -> bool { - if self.notify_lifecycle(KvmVmState::Running, KvmVmState::Paused) { - event!(Stop); - true - } else { - false - } - } - - fn resume(&self) -> bool { - if !self.notify_lifecycle(KvmVmState::Paused, KvmVmState::Running) { - return false; - } - event!(Resume); - true - } - - fn destroy(&self) -> bool { - let vmstate = { - let state = self.vm_state.deref().0.lock().unwrap(); - *state - }; - - if !self.notify_lifecycle(vmstate, KvmVmState::Shutdown) { - return false; - } - true - } - - fn reset(&mut self) -> bool { - self.reset_req.write(1).unwrap(); - true - } - - fn notify_lifecycle(&self, old: KvmVmState, new: KvmVmState) -> bool { - ::vm_state_transfer( - &self.cpus, - #[cfg(target_arch = "aarch64")] - &self.irq_chip, - &mut self.vm_state.0.lock().unwrap(), - old, - new, - ) - .is_ok() - } -} - -impl MachineAddressInterface for StdMachine { - fn mmio_read(&self, addr: u64, mut data: &mut [u8]) -> bool { - let length = data.len() as u64; - self.sys_mem - .read(&mut data, GuestAddress(addr), length) - .is_ok() - } - - fn mmio_write(&self, addr: u64, mut data: &[u8]) -> bool { - let count = data.len() as u64; - self.sys_mem - .write(&mut data, GuestAddress(addr), count) - .is_ok() - } -} - -impl MigrateInterface for StdMachine { - fn migrate(&self, uri: String) -> Response { - use util::unix::{parse_uri, UnixPath}; - - match parse_uri(&uri) { - Ok((UnixPath::File, path)) => { - if let Err(e) = MigrationManager::save_snapshot(&path) { - error!( - "Failed to migrate to path \'{:?}\': {}", - path, - e.display_chain() - ); - let _ = MigrationManager::set_status(MigrationStatus::Failed) - .map_err(|e| error!("{}", e)); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - } - _ => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(format!("Invalid uri: {}", uri)), - None, - ); - } - } - - Response::create_empty_response() - } - - fn query_migrate(&self) -> Response { - let status_str = MigrationManager::migration_get_status().to_string(); - let migration_info = qmp_schema::MigrationInfo { - status: Some(status_str), - }; - - Response::create_response(serde_json::to_value(migration_info).unwrap(), None) - } -} - -impl MachineInterface for StdMachine {} -impl MachineExternalInterface for StdMachine {} - -impl EventLoopManager for StdMachine { - fn loop_should_exit(&self) -> bool { - let vmstate = self.vm_state.deref().0.lock().unwrap(); - *vmstate == KvmVmState::Shutdown - } - - fn loop_cleanup(&self) -> util::errors::Result<()> { - use util::errors::ResultExt; - - set_termi_canon_mode().chain_err(|| "Failed to set terminal to canonical mode")?; - Ok(()) - } -} - -// Function that helps to generate pci node in device-tree. -// -// # Arguments -// -// * `fdt` - Flatted device-tree blob where node will be filled into. -fn generate_pci_host_node(fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let pcie_ecam_base = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0; - let pcie_ecam_size = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1; - let pcie_buses_num = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].1 >> 20; - let node = format!("pcie@{:x}", pcie_ecam_base); - let pci_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "pci-host-ecam-generic")?; - fdt.set_property_string("device_type", "pci")?; - fdt.set_property_array_u64("reg", &[pcie_ecam_base, pcie_ecam_size])?; - fdt.set_property_array_u32("bus-range", &[0, (pcie_buses_num - 1) as u32])?; - fdt.set_property_u32("linux,pci-domain", 0)?; - fdt.set_property_u32("#address-cells", 3)?; - fdt.set_property_u32("#size-cells", 2)?; - - let pcie_mmio_base = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].0; - let pcie_mmio_size = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].1; - let fdt_pci_mmio_type: u32 = 0x0200_0000; - let mmio_base_hi: u32 = (pcie_mmio_base >> 32) as u32; - let mmio_base_lo: u32 = (pcie_mmio_base & 0xffff_ffff) as u32; - let mmio_size_hi: u32 = (pcie_mmio_size >> 32) as u32; - let mmio_size_lo: u32 = (pcie_mmio_size & 0xffff_ffff) as u32; - - let pcie_pio_base = MEM_LAYOUT[LayoutEntryType::PciePio as usize].0; - let pcie_pio_size = MEM_LAYOUT[LayoutEntryType::PciePio as usize].1; - let fdt_pci_pio_type: u32 = 0x0100_0000; - let pio_base_hi: u32 = (pcie_pio_base >> 32) as u32; - let pio_base_lo: u32 = (pcie_pio_base & 0xffff_ffff) as u32; - let pio_size_hi: u32 = (pcie_pio_size >> 32) as u32; - let pio_size_lo: u32 = (pcie_pio_size & 0xffff_ffff) as u32; - - fdt.set_property_array_u32( - "ranges", - &[ - fdt_pci_pio_type, - 0, - 0, - pio_base_hi, - pio_base_lo, - pio_size_hi, - pio_size_lo, - fdt_pci_mmio_type, - mmio_base_hi, - mmio_base_lo, - mmio_base_hi, - mmio_base_lo, - mmio_size_hi, - mmio_size_lo, - ], - )?; - - fdt.set_property_u32("msi-parent", device_tree::GIC_ITS_PHANDLE)?; - fdt.end_node(pci_node_dep)?; - Ok(()) -} - -// Function that helps to generate Virtio-Mmio device's node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of Virtio-Mmio device. -// * `fdt` - Flatted device-tree blob where node will be filled into. -fn generate_virtio_devices_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("virtio_mmio@{:x}", res.region_base); - let virtio_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "virtio,mmio")?; - fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_EDGE_RISING, - ], - )?; - fdt.end_node(virtio_node_dep)?; - Ok(()) -} - -/// Function that helps to generate flash node in device-tree. -/// -/// # Arguments -/// -/// * `dev_info` - Device resource info of fw-cfg device. -/// * `flash` - Flatted device-tree blob where fw-cfg node will be filled into. -fn generate_flash_device_node(fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let flash_base = MEM_LAYOUT[LayoutEntryType::Flash as usize].0; - let flash_size = MEM_LAYOUT[LayoutEntryType::Flash as usize].1 / 2; - let node = format!("flash@{:x}", flash_base); - let flash_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "cfi-flash")?; - fdt.set_property_array_u64( - "reg", - &[flash_base, flash_size, flash_base + flash_size, flash_size], - )?; - fdt.set_property_u32("bank-width", 4)?; - fdt.end_node(flash_node_dep)?; - Ok(()) -} - -/// Function that helps to generate fw-cfg node in device-tree. -/// -/// # Arguments -/// -/// * `dev_info` - Device resource info of fw-cfg device. -/// * `fdt` - Flatted device-tree blob where fw-cfg node will be filled into. -fn generate_fwcfg_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("fw-cfg@{:x}", res.region_base); - let fwcfg_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "qemu,fw-cfg-mmio")?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.end_node(fwcfg_node_dep)?; - - Ok(()) -} - -// Function that helps to generate serial node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of serial device. -// * `fdt` - Flatted device-tree blob where serial node will be filled into. -fn generate_serial_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("pl011@{:x}", res.region_base); - let serial_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "arm,pl011\0arm,primecell")?; - fdt.set_property_string("clock-names", "uartclk\0apb_pclk")?; - fdt.set_property_array_u32( - "clocks", - &[device_tree::CLK_PHANDLE, device_tree::CLK_PHANDLE], - )?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_EDGE_RISING, - ], - )?; - fdt.end_node(serial_node_dep)?; - - Ok(()) -} - -// Function that helps to generate RTC node in device-tree. -// -// # Arguments -// -// * `dev_info` - Device resource info of RTC device. -// * `fdt` - Flatted device-tree blob where RTC node will be filled into. -fn generate_rtc_device_node(fdt: &mut FdtBuilder, res: &SysRes) -> util::errors::Result<()> { - let node = format!("pl031@{:x}", res.region_base); - let rtc_node_dep = fdt.begin_node(&node)?; - fdt.set_property_string("compatible", "arm,pl031\0arm,primecell\0")?; - fdt.set_property_string("clock-names", "apb_pclk")?; - fdt.set_property_u32("clocks", device_tree::CLK_PHANDLE)?; - fdt.set_property_array_u64("reg", &[res.region_base, res.region_size])?; - fdt.set_property_array_u32( - "interrupts", - &[ - device_tree::GIC_FDT_IRQ_TYPE_SPI, - res.irq as u32, - device_tree::IRQ_TYPE_LEVEL_HIGH, - ], - )?; - fdt.end_node(rtc_node_dep)?; - - Ok(()) -} - -/// Trait that helps to generate all nodes in device-tree. -#[allow(clippy::upper_case_acronyms)] -trait CompileFDTHelper { - /// Function that helps to generate cpu nodes. - fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate memory nodes. - fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate Virtio-mmio devices' nodes. - fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; - /// Function that helps to generate the chosen node. - fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()>; -} - -impl CompileFDTHelper for StdMachine { - fn generate_cpu_nodes(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node = "cpus"; - - let cpus_node_dep = fdt.begin_node(node)?; - fdt.set_property_u32("#address-cells", 0x02)?; - fdt.set_property_u32("#size-cells", 0x0)?; - - // Generate CPU topology - if self.cpu_topo.max_cpus > 0 && self.cpu_topo.max_cpus % 8 == 0 { - let cpu_map_node_dep = fdt.begin_node("cpu-map")?; - - let sockets = self.cpu_topo.max_cpus / 8; - for cluster in 0..u32::from(sockets) { - let clster = format!("cluster{}", cluster); - let cluster_node_dep = fdt.begin_node(&clster)?; - - for i in 0..2_u32 { - let sub_cluster = format!("cluster{}", i); - let sub_cluster_node_dep = fdt.begin_node(&sub_cluster)?; - - let core0 = "core0".to_string(); - let core0_node_dep = fdt.begin_node(&core0)?; - - let thread0 = "thread0".to_string(); - let thread0_node_dep = fdt.begin_node(&thread0)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10)?; - fdt.end_node(thread0_node_dep)?; - - let thread1 = "thread1".to_string(); - let thread1_node_dep = fdt.begin_node(&thread1)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 1)?; - fdt.end_node(thread1_node_dep)?; - - fdt.end_node(core0_node_dep)?; - - let core1 = "core1".to_string(); - let core1_node_dep = fdt.begin_node(&core1)?; - - let thread0 = "thread0".to_string(); - let thread0_node_dep = fdt.begin_node(&thread0)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 2)?; - fdt.end_node(thread0_node_dep)?; - - let thread1 = "thread1".to_string(); - let thread1_node_dep = fdt.begin_node(&thread1)?; - fdt.set_property_u32("cpu", cluster * 8 + i * 4 + 10 + 3)?; - fdt.end_node(thread1_node_dep)?; - - fdt.end_node(core1_node_dep)?; - - fdt.end_node(sub_cluster_node_dep)?; - } - fdt.end_node(cluster_node_dep)?; - } - fdt.end_node(cpu_map_node_dep)?; - } - - for cpu_index in 0..self.cpu_topo.max_cpus { - let mpidr = self.cpus[cpu_index as usize].arch().lock().unwrap().mpidr(); - - let node = format!("cpu@{:x}", mpidr); - let mpidr_node_dep = fdt.begin_node(&node)?; - fdt.set_property_u32( - "phandle", - u32::from(cpu_index) + device_tree::CPU_PHANDLE_START, - )?; - fdt.set_property_string("device_type", "cpu")?; - fdt.set_property_string("compatible", "arm,arm-v8")?; - if self.cpu_topo.max_cpus > 1 { - fdt.set_property_string("enable-method", "psci")?; - } - fdt.set_property_u64("reg", mpidr & 0x007F_FFFF)?; - fdt.end_node(mpidr_node_dep)?; - } - - fdt.end_node(cpus_node_dep)?; - - Ok(()) - } - - fn generate_memory_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let mem_base = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - let mem_size = self.sys_mem.memory_end_address().raw_value() - - MEM_LAYOUT[LayoutEntryType::Mem as usize].0; - let node = "memory"; - let memory_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("device_type", "memory")?; - fdt.set_property_array_u64("reg", &[mem_base, mem_size as u64])?; - fdt.end_node(memory_node_dep)?; - - Ok(()) - } - - fn generate_devices_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - // timer - let mut cells: Vec = Vec::new(); - for &irq in [13, 14, 11, 10].iter() { - cells.push(device_tree::GIC_FDT_IRQ_TYPE_PPI); - cells.push(irq); - cells.push(device_tree::IRQ_TYPE_LEVEL_HIGH); - } - let node = "timer"; - let timer_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "arm,armv8-timer")?; - fdt.set_property("always-on", &Vec::new())?; - fdt.set_property_array_u32("interrupts", &cells)?; - fdt.end_node(timer_node_dep)?; - - // clock - let node = "apb-pclk"; - let clock_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "fixed-clock")?; - fdt.set_property_string("clock-output-names", "clk24mhz")?; - fdt.set_property_u32("#clock-cells", 0x0)?; - fdt.set_property_u32("clock-frequency", 24_000_000)?; - fdt.set_property_u32("phandle", device_tree::CLK_PHANDLE)?; - fdt.end_node(clock_node_dep)?; - - // psci - let node = "psci"; - let psci_node_dep = fdt.begin_node(node)?; - fdt.set_property_string("compatible", "arm,psci-0.2")?; - fdt.set_property_string("method", "hvc")?; - fdt.end_node(psci_node_dep)?; - - for dev in self.sysbus.devices.iter() { - let mut locked_dev = dev.lock().unwrap(); - match locked_dev.get_type() { - SysBusDevType::PL011 => { - generate_serial_device_node(fdt, locked_dev.get_sys_resource().unwrap())? - } - SysBusDevType::Rtc => { - generate_rtc_device_node(fdt, locked_dev.get_sys_resource().unwrap())? - } - SysBusDevType::VirtioMmio => { - generate_virtio_devices_node(fdt, locked_dev.get_sys_resource().unwrap())? - } - SysBusDevType::FwCfg => { - generate_fwcfg_device_node(fdt, locked_dev.get_sys_resource().unwrap())?; - } - _ => (), - } - } - generate_flash_device_node(fdt)?; - - generate_pci_host_node(fdt)?; - - Ok(()) - } - - fn generate_chosen_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node = "chosen"; - - let boot_source = self.boot_source.lock().unwrap(); - - let chosen_node_dep = fdt.begin_node(node)?; - let cmdline = &boot_source.kernel_cmdline.to_string(); - fdt.set_property_string("bootargs", cmdline.as_str())?; - - let pl011_property_string = - format!("/pl011@{:x}", MEM_LAYOUT[LayoutEntryType::Uart as usize].0); - fdt.set_property_string("stdout-path", &pl011_property_string)?; - - match &boot_source.initrd { - Some(initrd) => { - fdt.set_property_u64("linux,initrd-start", initrd.initrd_addr)?; - fdt.set_property_u64("linux,initrd-end", initrd.initrd_addr + initrd.initrd_size)?; - } - None => {} - } - fdt.end_node(chosen_node_dep)?; - - Ok(()) - } -} - -impl device_tree::CompileFDT for StdMachine { - fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> util::errors::Result<()> { - let node_dep = fdt.begin_node("")?; - - fdt.set_property_string("compatible", "linux,dummy-virt")?; - fdt.set_property_u32("#address-cells", 0x2)?; - fdt.set_property_u32("#size-cells", 0x2)?; - fdt.set_property_u32("interrupt-parent", device_tree::GIC_PHANDLE)?; - - self.generate_cpu_nodes(fdt)?; - self.generate_memory_node(fdt)?; - self.generate_devices_node(fdt)?; - self.generate_chosen_node(fdt)?; - self.irq_chip.as_ref().unwrap().generate_fdt_node(fdt)?; - - fdt.end_node(node_dep)?; - - Ok(()) - } -} diff --git a/machine/src/standard_vm/aarch64/syscall.rs b/machine/src/standard_vm/aarch64/syscall.rs deleted file mode 100644 index f44c5f765f701a3e72e94d43d7cd4a61afdc7065..0000000000000000000000000000000000000000 --- a/machine/src/standard_vm/aarch64/syscall.rs +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use hypervisor::kvm::*; -use util::seccomp::{BpfRule, SeccompCmpOpt}; -use util::tap::{TUNSETIFF, TUNSETOFFLOAD, TUNSETVNETHDRSZ}; -use vfio::{ - VFIO_CHECK_EXTENSION, VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_IRQ_INFO, - VFIO_DEVICE_GET_REGION_INFO, VFIO_DEVICE_RESET, VFIO_DEVICE_SET_IRQS, VFIO_GET_API_VERSION, - VFIO_GROUP_GET_DEVICE_FD, VFIO_GROUP_GET_STATUS, VFIO_GROUP_SET_CONTAINER, VFIO_IOMMU_MAP_DMA, - VFIO_IOMMU_UNMAP_DMA, VFIO_SET_IOMMU, -}; -use virtio::VhostKern::*; - -/// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/futex.h -const FUTEX_WAIT: u32 = 0; -const FUTEX_WAKE: u32 = 1; -const FUTEX_CMP_REQUEUE: u32 = 4; -const FUTEX_WAKE_OP: u32 = 5; -const FUTEX_WAIT_BITSET: u32 = 9; -const FUTEX_PRIVATE_FLAG: u32 = 128; -#[cfg(target_env = "gnu")] -const FUTEX_CLOCK_REALTIME: u32 = 256; -const FUTEX_WAIT_PRIVATE: u32 = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; -const FUTEX_WAKE_PRIVATE: u32 = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; -const FUTEX_CMP_REQUEUE_PRIVATE: u32 = FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG; -const FUTEX_WAKE_OP_PRIVATE: u32 = FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG; -const FUTEX_WAIT_BITSET_PRIVATE: u32 = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG; - -/// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/fcntl.h -const F_GETFD: u32 = 1; -const F_SETFD: u32 = 2; -const F_SETFL: u32 = 4; -const F_LINUX_SPECIFIC_BASE: u32 = 1024; -const F_DUPFD_CLOEXEC: u32 = F_LINUX_SPECIFIC_BASE + 6; - -// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/asm-generic/ioctls.h -const TCGETS: u32 = 0x5401; -const TCSETS: u32 = 0x5402; -const TIOCGWINSZ: u32 = 0x5413; -const FIOCLEX: u32 = 0x5451; -const FIONBIO: u32 = 0x5421; -const KVM_RUN: u32 = 0xae80; - -/// Create a syscall allowlist for seccomp. -/// -/// # Notes -/// This allowlist limit syscall with: -/// * aarch64-unknown-gnu: 45 syscalls -/// * aarch64-unknown-musl: 44 syscalls -/// To reduce performance losses, the syscall rules is ordered by frequency. -pub fn syscall_whitelist() -> Vec { - vec![ - BpfRule::new(libc::SYS_read), - BpfRule::new(libc::SYS_readv), - BpfRule::new(libc::SYS_write), - BpfRule::new(libc::SYS_writev), - ioctl_allow_list(), - BpfRule::new(libc::SYS_epoll_pwait), - BpfRule::new(libc::SYS_io_getevents), - BpfRule::new(libc::SYS_io_submit), - BpfRule::new(libc::SYS_dup), - BpfRule::new(libc::SYS_close), - BpfRule::new(libc::SYS_eventfd2), - BpfRule::new(libc::SYS_epoll_ctl), - BpfRule::new(libc::SYS_fdatasync), - BpfRule::new(libc::SYS_recvmsg), - BpfRule::new(libc::SYS_sendmsg), - BpfRule::new(libc::SYS_recvfrom), - BpfRule::new(libc::SYS_mremap), - BpfRule::new(libc::SYS_io_setup), - BpfRule::new(libc::SYS_brk), - BpfRule::new(libc::SYS_fcntl) - .add_constraint(SeccompCmpOpt::Eq, 1, F_DUPFD_CLOEXEC) - .add_constraint(SeccompCmpOpt::Eq, 1, F_SETFD) - .add_constraint(SeccompCmpOpt::Eq, 1, F_GETFD) - .add_constraint(SeccompCmpOpt::Eq, 1, F_SETFL), - BpfRule::new(libc::SYS_rt_sigprocmask), - BpfRule::new(libc::SYS_openat), - BpfRule::new(libc::SYS_sigaltstack), - BpfRule::new(libc::SYS_mmap), - #[cfg(target_env = "gnu")] - BpfRule::new(libc::SYS_mprotect), - BpfRule::new(libc::SYS_munmap), - BpfRule::new(libc::SYS_accept4), - BpfRule::new(libc::SYS_lseek), - futex_rule(), - BpfRule::new(libc::SYS_exit), - BpfRule::new(libc::SYS_exit_group), - BpfRule::new(libc::SYS_rt_sigreturn), - #[cfg(target_env = "musl")] - BpfRule::new(libc::SYS_tkill), - BpfRule::new(libc::SYS_newfstatat), - #[cfg(target_env = "gnu")] - BpfRule::new(libc::SYS_tgkill), - BpfRule::new(libc::SYS_gettid), - BpfRule::new(libc::SYS_getpid), - BpfRule::new(libc::SYS_fstat), - BpfRule::new(libc::SYS_pread64), - BpfRule::new(libc::SYS_pwrite64), - BpfRule::new(libc::SYS_statx), - BpfRule::new(libc::SYS_mkdirat), - BpfRule::new(libc::SYS_unlinkat), - madvise_rule(), - BpfRule::new(libc::SYS_msync), - BpfRule::new(libc::SYS_readlinkat), - ] -} - -/// Create a syscall bpf rule for syscall `ioctl`. -fn ioctl_allow_list() -> BpfRule { - BpfRule::new(libc::SYS_ioctl) - .add_constraint(SeccompCmpOpt::Eq, 1, TCGETS) - .add_constraint(SeccompCmpOpt::Eq, 1, TCSETS) - .add_constraint(SeccompCmpOpt::Eq, 1, TIOCGWINSZ) - .add_constraint(SeccompCmpOpt::Eq, 1, FIOCLEX) - .add_constraint(SeccompCmpOpt::Eq, 1, FIONBIO) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_RUN) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_DEVICE_ATTR) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_USER_MEMORY_REGION) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_IOEVENTFD) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SIGNAL_MSI) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_VSOCK_SET_GUEST_CID() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_VSOCK_SET_RUNNING() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_CALL() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_NUM() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_ADDR() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_BASE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_VRING_BASE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_VRING_KICK() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_OWNER() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_FEATURES() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_FEATURES() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_SET_MEM_TABLE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_NET_SET_BACKEND() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_GET_FEATURES() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VHOST_RESET_OWNER() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETIFF() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETOFFLOAD() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, TUNSETVNETHDRSZ() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_GSI_ROUTING() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_IRQFD() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_SET_IRQS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_GET_STATUS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GET_API_VERSION() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_CHECK_EXTENSION() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_SET_CONTAINER() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_SET_IOMMU() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_CREATE_DEVICE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_IOMMU_MAP_DMA() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_IOMMU_UNMAP_DMA() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_GROUP_GET_DEVICE_FD() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_INFO() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_RESET() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_REGION_INFO() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, VFIO_DEVICE_GET_IRQ_INFO() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_API_VERSION() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MP_STATE() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_VCPU_EVENTS() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_ONE_REG() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEVICE_ATTR() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REG_LIST() as u32) - .add_constraint(SeccompCmpOpt::Eq, 1, KVM_ARM_VCPU_INIT() as u32) -} - -fn madvise_rule() -> BpfRule { - #[cfg(target_env = "musl")] - return BpfRule::new(libc::SYS_madvise) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_FREE as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32); - #[cfg(target_env = "gnu")] - return BpfRule::new(libc::SYS_madvise) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_WILLNEED as u32) - .add_constraint(SeccompCmpOpt::Eq, 2, libc::MADV_DONTDUMP as u32); -} - -fn futex_rule() -> BpfRule { - #[cfg(target_env = "musl")] - return BpfRule::new(libc::SYS_futex) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_CMP_REQUEUE_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_OP_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_BITSET_PRIVATE); - #[cfg(target_env = "gnu")] - return BpfRule::new(libc::SYS_futex) - .add_constraint( - SeccompCmpOpt::Eq, - 1, - FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME, - ) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_CMP_REQUEUE_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAKE_OP_PRIVATE) - .add_constraint(SeccompCmpOpt::Eq, 1, FUTEX_WAIT_BITSET_PRIVATE); -} diff --git a/machine/src/standard_vm/mod.rs b/machine/src/standard_vm/mod.rs deleted file mode 100644 index 05914a02e1040caff9ece2c16aebe7569f094265..0000000000000000000000000000000000000000 --- a/machine/src/standard_vm/mod.rs +++ /dev/null @@ -1,951 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -#[allow(dead_code)] -#[cfg(target_arch = "aarch64")] -mod aarch64; -#[cfg(target_arch = "x86_64")] -mod x86_64; - -#[cfg(target_arch = "aarch64")] -pub use aarch64::StdMachine; -use machine_manager::event_loop::EventLoop; -use util::loop_context::{EventNotifier, NotifierCallback, NotifierOperation}; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; -#[cfg(target_arch = "x86_64")] -pub use x86_64::StdMachine; - -#[allow(clippy::upper_case_acronyms)] -pub mod errors { - error_chain! { - links { - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - Cpu(cpu::errors::Error, cpu::errors::ErrorKind); - Legacy(devices::LegacyErrs::Error, devices::LegacyErrs::ErrorKind); - PciErr(pci::errors::Error, pci::errors::ErrorKind); - Acpi(acpi::errors::Error, acpi::errors::ErrorKind); - MachineManager(machine_manager::config::errors::Error, machine_manager::config::errors::ErrorKind); - } - foreign_links{ - Io(std::io::Error); - } - errors { - InitPCIeHostErr { - display("Failed to init PCIe host.") - } - OpenFileErr(path: String) { - display("Failed to open file: {}.", path) - } - InitPflashErr { - display("Failed to init pflash device.") - } - RlzPflashErr { - display("Failed to realize pflash device.") - } - } - } -} - -use std::mem::size_of; -use std::ops::Deref; -use std::os::unix::io::RawFd; -use std::os::unix::prelude::AsRawFd; -use std::sync::{Arc, Condvar, Mutex}; - -use crate::errors::Result as MachineResult; -use crate::MachineOps; -#[cfg(target_arch = "x86_64")] -use acpi::AcpiGenericAddress; -use acpi::{ - AcpiRsdp, AcpiTable, AmlBuilder, TableLoader, ACPI_RSDP_FILE, ACPI_TABLE_FILE, - ACPI_TABLE_LOADER_FILE, TABLE_CHECKSUM_OFFSET, -}; -use cpu::{CpuTopology, CPU}; -use devices::legacy::FwCfgOps; -use error_chain::ChainedError; -use errors::{Result, ResultExt}; -use machine_manager::config::{ - get_netdev_config, get_pci_df, BlkDevConfig, ConfigCheck, DriveConfig, NetworkInterfaceConfig, - PciBdf, VmConfig, -}; -use machine_manager::machine::{DeviceInterface, KvmVmState}; -use machine_manager::qmp::{qmp_schema, QmpChannel, Response}; -use migration::MigrationManager; -use pci::hotplug::{handle_plug, handle_unplug_request}; -use pci::PciBus; -use util::byte_code::ByteCode; -use virtio::{qmp_balloon, qmp_query_balloon, Block, BlockState, VhostKern, VirtioNetState}; - -#[cfg(target_arch = "aarch64")] -use aarch64::{LayoutEntryType, MEM_LAYOUT}; -#[cfg(target_arch = "x86_64")] -use x86_64::{LayoutEntryType, MEM_LAYOUT}; - -#[cfg(target_arch = "x86_64")] -use self::x86_64::ich9_lpc::{PM_CTRL_OFFSET, PM_EVENT_OFFSET, RST_CTRL_OFFSET, SLEEP_CTRL_OFFSET}; - -trait StdMachineOps: AcpiBuilder { - fn init_pci_host(&self) -> Result<()>; - - /// Build all ACPI tables and RSDP, and add them to FwCfg as file entries. - /// - /// # Arguments - /// - /// `fw_cfg` - FwCfgOps trait object. - fn build_acpi_tables(&self, fw_cfg: &Arc>) -> Result<()> - where - Self: Sized, - { - let mut loader = TableLoader::new(); - let acpi_tables = Arc::new(Mutex::new(Vec::new())); - loader.add_alloc_entry(ACPI_TABLE_FILE, acpi_tables.clone(), 64_u32, false)?; - - let mut xsdt_entries = Vec::new(); - - #[cfg(target_arch = "x86_64")] - { - let facs_addr = Self::build_facs_table(&acpi_tables, &mut loader) - .chain_err(|| "Failed to build ACPI FACS table")?; - xsdt_entries.push(facs_addr); - } - - let dsdt_addr = self - .build_dsdt_table(&acpi_tables, &mut loader) - .chain_err(|| "Failed to build ACPI DSDT table")?; - let fadt_addr = Self::build_fadt_table(&acpi_tables, &mut loader, dsdt_addr) - .chain_err(|| "Failed to build ACPI FADT table")?; - xsdt_entries.push(fadt_addr); - - let madt_addr = self - .build_madt_table(&acpi_tables, &mut loader) - .chain_err(|| "Failed to build ACPI MADT table")?; - xsdt_entries.push(madt_addr); - - let mcfg_addr = Self::build_mcfg_table(&acpi_tables, &mut loader) - .chain_err(|| "Failed to build ACPI MCFG table")?; - xsdt_entries.push(mcfg_addr); - - let xsdt_addr = Self::build_xsdt_table(&acpi_tables, &mut loader, xsdt_entries)?; - - let mut locked_fw_cfg = fw_cfg.lock().unwrap(); - Self::build_rsdp( - &mut loader, - &mut *locked_fw_cfg as &mut dyn FwCfgOps, - xsdt_addr, - ) - .chain_err(|| "Failed to build ACPI RSDP")?; - - locked_fw_cfg - .add_file_entry(ACPI_TABLE_LOADER_FILE, loader.cmd_entries()) - .chain_err(|| "Failed to add ACPI table loader file entry")?; - locked_fw_cfg - .add_file_entry(ACPI_TABLE_FILE, acpi_tables.lock().unwrap().to_vec()) - .chain_err(|| "Failed to add ACPI-tables file entry")?; - - Ok(()) - } - - fn add_fwcfg_device(&mut self) -> Result>> { - bail!("Not implemented"); - } - - fn get_vm_state(&self) -> &Arc<(Mutex, Condvar)>; - - fn get_cpu_topo(&self) -> &CpuTopology; - - fn get_cpus(&self) -> &Vec>; - - fn get_vm_config(&self) -> &Mutex; - - /// Register event notifier for reset of standard machine. - /// - /// # Arguments - /// - /// * `reset_req` - Eventfd of the reset request. - /// * `clone_vm` - Reference of the StdMachine. - fn register_reset_event( - &self, - reset_req: &EventFd, - clone_vm: Arc>, - ) -> MachineResult<()> { - let reset_req = reset_req.try_clone().unwrap(); - let reset_req_fd = reset_req.as_raw_fd(); - let reset_req_handler: Arc>> = - Arc::new(Mutex::new(Box::new(move |_, _| { - let _ret = reset_req.read().unwrap(); - if let Err(e) = StdMachine::handle_reset_request(&clone_vm) { - error!( - "Fail to reboot standard VM, {}", - error_chain::ChainedError::display_chain(&e) - ); - } - - None - }))); - let notifier = EventNotifier::new( - NotifierOperation::AddShared, - reset_req_fd, - None, - EventSet::IN, - vec![reset_req_handler], - ); - EventLoop::update_event(vec![notifier], None) - .chain_err(|| "Failed to register event notifier.")?; - Ok(()) - } - - #[cfg(target_arch = "x86_64")] - fn register_acpi_shutdown_event( - &self, - shutdown_req: &EventFd, - clone_vm: Arc>, - ) -> MachineResult<()> { - let shutdown_req = shutdown_req.try_clone().unwrap(); - let shutdown_req_fd = shutdown_req.as_raw_fd(); - let shutdown_req_handler: Arc>> = - Arc::new(Mutex::new(Box::new(move |_, _| { - let _ret = shutdown_req.read().unwrap(); - StdMachine::handle_shutdown_request(&clone_vm); - let notifiers = vec![EventNotifier::new( - NotifierOperation::Delete, - shutdown_req_fd, - None, - EventSet::IN, - Vec::new(), - )]; - Some(notifiers) - }))); - let notifier = EventNotifier::new( - NotifierOperation::AddShared, - shutdown_req_fd, - None, - EventSet::IN, - vec![shutdown_req_handler], - ); - EventLoop::update_event(vec![notifier], None) - .chain_err(|| "Failed to register event notifier.")?; - Ok(()) - } -} - -/// Trait that helps to build ACPI tables. -/// Standard machine struct should at least implement `build_dsdt_table`, `build_madt_table` -/// and `build_mcfg_table` function. -trait AcpiBuilder { - /// Build ACPI DSDT table, returns the offset of ACPI DSDT table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - fn build_dsdt_table( - &self, - _acpi_data: &Arc>>, - _loader: &mut TableLoader, - ) -> Result { - bail!("Not implemented"); - } - - /// Build ACPI MADT table, returns the offset of ACPI MADT table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - fn build_madt_table( - &self, - _acpi_data: &Arc>>, - _loader: &mut TableLoader, - ) -> Result { - bail!("Not implemented"); - } - - /// Build ACPI MCFG table, returns the offset of ACPI MCFG table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - fn build_mcfg_table(acpi_data: &Arc>>, loader: &mut TableLoader) -> Result - where - Self: Sized, - { - let mut mcfg = AcpiTable::new(*b"MCFG", 1, *b"STRATO", *b"VIRTMCFG", 1); - let ecam_addr: u64 = MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].0; - // Bits 20~28 (totally 9 bits) in PCIE ECAM represents bus number. - let bus_number_mask = (1 << 9) - 1; - let max_nr_bus = (MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].1 >> 20) & bus_number_mask; - - // Reserved - mcfg.append_child(&[0_u8; 8]); - // Base address of PCIE ECAM - mcfg.append_child(ecam_addr.as_bytes()); - // PCI Segment Group Number - mcfg.append_child(0_u16.as_bytes()); - // Start Bus Number and End Bus Number - mcfg.append_child(&[0_u8, (max_nr_bus - 1) as u8]); - // Reserved - mcfg.append_child(&[0_u8; 4]); - - let mut acpi_data_locked = acpi_data.lock().unwrap(); - let mcfg_begin = acpi_data_locked.len() as u32; - acpi_data_locked.extend(mcfg.aml_bytes()); - let mcfg_end = acpi_data_locked.len() as u32; - drop(acpi_data_locked); - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - mcfg_begin + TABLE_CHECKSUM_OFFSET, - mcfg_begin, - mcfg_end - mcfg_begin, - )?; - Ok(mcfg_begin as u64) - } - - /// Build ACPI FADT table, returns the offset of ACPI FADT table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - /// `dsdt_addr` - Offset of ACPI DSDT table in `acpi_data`. - fn build_fadt_table( - acpi_data: &Arc>>, - loader: &mut TableLoader, - dsdt_addr: u64, - ) -> Result - where - Self: Sized, - { - let mut fadt = AcpiTable::new(*b"FACP", 6, *b"STRATO", *b"VIRTFSCP", 1); - - fadt.set_table_len(208_usize); - // PM1A_EVENT bit, offset is 56. - #[cfg(target_arch = "x86_64")] - fadt.set_field(56, 0x600); - // PM1A_CONTROL bit, offset is 64. - #[cfg(target_arch = "x86_64")] - fadt.set_field(64, 0x604); - // PM_TMR_BLK bit, offset is 76. - #[cfg(target_arch = "x86_64")] - fadt.set_field(76, 0x608); - // FADT flag: disable HW_REDUCED_ACPI bit. - fadt.set_field(112, 1 << 10 | 1 << 8); - // FADT minor revision - fadt.set_field(131, 3); - // X_PM_TMR_BLK bit, offset is 208. - #[cfg(target_arch = "x86_64")] - fadt.append_child(&AcpiGenericAddress::new_io_address(0x608_u32).aml_bytes()); - // FADT table size is fixed. - fadt.set_table_len(276_usize); - - #[cfg(target_arch = "x86_64")] - { - // Reset Register bit, offset is 116. - fadt.set_field(116, 0x01_u8); - fadt.set_field(117, 0x08_u8); - fadt.set_field(120, RST_CTRL_OFFSET as u64); - fadt.set_field(128, 0x0F_u8); - // PM1a event register bit, offset is 148. - fadt.set_field(148, 0x01_u8); - fadt.set_field(149, 0x20_u8); - fadt.set_field(152, PM_EVENT_OFFSET as u64); - // PM1a contol register bit, offset is 172. - fadt.set_field(172, 0x01_u8); - fadt.set_field(173, 0x10_u8); - fadt.set_field(176, PM_CTRL_OFFSET as u64); - // Sleep control register, offset is 244. - fadt.set_field(244, 0x01_u8); - fadt.set_field(245, 0x08_u8); - fadt.set_field(248, SLEEP_CTRL_OFFSET as u64); - // Sleep status tegister, offset is 256. - fadt.set_field(256, 0x01_u8); - fadt.set_field(257, 0x08_u8); - fadt.set_field(260, SLEEP_CTRL_OFFSET as u64); - } - - let mut locked_acpi_data = acpi_data.lock().unwrap(); - let fadt_begin = locked_acpi_data.len() as u32; - locked_acpi_data.extend(fadt.aml_bytes()); - let fadt_end = locked_acpi_data.len() as u32; - drop(locked_acpi_data); - - // xDSDT address field's offset in FADT. - let xdsdt_offset = 140_u32; - // Size of xDSDT address. - let xdsdt_size = 8_u8; - loader.add_pointer_entry( - ACPI_TABLE_FILE, - fadt_begin + xdsdt_offset, - xdsdt_size, - ACPI_TABLE_FILE, - dsdt_addr as u32, - )?; - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - fadt_begin + TABLE_CHECKSUM_OFFSET, - fadt_begin, - fadt_end - fadt_begin, - )?; - - Ok(fadt_begin as u64) - } - - /// Build ACPI FACS table, returns the offset of ACPI FACS table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - #[cfg(target_arch = "x86_64")] - fn build_facs_table(acpi_data: &Arc>>, loader: &mut TableLoader) -> Result - where - Self: Sized, - { - let mut facs_data = vec![0_u8; 0x40]; - // FACS table signature. - facs_data[0] = b'F'; - facs_data[1] = b'A'; - facs_data[2] = b'C'; - facs_data[3] = b'S'; - // FACS table length. - facs_data[4] = 0x40; - - let mut locked_acpi_data = acpi_data.lock().unwrap(); - let facs_begin = locked_acpi_data.len() as u32; - locked_acpi_data.extend(facs_data); - let facs_end = locked_acpi_data.len() as u32; - drop(locked_acpi_data); - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - facs_begin + TABLE_CHECKSUM_OFFSET, - facs_begin, - facs_end - facs_begin, - )?; - - Ok(facs_begin as u64) - } - - /// Build ACPI XSDT table, returns the offset of ACPI XSDT table in `acpi_data`. - /// - /// # Arguments - /// - /// `acpi_data` - Bytes streams that ACPI tables converts to. - /// `loader` - ACPI table loader. - /// `xsdt_entries` - Offset of table entries in `acpi_data`, such as FADT, MADT, MCFG table. - fn build_xsdt_table( - acpi_data: &Arc>>, - loader: &mut TableLoader, - xsdt_entries: Vec, - ) -> Result - where - Self: Sized, - { - let mut xsdt = AcpiTable::new(*b"XSDT", 1, *b"STRATO", *b"VIRTXSDT", 1); - - xsdt.set_table_len(xsdt.table_len() + size_of::() * xsdt_entries.len()); - - let mut locked_acpi_data = acpi_data.lock().unwrap(); - let xsdt_begin = locked_acpi_data.len() as u32; - locked_acpi_data.extend(xsdt.aml_bytes()); - let xsdt_end = locked_acpi_data.len() as u32; - drop(locked_acpi_data); - - // Offset of table entries in XSDT. - let mut entry_offset = 36_u32; - // Size of each entry. - let entry_size = size_of::() as u8; - for entry in xsdt_entries { - loader.add_pointer_entry( - ACPI_TABLE_FILE, - xsdt_begin + entry_offset, - entry_size, - ACPI_TABLE_FILE, - entry as u32, - )?; - entry_offset += u32::from(entry_size); - } - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - xsdt_begin + TABLE_CHECKSUM_OFFSET, - xsdt_begin, - xsdt_end - xsdt_begin, - )?; - - Ok(xsdt_begin as u64) - } - - /// Build ACPI RSDP and add it to FwCfg as file-entry. - /// - /// # Arguments - /// - /// `loader` - ACPI table loader. - /// `fw_cfg`: FwCfgOps trait object. - /// `xsdt_addr` - Offset of ACPI XSDT table in `acpi_data`. - fn build_rsdp(loader: &mut TableLoader, fw_cfg: &mut dyn FwCfgOps, xsdt_addr: u64) -> Result<()> - where - Self: Sized, - { - let rsdp = AcpiRsdp::new(*b"STRATO"); - let rsdp_data = Arc::new(Mutex::new(rsdp.aml_bytes().to_vec())); - - loader.add_alloc_entry(ACPI_RSDP_FILE, rsdp_data.clone(), 16, true)?; - - let xsdt_offset = 24_u32; - let xsdt_size = 8_u8; - loader.add_pointer_entry( - ACPI_RSDP_FILE, - xsdt_offset, - xsdt_size, - ACPI_TABLE_FILE, - xsdt_addr as u32, - )?; - - let cksum_offset = 8_u32; - let exd_cksum_offset = 32_u32; - loader.add_cksum_entry(ACPI_RSDP_FILE, cksum_offset, 0, 20)?; - loader.add_cksum_entry(ACPI_RSDP_FILE, exd_cksum_offset, 0, 36)?; - - fw_cfg.add_file_entry(ACPI_RSDP_FILE, rsdp_data.lock().unwrap().to_vec())?; - - Ok(()) - } -} - -fn get_device_bdf(bus: Option, addr: Option) -> Result { - let mut pci_bdf = PciBdf { - bus: bus.unwrap_or_else(|| String::from("pcie.0")), - addr: (0, 0), - }; - let addr = addr.unwrap_or_else(|| String::from("0x0")); - pci_bdf.addr = get_pci_df(&addr).chain_err(|| "Failed to get device num or function num")?; - Ok(pci_bdf) -} - -impl StdMachine { - fn plug_virtio_pci_blk( - &mut self, - pci_bdf: &PciBdf, - args: &qmp_schema::DeviceAddArgument, - ) -> Result<()> { - let multifunction = args.multifunction.unwrap_or(false); - let drive = if let Some(drv) = &args.drive { - drv - } else { - bail!("Drive not set"); - }; - - let blk = if let Some(conf) = self.get_vm_config().lock().unwrap().drives.get(drive) { - let dev = BlkDevConfig { - id: args.id.clone(), - path_on_host: conf.path_on_host.clone(), - read_only: conf.read_only, - direct: conf.direct, - serial_num: args.serial_num.clone(), - iothread: args.iothread.clone(), - iops: conf.iops, - }; - dev.check()?; - dev - } else { - bail!("Drive not found"); - }; - - let blk_id = blk.id.clone(); - let blk = Arc::new(Mutex::new(Block::new(blk))); - self.add_virtio_pci_device(&args.id, pci_bdf, blk.clone(), multifunction) - .chain_err(|| "Failed to add virtio pci block device")?; - - MigrationManager::register_device_instance_mutex_with_id( - BlockState::descriptor(), - blk, - &blk_id, - ); - Ok(()) - } - - fn plug_virtio_pci_net( - &mut self, - pci_bdf: &PciBdf, - args: &qmp_schema::DeviceAddArgument, - ) -> Result<()> { - let multifunction = args.multifunction.unwrap_or(false); - let netdev = if let Some(dev) = &args.netdev { - dev - } else { - bail!("Netdev not set"); - }; - - let dev = if let Some(conf) = self.get_vm_config().lock().unwrap().netdevs.get(netdev) { - let dev = NetworkInterfaceConfig { - id: args.id.clone(), - host_dev_name: conf.ifname.clone(), - mac: args.mac.clone(), - tap_fds: conf.tap_fds.clone(), - vhost_type: conf.vhost_type.clone(), - vhost_fds: conf.vhost_fds.clone(), - iothread: args.iothread.clone(), - queues: conf.queues, - mq: conf.queues > 2, - }; - dev.check()?; - dev - } else { - bail!("Netdev not found"); - }; - - if dev.vhost_type.is_some() { - let net = Arc::new(Mutex::new(VhostKern::Net::new(&dev, self.get_sys_mem()))); - self.add_virtio_pci_device(&args.id, pci_bdf, net, multifunction) - .chain_err(|| "Failed to add virtio net device")?; - } else { - let net_id = dev.id.clone(); - let net = Arc::new(Mutex::new(virtio::Net::new(dev))); - self.add_virtio_pci_device(&args.id, pci_bdf, net.clone(), multifunction) - .chain_err(|| "Failed to add virtio net device")?; - MigrationManager::register_device_instance_mutex_with_id( - VirtioNetState::descriptor(), - net, - &net_id, - ); - } - Ok(()) - } - - fn plug_vfio_pci_device( - &mut self, - bdf: &PciBdf, - args: &qmp_schema::DeviceAddArgument, - ) -> Result<()> { - if args.host.is_none() { - bail!("Option \"host\" not provided."); - } - - if let Err(e) = self.create_vfio_pci_device( - &args.id, - bdf, - args.host.as_ref().unwrap(), - args.multifunction.map_or(false, |m| m), - ) { - error!("{}", e.display_chain()); - bail!("Failed to plug vfio-pci device."); - } - Ok(()) - } -} - -impl DeviceInterface for StdMachine { - fn query_status(&self) -> Response { - let vm_state = self.get_vm_state(); - let vmstate = vm_state.deref().0.lock().unwrap(); - let qmp_state = match *vmstate { - KvmVmState::Running => qmp_schema::StatusInfo { - singlestep: false, - running: true, - status: qmp_schema::RunState::running, - }, - KvmVmState::Paused => qmp_schema::StatusInfo { - singlestep: false, - running: true, - status: qmp_schema::RunState::paused, - }, - _ => Default::default(), - }; - - Response::create_response(serde_json::to_value(&qmp_state).unwrap(), None) - } - - fn query_cpus(&self) -> Response { - let mut cpu_vec: Vec = Vec::new(); - let cpu_topo = self.get_cpu_topo(); - let cpus = self.get_cpus(); - for cpu_index in 0..cpu_topo.max_cpus { - if cpu_topo.get_mask(cpu_index as usize) == 1 { - let thread_id = cpus[cpu_index as usize].tid(); - let (socketid, coreid, threadid) = cpu_topo.get_topo(cpu_index as usize); - let cpu_instance = qmp_schema::CpuInstanceProperties { - node_id: None, - socket_id: Some(socketid as isize), - core_id: Some(coreid as isize), - thread_id: Some(threadid as isize), - }; - let cpu_info = qmp_schema::CpuInfo::x86 { - current: true, - qom_path: String::from("/machine/unattached/device[") - + &cpu_index.to_string() - + "]", - halted: false, - props: Some(cpu_instance), - CPU: cpu_index as isize, - thread_id: thread_id as isize, - x86: qmp_schema::CpuInfoX86 {}, - }; - cpu_vec.push(serde_json::to_value(cpu_info).unwrap()); - } - } - Response::create_response(cpu_vec.into(), None) - } - - fn query_hotpluggable_cpus(&self) -> Response { - Response::create_empty_response() - } - - fn balloon(&self, value: u64) -> Response { - if qmp_balloon(value) { - return Response::create_empty_response(); - } - Response::create_error_response( - qmp_schema::QmpErrorClass::DeviceNotActive( - "No balloon device has been activated".to_string(), - ), - None, - ) - } - - fn query_balloon(&self) -> Response { - if let Some(actual) = qmp_query_balloon() { - let ret = qmp_schema::BalloonInfo { actual }; - return Response::create_response(serde_json::to_value(&ret).unwrap(), None); - } - Response::create_error_response( - qmp_schema::QmpErrorClass::DeviceNotActive( - "No balloon device has been activated".to_string(), - ), - None, - ) - } - - fn device_add(&mut self, args: Box) -> Response { - if let Err(e) = self.check_device_id_existed(&args.id) { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - - // Use args.bus.clone() and args.addr.clone() because args borrowed in the following process. - let pci_bdf = match get_device_bdf(args.bus.clone(), args.addr.clone()) { - Ok(bdf) => bdf, - Err(e) => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ) - } - }; - - let driver = args.driver.as_str(); - match driver { - "virtio-blk-pci" => { - if let Err(e) = self.plug_virtio_pci_blk(&pci_bdf, args.as_ref()) { - error!("{}", e.display_chain()); - let err_str = format!("Failed to add virtio pci blk: {}", e); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(err_str), - None, - ); - } - } - "virtio-net-pci" => { - if let Err(e) = self.plug_virtio_pci_net(&pci_bdf, args.as_ref()) { - error!("{}", e.display_chain()); - let err_str = format!("Failed to add virtio pci net: {}", e); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(err_str), - None, - ); - } - } - "vfio-pci" => { - if let Err(e) = self.plug_vfio_pci_device(&pci_bdf, args.as_ref()) { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - } - _ => { - let err_str = format!("Failed to add device: Driver {} is not support", driver); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(err_str), - None, - ); - } - } - - // It's safe to call get_pci_host().unwrap() because it has been checked before. - let locked_pci_host = self.get_pci_host().unwrap().lock().unwrap(); - if let Some((bus, dev)) = PciBus::find_attached_bus(&locked_pci_host.root_bus, &args.id) { - match handle_plug(&bus, &dev) { - Ok(()) => Response::create_empty_response(), - Err(e) => { - if let Err(e) = PciBus::detach_device(&bus, &dev) { - error!("{}", e.display_chain()); - error!("Failed to detach device"); - } - let err_str = format!("Failed to plug device: {}", e); - Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(err_str), - None, - ) - } - } - } else { - Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError( - "Failed to add device: Bus not found".to_string(), - ), - None, - ) - } - } - - fn device_del(&mut self, device_id: String) -> Response { - let pci_host = match self.get_pci_host() { - Ok(host) => host, - Err(e) => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ) - } - }; - - let locked_pci_host = pci_host.lock().unwrap(); - if let Some((bus, dev)) = PciBus::find_attached_bus(&locked_pci_host.root_bus, &device_id) { - match handle_unplug_request(&bus, &dev) { - Ok(()) => Response::create_empty_response(), - Err(e) => Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ), - } - } else { - let err_str = format!("Failed to remove device: id {} not found", &device_id); - Response::create_error_response(qmp_schema::QmpErrorClass::GenericError(err_str), None) - } - } - - fn blockdev_add(&self, args: Box) -> Response { - let read_only = args.read_only.unwrap_or(false); - let direct = if let Some(cache) = args.cache { - cache.direct.unwrap_or(true) - } else { - true - }; - let config = DriveConfig { - id: args.node_name, - path_on_host: args.file.filename, - read_only, - direct, - iops: args.iops, - }; - - if let Err(e) = config.check() { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - // Check whether path is valid after configuration check - if let Err(e) = config.check_path() { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - match self - .get_vm_config() - .lock() - .unwrap() - .add_drive_with_config(config) - { - Ok(()) => Response::create_empty_response(), - Err(e) => Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ), - } - } - - fn blockdev_del(&self, node_name: String) -> Response { - match self - .get_vm_config() - .lock() - .unwrap() - .del_drive_by_id(&node_name) - { - Ok(()) => Response::create_empty_response(), - Err(e) => Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ), - } - } - - fn netdev_add(&mut self, args: Box) -> Response { - let config = match get_netdev_config(args) { - Ok(conf) => conf, - Err(e) => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - }; - - if let Err(e) = config.check() { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - - match self - .get_vm_config() - .lock() - .unwrap() - .add_netdev_with_config(config) - { - Ok(()) => Response::create_empty_response(), - Err(e) => Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ), - } - } - - fn netdev_del(&mut self, id: String) -> Response { - match self.get_vm_config().lock().unwrap().del_netdev_by_id(&id) { - Ok(()) => Response::create_empty_response(), - Err(e) => Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ), - } - } - - fn getfd(&self, fd_name: String, if_fd: Option) -> Response { - if let Some(fd) = if_fd { - QmpChannel::set_fd(fd_name, fd); - Response::create_empty_response() - } else { - let err_resp = - qmp_schema::QmpErrorClass::GenericError("Invalid SCM message".to_string()); - Response::create_error_response(err_resp, None) - } - } -} diff --git a/machine/src/standard_vm/x86_64/mod.rs b/machine/src/standard_vm/x86_64/mod.rs deleted file mode 100644 index 954013f167bb962bd00ed3e4ff799e86202565cb..0000000000000000000000000000000000000000 --- a/machine/src/standard_vm/x86_64/mod.rs +++ /dev/null @@ -1,804 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -pub(crate) mod ich9_lpc; -mod mch; -mod syscall; - -use std::fs::OpenOptions; -use std::io::{Seek, SeekFrom}; -use std::mem::size_of; -use std::ops::Deref; -use std::sync::{Arc, Condvar, Mutex}; - -use acpi::{ - AcpiIoApic, AcpiLocalApic, AcpiTable, AmlBuilder, AmlDevice, AmlInteger, AmlNameDecl, - AmlPackage, AmlScope, AmlScopeBuilder, AmlString, TableLoader, ACPI_TABLE_FILE, - IOAPIC_BASE_ADDR, LAPIC_BASE_ADDR, TABLE_CHECKSUM_OFFSET, -}; -use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; -use boot_loader::{load_linux, BootLoaderConfig}; -use cpu::{CPUBootConfig, CPUInterface, CpuTopology, CPU}; -use devices::legacy::{FwCfgEntryType, FwCfgIO, FwCfgOps, PFlash, Serial, RTC, SERIAL_ADDR}; -use error_chain::ChainedError; -use hypervisor::kvm::KVM_FDS; -use kvm_bindings::{kvm_pit_config, KVM_PIT_SPEAKER_DUMMY}; -use machine_manager::config::{BootSource, PFlashConfig, SerialConfig, VmConfig}; -use machine_manager::machine::{ - KvmVmState, MachineAddressInterface, MachineExternalInterface, MachineInterface, - MachineLifecycle, MigrateInterface, -}; -use machine_manager::qmp::{qmp_schema, QmpChannel, Response}; -use migration::{MigrationManager, MigrationStatus}; -use pci::{PciDevOps, PciHost}; -use sysbus::{SysBus, SysBusDevOps}; -use util::loop_context::EventLoopManager; -use util::seccomp::BpfRule; -use util::set_termi_canon_mode; -use vmm_sys_util::eventfd::EventFd; - -use self::ich9_lpc::SLEEP_CTRL_OFFSET; - -use super::errors::{ErrorKind, Result}; -use super::{AcpiBuilder, StdMachineOps}; -use crate::errors::{ErrorKind as MachineErrorKind, Result as MachineResult}; -use crate::MachineOps; -use mch::Mch; -use syscall::syscall_whitelist; -use util::byte_code::ByteCode; - -const VENDOR_ID_INTEL: u16 = 0x8086; - -/// The type of memory layout entry on x86_64 -#[repr(usize)] -#[allow(dead_code)] -pub enum LayoutEntryType { - MemBelow4g = 0_usize, - PcieEcam, - PcieMmio, - Mmio, - IoApic, - LocalApic, - IdentTss, - MemAbove4g, -} - -/// Layout of x86_64 -pub const MEM_LAYOUT: &[(u64, u64)] = &[ - (0, 0x8000_0000), // MemBelow4g - (0xB000_0000, 0x1000_0000), // PcieEcam - (0xC000_0000, 0x3000_0000), // PcieMmio - (0xF010_0000, 0x200), // Mmio - (0xFEC0_0000, 0x10_0000), // IoApic - (0xFEE0_0000, 0x10_0000), // LocalApic - (0xFEF0_C000, 0x4000), // Identity map address and TSS - (0x1_0000_0000, 0x80_0000_0000), // MemAbove4g -]; - -/// Standard machine structure. -pub struct StdMachine { - /// `vCPU` topology, support sockets, cores, threads. - cpu_topo: CpuTopology, - /// `vCPU` devices. - cpus: Vec>, - /// IO address space. - sys_io: Arc, - /// Memory address space. - pub sys_mem: Arc, - /// System bus. - sysbus: SysBus, - /// PCI/PCIe host bridge. - pci_host: Arc>, - /// VM running state. - vm_state: Arc<(Mutex, Condvar)>, - /// Vm boot_source config. - boot_source: Arc>, - /// VM power button, handle VM `Shutdown` event. - power_button: EventFd, - vm_config: Mutex, -} - -impl StdMachine { - pub fn new(vm_config: &VmConfig) -> MachineResult { - use crate::errors::ResultExt; - - let cpu_topo = CpuTopology::new(vm_config.machine_config.nr_cpus); - let sys_io = AddressSpace::new(Region::init_container_region(1 << 16)) - .chain_err(|| MachineErrorKind::CrtMemSpaceErr)?; - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())) - .chain_err(|| MachineErrorKind::CrtIoSpaceErr)?; - let sysbus = SysBus::new( - &sys_io, - &sys_mem, - (5, 15), - ( - MEM_LAYOUT[LayoutEntryType::Mmio as usize].0, - MEM_LAYOUT[LayoutEntryType::Mmio as usize + 1].0, - ), - ); - // Machine state init - let vm_state = Arc::new((Mutex::new(KvmVmState::Created), Condvar::new())); - - Ok(StdMachine { - cpu_topo, - cpus: Vec::new(), - sys_io: sys_io.clone(), - sys_mem: sys_mem.clone(), - sysbus, - pci_host: Arc::new(Mutex::new(PciHost::new( - &sys_io, - &sys_mem, - MEM_LAYOUT[LayoutEntryType::PcieEcam as usize], - MEM_LAYOUT[LayoutEntryType::PcieMmio as usize], - ))), - boot_source: Arc::new(Mutex::new(vm_config.clone().boot_source)), - vm_state, - power_button: EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| MachineErrorKind::InitEventFdErr("power_button".to_string()))?, - vm_config: Mutex::new(vm_config.clone()), - }) - } - - pub fn handle_reset_request(vm: &Arc>) -> MachineResult<()> { - use crate::errors::ResultExt as MachineResultExt; - - let locked_vm = vm.lock().unwrap(); - - for (cpu_index, cpu) in locked_vm.cpus.iter().enumerate() { - MachineResultExt::chain_err(cpu.kick(), || { - format!("Failed to kick vcpu{}", cpu_index) - })?; - - cpu.set_to_boot_state(); - } - - for dev in locked_vm.sysbus.devices.iter() { - MachineResultExt::chain_err(dev.lock().unwrap().reset(), || { - "Fail to reset sysbus device" - })?; - } - MachineResultExt::chain_err(locked_vm.pci_host.lock().unwrap().reset(), || { - "Fail to reset pci host" - })?; - - for (cpu_index, cpu) in locked_vm.cpus.iter().enumerate() { - MachineResultExt::chain_err(cpu.reset(), || { - format!("Failed to reset vcpu{}", cpu_index) - })?; - } - - Ok(()) - } - - pub fn handle_shutdown_request(vm: &Arc>) -> bool { - let locked_vm = vm.lock().unwrap(); - for (cpu_index, cpu) in locked_vm.cpus.iter().enumerate() { - if let Err(e) = cpu.destroy() { - error!( - "Failed to destroy vcpu{}, error is {}", - cpu_index, - e.display_chain() - ); - } - } - - let mut vmstate = locked_vm.vm_state.0.lock().unwrap(); - *vmstate = KvmVmState::Shutdown; - true - } - - fn arch_init() -> MachineResult<()> { - use crate::errors::ResultExt; - - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let identity_addr: u64 = MEM_LAYOUT[LayoutEntryType::IdentTss as usize].0; - - ioctl_iow_nr!( - KVM_SET_IDENTITY_MAP_ADDR, - kvm_bindings::KVMIO, - 0x48, - std::os::raw::c_ulong - ); - // Safe because the following ioctl only sets identity map address to KVM. - unsafe { - vmm_sys_util::ioctl::ioctl_with_ref(vm_fd, KVM_SET_IDENTITY_MAP_ADDR(), &identity_addr); - } - // Page table takes 1 page, TSS takes the following 3 pages. - vm_fd - .set_tss_address((identity_addr + 0x1000) as usize) - .chain_err(|| MachineErrorKind::SetTssErr)?; - - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - pad: Default::default(), - }; - vm_fd - .create_pit2(pit_config) - .chain_err(|| MachineErrorKind::CrtPitErr)?; - Ok(()) - } - - fn init_ich9_lpc(&self, vm: Arc>) -> Result<()> { - use super::errors::ResultExt; - - let clone_vm = vm.clone(); - let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().root_bus); - let ich = ich9_lpc::LPCBridge::new(root_bus, self.sys_io.clone()); - self.register_reset_event(&ich.reset_req, vm) - .chain_err(|| "Fail to register reset event in LPC")?; - self.register_acpi_shutdown_event(&ich.shutdown_req, clone_vm) - .chain_err(|| "Fail to register shutdown event in LPC")?; - PciDevOps::realize(ich)?; - Ok(()) - } -} - -impl StdMachineOps for StdMachine { - fn init_pci_host(&self) -> Result<()> { - use super::errors::ResultExt; - - let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().root_bus); - let mmconfig_region_ops = PciHost::build_mmconfig_ops(self.pci_host.clone()); - let mmconfig_region = Region::init_io_region( - MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].1, - mmconfig_region_ops.clone(), - ); - self.sys_mem - .root() - .add_subregion( - mmconfig_region.clone(), - MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].0, - ) - .chain_err(|| "Failed to register ECAM in memory space.")?; - - let pio_addr_ops = PciHost::build_pio_addr_ops(self.pci_host.clone()); - let pio_addr_region = Region::init_io_region(4, pio_addr_ops); - self.sys_io - .root() - .add_subregion(pio_addr_region, 0xcf8) - .chain_err(|| "Failed to register CONFIG_ADDR port in I/O space.")?; - let pio_data_ops = PciHost::build_pio_data_ops(self.pci_host.clone()); - let pio_data_region = Region::init_io_region(4, pio_data_ops); - self.sys_io - .root() - .add_subregion(pio_data_region, 0xcfc) - .chain_err(|| "Failed to register CONFIG_DATA port in I/O space.")?; - - let mch = Mch::new(root_bus, mmconfig_region, mmconfig_region_ops); - PciDevOps::realize(mch)?; - Ok(()) - } - - fn add_fwcfg_device(&mut self) -> super::errors::Result>> { - use super::errors::ResultExt; - - let mut fwcfg = FwCfgIO::new(self.sys_mem.clone()); - let ncpus = self.cpus.len(); - fwcfg.add_data_entry(FwCfgEntryType::NbCpus, ncpus.as_bytes().to_vec())?; - fwcfg.add_data_entry(FwCfgEntryType::MaxCpus, ncpus.as_bytes().to_vec())?; - fwcfg.add_data_entry(FwCfgEntryType::Irq0Override, 1_u32.as_bytes().to_vec())?; - - let fwcfg_dev = FwCfgIO::realize(fwcfg, &mut self.sysbus) - .chain_err(|| "Failed to realize fwcfg device")?; - - Ok(fwcfg_dev) - } - - fn get_vm_state(&self) -> &Arc<(Mutex, Condvar)> { - &self.vm_state - } - - fn get_cpu_topo(&self) -> &CpuTopology { - &self.cpu_topo - } - - fn get_cpus(&self) -> &Vec> { - &self.cpus - } - - fn get_vm_config(&self) -> &Mutex { - &self.vm_config - } -} - -impl MachineOps for StdMachine { - fn arch_ram_ranges(&self, mem_size: u64) -> Vec<(u64, u64)> { - let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 - + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; - - let mut ranges = vec![(0, std::cmp::min(gap_start, mem_size))]; - if mem_size > gap_start { - let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; - ranges.push((gap_end, mem_size - gap_start)); - } - ranges - } - - fn init_interrupt_controller(&mut self, _vcpu_count: u64) -> MachineResult<()> { - use crate::errors::ResultExt; - - KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_irq_chip() - .chain_err(|| MachineErrorKind::CrtIrqchipErr)?; - KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .init_irq_route_table(); - KVM_FDS.load().commit_irq_routing()?; - Ok(()) - } - - fn load_boot_source( - &self, - fwcfg: Option<&Arc>>, - ) -> MachineResult { - use crate::errors::ResultExt; - - let boot_source = self.boot_source.lock().unwrap(); - let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); - - let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 - + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; - let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; - let bootloader_config = BootLoaderConfig { - kernel: boot_source.kernel_file.clone(), - initrd, - kernel_cmdline: boot_source.kernel_cmdline.to_string(), - cpu_count: self.cpu_topo.nrcpus, - gap_range: (gap_start, gap_end - gap_start), - ioapic_addr: MEM_LAYOUT[LayoutEntryType::IoApic as usize].0 as u32, - lapic_addr: MEM_LAYOUT[LayoutEntryType::LocalApic as usize].0 as u32, - ident_tss_range: Some(MEM_LAYOUT[LayoutEntryType::IdentTss as usize]), - prot64_mode: false, - }; - let layout = load_linux(&bootloader_config, &self.sys_mem, fwcfg) - .chain_err(|| MachineErrorKind::LoadKernErr)?; - - Ok(CPUBootConfig { - prot64_mode: false, - boot_ip: layout.boot_ip, - boot_sp: layout.boot_sp, - boot_selector: layout.boot_selector, - ..Default::default() - }) - } - - fn add_rtc_device(&mut self, mem_size: u64) -> MachineResult<()> { - use crate::errors::ResultExt; - - let mut rtc = RTC::new().chain_err(|| "Failed to create RTC device")?; - rtc.set_memory( - mem_size, - MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 - + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1, - ); - RTC::realize(rtc, &mut self.sysbus).chain_err(|| "Failed to realize RTC device")?; - - Ok(()) - } - - fn add_serial_device(&mut self, config: &SerialConfig) -> MachineResult<()> { - use crate::errors::ResultExt; - let region_base: u64 = SERIAL_ADDR; - let region_size: u64 = 8; - let serial = Serial::new(config.clone()); - serial - .realize(&mut self.sysbus, region_base, region_size) - .chain_err(|| "Failed to realize serial device.")?; - Ok(()) - } - - fn syscall_whitelist(&self) -> Vec { - syscall_whitelist() - } - - fn realize( - vm: &Arc>, - vm_config: &mut VmConfig, - is_migrate: bool, - ) -> MachineResult<()> { - use crate::errors::ResultExt; - - let clone_vm = vm.clone(); - let mut locked_vm = vm.lock().unwrap(); - locked_vm.init_global_config(vm_config)?; - locked_vm.init_memory( - &vm_config.machine_config.mem_config, - &locked_vm.sys_io, - &locked_vm.sys_mem, - is_migrate, - vm_config.machine_config.nr_cpus, - )?; - - locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; - let kvm_fds = KVM_FDS.load(); - let vm_fd = kvm_fds.vm_fd.as_ref().unwrap(); - let nr_cpus = vm_config.machine_config.nr_cpus; - let mut vcpu_fds = vec![]; - for cpu_id in 0..nr_cpus { - vcpu_fds.push(Arc::new(vm_fd.create_vcpu(cpu_id)?)); - } - - locked_vm - .init_pci_host() - .chain_err(|| ErrorKind::InitPCIeHostErr)?; - locked_vm - .init_ich9_lpc(clone_vm) - .chain_err(|| "Fail to init LPC bridge")?; - locked_vm.add_devices(vm_config)?; - - let (boot_config, fwcfg) = if !is_migrate { - let fwcfg = locked_vm.add_fwcfg_device()?; - (Some(locked_vm.load_boot_source(Some(&fwcfg))?), Some(fwcfg)) - } else { - (None, None) - }; - locked_vm.cpus.extend(::init_vcpu( - vm.clone(), - vm_config.machine_config.nr_cpus, - &vcpu_fds, - &boot_config, - )?); - - if let Some(fwcfg) = fwcfg { - locked_vm - .build_acpi_tables(&fwcfg) - .chain_err(|| "Failed to create ACPI tables")?; - } - StdMachine::arch_init()?; - locked_vm.register_power_event(&locked_vm.power_button)?; - - if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { - bail!("Failed to set migration status {}", e); - } - - Ok(()) - } - - fn add_pflash_device(&mut self, configs: &[PFlashConfig]) -> MachineResult<()> { - use super::errors::ResultExt; - - let mut configs_vec = configs.to_vec(); - configs_vec.sort_by_key(|c| c.unit); - // The two PFlash devices locates below 4GB, this variable represents the end address - // of current PFlash device. - let mut flash_end: u64 = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; - for config in configs_vec { - let mut fd = OpenOptions::new() - .read(true) - .write(!config.read_only) - .open(&config.path_on_host) - .chain_err(|| ErrorKind::OpenFileErr(config.path_on_host.clone()))?; - let pfl_size = fd.metadata().unwrap().len(); - - if config.unit == 0 { - // According to the Linux/x86 boot protocol, the memory region of - // 0x000000 - 0x100000 (1 MiB) is for BIOS usage. And the top 128 - // KiB is for BIOS code which is stored in the first PFlash. - let rom_base = 0xe0000; - let rom_size = 0x20000; - fd.seek(SeekFrom::Start(pfl_size - rom_size))?; - - let ram1 = Arc::new(HostMemMapping::new( - GuestAddress(rom_base), - None, - rom_size, - None, - false, - false, - false, - )?); - let rom_region = Region::init_ram_region(ram1); - rom_region.write(&mut fd, GuestAddress(rom_base), 0, rom_size)?; - rom_region.set_priority(10); - self.sys_mem.root().add_subregion(rom_region, rom_base)?; - - fd.seek(SeekFrom::Start(0))?; - } - - let sector_len: u32 = 1024 * 4; - let backend = Some(fd); - let pflash = PFlash::new( - pfl_size, - &backend, - sector_len, - 4_u32, - 1_u32, - config.read_only, - ) - .chain_err(|| ErrorKind::InitPflashErr)?; - PFlash::realize( - pflash, - &mut self.sysbus, - flash_end - pfl_size, - pfl_size, - backend, - ) - .chain_err(|| ErrorKind::RlzPflashErr)?; - flash_end -= pfl_size; - } - - Ok(()) - } - - fn run(&self, paused: bool) -> MachineResult<()> { - ::vm_start(paused, &self.cpus, &mut self.vm_state.0.lock().unwrap()) - } - - fn get_sys_mem(&mut self) -> &Arc { - &self.sys_mem - } - - fn get_pci_host(&mut self) -> Result<&Arc>> { - Ok(&self.pci_host) - } -} - -impl AcpiBuilder for StdMachine { - fn build_dsdt_table( - &self, - acpi_data: &Arc>>, - loader: &mut TableLoader, - ) -> super::errors::Result { - let mut dsdt = AcpiTable::new(*b"DSDT", 2, *b"STRATO", *b"VIRTDSDT", 1); - - // 1. CPU info. - let cpus_count = self.cpus.len() as u64; - let mut sb_scope = AmlScope::new("\\_SB"); - for cpu_id in 0..cpus_count { - let mut dev = AmlDevice::new(format!("C{:03}", cpu_id).as_str()); - dev.append_child(AmlNameDecl::new("_HID", AmlString("ACPI0007".to_string()))); - dev.append_child(AmlNameDecl::new("_UID", AmlInteger(cpu_id))); - sb_scope.append_child(dev); - } - - // 2. Create pci host bridge node. - sb_scope.append_child(self.pci_host.lock().unwrap().clone()); - dsdt.append_child(sb_scope.aml_bytes().as_slice()); - - // 3. Info of devices attached to system bus. - dsdt.append_child(self.sysbus.aml_bytes().as_slice()); - - // 4. Add _S5 sleep state. - let mut package = AmlPackage::new(4); - package.append_child(AmlInteger(5)); - package.append_child(AmlInteger(0)); - package.append_child(AmlInteger(0)); - package.append_child(AmlInteger(0)); - dsdt.append_child(AmlNameDecl::new("_S5", package).aml_bytes().as_slice()); - - let mut locked_acpi_data = acpi_data.lock().unwrap(); - let dsdt_begin = locked_acpi_data.len() as u32; - locked_acpi_data.extend(dsdt.aml_bytes()); - let dsdt_end = locked_acpi_data.len() as u32; - // Drop the lock of acpi_data to avoid dead-lock when adding entry to - // TableLoader, because TableLoader also needs to acquire this lock. - drop(locked_acpi_data); - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - dsdt_begin + TABLE_CHECKSUM_OFFSET, - dsdt_begin, - dsdt_end - dsdt_begin, - )?; - - Ok(dsdt_begin as u64) - } - - fn build_madt_table( - &self, - acpi_data: &Arc>>, - loader: &mut TableLoader, - ) -> super::errors::Result { - let mut madt = AcpiTable::new(*b"APIC", 5, *b"STRATO", *b"VIRTAPIC", 1); - - madt.append_child(LAPIC_BASE_ADDR.as_bytes()); - // Flags: PC-AT-compatible dual-8259 setup - madt.append_child(1_u32.as_bytes()); - - let ioapic = AcpiIoApic { - type_id: 1_u8, - length: size_of::() as u8, - io_apic_id: 0, - reserved: 0, - io_apic_addr: IOAPIC_BASE_ADDR, - gsi_base: 0, - }; - madt.append_child(ioapic.aml_bytes().as_ref()); - - self.cpus.iter().for_each(|cpu| { - let lapic = AcpiLocalApic { - type_id: 0, - length: size_of::() as u8, - processor_uid: cpu.id(), - apic_id: cpu.id(), - flags: 1, // Flags: enabled. - }; - madt.append_child(&lapic.aml_bytes()); - }); - - let mut locked_acpi_data = acpi_data.lock().unwrap(); - let madt_begin = locked_acpi_data.len() as u32; - locked_acpi_data.extend(madt.aml_bytes()); - let madt_end = locked_acpi_data.len() as u32; - // Drop the lock of acpi_data to avoid dead-lock when adding entry to - // TableLoader, because TableLoader also needs to acquire this lock. - drop(locked_acpi_data); - - loader.add_cksum_entry( - ACPI_TABLE_FILE, - madt_begin + TABLE_CHECKSUM_OFFSET, - madt_begin, - madt_end - madt_begin, - )?; - - Ok(madt_begin as u64) - } -} - -impl MachineLifecycle for StdMachine { - fn pause(&self) -> bool { - if self.notify_lifecycle(KvmVmState::Running, KvmVmState::Paused) { - event!(Stop); - true - } else { - false - } - } - - fn resume(&self) -> bool { - if !self.notify_lifecycle(KvmVmState::Paused, KvmVmState::Running) { - return false; - } - event!(Resume); - true - } - - fn destroy(&self) -> bool { - let vmstate = { - let state = self.vm_state.deref().0.lock().unwrap(); - *state - }; - - if !self.notify_lifecycle(vmstate, KvmVmState::Shutdown) { - return false; - } - - self.power_button.write(1).unwrap(); - true - } - - fn notify_lifecycle(&self, old: KvmVmState, new: KvmVmState) -> bool { - ::vm_state_transfer( - &self.cpus, - &mut self.vm_state.0.lock().unwrap(), - old, - new, - ) - .is_ok() - } -} - -impl MachineAddressInterface for StdMachine { - fn pio_in(&self, addr: u64, mut data: &mut [u8]) -> bool { - if (0x60..=0x64).contains(&addr) { - // The function pit_calibrate_tsc() in kernel gets stuck if data read from - // io-port 0x61 is not 0x20. - // This problem only happens before Linux version 4.18 (fixed by 368a540e0) - if addr == 0x61 { - data[0] = 0x20; - return true; - } - if addr == 0x64 { - // UEFI will read PS2 Keyboard's Status register 0x64 to detect if - // this device is present. - data[0] = 0xFF; - } - } - - let length = data.len() as u64; - self.sys_io - .read(&mut data, GuestAddress(addr), length) - .is_ok() - } - - fn pio_out(&self, addr: u64, mut data: &[u8]) -> bool { - let count = data.len() as u64; - if addr == SLEEP_CTRL_OFFSET as u64 { - if let Err(e) = self.cpus[0].pause() { - error!("Fail to pause bsp, {}", e.display_chain()); - } - } - self.sys_io - .write(&mut data, GuestAddress(addr), count) - .is_ok() - } - - fn mmio_read(&self, addr: u64, mut data: &mut [u8]) -> bool { - let length = data.len() as u64; - self.sys_mem - .read(&mut data, GuestAddress(addr), length) - .is_ok() - } - - fn mmio_write(&self, addr: u64, mut data: &[u8]) -> bool { - let count = data.len() as u64; - self.sys_mem - .write(&mut data, GuestAddress(addr), count) - .is_ok() - } -} - -impl MigrateInterface for StdMachine { - fn migrate(&self, uri: String) -> Response { - use util::unix::{parse_uri, UnixPath}; - - match parse_uri(&uri) { - Ok((UnixPath::File, path)) => { - if let Err(e) = MigrationManager::save_snapshot(&path) { - error!( - "Failed to migrate to path \'{:?}\': {}", - path, - e.display_chain() - ); - let _ = MigrationManager::set_status(MigrationStatus::Failed) - .map_err(|e| error!("{}", e)); - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(e.to_string()), - None, - ); - } - } - _ => { - return Response::create_error_response( - qmp_schema::QmpErrorClass::GenericError(format!("Invalid uri: {}", uri)), - None, - ); - } - } - - Response::create_empty_response() - } - - fn query_migrate(&self) -> Response { - let status_str = MigrationManager::migration_get_status().to_string(); - let migration_info = qmp_schema::MigrationInfo { - status: Some(status_str), - }; - - Response::create_response(serde_json::to_value(migration_info).unwrap(), None) - } -} - -impl MachineInterface for StdMachine {} -impl MachineExternalInterface for StdMachine {} - -impl EventLoopManager for StdMachine { - fn loop_should_exit(&self) -> bool { - let vmstate = self.vm_state.deref().0.lock().unwrap(); - *vmstate == KvmVmState::Shutdown - } - - fn loop_cleanup(&self) -> util::errors::Result<()> { - use util::errors::ResultExt; - - set_termi_canon_mode().chain_err(|| "Failed to set terminal to canonical mode")?; - Ok(()) - } -} diff --git a/machine/src/standard_vm/x86_64/ich9_lpc.rs b/machine/src/x86_64/ich9_lpc.rs similarity index 57% rename from machine/src/standard_vm/x86_64/ich9_lpc.rs rename to machine/src/x86_64/ich9_lpc.rs index 24a8770d3961b775e0c5b9437b3f9de137d9f200..e4c40c5833929204face54b548d5e7a75f6bcf2e 100644 --- a/machine/src/standard_vm/x86_64/ich9_lpc.rs +++ b/machine/src/x86_64/ich9_lpc.rs @@ -15,21 +15,22 @@ use std::sync::{ Arc, Mutex, Weak, }; +use anyhow::{Context, Result}; +use log::error; +use vmm_sys_util::eventfd::EventFd; + +use crate::x86_64::standard::VENDOR_ID_INTEL; use acpi::{AcpiPMTimer, AcpiPmCtrl, AcpiPmEvent}; use address_space::{AddressSpace, GuestAddress, Region, RegionOps}; -use error_chain::ChainedError; -use pci::config::{ - PciConfig, DEVICE_ID, HEADER_TYPE, HEADER_TYPE_BRIDGE, HEADER_TYPE_MULTIFUNC, - PCI_CONFIG_SPACE_SIZE, SUB_CLASS_CODE, VENDOR_ID, +use devices::pci::config::{ + PciConfig, CLASS_CODE_ISA_BRIDGE, DEVICE_ID, HEADER_TYPE, HEADER_TYPE_BRIDGE, + HEADER_TYPE_MULTIFUNC, PCI_CONFIG_SPACE_SIZE, SUB_CLASS_CODE, VENDOR_ID, }; -use pci::errors::Result as PciResult; -use pci::{le_write_u16, le_write_u32, ranges_overlap, PciBus, PciDevOps}; +use devices::pci::{le_write_u16, le_write_u32, PciDevBase, PciDevOps}; +use devices::{Bus, Device, DeviceBase}; use util::byte_code::ByteCode; -use vmm_sys_util::eventfd::EventFd; - -use super::VENDOR_ID_INTEL; -use crate::standard_vm::errors::Result; -use pci::config::CLASS_CODE_ISA_BRIDGE; +use util::gen_base_func; +use util::num_ops::ranges_overlap; const DEVICE_ID_INTEL_ICH9: u16 = 0x2918; @@ -43,34 +44,41 @@ pub const RST_CTRL_OFFSET: u16 = 0xCF9; /// LPC bridge of ICH9 (IO controller hub 9), Device 1F : Function 0 #[allow(clippy::upper_case_acronyms)] pub struct LPCBridge { - config: PciConfig, - parent_bus: Weak>, + base: PciDevBase, sys_io: Arc, pm_timer: Arc>, rst_ctrl: Arc, pm_evt: Arc>, pm_ctrl: Arc>, - /// Reset request trigged by ACPI PM1 Control Registers. - pub reset_req: EventFd, - pub shutdown_req: EventFd, + /// Reset request triggered by ACPI PM1 Control Registers. + pub reset_req: Arc, + pub shutdown_req: Arc, } impl LPCBridge { - pub fn new(parent_bus: Weak>, sys_io: Arc) -> Self { - Self { - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus, + pub fn new( + parent_bus: Weak>, + sys_io: Arc, + reset_req: Arc, + shutdown_req: Arc, + ) -> Result { + Ok(Self { + base: PciDevBase { + base: DeviceBase::new("ICH9 LPC bridge".to_string(), false, Some(parent_bus)), + config: PciConfig::new(0x1F << 3, PCI_CONFIG_SPACE_SIZE, 0), + devfn: 0x1F << 3, + }, sys_io, pm_timer: Arc::new(Mutex::new(AcpiPMTimer::new())), pm_evt: Arc::new(Mutex::new(AcpiPmEvent::new())), pm_ctrl: Arc::new(Mutex::new(AcpiPmCtrl::new())), rst_ctrl: Arc::new(AtomicU8::new(0)), - reset_req: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - shutdown_req: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } + reset_req, + shutdown_req, + }) } - fn update_pm_base(&self) -> Result<()> { + fn update_pm_base(&mut self) -> Result<()> { let cloned_pmtmr = self.pm_timer.clone(); let read_ops = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { cloned_pmtmr.lock().unwrap().read(data, addr, offset) @@ -80,14 +88,16 @@ impl LPCBridge { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let pmtmr_region = Region::init_io_region(0x8, ops); + let pmtmr_region = Region::init_io_region(0x8, ops, "PmtmrRegion"); let mut pm_base_addr = 0_u32; - self.config + self.base + .config .read(PM_BASE_OFFSET as usize, pm_base_addr.as_mut_bytes()); - self.sys_io - .root() - .add_subregion(pmtmr_region, pm_base_addr as u64 + PM_TIMER_OFFSET as u64)?; + self.sys_io.root().add_subregion( + pmtmr_region, + u64::from(pm_base_addr) + u64::from(PM_TIMER_OFFSET), + )?; Ok(()) } @@ -107,17 +117,20 @@ impl LPCBridge { }; let cloned_rst_ctrl = self.rst_ctrl.clone(); - let cloned_reset_fd = self.reset_req.try_clone().unwrap(); + let cloned_reset_fd = self.reset_req.clone(); let write_ops = move |data: &[u8], _addr: GuestAddress, _offset: u64| -> bool { let value: u8 = match data.len() { - 1 => data[0] as u8, + 1 => data[0], n => { error!("Invalid data length {}", n); return false; } }; if value & 0x4_u8 != 0 { - cloned_reset_fd.write(1).unwrap(); + if cloned_reset_fd.write(1).is_err() { + error!("X86 standard vm write reset fd failed"); + return false; + } return true; } cloned_rst_ctrl.store(value & 0xA, Ordering::SeqCst); @@ -128,10 +141,10 @@ impl LPCBridge { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let rst_ctrl_region = Region::init_io_region(0x1, ops); + let rst_ctrl_region = Region::init_io_region(0x1, ops, "RstCtrlRegion"); self.sys_io .root() - .add_subregion(rst_ctrl_region, RST_CTRL_OFFSET as u64)?; + .add_subregion(rst_ctrl_region, u64::from(RST_CTRL_OFFSET))?; Ok(()) } @@ -142,9 +155,12 @@ impl LPCBridge { true }; - let cloned_shutdown_fd = self.shutdown_req.try_clone().unwrap(); + let cloned_shutdown_fd = self.shutdown_req.clone(); let write_ops = move |_data: &[u8], _addr: GuestAddress, _offset: u64| -> bool { - cloned_shutdown_fd.write(1).unwrap(); + if cloned_shutdown_fd.write(1).is_err() { + error!("X86 standard vm write shutdown fd failed"); + return false; + } true }; @@ -152,10 +168,10 @@ impl LPCBridge { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let sleep_reg_region = Region::init_io_region(0x1, ops); + let sleep_reg_region = Region::init_io_region(0x1, ops, "SleepReg"); self.sys_io .root() - .add_subregion(sleep_reg_region, SLEEP_CTRL_OFFSET as u64)?; + .add_subregion(sleep_reg_region, u64::from(SLEEP_CTRL_OFFSET))?; Ok(()) } @@ -174,10 +190,10 @@ impl LPCBridge { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let pm_evt_region = Region::init_io_region(0x4, ops); + let pm_evt_region = Region::init_io_region(0x4, ops, "PmEvtRegion"); self.sys_io .root() - .add_subregion(pm_evt_region, PM_EVENT_OFFSET as u64)?; + .add_subregion(pm_evt_region, u64::from(PM_EVENT_OFFSET))?; Ok(()) } @@ -189,10 +205,13 @@ impl LPCBridge { }; let clone_pmctrl = self.pm_ctrl.clone(); - let cloned_shutdown_fd = self.shutdown_req.try_clone().unwrap(); + let cloned_shutdown_fd = self.shutdown_req.clone(); let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { - if clone_pmctrl.lock().unwrap().write(data, addr, offset) { - cloned_shutdown_fd.write(1).unwrap(); + if clone_pmctrl.lock().unwrap().write(data, addr, offset) + && cloned_shutdown_fd.write(1).is_err() + { + error!("X86 standard vm write shutdown fd failed"); + return false; } true }; @@ -201,107 +220,77 @@ impl LPCBridge { read: Arc::new(read_ops), write: Arc::new(write_ops), }; - let pm_ctrl_region = Region::init_io_region(0x4, ops); + let pm_ctrl_region = Region::init_io_region(0x4, ops, "PmCtrl"); self.sys_io .root() - .add_subregion(pm_ctrl_region, PM_CTRL_OFFSET as u64)?; + .add_subregion(pm_ctrl_region, u64::from(PM_CTRL_OFFSET))?; Ok(()) } } -impl PciDevOps for LPCBridge { - fn init_write_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_mask() - } - - fn init_write_clear_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_clear_mask() - } - - fn realize(mut self) -> PciResult<()> { - use pci::errors::ResultExt; +impl Device for LPCBridge { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); - self.init_write_mask()?; - self.init_write_clear_mask()?; + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; - le_write_u16(&mut self.config.config, VENDOR_ID as usize, VENDOR_ID_INTEL)?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, + VENDOR_ID as usize, + VENDOR_ID_INTEL, + )?; + le_write_u16( + &mut self.base.config.config, DEVICE_ID as usize, DEVICE_ID_INTEL_ICH9, )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, SUB_CLASS_CODE as usize, CLASS_CODE_ISA_BRIDGE, )?; - le_write_u32(&mut self.config.write_mask, PM_BASE_OFFSET as usize, 0xff80)?; + le_write_u32( + &mut self.base.config.write_mask, + PM_BASE_OFFSET as usize, + 0xff80, + )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, HEADER_TYPE as usize, - (HEADER_TYPE_BRIDGE | HEADER_TYPE_MULTIFUNC) as u16, + u16::from(HEADER_TYPE_BRIDGE | HEADER_TYPE_MULTIFUNC), )?; self.init_sleep_reg() - .chain_err(|| "Fail to init IO region for sleep control register")?; + .with_context(|| "Fail to init IO region for sleep control register")?; self.init_reset_ctrl_reg() - .chain_err(|| "Fail to init IO region for reset control register")?; + .with_context(|| "Fail to init IO region for reset control register")?; self.init_pm_evt_reg() - .chain_err(|| "Fail to init IO region for PM events register")?; + .with_context(|| "Fail to init IO region for PM events register")?; self.init_pm_ctrl_reg() - .chain_err(|| "Fail to init IO region for PM control register")?; - - let parent_bus = self.parent_bus.clone(); - parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .devices - .insert(0x1F << 3, Arc::new(Mutex::new(self))); - Ok(()) - } + .with_context(|| "Fail to init IO region for PM control register")?; - fn read_config(&self, offset: usize, data: &mut [u8]) { - let size = data.len(); - if offset + size > PCI_CONFIG_SPACE_SIZE || size > 4 { - debug!( - "Failed to read LPC bridge's pci config space: offset {}, data size {}", - offset, size - ); - return; - } - self.config.read(offset, data); + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + let mut locked_bus = parent_bus.lock().unwrap(); + let dev = Arc::new(Mutex::new(self)); + locked_bus.attach_child(0x1F << 3, dev.clone())?; + Ok(dev) } +} - fn write_config(&mut self, offset: usize, data: &[u8]) { - let size = data.len(); - let end = offset + size; - if end > PCI_CONFIG_SPACE_SIZE || size > 4 { - debug!( - "Failed to write LPC bridge's pci config space: offset {}, data size {}", - offset, size - ); - return; - } +impl PciDevOps for LPCBridge { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); - self.config.write(offset, data, 0); - if ranges_overlap( - offset, - end, - PM_BASE_OFFSET as usize, - PM_BASE_OFFSET as usize + 4, - ) { + fn write_config(&mut self, offset: usize, data: &[u8]) { + self.base.config.write(offset, data, 0, None, None); + // SAFETY: offset is no more than 0xfff. + if ranges_overlap(offset, data.len(), PM_BASE_OFFSET as usize, 4).unwrap() { if let Err(e) = self.update_pm_base() { - error!("Failed to update PM base addr: {}", e.display_chain()); + error!("Failed to update PM base addr: {:?}", e); } } } - - fn name(&self) -> String { - "ICH9 LPC bridge".to_string() - } } diff --git a/machine/src/standard_vm/x86_64/mch.rs b/machine/src/x86_64/mch.rs similarity index 49% rename from machine/src/standard_vm/x86_64/mch.rs rename to machine/src/x86_64/mch.rs index 2c93b28a469a418b34bcaf9b0348720989cc5b6c..13a47cdf20d203818939aebc68c4e63e3d66b57d 100644 --- a/machine/src/standard_vm/x86_64/mch.rs +++ b/machine/src/x86_64/mch.rs @@ -12,19 +12,21 @@ use std::sync::{Arc, Mutex, Weak}; +use anyhow::{bail, Result}; +use log::error; + +use crate::x86_64::standard::VENDOR_ID_INTEL; use address_space::{Region, RegionOps}; -use error_chain::ChainedError; -use pci::{ +use devices::pci::{ config::{ PciConfig, CLASS_CODE_HOST_BRIDGE, DEVICE_ID, PCI_CONFIG_SPACE_SIZE, SUB_CLASS_CODE, VENDOR_ID, }, - errors::Result as PciResult, - le_read_u64, le_write_u16, ranges_overlap, PciBus, PciDevOps, + le_read_u64, le_write_u16, PciBus, PciDevBase, PciDevOps, }; - -use super::VENDOR_ID_INTEL; -use crate::standard_vm::errors::Result; +use devices::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +use util::gen_base_func; +use util::num_ops::ranges_overlap; const DEVICE_ID_INTEL_Q35_MCH: u16 = 0x29c0; @@ -37,31 +39,35 @@ const PCIEXBAR_LENGTH_128MB: u64 = 0x2; const PCIEXBAR_LENGTH_64MB: u64 = 0x4; const PCIEXBAR_128MB_ADDR_MASK: u64 = 1 << 26; const PCIEXBAR_64MB_ADDR_MASK: u64 = 1 << 25; +// Bit 25:3 of PCIEXBAR is reserved. +const PCIEXBAR_RESERVED_MASK: u64 = 0x3ff_fff8; /// Memory controller hub (Device 0:Function 0) pub struct Mch { - config: PciConfig, - parent_bus: Weak>, + base: PciDevBase, mmconfig_region: Option, mmconfig_ops: RegionOps, } impl Mch { pub fn new( - parent_bus: Weak>, + parent_bus: Weak>, mmconfig_region: Region, mmconfig_ops: RegionOps, ) -> Self { Self { - config: PciConfig::new(PCI_CONFIG_SPACE_SIZE, 0), - parent_bus, + base: PciDevBase { + base: DeviceBase::new("Memory Controller Hub".to_string(), false, Some(parent_bus)), + config: PciConfig::new(0, PCI_CONFIG_SPACE_SIZE, 0), + devfn: 0, + }, mmconfig_region: Some(mmconfig_region), mmconfig_ops, } } fn update_pciexbar_mapping(&mut self) -> Result<()> { - let pciexbar: u64 = le_read_u64(&self.config.config, PCIEXBAR as usize)?; + let pciexbar: u64 = le_read_u64(&self.base.config.config, PCIEXBAR as usize)?; let enable = pciexbar & PCIEXBAR_ENABLE_MASK; let length: u64; let mut addr_mask: u64 = PCIEXBAR_ADDR_MASK; @@ -79,112 +85,75 @@ impl Mch { } if let Some(region) = self.mmconfig_region.as_ref() { - self.parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .mem_region - .delete_subregion(region)?; + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.mem_region.delete_subregion(region)?; self.mmconfig_region = None; } if enable == 0x1 { - let region = Region::init_io_region(length, self.mmconfig_ops.clone()); + let region = Region::init_io_region(length, self.mmconfig_ops.clone(), "PcieXBar"); let base_addr: u64 = pciexbar & addr_mask; - self.parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .mem_region - .add_subregion(region, base_addr)?; + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.mem_region.add_subregion(region, base_addr)?; } Ok(()) } -} -impl PciDevOps for Mch { - fn init_write_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_mask() - } + fn check_pciexbar_update(&self, old_pciexbar: u64) -> bool { + let cur_pciexbar: u64 = le_read_u64(&self.base.config.config, PCIEXBAR as usize).unwrap(); - fn init_write_clear_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_clear_mask() + if (cur_pciexbar & !PCIEXBAR_RESERVED_MASK) == (old_pciexbar & !PCIEXBAR_RESERVED_MASK) { + return false; + } + true } +} + +impl Device for Mch { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); - fn realize(mut self) -> PciResult<()> { - self.init_write_mask()?; - self.init_write_clear_mask()?; + fn realize(mut self) -> Result>> { + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; - le_write_u16(&mut self.config.config, VENDOR_ID as usize, VENDOR_ID_INTEL)?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, + VENDOR_ID as usize, + VENDOR_ID_INTEL, + )?; + le_write_u16( + &mut self.base.config.config, DEVICE_ID as usize, DEVICE_ID_INTEL_Q35_MCH, )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, SUB_CLASS_CODE as usize, CLASS_CODE_HOST_BRIDGE, )?; - let parent_bus = self.parent_bus.clone(); - parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .devices - .insert(0, Arc::new(Mutex::new(self))); - Ok(()) + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + let mut locked_bus = parent_bus.lock().unwrap(); + let dev = Arc::new(Mutex::new(self)); + locked_bus.attach_child(0, dev.clone())?; + Ok(dev) } +} - fn read_config(&self, offset: usize, data: &mut [u8]) { - let size = data.len(); - if size > 4 { - error!( - "Failed to read MCH config space: Invalid data size {}", - size - ); - return; - } - if offset + size > PCI_CONFIG_SPACE_SIZE { - debug!( - "Failed to read MCH config space: offset {}, size {}, config space size {}", - offset, size, PCI_CONFIG_SPACE_SIZE - ); - return; - } - self.config.read(offset, data); - } +impl PciDevOps for Mch { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); fn write_config(&mut self, offset: usize, data: &[u8]) { - let size = data.len(); - let end = offset + size; - if size > 4 { - error!( - "Failed to write MCH config space: Invalid data size {}", - size - ); - return; - } - if offset + size > PCI_CONFIG_SPACE_SIZE { - debug!( - "Failed to write MCH config space: offset {}, size {}, config space size {}", - offset, size, PCI_CONFIG_SPACE_SIZE - ); - return; - } - - self.config.write(offset, data, 0); - if ranges_overlap(offset, end, PCIEXBAR as usize, PCIEXBAR as usize + 8) { + let old_pciexbar: u64 = le_read_u64(&self.base.config.config, PCIEXBAR as usize).unwrap(); + self.base.config.write(offset, data, 0, None, None); + // SAFETY: offset is no more than 0xfff. + if ranges_overlap(offset, data.len(), PCIEXBAR as usize, 8).unwrap() + && self.check_pciexbar_update(old_pciexbar) + { if let Err(e) = self.update_pciexbar_mapping() { - error!("{}", e.display_chain()); + error!("{:?}", e); } } } - - fn name(&self) -> String { - "Memory Controller Hub".to_string() - } } diff --git a/machine/src/x86_64/micro.rs b/machine/src/x86_64/micro.rs new file mode 100644 index 0000000000000000000000000000000000000000..80c3c1b9b1b7f0ca0610aa754a35fe2f8b99d8ae --- /dev/null +++ b/machine/src/x86_64/micro.rs @@ -0,0 +1,259 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; + +use crate::micro_common::syscall::syscall_whitelist; +use crate::{register_shutdown_event, LightMachine, MachineBase, MachineError, MachineOps}; +use address_space::{AddressSpace, Region}; +use cpu::{CPUBootConfig, CPUTopology}; +use devices::legacy::{FwCfgOps, Serial, SERIAL_ADDR}; +use devices::Device; +use hypervisor::kvm::x86_64::*; +use hypervisor::kvm::*; +use machine_manager::config::{SerialConfig, VmConfig}; +use migration::{MigrationManager, MigrationStatus}; +use util::gen_base_func; +use util::seccomp::{BpfRule, SeccompCmpOpt}; +use virtio::{VirtioDevice, VirtioMmioDevice}; + +#[repr(usize)] +pub enum LayoutEntryType { + MemBelow4g = 0_usize, + Mmio, + IoApic, + LocalApic, + IdentTss, + MemAbove4g, +} + +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0, 0xC000_0000), // MemBelow4g + (0xF010_0000, 0x200), // Mmio + (0xFEC0_0000, 0x10_0000), // IoApic + (0xFEE0_0000, 0x10_0000), // LocalApic + (0xFEF0_C000, 0x4000), // Identity map address and TSS + (0x1_0000_0000, 0x80_0000_0000), // MemAbove4g +]; + +impl MachineOps for LightMachine { + gen_base_func!(machine_base, machine_base_mut, MachineBase, base); + + fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()> { + let vm_ram = self.get_vm_ram(); + let below4g_size = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + let below4g_ram = Region::init_alias_region( + vm_ram.clone(), + 0, + std::cmp::min(below4g_size, mem_size), + "below4g_ram", + ); + sys_mem.root().add_subregion( + below4g_ram, + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0, + )?; + + if mem_size > below4g_size { + let above4g_ram = Region::init_alias_region( + vm_ram.clone(), + below4g_size, + mem_size - below4g_size, + "above4g_ram", + ); + let above4g_start = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + sys_mem.root().add_subregion(above4g_ram, above4g_start)?; + } + + Ok(()) + } + + fn init_interrupt_controller(&mut self, _vcpu_count: u64) -> Result<()> { + let hypervisor = self.get_hypervisor(); + let mut locked_hypervisor = hypervisor.lock().unwrap(); + locked_hypervisor.create_interrupt_controller()?; + + let irq_manager = locked_hypervisor.create_irq_manager()?; + self.base.sysbus.lock().unwrap().irq_manager = irq_manager.line_irq_manager; + + Ok(()) + } + + fn load_boot_source(&self, fwcfg: Option<&Arc>>) -> Result { + use boot_loader::{load_linux, BootLoaderConfig}; + + let boot_source = self.base.boot_source.lock().unwrap(); + let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); + + // MEM_LAYOUT is defined statically, will not overflow. + let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + let bootloader_config = BootLoaderConfig { + kernel: boot_source.kernel_file.clone(), + initrd, + kernel_cmdline: boot_source.kernel_cmdline.to_string(), + cpu_count: self.base.cpu_topo.nrcpus, + // gap_end is bigger than gap_start, as MEM_LAYOUT is defined statically. + gap_range: (gap_start, gap_end - gap_start), + ioapic_addr: MEM_LAYOUT[LayoutEntryType::IoApic as usize].0 as u32, + lapic_addr: MEM_LAYOUT[LayoutEntryType::LocalApic as usize].0 as u32, + ident_tss_range: None, + prot64_mode: true, + }; + let layout = load_linux(&bootloader_config, &self.base.sys_mem, fwcfg) + .with_context(|| MachineError::LoadKernErr)?; + + Ok(CPUBootConfig { + prot64_mode: true, + boot_ip: layout.boot_ip, + boot_sp: layout.boot_sp, + boot_selector: layout.boot_selector, + zero_page: layout.zero_page_addr, + code_segment: layout.segments.code_segment, + data_segment: layout.segments.data_segment, + gdt_base: layout.segments.gdt_base, + gdt_size: layout.segments.gdt_limit, + idt_base: layout.segments.idt_base, + idt_size: layout.segments.idt_limit, + pml4_start: layout.boot_pml4_addr, + }) + } + + fn add_serial_device(&mut self, config: &SerialConfig) -> Result<()> { + let region_base: u64 = SERIAL_ADDR; + let region_size: u64 = 8; + let serial = Serial::new(config.clone(), &self.base.sysbus, region_base, region_size)?; + serial + .realize() + .with_context(|| "Failed to realize serial device.")?; + Ok(()) + } + + fn realize(vm: &Arc>, vm_config: &mut VmConfig) -> Result<()> { + let mut locked_vm = vm.lock().unwrap(); + + trace::sysbus(&locked_vm.base.sysbus); + trace::vm_state(&locked_vm.base.vm_state); + + let topology = CPUTopology::new().set_topology(( + vm_config.machine_config.nr_threads, + vm_config.machine_config.nr_cores, + vm_config.machine_config.nr_dies, + )); + trace::cpu_topo(&topology); + locked_vm.base.numa_nodes = locked_vm.add_numa_nodes(vm_config)?; + locked_vm.init_interrupt_controller(u64::from(vm_config.machine_config.nr_cpus))?; + let locked_hypervisor = locked_vm.base.hypervisor.lock().unwrap(); + locked_hypervisor.init_machine(&locked_vm.base.sys_io, &locked_vm.base.sys_mem)?; + drop(locked_hypervisor); + locked_vm.init_memory( + &vm_config.machine_config.mem_config, + &locked_vm.base.sys_mem, + vm_config.machine_config.nr_cpus, + )?; + + // Add mmio devices + locked_vm + .create_replaceable_devices() + .with_context(|| "Failed to create replaceable devices.")?; + locked_vm.add_devices(vm_config)?; + trace::replaceable_info(&locked_vm.replaceable_info); + + let boot_config = locked_vm.load_boot_source(None)?; + let hypervisor = locked_vm.base.hypervisor.clone(); + locked_vm.base.cpus.extend(::init_vcpu( + vm.clone(), + hypervisor, + vm_config.machine_config.nr_cpus, + vm_config.machine_config.max_cpus, + &topology, + &boot_config, + )?); + register_shutdown_event(locked_vm.shutdown_req.clone(), vm.clone()) + .with_context(|| "Failed to register shutdown event")?; + + MigrationManager::register_vm_instance(vm.clone()); + let migration_hyp = locked_vm.base.migration_hypervisor.clone(); + migration_hyp.lock().unwrap().register_instance()?; + MigrationManager::register_migration_instance(migration_hyp); + if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { + bail!("Failed to set migration status {}", e); + } + + Ok(()) + } + + fn add_virtio_mmio_net(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + self.add_virtio_mmio_net(vm_config, cfg_args) + } + + fn add_virtio_mmio_block(&mut self, vm_config: &mut VmConfig, cfg_args: &str) -> Result<()> { + self.add_virtio_mmio_block(vm_config, cfg_args) + } + + fn add_virtio_mmio_device( + &mut self, + name: String, + device: Arc>, + ) -> Result>> { + self.add_virtio_mmio_device(name, device) + } + + fn syscall_whitelist(&self) -> Vec { + syscall_whitelist() + } +} + +pub(crate) fn arch_ioctl_allow_list(bpf_rule: BpfRule) -> BpfRule { + bpf_rule + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_PIT2() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_CLOCK() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_IRQCHIP() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XSAVE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_REGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XSAVE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_FPU() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SUPPORTED_CPUID() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEBUGREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_DEBUGREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XCRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XCRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_LAPIC() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_LAPIC() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MSRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MSRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_CPUID2() as u32) +} + +pub(crate) fn arch_syscall_whitelist() -> Vec { + vec![ + #[cfg(not(target_env = "gnu"))] + BpfRule::new(libc::SYS_epoll_pwait), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_epoll_wait), + BpfRule::new(libc::SYS_open), + #[cfg(target_env = "musl")] + BpfRule::new(libc::SYS_stat), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_newfstatat), + BpfRule::new(libc::SYS_unlink), + BpfRule::new(libc::SYS_mkdir), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_readlink), + ] +} diff --git a/machine/src/x86_64/mod.rs b/machine/src/x86_64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..b3227f997a235f184f9d6cdfab8cab7505de5d0e --- /dev/null +++ b/machine/src/x86_64/mod.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod ich9_lpc; +pub mod mch; +pub mod micro; +pub mod standard; diff --git a/machine/src/x86_64/standard.rs b/machine/src/x86_64/standard.rs new file mode 100644 index 0000000000000000000000000000000000000000..61007855188aca5eb436f2abc3e724bb624dca62 --- /dev/null +++ b/machine/src/x86_64/standard.rs @@ -0,0 +1,933 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::io::{Seek, SeekFrom}; +use std::mem::size_of; +use std::sync::{Arc, Barrier, Mutex}; + +use anyhow::{bail, Context, Result}; + +use super::ich9_lpc; +use super::mch::Mch; +use crate::error::MachineError; +use crate::standard_common::syscall::syscall_whitelist; +use crate::standard_common::{AcpiBuilder, StdMachineOps}; +use crate::{register_shutdown_event, MachineBase, MachineOps, StdMachine}; +use acpi::{ + AcpiIoApic, AcpiLocalApic, AcpiSratMemoryAffinity, AcpiSratProcessorAffinity, AcpiTable, + AmlBuilder, AmlInteger, AmlNameDecl, AmlPackage, AmlScope, AmlScopeBuilder, TableLoader, + IOAPIC_BASE_ADDR, LAPIC_BASE_ADDR, +}; +use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; +use boot_loader::{load_linux, BootLoaderConfig}; +use cpu::{CPUBootConfig, CPUInterface, CPUTopology, CPU}; +use devices::acpi::cpu_controller::{CpuConfig, CpuController}; +use devices::acpi::ged::{Ged, GedEvent}; +use devices::legacy::{ + error::LegacyError as DevErrorKind, FwCfgEntryType, FwCfgIO, FwCfgOps, PFlash, Serial, RTC, + SERIAL_ADDR, +}; +use devices::pci::{PciBus, PciHost}; +use devices::{convert_bus_mut, Device, MUT_PCI_BUS}; +use hypervisor::kvm::x86_64::*; +use hypervisor::kvm::*; +#[cfg(feature = "gtk")] +use machine_manager::config::UiContext; +use machine_manager::config::{BootIndexInfo, DriveConfig, NumaNode, SerialConfig, VmConfig}; +use machine_manager::event; +use machine_manager::qmp::{qmp_channel::QmpChannel, qmp_schema}; +use migration::{MigrationManager, MigrationStatus}; +#[cfg(feature = "gtk")] +use ui::gtk::gtk_display_init; +#[cfg(feature = "vnc")] +use ui::vnc::vnc_init; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; +use util::seccomp::BpfRule; +use util::seccomp::SeccompCmpOpt; + +pub(crate) const VENDOR_ID_INTEL: u16 = 0x8086; +const HOLE_640K_START: u64 = 0x000A_0000; +const HOLE_640K_END: u64 = 0x0010_0000; + +/// The type of memory layout entry on x86_64 +#[repr(usize)] +pub enum LayoutEntryType { + MemBelow4g = 0_usize, + PcieEcam, + PcieMmio, + GedMmio, + CpuController, + Mmio, + IoApic, + LocalApic, + IdentTss, + MemAbove4g, +} + +/// Layout of x86_64 +pub const MEM_LAYOUT: &[(u64, u64)] = &[ + (0, 0x8000_0000), // MemBelow4g + (0xB000_0000, 0x1000_0000), // PcieEcam + (0xC000_0000, 0x3000_0000), // PcieMmio + (0xF000_0000, 0x04), // GedMmio + (0xF000_0004, 0x03), // CpuController + (0xF010_0000, 0x200), // Mmio + (0xFEC0_0000, 0x10_0000), // IoApic + (0xFEE0_0000, 0x10_0000), // LocalApic + (0xFEF0_C000, 0x4000), // Identity map address and TSS + (0x1_0000_0000, 0x80_0000_0000), // MemAbove4g +]; + +/// The type of Irq entry on aarch64 +enum IrqEntryType { + #[allow(unused)] + Uart, + Sysbus, + Pcie, +} + +/// IRQ MAP of x86_64 +const IRQ_MAP: &[(i32, i32)] = &[ + (4, 4), // Uart + (5, 15), // Sysbus + (16, 19), // Pcie +]; + +impl StdMachine { + pub fn new(vm_config: &VmConfig) -> Result { + let free_irqs = ( + IRQ_MAP[IrqEntryType::Sysbus as usize].0, + IRQ_MAP[IrqEntryType::Sysbus as usize].1, + ); + let mmio_region = ( + MEM_LAYOUT[LayoutEntryType::Mmio as usize].0, + MEM_LAYOUT[LayoutEntryType::Mmio as usize + 1].0, + ); + let base = MachineBase::new(vm_config, free_irqs, mmio_region)?; + let sys_mem = base.sys_mem.clone(); + let sys_io = base.sys_io.clone(); + + Ok(StdMachine { + base, + pci_host: Arc::new(Mutex::new(PciHost::new( + &sys_io, + &sys_mem, + MEM_LAYOUT[LayoutEntryType::PcieEcam as usize], + MEM_LAYOUT[LayoutEntryType::PcieMmio as usize], + IRQ_MAP[IrqEntryType::Pcie as usize].0, + ))), + reset_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("reset request".to_string()))?, + ), + shutdown_req: Arc::new( + create_new_eventfd().with_context(|| { + MachineError::InitEventFdErr("shutdown request".to_string()) + })?, + ), + power_button: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("power button".to_string()))?, + ), + cpu_resize_req: Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("cpu resize".to_string()))?, + ), + boot_order_list: Arc::new(Mutex::new(Vec::new())), + cpu_controller: None, + }) + } + + pub fn handle_reset_request(vm: &Arc>) -> Result<()> { + let mut locked_vm = vm.lock().unwrap(); + + for (cpu_index, cpu) in locked_vm.base.cpus.iter().enumerate() { + cpu.pause() + .with_context(|| format!("Failed to pause vcpu{}", cpu_index))?; + + cpu.hypervisor_cpu.reset_vcpu(cpu.clone())?; + } + + locked_vm + .reset_all_devices() + .with_context(|| "Fail to reset all devices")?; + locked_vm + .reset_fwcfg_boot_order() + .with_context(|| "Fail to update boot order information to FwCfg device")?; + + if QmpChannel::is_connected() { + let reset_msg = qmp_schema::Reset { guest: true }; + event!(Reset; reset_msg); + } + + for (cpu_index, cpu) in locked_vm.base.cpus.iter().enumerate() { + cpu.resume() + .with_context(|| format!("Failed to resume vcpu{}", cpu_index))?; + } + + Ok(()) + } + + fn init_ich9_lpc(&self, vm: Arc>) -> Result<()> { + let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().child_bus().unwrap()); + let ich = ich9_lpc::LPCBridge::new( + root_bus, + self.base.sys_io.clone(), + self.reset_req.clone(), + self.shutdown_req.clone(), + )?; + self.register_reset_event(self.reset_req.clone(), vm.clone()) + .with_context(|| "Fail to register reset event in LPC")?; + register_shutdown_event(ich.shutdown_req.clone(), vm) + .with_context(|| "Fail to register shutdown event in LPC")?; + ich.realize()?; + Ok(()) + } + + pub fn get_vcpu_reg_val(&self, _addr: u64, _vcpu: usize) -> Option { + None + } + + pub fn handle_hotplug_vcpu_request(vm: &Arc>) -> Result<()> { + let mut locked_vm = vm.lock().unwrap(); + locked_vm.add_vcpu_device(vm.clone()) + } + + fn init_cpu_controller( + &mut self, + boot_config: CPUBootConfig, + cpu_topology: CPUTopology, + vm: Arc>, + ) -> Result<()> { + let region_base: u64 = MEM_LAYOUT[LayoutEntryType::CpuController as usize].0; + let region_size: u64 = MEM_LAYOUT[LayoutEntryType::CpuController as usize].1; + let cpu_config = CpuConfig::new(boot_config, cpu_topology); + let hotplug_cpu_req = Arc::new( + create_new_eventfd() + .with_context(|| MachineError::InitEventFdErr("hotplug cpu".to_string()))?, + ); + let cpu_controller = CpuController::new( + self.base.cpu_topo.max_cpus, + &self.base.sysbus, + region_base, + region_size, + cpu_config, + hotplug_cpu_req.clone(), + self.base.cpus.clone(), + )?; + let realize_controller = cpu_controller + .realize() + .with_context(|| "Failed to realize Cpu Controller")?; + self.register_hotplug_vcpu_event(hotplug_cpu_req, vm)?; + self.cpu_controller = Some(realize_controller); + Ok(()) + } +} + +impl StdMachineOps for StdMachine { + fn init_pci_host(&self) -> Result<()> { + let root_bus = Arc::downgrade(&self.pci_host.lock().unwrap().child_bus().unwrap()); + let mmconfig_region_ops = PciHost::build_mmconfig_ops(self.pci_host.clone()); + let mmconfig_region = Region::init_io_region( + MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].1, + mmconfig_region_ops.clone(), + "PcieEcamSpace", + ); + self.base + .sys_mem + .root() + .add_subregion( + mmconfig_region.clone(), + MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].0, + ) + .with_context(|| "Failed to register ECAM in memory space.")?; + + let pio_addr_ops = PciHost::build_pio_addr_ops(self.pci_host.clone()); + let pio_addr_region = Region::init_io_region(4, pio_addr_ops, "PioAddr"); + self.base + .sys_io + .root() + .add_subregion(pio_addr_region, 0xcf8) + .with_context(|| "Failed to register CONFIG_ADDR port in I/O space.")?; + let pio_data_ops = PciHost::build_pio_data_ops(self.pci_host.clone()); + let pio_data_region = Region::init_io_region(4, pio_data_ops, "PioData"); + self.base + .sys_io + .root() + .add_subregion(pio_data_region, 0xcfc) + .with_context(|| "Failed to register CONFIG_DATA port in I/O space.")?; + + let mch = Mch::new(root_bus, mmconfig_region, mmconfig_region_ops); + mch.realize()?; + Ok(()) + } + + fn add_fwcfg_device( + &mut self, + nr_cpus: u8, + max_cpus: u8, + ) -> Result>>> { + let mut fwcfg = FwCfgIO::new(self.base.sys_mem.clone(), &self.base.sysbus)?; + fwcfg.add_data_entry(FwCfgEntryType::NbCpus, nr_cpus.as_bytes().to_vec())?; + fwcfg.add_data_entry(FwCfgEntryType::MaxCpus, max_cpus.as_bytes().to_vec())?; + fwcfg.add_data_entry(FwCfgEntryType::Irq0Override, 1_u32.as_bytes().to_vec())?; + + let boot_order = Vec::::new(); + fwcfg + .add_file_entry("bootorder", boot_order) + .with_context(|| DevErrorKind::AddEntryErr("bootorder".to_string()))?; + + let fwcfg_dev = fwcfg + .realize() + .with_context(|| "Failed to realize fwcfg device")?; + self.base.fwcfg_dev = Some(fwcfg_dev.clone()); + + Ok(Some(fwcfg_dev)) + } + + fn get_cpu_controller(&self) -> &Arc> { + self.cpu_controller.as_ref().unwrap() + } + + fn add_vcpu_device(&mut self, clone_vm: Arc>) -> Result<()> { + let mut locked_controller = self.cpu_controller.as_ref().unwrap().lock().unwrap(); + let device_id; + let vcpu_id; + (device_id, vcpu_id) = locked_controller.get_hotplug_cpu_info(); + + // Check if there is a reusable CPU, and if not, create a new one. + let vcpu = if let Some(reuse_vcpu) = locked_controller.find_reusable_vcpu() { + locked_controller.setup_reuse_vcpu(reuse_vcpu.clone())?; + reuse_vcpu + } else { + let boot_cfg = locked_controller.get_boot_config(); + let topology = locked_controller.get_topology_config(); + + let hypervisor = clone_vm.lock().unwrap().base.hypervisor.clone(); + let vcpu = ::create_vcpu( + vcpu_id, + clone_vm, + hypervisor, + self.base.cpu_topo.max_cpus, + )?; + vcpu.realize(boot_cfg, topology).with_context(|| { + format!( + "Failed to realize arch cpu register/features for CPU {}", + vcpu_id + ) + })?; + + locked_controller.setup_hotplug_vcpu(device_id, vcpu_id, vcpu.clone())?; + self.base.cpus.push(vcpu.clone()); + vcpu + }; + // Start vcpu. + let cpu_thread_barrier = Arc::new(Barrier::new(1)); + if let Err(e) = CPU::start(vcpu, cpu_thread_barrier, false) { + bail!("Failed to run vcpu-{}, {:?}", vcpu_id, e) + }; + // Trigger GED cpu resize event. + self.cpu_resize_req + .write(1) + .with_context(|| "Failed to write cpu resize request.") + } + + fn remove_vcpu_device(&mut self, vcpu_id: u8) -> Result<()> { + if self.base.numa_nodes.is_some() { + bail!("Not support to hotunplug cpu in numa architecture now.") + } + let mut locked_controller = self.cpu_controller.as_ref().unwrap().lock().unwrap(); + + // Trigger GED cpu resize event. + locked_controller.set_hotunplug_cpu(vcpu_id)?; + self.cpu_resize_req + .write(1) + .with_context(|| "Failed to write cpu resize request.") + } + + fn find_cpu_id_by_device_id(&mut self, device_id: &str) -> Option { + let locked_controller = self.cpu_controller.as_ref().unwrap().lock().unwrap(); + locked_controller.find_cpu_by_device_id(device_id) + } +} + +impl MachineOps for StdMachine { + gen_base_func!(machine_base, machine_base_mut, MachineBase, base); + + fn init_machine_ram(&self, sys_mem: &Arc, mem_size: u64) -> Result<()> { + let ram = self.get_vm_ram(); + let below4g_size = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + + let below4g_ram = Region::init_alias_region( + ram.clone(), + 0, + std::cmp::min(below4g_size, mem_size), + "below4g_ram", + ); + sys_mem.root().add_subregion( + below4g_ram, + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0, + )?; + + if mem_size > below4g_size { + let above4g_ram = Region::init_alias_region( + ram.clone(), + below4g_size, + mem_size - below4g_size, + "above4g_ram", + ); + let above4g_start = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + sys_mem.root().add_subregion(above4g_ram, above4g_start)?; + } + Ok(()) + } + + fn init_interrupt_controller(&mut self, _vcpu_count: u64) -> Result<()> { + let hypervisor = self.get_hypervisor(); + let mut locked_hypervisor = hypervisor.lock().unwrap(); + locked_hypervisor.create_interrupt_controller()?; + + let child_bus = self.pci_host.lock().unwrap().child_bus().unwrap(); + MUT_PCI_BUS!(child_bus, locked_bus, pci_bus); + let irq_manager = locked_hypervisor.create_irq_manager()?; + pci_bus.msi_irq_manager = irq_manager.msi_irq_manager; + self.base.sysbus.lock().unwrap().irq_manager = irq_manager.line_irq_manager; + + Ok(()) + } + + fn load_boot_source(&self, fwcfg: Option<&Arc>>) -> Result { + let boot_source = self.base.boot_source.lock().unwrap(); + let initrd = boot_source.initrd.as_ref().map(|b| b.initrd_file.clone()); + + // MEM_LAYOUT is defined statically, will not overflow. + let gap_start = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + let gap_end = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + let bootloader_config = BootLoaderConfig { + kernel: boot_source.kernel_file.clone(), + initrd, + kernel_cmdline: boot_source.kernel_cmdline.to_string(), + cpu_count: self.base.cpu_topo.nrcpus, + // gap_end is bigger than gap_start, as MEM_LAYOUT is defined statically. + gap_range: (gap_start, gap_end - gap_start), + ioapic_addr: MEM_LAYOUT[LayoutEntryType::IoApic as usize].0 as u32, + lapic_addr: MEM_LAYOUT[LayoutEntryType::LocalApic as usize].0 as u32, + ident_tss_range: Some(MEM_LAYOUT[LayoutEntryType::IdentTss as usize]), + prot64_mode: false, + }; + let layout = load_linux(&bootloader_config, &self.base.sys_mem, fwcfg) + .with_context(|| MachineError::LoadKernErr)?; + + Ok(CPUBootConfig { + prot64_mode: false, + boot_ip: layout.boot_ip, + boot_sp: layout.boot_sp, + boot_selector: layout.boot_selector, + ..Default::default() + }) + } + + fn add_rtc_device(&mut self, mem_size: u64) -> Result<()> { + let mut rtc = RTC::new(&self.base.sysbus).with_context(|| "Failed to create RTC device")?; + rtc.set_memory( + mem_size, + // MEM_LAYOUT is defined statically, will not overflow. + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1, + ); + rtc.realize() + .with_context(|| "Failed to realize RTC device")?; + Ok(()) + } + + fn add_ged_device(&mut self) -> Result<()> { + let region_base: u64 = MEM_LAYOUT[LayoutEntryType::GedMmio as usize].0; + let region_size: u64 = MEM_LAYOUT[LayoutEntryType::GedMmio as usize].1; + let ged_event = GedEvent::new(self.power_button.clone(), self.cpu_resize_req.clone()); + let ged = Ged::new( + false, + &self.base.sysbus, + region_base, + region_size, + ged_event, + )?; + + ged.realize().with_context(|| "Failed to realize Ged")?; + Ok(()) + } + + fn add_serial_device(&mut self, config: &SerialConfig) -> Result<()> { + let region_base: u64 = SERIAL_ADDR; + let region_size: u64 = 8; + let serial = Serial::new(config.clone(), &self.base.sysbus, region_base, region_size)?; + serial + .realize() + .with_context(|| "Failed to realize serial device.")?; + Ok(()) + } + + fn syscall_whitelist(&self) -> Vec { + syscall_whitelist() + } + + fn realize(vm: &Arc>, vm_config: &mut VmConfig) -> Result<()> { + let nr_cpus = vm_config.machine_config.nr_cpus; + let max_cpus = vm_config.machine_config.max_cpus; + let mut locked_vm = vm.lock().unwrap(); + locked_vm.init_global_config(vm_config)?; + locked_vm.base.numa_nodes = locked_vm.add_numa_nodes(vm_config)?; + locked_vm.init_interrupt_controller(u64::from(nr_cpus))?; + let locked_hypervisor = locked_vm.base.hypervisor.lock().unwrap(); + locked_hypervisor.init_machine(&locked_vm.base.sys_io, &locked_vm.base.sys_mem)?; + drop(locked_hypervisor); + locked_vm.init_memory( + &vm_config.machine_config.mem_config, + &locked_vm.base.sys_mem, + nr_cpus, + )?; + + locked_vm + .init_pci_host() + .with_context(|| MachineError::InitPCIeHostErr)?; + locked_vm + .init_ich9_lpc(vm.clone()) + .with_context(|| "Fail to init LPC bridge")?; + locked_vm.add_devices(vm_config)?; + + let fwcfg = locked_vm.add_fwcfg_device(nr_cpus, max_cpus)?; + let boot_config = locked_vm.load_boot_source(fwcfg.as_ref())?; + let topology = CPUTopology::new().set_topology(( + vm_config.machine_config.nr_threads, + vm_config.machine_config.nr_cores, + vm_config.machine_config.nr_dies, + )); + let hypervisor = locked_vm.base.hypervisor.clone(); + locked_vm.base.cpus.extend(::init_vcpu( + vm.clone(), + hypervisor, + nr_cpus, + max_cpus, + &topology, + &boot_config, + )?); + + locked_vm.init_cpu_controller(boot_config, topology, vm.clone())?; + + if let Some(fw_cfg) = fwcfg { + locked_vm + .build_acpi_tables(&fw_cfg) + .with_context(|| "Failed to create ACPI tables")?; + let mut mem_array = Vec::new(); + let mem_size = vm_config.machine_config.mem_config.mem_size; + let below_size = + std::cmp::min(MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1, mem_size); + mem_array.push(( + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0, + below_size, + )); + if mem_size > below_size { + mem_array.push(( + MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0, + mem_size - below_size, + )); + } + + locked_vm + .build_smbios(&fw_cfg, mem_array) + .with_context(|| "Failed to create smbios tables")?; + } + + locked_vm + .reset_fwcfg_boot_order() + .with_context(|| "Fail to update boot order imformation to FwCfg device")?; + + locked_vm + .display_init(vm_config) + .with_context(|| "Fail to init display")?; + + #[cfg(feature = "windows_emu_pid")] + crate::watch_windows_emu_pid( + vm_config, + locked_vm.shutdown_req.clone(), + locked_vm.shutdown_req.clone(), + vm.clone(), + ); + + MigrationManager::register_vm_config(locked_vm.get_vm_config()); + MigrationManager::register_vm_instance(vm.clone()); + let migration_hyp = locked_vm.base.migration_hypervisor.clone(); + migration_hyp.lock().unwrap().register_instance()?; + MigrationManager::register_migration_instance(migration_hyp); + if let Err(e) = MigrationManager::set_status(MigrationStatus::Setup) { + bail!("Failed to set migration status {}", e); + } + + Ok(()) + } + + fn add_pflash_device(&mut self, configs: &[DriveConfig]) -> Result<()> { + let mut configs_vec = configs.to_vec(); + configs_vec.sort_by_key(|c| c.unit.unwrap()); + // The two PFlash devices locates below 4GB, this variable represents the end address + // of current PFlash device. + let mut flash_end: u64 = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + for config in configs_vec { + let file = self.fetch_drive_file(&config.path_on_host)?; + let pfl_size = file.as_ref().metadata()?.len(); + + if config.unit.unwrap() == 0 { + // According to the Linux/x86 boot protocol, the memory region of + // 0x000000 - 0x100000 (1 MiB) is for BIOS usage. And the top 128 + // KiB is for BIOS code which is stored in the first PFlash. + let rom_base = 0xe0000; + let rom_size = 0x20000; + let seek_start = pfl_size + .checked_sub(rom_size) + .with_context(|| "pflash file size less than rom size")?; + file.as_ref().seek(SeekFrom::Start(seek_start))?; + + let ram1 = Arc::new(HostMemMapping::new( + GuestAddress(rom_base), + None, + rom_size, + None, + false, + false, + false, + )?); + let rom_region = Region::init_ram_region(ram1, "PflashRam"); + rom_region.write(&mut file.as_ref(), GuestAddress(rom_base), 0, rom_size)?; + rom_region.set_priority(10); + self.base + .sys_mem + .root() + .add_subregion(rom_region, rom_base)?; + + file.as_ref().rewind()? + } + + let sector_len: u32 = 1024 * 4; + let backend = Some(file); + let region_base = flash_end + .checked_sub(pfl_size) + .with_context(|| "flash end is less than flash size")?; + let pflash = PFlash::new( + pfl_size, + backend, + sector_len, + 4_u32, + 1_u32, + config.readonly, + &self.base.sysbus, + region_base, + ) + .with_context(|| MachineError::InitPflashErr)?; + pflash + .realize() + .with_context(|| MachineError::RlzPflashErr)?; + // sub has been checked above. + flash_end -= pfl_size; + } + + Ok(()) + } + + /// Create display. + #[allow(unused_variables)] + fn display_init(&mut self, vm_config: &mut VmConfig) -> Result<()> { + // GTK display init. + #[cfg(feature = "gtk")] + match vm_config.display { + Some(ref ds_cfg) if ds_cfg.display_type == "gtk" => { + let ui_context = UiContext { + vm_name: vm_config.guest_name.clone(), + power_button: None, + shutdown_req: Some(self.shutdown_req.clone()), + pause_req: None, + resume_req: None, + }; + gtk_display_init(ds_cfg, ui_context) + .with_context(|| "Failed to init GTK display!")?; + } + _ => {} + }; + + // VNC display init. + #[cfg(feature = "vnc")] + vnc_init(&vm_config.vnc, &vm_config.object) + .with_context(|| "Failed to init VNC server!")?; + Ok(()) + } + + fn get_pci_host(&mut self) -> Result<&Arc>> { + Ok(&self.pci_host) + } + + fn get_boot_order_list(&self) -> Option>>> { + Some(self.boot_order_list.clone()) + } +} + +pub(crate) fn arch_ioctl_allow_list(bpf_rule: BpfRule) -> BpfRule { + bpf_rule + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_PIT2() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_CLOCK() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_IRQCHIP() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_REGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XSAVE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_DEBUGREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_XCRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_LAPIC() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_MSRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_GET_SUPPORTED_CPUID() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_CPUID2() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_SREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_REGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XSAVE() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_XCRS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_DEBUGREGS() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_LAPIC() as u32) + .add_constraint(SeccompCmpOpt::Eq, 1, KVM_SET_MSRS() as u32) +} + +pub(crate) fn arch_syscall_whitelist() -> Vec { + vec![ + #[cfg(not(target_env = "gnu"))] + BpfRule::new(libc::SYS_epoll_pwait), + BpfRule::new(libc::SYS_epoll_wait), + BpfRule::new(libc::SYS_open), + #[cfg(target_env = "musl")] + BpfRule::new(libc::SYS_stat), + BpfRule::new(libc::SYS_mkdir), + BpfRule::new(libc::SYS_unlink), + BpfRule::new(libc::SYS_readlink), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_clone3), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_rt_sigaction), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_poll), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_access), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_sched_setattr), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_fadvise64), + #[cfg(target_env = "gnu")] + BpfRule::new(libc::SYS_rseq), + ] +} + +impl AcpiBuilder for StdMachine { + fn build_dsdt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut dsdt = AcpiTable::new(*b"DSDT", 2, *b"STRATO", *b"VIRTDSDT", 1); + + // 1. Create pci host bridge node. + let mut sb_scope = AmlScope::new("\\_SB"); + sb_scope.append_child(self.pci_host.lock().unwrap().clone()); + dsdt.append_child(sb_scope.aml_bytes().as_slice()); + + // 2. Info of devices attached to system bus. + dsdt.append_child(self.base.sysbus.lock().unwrap().aml_bytes().as_slice()); + + // 3. Add _S5 sleep state. + let mut package = AmlPackage::new(4); + package.append_child(AmlInteger(5)); + package.append_child(AmlInteger(0)); + package.append_child(AmlInteger(0)); + package.append_child(AmlInteger(0)); + dsdt.append_child(AmlNameDecl::new("_S5", package).aml_bytes().as_slice()); + + let dsdt_begin = StdMachine::add_table_to_loader(acpi_data, loader, &dsdt) + .with_context(|| "Fail to add DSTD table to loader")?; + Ok(dsdt_begin) + } + + fn build_madt_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut madt = AcpiTable::new(*b"APIC", 5, *b"STRATO", *b"VIRTAPIC", 1); + + madt.append_child(LAPIC_BASE_ADDR.as_bytes()); + // Flags: PC-AT-compatible dual-8259 setup + madt.append_child(1_u32.as_bytes()); + + let ioapic = AcpiIoApic { + type_id: 1_u8, + length: size_of::() as u8, + io_apic_id: 0, + reserved: 0, + io_apic_addr: IOAPIC_BASE_ADDR, + gsi_base: 0, + }; + madt.append_child(ioapic.aml_bytes().as_ref()); + + self.base.cpus.iter().for_each(|cpu| { + let lapic = AcpiLocalApic { + type_id: 0, + length: size_of::() as u8, + processor_uid: cpu.id(), + apic_id: cpu.id(), + flags: 1, // Flags: enabled. + }; + madt.append_child(&lapic.aml_bytes()); + }); + + // Add non boot cpu lapic. + for cpuid in self.base.cpu_topo.nrcpus..self.base.cpu_topo.max_cpus { + let lapic = AcpiLocalApic { + type_id: 0, + length: size_of::() as u8, + processor_uid: cpuid, + apic_id: cpuid, + flags: 2, // Flags: hotplug enabled. + }; + madt.append_child(&lapic.aml_bytes()); + } + + let madt_begin = StdMachine::add_table_to_loader(acpi_data, loader, &madt) + .with_context(|| "Fail to add MADT table to loader")?; + Ok(madt_begin) + } + + fn build_srat_cpu(&self, proximity_domain: u32, node: &NumaNode, srat: &mut AcpiTable) { + for cpu in node.cpus.iter() { + srat.append_child( + &AcpiSratProcessorAffinity { + length: size_of::() as u8, + proximity_lo: proximity_domain as u8, + local_apic_id: *cpu, + flags: 1, + ..Default::default() + } + .aml_bytes(), + ); + } + } + + fn build_srat_mem( + &self, + base_addr: u64, + proximity_domain: u32, + node: &NumaNode, + srat: &mut AcpiTable, + ) -> u64 { + // MEM_LAYOUT is defined statically, will not overflow. + let mem_below_4g = MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].0 + + MEM_LAYOUT[LayoutEntryType::MemBelow4g as usize].1; + let mem_above_4g = MEM_LAYOUT[LayoutEntryType::MemAbove4g as usize].0; + + let mut mem_base = base_addr; + let mut mem_len = node.size; + let mut next_base = mem_base + mem_len; + // It contains the hole from 604Kb to 1Mb + if mem_base <= HOLE_640K_START && next_base > HOLE_640K_START { + mem_len -= next_base - HOLE_640K_START; + if mem_len > 0 { + srat.append_child( + &AcpiSratMemoryAffinity { + type_id: 1, + length: size_of::() as u8, + proximity_domain, + base_addr: mem_base, + range_length: mem_len, + flags: 1, + ..Default::default() + } + .aml_bytes(), + ); + } + + if next_base <= HOLE_640K_END { + next_base = HOLE_640K_END; + return next_base; + } + mem_base = HOLE_640K_END; + mem_len = next_base - HOLE_640K_END; + } + + // It contains the hole possibly from mem_below_4g(2G) to mem_below_4g(4G). + if mem_base <= mem_below_4g && next_base > mem_below_4g { + mem_len -= next_base - mem_below_4g; + if mem_len > 0 { + srat.append_child( + &AcpiSratMemoryAffinity { + type_id: 1, + length: size_of::() as u8, + proximity_domain, + base_addr: mem_base, + range_length: mem_len, + flags: 1, + ..Default::default() + } + .aml_bytes(), + ); + } + mem_base = mem_above_4g; + mem_len = next_base - mem_below_4g; + next_base = mem_base + mem_len; + } + + if mem_len > 0 { + srat.append_child( + &AcpiSratMemoryAffinity { + type_id: 1, + length: size_of::() as u8, + proximity_domain, + base_addr: mem_base, + range_length: mem_len, + flags: 1, + ..Default::default() + } + .aml_bytes(), + ); + } + + next_base + } + + fn build_srat_table( + &self, + acpi_data: &Arc>>, + loader: &mut TableLoader, + ) -> Result { + let mut srat = AcpiTable::new(*b"SRAT", 1, *b"STRATO", *b"VIRTSRAT", 1); + srat.append_child(&[1_u8; 4_usize]); + srat.append_child(&[0_u8; 8_usize]); + + let mut next_base = 0_u64; + for (id, node) in self.base.numa_nodes.as_ref().unwrap().iter() { + self.build_srat_cpu(*id, node, &mut srat); + next_base = self.build_srat_mem(next_base, *id, node, &mut srat); + } + + let srat_begin = StdMachine::add_table_to_loader(acpi_data, loader, &srat) + .with_context(|| "Fail to add SRAT table to loader")?; + Ok(srat_begin) + } + + fn get_hardware_signature(&self) -> Option { + let vm_config = self.machine_base().vm_config.lock().unwrap(); + vm_config.hardware_signature + } +} diff --git a/machine_manager/Cargo.toml b/machine_manager/Cargo.toml index ed573c5b604eacd7e7876292159abe0549f1a6d3..ec787bc510c1f4eb388d618034d58f700841d431 100644 --- a/machine_manager/Cargo.toml +++ b/machine_manager/Cargo.toml @@ -1,24 +1,45 @@ [package] name = "machine_manager" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -error-chain = "0.12.4" -log = "0.4.8" -libc = ">=0.2.71" -serde_json = "1.0.55" -vmm-sys-util = ">=0.7.0" +regex = "1" +log = "0.4" +libc = "0.2" +serde_json = "1.0" +vmm-sys-util = "0.12.1" +hex = "0.4.3" +serde = { version = "1.0", features = ["derive"] } +strum = "0.24.1" +strum_macros = "0.24.3" +once_cell = "1.18.0" +thiserror = "1.0" +anyhow = "1.0" +trace = { path = "../trace" } util = { path = "../util" } -serde = { version = ">=1.0.114", features = ["derive"] } -strum = "0.20" -strum_macros = "0.20" -once_cell = "1.9.0" +clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } [features] default = [] - +scream = [] +scream_alsa = ["scream"] +scream_pulseaudio = ["scream"] +scream_ohaudio = ["scream"] +demo_device = [] +usb_host = [] +usb_camera = [] +usb_camera_v4l2 = ["usb_camera"] +usb_camera_oh = ["usb_camera"] +windows_emu_pid = [] +ohui_srv = [] +gtk = [] +vnc = [] +vnc_auth = [] +ramfb = [] +virtio_gpu = [] +pvpanic = [] diff --git a/machine_manager/src/cmdline.rs b/machine_manager/src/cmdline.rs index 24c34abcd800846bf0f8e191bd2bf7750939c542..f29804e60004f680d381bb9360b79e9557b388f8 100644 --- a/machine_manager/src/cmdline.rs +++ b/machine_manager/src/cmdline.rs @@ -10,20 +10,20 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::os::unix::net::UnixListener; - -use error_chain::bail; -use util::arg_parser::{Arg, ArgMatches, ArgParser}; -use util::unix::{limit_permission, parse_uri}; +use anyhow::{bail, Context, Result}; +use clap::{ArgAction, Parser}; use crate::{ - config::{add_trace_events, ChardevType, CmdParser, MachineType, VmConfig}, - errors::{Result, ResultExt}, + config::{add_trace, str_slip_to_clap, ChardevType, MachineType, SocketType, VmConfig}, + qmp::qmp_socket::QmpSocketPath, temp_cleaner::TempCleaner, }; - -// Read the programe version in `Cargo.toml`. -const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); +use util::file::clear_file; +use util::unix::limit_permission; +use util::{ + arg_parser::{Arg, ArgMatches, ArgParser}, + socket::SocketListener, +}; /// This macro is to run struct $z 's function $s whose arg is $x 's inner member. /// There is a multi-macro-cast in cases of vec and bool. @@ -73,49 +73,95 @@ macro_rules! add_args_to_config_multi { /// This function is to define all commandline arguments. pub fn create_args_parser<'a>() -> ArgParser<'a> { - ArgParser::new("StratoVirt") - .version(VERSION.unwrap_or("unknown")) - .author("Huawei Technologies Co., Ltd") + let parser = ArgParser::new("StratoVirt") + .version(util::VERSION) + .author("The StratoVirt Project Developers") .about("A light kvm-based hypervisor.") .arg( Arg::with_name("name") .long("name") - .value_name("vm_name") + .value_name("[vm_name]") .help("set the name of the guest.") .takes_value(true), ) .arg( Arg::with_name("machine") .long("machine") - .value_name("[type=]name[,dump_guest_core=on|off][,mem-share=on|off]") - .help("selects emulated machine and set properties") + .value_name("[type=][,dump_guest_core=on|off][,mem-share=on|off]") + .help("'type' selects emulated machine type and set properties. \ + 'dump_guest_core' includes guest memory in a core dump. \ + 'mem-share' sets guest memory is shareable.") + .takes_value(true), + ) + .arg( + Arg::with_name("accel") + .long("accel") + .value_name("[accel]") + .help("select accelerator, only 'kvm' is supported now.") .takes_value(true), ) .arg( Arg::with_name("smp") .long("smp") - .value_name("[cpus=]n") - .help("set the number of CPUs to 'n' (default: 1)") + .value_name("[cpus=][,maxcpus=][,sockets=][,dies=][,clusters=][,cores=][,threads=]") + .help("'cpus' sets the number of CPUs to 'n' (default: 1). 'maxcpus' sets number of total CPUs, including online and offline CPUs. \ + 'sockets' is the number of sockets on the machine. \ + 'dies' is the number of dies in one socket. \ + 'clusters' is the number of clusters in one die. \ + 'cores' is the number of cores in one cluster. \ + 'threads' is the number of threads in one core") .takes_value(true), ) + .arg( + Arg::with_name("cpu") + .long("cpu") + .value_name("host[,pmu=on|off][,sve=on|off]") + .help("set CPU model and features.") + .can_no_value(false) + .takes_value(true) + ) + .arg( + Arg::with_name("freeze_cpu") + .short("S") + .long("freeze") + .help("freeze CPU at startup") + .takes_value(false) + .required(false), + ) .arg( Arg::with_name("memory") .long("m") - .value_name("[size=]megs[m|M|g|G]") - .help("configure guest RAM") + .value_name("[size=][m|M|g|G]") + .help("configure guest RAM(default unit: MiB).") .takes_value(true), ) .arg( Arg::with_name("mem-path") .long("mem-path") - .value_name("filebackend file path") + .value_name("") .help("configure file path that backs guest memory.") .takes_value(true), ) + .arg( + Arg::with_name("mem-prealloc") + .long("mem-prealloc") + .help("Prealloc memory for VM") + .takes_value(false) + .required(false), + ) + .arg( + Arg::with_name("numa") + .multiple(true) + .long("numa") + .value_name("") + .help("\n\t\tset numa node: -numa node,nodeid=<0>,cpus=<0-1>,memdev=; \ + \n\t\tset numa distance: -numa dist,src=<0>,dst=<1>,val=<20> ") + .takes_values(true), + ) .arg( Arg::with_name("kernel") .long("kernel") - .value_name("kernel_path") + .value_name("") .help("use uncompressed kernel image") .takes_value(true), ) @@ -123,30 +169,40 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { Arg::with_name("kernel-cmdline") .multiple(true) .long("append") - .value_name("kernel cmdline parameters") + .value_name("") .help("use 'cmdline' as kernel command line") .takes_values(true), ) .arg( Arg::with_name("initrd-file") .long("initrd") - .value_name("initrd_path") + .value_name("") .help("use 'initrd-file' as initial ram disk") .takes_value(true), ) .arg( Arg::with_name("qmp") .long("qmp") + .value_name("") + .help("\n\t\tset unix socket path: unix:,server,nowait; \ + \n\t\tset tcp socket path: tcp:ip:port,server,nowait") + .takes_value(true) + ) + .arg( + Arg::with_name("mod-test") + .long("mod-test") .value_name("unix:socket_path") - .help("set qmp's unixsocket path") + .help("set module test's unixsocket path") .takes_value(true) ) .arg( Arg::with_name("drive") .multiple(true) .long("drive") - .value_name("file=path,id=str[,readonly=][,direct=][,serial=][,iothread=][iops=]") - .help("use 'file' as a drive image") + .value_name("") + .help("\n\t\tset block drive image: -drive id=,file=[,readonly=on|off][,direct=on|off][,throttling.iops-total=<200>]; \ + \n\t\tset pflash drive image: -drive file=,if=pflash,unit=0|1[,readonly=true|false]; \ + \n\t\tset scsi drive image: -drive id=,file=[,readonly=true|false]") .takes_values(true), ) .arg( @@ -154,7 +210,7 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .multiple(true) .long("netdev") .value_name( - "id=str,netdev=str[,mac=][,fds=][,vhost=on|off][,vhostfd=][,iothread=]", + "tap,id=,ifname=[,vhost=on|off][,queue=]", ) .help("configure a host TAP network with ID 'str'") .takes_values(true), @@ -163,29 +219,63 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { Arg::with_name("chardev") .multiple(true) .long("chardev") - .value_name("id=str,path=socket_path") - .help("set char device virtio console for vm") + .value_name("") + .help("\n\t\tadd standard i/o device: -chardev stdio,id=; \ + \n\t\tadd pseudo-terminal: -chardev pty,id=; \ + \n\t\tadd file: -chardev file,id=,path=; \ + \n\t\tadd unix-socket: -chardev socket,id=,path=[,server][,nowait]; \ + \n\t\tadd tcp-socket: -chardev socket,id=,port=[,host=host][,server][,nowait];") .takes_values(true), ) .arg( Arg::with_name("device") .multiple(true) .long("device") - .value_name("vsock,id=str,guest-cid=u32[,vhostfd=]") - .help("add virtio vsock device and sets properties") + .value_name("") + .help("\n\t\tadd virtio mmio block: -device virtio-blk-device,id=,drive=[,iothread=][,serial=]; \ + \n\t\tadd virtio pci block: -device virtio-blk-pci,id=,drive=,bus=,addr=<0x3>[,multifunction=on|off][,iothread=][,serial=][,num-queues=][,bootindex=]; \ + \n\t\tadd vhost user pci block: -device vhost-user-blk-pci,id=,chardev=,bus=,addr=<0x3>[,num-queues=][,bootindex=]; \ + \n\t\tadd virtio mmio net: -device virtio-net-device,id=,netdev=[,iothread=][,mac=<12:34:56:78:9A:BC>]; \ + \n\t\tadd virtio pci net: -device virtio-net-pci,id=,netdev=,bus=,addr=<0x2>[,multifunction=on|off][,iothread=][,mac=<12:34:56:78:9A:BC>][,mq=on|off]; \ + \n\t\tadd vhost mmio net: -device virtio-net-device,id=,netdev=[,iothread=][,mac=<12:34:56:78:9A:BC>]; \ + \n\t\tadd vhost pci net: -device virtio-net-pci,id=,netdev=,bus=,addr=<0x2>[,multifunction=on|off][,iothread=][,mac=<12:34:56:78:9A:BC>][,mq=on|off]; \ + \n\t\tadd virtio mmio console: -device virtio-serial-device[,id=] -device virtconsole,id=console_id,chardev=; \ + \n\t\tadd virtio pci console: -device virtio-serial-pci,id=,bus=,addr=<0x3>[,multifunction=on|off] -device virtconsole,id=,chardev=; \ + \n\t\tadd vhost mmio vsock: -device vhost-vsock-device,id=,guest-cid=; \ + \n\t\tadd vhost pci vsock: -device vhost-vsock-pci,id=,guest-cid=,bus=,addr=<0x3>[,multifunction=on|off]; \ + \n\t\tadd virtio mmio balloon: -device virtio-balloon-device[,deflate-on-oom=true|false][,free-page-reporting=true|false]; \ + \n\t\tadd virtio pci balloon: -device virtio-balloon-pci,id=,bus=,addr=<0x4>[,deflate-on-oom=true|false][,free-page-reporting=true|false][,multifunction=on|off]; \ + \n\t\tadd virtio mmio rng: -device virtio-rng-device,rng=,max-bytes=<1234>,period=<1000>; \ + \n\t\tadd virtio pci rng: -device virtio-rng-pci,id=,rng=,max-bytes=<1234>,period=<1000>,bus=,addr=<0x1>[,multifunction=on|off]; \ + \n\t\tadd pcie root port: -device pcie-root-port,id=,port=<0x1>,bus=,addr=<0x1>[,multifunction=on|off]; \ + \n\t\tadd vfio pci: -device vfio-pci,id=,host=<0000:1a:00.3>,bus=,addr=<0x03>[,multifunction=on|off]; \ + \n\t\tadd usb controller: -device nec-usb-xhci,id=,bus=,addr=<0xa>; \ + \n\t\tadd usb keyboard: -device usb-kbd,id=; \ + \n\t\tadd usb tablet: -device usb-tablet,id=; \ + \n\t\tadd usb storage: -device usb-storage,id=,drive=; \ + \n\t\tadd scsi controller: -device virtio-scsi-pci,id=,bus=,addr=<0x3>[,multifunction=on|off][,iothread=][,num-queues=]; \ + \n\t\tadd scsi hard disk: -device scsi-hd,scsi-id=<0>,bus=,lun=<0>,drive=,id=; \ + \n\t\tadd vhost user fs: -device vhost-user-fs-pci,id=,chardev=,tag=; \ + \n\t\tadd pvpanic: -device pvpanic,id=,bus=,addr=<0x7>[,supported-features=<0|1|2|3>];") .takes_values(true), ) .arg( Arg::with_name("serial") .long("serial") - .value_name("backend[,path=,server,nowait] or chardev:char_id") - .help("add serial and set chardev for it") + .value_name("") + .help("\n\t\tuse chardev device: -serial chardev:; \ + \n\t\tuse standard i/o device: -serial stdio; \ + \n\t\tuse pseudo-terminal: -serial pty; \ + \n\t\tuse file: -serial file,path=; \ + \n\t\tuse unix-socket: -serial socket,path=[,server][,nowait]; \ + \n\t\tuse tcp-socket: -serial socket,port=[,host=][,server][,nowait]; \ + ") .takes_value(true), ) .arg( Arg::with_name("display log") .long("D") - .value_name("log path") + .value_name("[log path]") .help("output log to logfile (default stderr)") .takes_value(true) .can_no_value(true), @@ -193,13 +283,14 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .arg( Arg::with_name("pidfile") .long("pidfile") - .value_name("pidfile path") + .value_name("") .help("write PID to 'file'") .takes_value(true), ) .arg( Arg::with_name("daemonize") .long("daemonize") + .value_name("") .help("daemonize StratoVirt after initializing") .takes_value(false) .required(false), @@ -207,48 +298,44 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .arg( Arg::with_name("disable-seccomp") .long("disable-seccomp") + .value_name("") .help("not use seccomp sandbox for StratoVirt") .takes_value(false) .required(false), ) - .arg( - Arg::with_name("freeze_cpu") - .short("S") - .long("freeze") - .help("Freeze CPU at startup") - .takes_value(false) - .required(false), - ) .arg( Arg::with_name("incoming") .long("incoming") - .help("wait for the URI to be specified via migrate_incoming") - .value_name("incoming") + .value_name("") + .help("\n\t\tdo the migration using tcp socket: -incoming tcp::; \ + \n\t\tdo the migration using unix socket: -incoming unix:; \ + \n\t\tdo the virtual machine snapshot: -incoming file:") .takes_value(true), ) .arg( Arg::with_name("object") .multiple(true) .long("object") - .value_name("-object virtio-rng-device,rng=rng_name,max-bytes=1234,period=1000") - .help("add object") + .value_name("") + .help("\n\t\tadd memory backend ram object: -object memory-backend-ram,size=,id=[,policy=] + [,host-nodes=<0>][,mem-prealloc=][,dump-guest-core=][,share=]; \ + \n\t\tadd memory backend file object: -object memory-backend-file,size=,id=[,host-nodes=<0-1>] \ + [,policy=bind][,mem-path=][,dump-guest-core=][,mem-prealloc=][,share=] \ + \n\t\tadd memory backend memfd object: -object memory-backend-memfd,size=,id=[,host-nodes=0-1][,policy=bind] \ + [,mem-prealloc=][,dump-guest-core=][,share=]; \ + \n\t\tadd iothread object: -object iothread,id=; \ + \n\t\tadd rng object: -object rng-random,id=,filename=; \ + \n\t\tadd vnc tls object: -object tls-creds-x509,id=,dir=; \ + \n\t\tadd authz object: -object authz-simple,id=,identity=") .takes_values(true), ) .arg( Arg::with_name("mon") .long("mon") - .value_name("chardev=chardev_id,id=mon_id[,mode=control]") + .value_name("chardev=,id=[,mode=control]") .help("-mon is another way to create qmp channel. To use it, the chardev should be specified") .takes_value(true), ) - .arg( - Arg::with_name("cpu") - .long("cpu") - .value_name("host") - .hidden(true) - .can_no_value(true) - .takes_value(true), - ) .arg( Arg::with_name("overcommit") .long("overcommit") @@ -310,6 +397,14 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .can_no_value(true) .takes_value(true), ) + .arg( + Arg::with_name("battery") + .long("battery") + .value_name("") + .help("enable battery and power adapter devices") + .takes_value(false) + .required(false), + ) .arg( Arg::with_name("boot") .long("boot") @@ -348,30 +443,87 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .can_no_value(true) .takes_value(true), ) - .arg( - Arg::with_name("mem-prealloc") - .long("mem-prealloc") - .help("Prealloc memory for VM") - .takes_value(false) - .required(false), - ) .arg( Arg::with_name("trace") .multiple(false) .long("trace") - .value_name("events=") - .help("specify the file lists trace events to enable") + .value_name("file=|type=") + .help("specify the trace state to enable") .takes_value(true), ) .arg( Arg::with_name("global") .multiple(true) .long("global") - .value_name("[key=value]") + .value_name("[key=]") .help("set global config") .takes_values(true) .required(false), ) + .arg( + Arg::with_name("hardware-signature") + .multiple(false) + .long("hardware-signature") + .value_name("<32bit integer>") + .help("set ACPI Hardware Signature") + .takes_value(true) + .required(false), + ) + .arg( + Arg::with_name("smbios") + .multiple(true) + .long("smbios") + .value_name("") + .help("\n\t\tadd type0 table: -smbios type=0[,vendor=str][,version=str][,date=str]; \ + \n\t\tadd type1 table: -smbios type=1[,manufacturer=str][,version=str][,product=str][,serial=str][,uuid=str][,sku=str][,family=str]; \ + \n\t\tadd type2 table: -smbios type=2[,manufacturer=str][,product=str][,version=str][,serial=str][,asset=str][,location=str]; \ + \n\t\tadd type3 table: -smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str]; \ + \n\t\tadd type4 table: -smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,max-speed=%d][,current-speed=%d]; \ + \n\t\tadd type17 table: -smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str][,asset=str][,part=str][,speed=%d]") + .takes_values(true), + ); + + #[cfg(feature = "usb_camera")] + let parser = parser.arg( + Arg::with_name("cameradev") + .multiple(true) + .long("cameradev") + .value_name("") + .help("set cameradev: -cameradev v4l2,id=,path=") + .takes_values(true), + ); + + #[cfg(feature = "gtk")] + let parser = parser.arg( + Arg::with_name("display") + .multiple(false) + .long("display") + .value_name("gtk") + .help("set display for virtual machine: currently only supports gtk") + .takes_value(true), + ); + + #[cfg(feature = "vnc")] + let parser = parser.arg( + Arg::with_name("vnc") + .multiple(false) + .long("vnc") + .value_name("ip:port") + .help("specify the ip and port for vnc") + .takes_value(true), + ); + + #[cfg(feature = "windows_emu_pid")] + let parser = parser.arg( + Arg::with_name("windows_emu_pid") + .multiple(false) + .long("windows_emu_pid") + .value_name("pid") + .help("watch on the external emulator pid") + .takes_value(true), + ); + + parser } /// Create `VmConfig` from `ArgMatches`'s arg. @@ -396,12 +548,32 @@ pub fn create_vmconfig(args: &ArgMatches) -> Result { // Parse cmdline args which need to set in VmConfig add_args_to_config!((args.value_of("name")), vm_cfg, add_name); add_args_to_config!((args.value_of("machine")), vm_cfg, add_machine); + add_args_to_config!((args.value_of("accel")), vm_cfg, add_accel); add_args_to_config!((args.value_of("memory")), vm_cfg, add_memory); add_args_to_config!((args.value_of("mem-path")), vm_cfg, add_mem_path); add_args_to_config!((args.value_of("smp")), vm_cfg, add_cpu); + add_args_to_config!((args.value_of("cpu")), vm_cfg, add_cpu_feature); add_args_to_config!((args.value_of("kernel")), vm_cfg, add_kernel); add_args_to_config!((args.value_of("initrd-file")), vm_cfg, add_initrd); add_args_to_config!((args.value_of("serial")), vm_cfg, add_serial); + add_args_to_config!((args.value_of("incoming")), vm_cfg, add_incoming); + #[cfg(feature = "vnc")] + add_args_to_config!((args.value_of("vnc")), vm_cfg, add_vnc); + #[cfg(any(feature = "gtk", feature = "ohui_srv"))] + add_args_to_config!((args.value_of("display")), vm_cfg, add_display); + #[cfg(feature = "windows_emu_pid")] + add_args_to_config!( + (args.value_of("windows_emu_pid")), + vm_cfg, + add_windows_emu_pid + ); + add_args_to_config!( + (args.is_present("no-shutdown")), + vm_cfg, + add_no_shutdown, + bool + ); + add_args_to_config!((args.is_present("battery")), vm_cfg, add_battery, bool); add_args_to_config!( (args.is_present("mem-prealloc")), vm_cfg, @@ -414,26 +586,56 @@ pub fn create_vmconfig(args: &ArgMatches) -> Result { add_kernel_cmdline, vec ); + add_args_to_config!( + (args.value_of("hardware-signature")), + vm_cfg, + add_hw_signature + ); add_args_to_config_multi!((args.values_of("drive")), vm_cfg, add_drive); add_args_to_config_multi!((args.values_of("object")), vm_cfg, add_object); add_args_to_config_multi!((args.values_of("netdev")), vm_cfg, add_netdev); add_args_to_config_multi!((args.values_of("chardev")), vm_cfg, add_chardev); add_args_to_config_multi!((args.values_of("device")), vm_cfg, add_device); add_args_to_config_multi!((args.values_of("global")), vm_cfg, add_global_config); - - if let Some(s) = args.value_of("trace") { - add_trace_events(&s)?; + add_args_to_config_multi!((args.values_of("numa")), vm_cfg, add_numa); + #[cfg(feature = "usb_camera")] + add_args_to_config_multi!((args.values_of("cameradev")), vm_cfg, add_camera_backend); + add_args_to_config_multi!((args.values_of("smbios")), vm_cfg, add_smbios); + if let Some(opt) = args.value_of("trace") { + add_trace(&opt)?; } // Check the mini-set for Vm to start is ok if vm_cfg.machine_config.mach_type != MachineType::None { vm_cfg .check_vmconfig(args.is_present("daemonize")) - .chain_err(|| "Precheck failed, VmConfig is unhealthy, stop running")?; + .with_context(|| "Precheck failed, VmConfig is unhealthy, stop running")?; } Ok(vm_cfg) } +#[derive(Parser)] +#[command(no_binary_name(true))] +struct QmpConfig { + #[arg(long, alias = "classtype")] + uri: String, + #[arg(long, action = ArgAction::SetTrue, required = true)] + server: bool, + #[arg(long, action = ArgAction::SetTrue, required = true)] + nowait: bool, +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +struct MonConfig { + #[arg(long, default_value = "")] + id: String, + #[arg(long, value_parser = ["control"])] + mode: String, + #[arg(long)] + chardev: String, +} + /// This function is to parse qmp socket path and type. /// /// # Arguments @@ -443,55 +645,39 @@ pub fn create_vmconfig(args: &ArgMatches) -> Result { /// # Errors /// /// The value of `qmp` is illegel. -pub fn check_api_channel(args: &ArgMatches, vm_config: &mut VmConfig) -> Result> { +pub fn check_api_channel( + args: &ArgMatches, + vm_config: &mut VmConfig, +) -> Result> { let mut sock_paths = Vec::new(); - if let Some(qmp_config) = args.value_of("qmp") { - let mut cmd_parser = CmdParser::new("qmp"); - cmd_parser.push("").push("server").push("nowait"); - - cmd_parser.parse(&qmp_config)?; - if let Some(uri) = cmd_parser.get_value::("")? { - let (_api_type, api_path) = - parse_uri(&uri).chain_err(|| "Failed to parse qmp socket path")?; - sock_paths.push(api_path); - } else { - bail!("No uri found for qmp"); - } - if cmd_parser.get_value::("server")?.is_none() { - bail!("Argument \'server\' is needed for qmp"); - } - if cmd_parser.get_value::("nowait")?.is_none() { - bail!("Argument \'nowait\' is needed for qmp"); - } + if let Some(qmp_args) = args.value_of("qmp") { + let qmp_cfg = QmpConfig::try_parse_from(str_slip_to_clap(&qmp_args, true, false))?; + let sock_path = + QmpSocketPath::new(qmp_cfg.uri).with_context(|| "Failed to parse qmp socket path")?; + sock_paths.push(sock_path); } - if let Some(mon_config) = args.value_of("mon") { - let mut cmd_parser = CmdParser::new("monitor"); - cmd_parser.push("id").push("mode").push("chardev"); - - cmd_parser.parse(&mon_config)?; - - let chardev = if let Some(dev) = cmd_parser.get_value::("chardev")? { - dev - } else { - bail!("Argument \'chardev\' is missing for \'mon\'"); - }; - - if let Some(mode) = cmd_parser.get_value::("mode")? { - if mode != *"control" { - bail!("Invalid \'mode\' parameter: {:?} for monitor", &mode); + if let Some(mon_args) = args.value_of("mon") { + let mon_cfg = MonConfig::try_parse_from(str_slip_to_clap(&mon_args, false, false))?; + let cfg = vm_config + .chardev + .remove(&mon_cfg.chardev) + .with_context(|| format!("No chardev found: {}", &mon_cfg.chardev))?; + let socket = cfg + .classtype + .socket_type() + .with_context(|| "Only chardev of unix-socket type can be used for monitor")?; + if let ChardevType::Socket { server, nowait, .. } = cfg.classtype { + if !server || !nowait { + bail!( + "Argument \'server\' and \'nowait\' are both required for chardev \'{}\'", + cfg.id() + ); } - } else { - bail!("Argument \'mode\' of \'mon\' should be set to \'control\'."); } - - if let Some(cfg) = vm_config.chardev.remove(&chardev) { - if let ChardevType::Socket(path) = cfg.backend { - sock_paths.push(path); - } else { - bail!("Only socket-type of chardev can be used for monitor"); - } - } else { - bail!("No chardev found: {}", &chardev); + if let SocketType::Tcp { host, port } = socket { + sock_paths.push(QmpSocketPath::Tcp { host, port }); + } else if let SocketType::Unix { path } = socket { + sock_paths.push(QmpSocketPath::Unix { path }); } } @@ -499,22 +685,34 @@ pub fn check_api_channel(args: &ArgMatches, vm_config: &mut VmConfig) -> Result< bail!("Please use \'-qmp\' or \'-mon\' to give a qmp path for Unix socket"); } let mut listeners = Vec::new(); - for path in sock_paths { - listeners.push( - bind_socket(path.clone()) - .chain_err(|| format!("Failed to bind socket for path: {:?}", &path))?, - ) + for sock_path in sock_paths { + listeners.push(bind_socket(&sock_path).with_context(|| { + format!( + "Failed to bind socket for path: {:?}", + sock_path.to_string() + ) + })?) } Ok(listeners) } -fn bind_socket(path: String) -> Result { - let listener = - UnixListener::bind(&path).chain_err(|| format!("Failed to bind socket file {}", &path))?; - // Add file to temporary pool, so it could be cleaned when vm exits. - TempCleaner::add_path(path.clone()); - limit_permission(&path) - .chain_err(|| format!("Failed to limit permission for socket file {}", &path))?; - Ok(listener) +fn bind_socket(path: &QmpSocketPath) -> Result { + match path { + QmpSocketPath::Tcp { host, port } => { + let listener = SocketListener::bind_by_tcp(host, *port) + .with_context(|| format!("Failed to bind tcp socket {}:{}", &host, &port))?; + Ok(listener) + } + QmpSocketPath::Unix { path } => { + clear_file(path.clone())?; + let listener = SocketListener::bind_by_uds(path) + .with_context(|| format!("Failed to bind socket file {}", &path))?; + // Add file to temporary pool, so it could be cleaned when vm exits. + TempCleaner::add_path(path.clone()); + limit_permission(path) + .with_context(|| format!("Failed to limit permission for socket file {}", &path))?; + Ok(listener) + } + } } diff --git a/machine_manager/src/config/balloon.rs b/machine_manager/src/config/balloon.rs deleted file mode 100644 index ec19445f955cada18e1d0aadb856eab68b5513d1..0000000000000000000000000000000000000000 --- a/machine_manager/src/config/balloon.rs +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use serde::{Deserialize, Serialize}; - -use super::{ - errors::{ErrorKind, Result}, - pci_args_check, ConfigCheck, MAX_STRING_LENGTH, -}; -use crate::config::{CmdParser, ExBool, VmConfig}; - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct BalloonConfig { - pub id: String, - pub deflate_on_oom: bool, -} - -impl ConfigCheck for BalloonConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "balloon id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } - - Ok(()) - } -} - -pub fn parse_balloon(vm_config: &mut VmConfig, balloon_config: &str) -> Result { - if vm_config.dev_name.get("balloon").is_some() { - bail!("Only one balloon device is supported for each vm."); - } - let mut cmd_parser = CmdParser::new("virtio-balloon"); - cmd_parser - .push("") - .push("bus") - .push("addr") - .push("multifunction") - .push("id") - .push("deflate-on-oom"); - cmd_parser.parse(balloon_config)?; - - pci_args_check(&cmd_parser)?; - let mut balloon: BalloonConfig = Default::default(); - if let Some(default) = cmd_parser.get_value::("deflate-on-oom")? { - balloon.deflate_on_oom = default.into(); - } - if let Some(id) = cmd_parser.get_value::("id")? { - balloon.id = id; - } - balloon.check()?; - vm_config.dev_name.insert("balloon".to_string(), 1); - Ok(balloon) -} - -#[cfg(test)] -mod tests { - use crate::config::get_pci_bdf; - - use super::*; - - #[test] - fn test_balloon_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - let bln_cfg_res = parse_balloon( - &mut vm_config, - "virtio-balloon-device,deflate-on-oom=true,id=balloon0", - ); - assert!(bln_cfg_res.is_ok()); - let balloon_configs = bln_cfg_res.unwrap(); - assert_eq!(balloon_configs.id, "balloon0".to_string()); - assert_eq!(balloon_configs.deflate_on_oom, true); - } - - #[test] - fn test_pci_balloon_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - let bln_cfg = "virtio-balloon-pci,deflate-on-oom=true,bus=pcie.0,addr=0x1.0x2,id=balloon0"; - let bln_cfg_res = parse_balloon(&mut vm_config, bln_cfg); - assert!(bln_cfg_res.is_ok()); - let balloon_configs = bln_cfg_res.unwrap(); - assert_eq!(balloon_configs.id, "balloon0".to_string()); - assert_eq!(balloon_configs.deflate_on_oom, true); - - let pci_bdf = get_pci_bdf(bln_cfg); - assert!(pci_bdf.is_ok()); - let pci = pci_bdf.unwrap(); - assert_eq!(pci.bus, "pcie.0".to_string()); - assert_eq!(pci.addr, (1, 2)); - - let mut vm_config = VmConfig::default(); - let bln_cfg = "virtio-balloon-pci,deflate-on-oom=true,bus=pcie.0,addr=0x1.0x2,id=balloon0,multifunction=on"; - assert!(parse_balloon(&mut vm_config, bln_cfg).is_ok()); - } - - #[test] - fn test_two_balloon_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - let bln_cfg_res1 = parse_balloon( - &mut vm_config, - "virtio-balloon-device,deflate-on-oom=true,id=balloon0", - ); - assert!(bln_cfg_res1.is_ok()); - let bln_cfg_res2 = parse_balloon( - &mut vm_config, - "virtio-balloon-device,deflate-on-oom=true,id=balloon1", - ); - assert!(bln_cfg_res2.is_err()); - } -} diff --git a/machine_manager/src/config/boot_source.rs b/machine_manager/src/config/boot_source.rs index 81cc0f515b769472de3a7df863073f2892e2b1d3..997b841044396e43edcf5a75ba67c1c263ab1095 100644 --- a/machine_manager/src/config/boot_source.rs +++ b/machine_manager/src/config/boot_source.rs @@ -13,10 +13,11 @@ use std::fmt; use std::path::PathBuf; +use anyhow::{anyhow, Result}; use serde::{Deserialize, Serialize}; -use super::errors::{ErrorKind, Result}; -use crate::config::{ConfigCheck, VmConfig, MAX_PATH_LENGTH, MAX_STRING_LENGTH}; +use super::error::ConfigError; +use crate::config::{check_arg_too_long, ConfigCheck, VmConfig, MAX_PATH_LENGTH}; /// Config struct for boot-source. /// Contains `kernel_file`, `kernel_cmdline` and `initrd`. @@ -41,14 +42,15 @@ impl ConfigCheck for BootSource { fn check(&self) -> Result<()> { if let Some(kernel_file) = &self.kernel_file { if kernel_file.to_str().unwrap().len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( + return Err(anyhow!(ConfigError::StringLengthTooLong( "kernel_file path".to_string(), MAX_PATH_LENGTH, - ) - .into()); + ))); } if !kernel_file.is_file() { - return Err(ErrorKind::UnRegularFile("Input kernel_file".to_string()).into()); + return Err(anyhow!(ConfigError::UnRegularFile( + "Input kernel_file".to_string() + ))); } } @@ -81,16 +83,12 @@ impl InitrdConfig { impl ConfigCheck for InitrdConfig { fn check(&self) -> Result<()> { - if self.initrd_file.to_str().unwrap().len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "initrd_file".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } + check_arg_too_long(self.initrd_file.to_str().unwrap(), "initrd_file")?; if !self.initrd_file.is_file() { - return Err(ErrorKind::UnRegularFile("Input initrd_file".to_string()).into()); + return Err(anyhow!(ConfigError::UnRegularFile( + "Input initrd_file".to_string() + ))); } Ok(()) @@ -98,23 +96,15 @@ impl ConfigCheck for InitrdConfig { } /// Struct `KernelParams` used to parse kernel cmdline to config. -/// Contains a `Vec` and its `len()`. #[derive(Default, Clone, Debug, Serialize, Deserialize)] pub struct KernelParams { pub params: Vec, - pub length: usize, } impl ConfigCheck for KernelParams { fn check(&self) -> Result<()> { - for param in self.params.clone() { - if param.value.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "kernel params".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } + for param in self.params.iter() { + check_arg_too_long(¶m.value, "kernel params")?; } Ok(()) @@ -126,37 +116,27 @@ impl KernelParams { fn from_str(kernel_cmdline: String) -> Self { let split = kernel_cmdline.split(' '); let vec = split.collect::>(); - let mut params: Vec = Vec::new(); - let mut length: usize = 0; + let mut params: Vec = Vec::with_capacity(vec.len()); for item in vec { params.push(Param::from_str(item)); - length += 1; } - KernelParams { params, length } + KernelParams { params } } /// Push new `Param` to `KernelParams`. pub fn push(&mut self, item: Param) { self.params.push(item); - self.length = self - .length - .checked_add(1) - .unwrap_or_else(|| panic!("Kernel params length is too long: {}", self.length)); } /// Move all the `Param` into `KernelParams`. pub fn append(&mut self, items: &mut Vec) { - self.length = self - .length - .checked_add(items.len()) - .unwrap_or_else(|| panic!("Kernel params length is too long: {}", self.length)); self.params.append(items); } /// Check `KernelParam` whether contains `item` or not. pub fn contains(&self, item: &str) -> bool { - for i in 0..self.length { - if self.params[i].param_type == item { + for param in self.params.iter() { + if param.param_type == item { return true; } } @@ -166,9 +146,9 @@ impl KernelParams { impl fmt::Display for KernelParams { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut vec: Vec = Vec::with_capacity(self.length); - for i in 0..self.length { - vec.push(self.params[i].to_string()); + let mut vec: Vec = Vec::with_capacity(self.params.len()); + for param in self.params.iter() { + vec.push(param.to_string()); } write!(f, "{}", vec.join(" ")) } @@ -195,7 +175,7 @@ impl Param { /// /// * `item` - The `str` transformed to `Param`. fn from_str(item: &str) -> Self { - let split = item.split('='); + let split = item.splitn(2, '='); let vec = split.collect::>(); if vec.len() == 1 { Param { @@ -246,20 +226,21 @@ impl VmConfig { #[cfg(test)] mod tests { - use super::*; use std::fs::File; + use super::*; + #[test] fn test_kernel_params() { let test_kernel = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0"; let mut test_kernel_param = KernelParams::from_str(test_kernel.to_string()); - assert_eq!(test_kernel_param.length, 5); + assert_eq!(test_kernel_param.params.len(), 5); test_kernel_param.push(Param::from_str("maxcpus=8")); - assert_eq!(test_kernel_param.length, 6); - assert_eq!(test_kernel_param.contains("maxcpus"), true); - assert_eq!(test_kernel_param.contains("cpus"), false); + assert_eq!(test_kernel_param.params.len(), 6); + assert!(test_kernel_param.contains("maxcpus")); + assert!(!test_kernel_param.contains("cpus")); assert_eq!( test_kernel_param.to_string(), "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 maxcpus=8" @@ -276,7 +257,7 @@ mod tests { initrd_file.set_len(100_u64).unwrap(); let mut vm_config = VmConfig::default(); assert!(vm_config.add_kernel(&kernel_path).is_ok()); - vm_config.add_kernel_cmdline(&vec![ + vm_config.add_kernel_cmdline(&[ String::from("console=ttyS0"), String::from("reboot=k"), String::from("panic=1"), diff --git a/machine_manager/src/config/camera.rs b/machine_manager/src/config/camera.rs new file mode 100644 index 0000000000000000000000000000000000000000..b3964f4f2f3cd65e67af0958b5bffe42595851ad --- /dev/null +++ b/machine_manager/src/config/camera.rs @@ -0,0 +1,119 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::str::FromStr; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use serde::{Deserialize, Serialize}; + +use crate::{ + config::{str_slip_to_clap, valid_id, VmConfig}, + qmp::qmp_schema, +}; + +#[derive(Parser, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct CameraDevConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub path: String, + #[arg(long)] + pub backend: CamBackendType, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum CamBackendType { + #[cfg(feature = "usb_camera_v4l2")] + V4l2, + #[cfg(all(target_env = "ohos", feature = "usb_camera_oh"))] + OhCamera, + Demo, +} + +impl FromStr for CamBackendType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + #[cfg(feature = "usb_camera_v4l2")] + "v4l2" => Ok(CamBackendType::V4l2), + #[cfg(all(target_env = "ohos", feature = "usb_camera_oh"))] + "ohcamera" => Ok(CamBackendType::OhCamera), + "demo" => Ok(CamBackendType::Demo), + _ => Err(anyhow!("Unknown camera backend type")), + } + } +} + +impl VmConfig { + pub fn add_camera_backend(&mut self, camera_config: &str) -> Result<()> { + let cfg = format!("cameradev,backend={}", camera_config); + let config = CameraDevConfig::try_parse_from(str_slip_to_clap(&cfg, true, false))?; + + self.add_cameradev_with_config(config) + } + + fn camera_backend_repeated(&self, id: &str, path: &str, backend: CamBackendType) -> bool { + for (key, cam) in self.camera_backend.iter() { + if key != id && cam.backend == backend && cam.path == *path { + return true; + } + } + + false + } + + pub fn add_cameradev_with_config(&mut self, conf: CameraDevConfig) -> Result<()> { + let cam = self.camera_backend.get(&conf.id); + + if cam.is_some() { + bail!("cameradev with id {:?} has already existed", conf.id); + } + + if self.camera_backend_repeated(&conf.id, &conf.path, conf.backend) { + bail!("another cameradev has the same backend device"); + } + + self.camera_backend.insert(conf.id.clone(), conf); + + Ok(()) + } + + pub fn del_cameradev_by_id(&mut self, id: &str) -> Result<()> { + if !self.camera_backend.contains_key(id) { + bail!("no cameradev with id {}", id); + } + self.camera_backend.remove(id); + + Ok(()) + } +} + +pub fn get_cameradev_config(args: qmp_schema::CameraDevAddArgument) -> Result { + let path = args.path.with_context(|| "cameradev config path is null")?; + let config = CameraDevConfig { + classtype: "cameradev".to_string(), + id: args.id, + path, + backend: CamBackendType::from_str(&args.driver)?, + }; + + Ok(config) +} + +pub fn get_cameradev_by_id(vm_config: &mut VmConfig, id: String) -> Option { + vm_config.camera_backend.get(&id).cloned() +} diff --git a/machine_manager/src/config/chardev.rs b/machine_manager/src/config/chardev.rs index 2eb435bef81f1a2f04f2c56f1222d8e344d274c6..1eb0d489c67483c727c416a141072447a6197a32 100644 --- a/machine_manager/src/config/chardev.rs +++ b/machine_manager/src/config/chardev.rs @@ -10,197 +10,283 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::net::IpAddr; +use std::str::FromStr; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser, Subcommand}; +use log::error; use serde::{Deserialize, Serialize}; -use super::{ - errors::{ErrorKind, Result, ResultExt}, - get_pci_bdf, pci_args_check, PciBdf, -}; -use crate::config::{CmdParser, ConfigCheck, ExBool, VmConfig, MAX_PATH_LENGTH, MAX_STRING_LENGTH}; +use super::{error::ConfigError, str_slip_to_clap}; +use super::{get_pci_df, parse_bool}; +use crate::config::{valid_id, valid_path, valid_socket_path, ConfigCheck, VmConfig}; +use crate::qmp::qmp_schema; + +/// Default value of max ports for virtio-serial. +const DEFAULT_SERIAL_PORTS_NUMBER: u32 = 31; + +/// Config structure for virtio-serial-port. +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct VirtioSerialPortCfg { + #[arg(long, value_parser = ["virtconsole", "virtserialport"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub chardev: String, + #[arg(long)] + pub nr: Option, +} -const MAX_GUEST_CID: u64 = 4_294_967_295; -const MIN_GUEST_CID: u64 = 3; +impl ConfigCheck for VirtioSerialPortCfg { + fn check(&self) -> Result<()> { + if self.classtype != "virtconsole" && self.nr.unwrap() == 0 { + bail!("Port number 0 on virtio-serial devices reserved for virtconsole device."); + } -/// Charecter device options. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ChardevType { - Stdio, - Pty, - Socket(String), - File(String), + Ok(()) + } } -/// Config structure for virtio-console. -#[derive(Debug, Clone)] -pub struct VirtioConsole { - pub id: String, - pub chardev: ChardevConfig, +impl VirtioSerialPortCfg { + /// If nr is not set in command line. Configure incremental maximum value for virtconsole. + /// Configure incremental maximum value(except 0) for virtserialport. + pub fn auto_nr(&mut self, free_port0: bool, free_nr: u32, max_nr_ports: u32) -> Result<()> { + let free_console_nr = if free_port0 { 0 } else { free_nr }; + let auto_nr = match self.classtype.as_str() { + "virtconsole" => free_console_nr, + "virtserialport" => free_nr, + _ => bail!("Invalid classtype."), + }; + let nr = self.nr.unwrap_or(auto_nr); + if nr >= max_nr_ports { + bail!( + "virtio serial port nr {} should be less than virtio serial's max_nr_ports {}", + nr, + max_nr_ports + ); + } + + self.nr = Some(nr); + Ok(()) + } } /// Config structure for character device. -#[derive(Debug, Clone)] +#[derive(Parser, Debug, Clone, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct ChardevConfig { - pub id: String, - pub backend: ChardevType, + #[command(subcommand)] + pub classtype: ChardevType, +} + +impl ChardevConfig { + pub fn id(&self) -> String { + match &self.classtype { + ChardevType::Stdio { id } => id, + ChardevType::Pty { id } => id, + ChardevType::Socket { id, .. } => id, + ChardevType::File { id, .. } => id, + } + .clone() + } } impl ConfigCheck for ChardevConfig { fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "chardev id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); + if let ChardevType::Socket { .. } = self.classtype { + self.classtype.socket_type()?; } - if let ChardevType::Socket(path) | ChardevType::File(path) = &self.backend { - if path.len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "socket path".to_string(), - MAX_PATH_LENGTH, - ) - .into()); - } - } Ok(()) } } -fn check_chardev_args(cmd_parser: CmdParser) -> Result<()> { - if let Some(chardev_type) = cmd_parser.get_value::("")? { - let chardev_str = chardev_type.as_str(); - let server = cmd_parser.get_value::("server")?; - let nowait = cmd_parser.get_value::("nowait")?; - match chardev_str { - "stdio" | "pty" | "file" => { - if server.is_some() { - bail!( - "Chardev of {}-type does not support \'server\' argument", - chardev_str - ); - } - if nowait.is_some() { - bail!( - "Chardev of {}-type does not support \'nowait\' argument", - chardev_str - ); - } - } - "socket" => { - if let Some(server) = server { - if server.ne("") { - bail!("No parameter needed for server"); - } - } else { - bail!("Argument \'server\' is needed for socket-type chardev."); - } - if let Some(nowait) = nowait { - if nowait.ne("") { - bail!("No parameter needed for nowait"); - } - } else { - bail!("Argument \'nowait\' is needed for socket-type chardev."); - } +/// Character device options. +#[derive(Subcommand, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum ChardevType { + Stdio { + #[arg(long, value_parser = valid_id)] + id: String, + }, + Pty { + #[arg(long, value_parser = valid_id)] + id: String, + }, + // Unix Socket: use `path`. + // Tcp Socket: use `host` and `port`. + #[clap(group = clap::ArgGroup::new("unix-socket").args(&["host", "port"]).requires("port").multiple(true).conflicts_with("tcp-socket"))] + #[clap(group = clap::ArgGroup::new("tcp-socket").arg("path").conflicts_with("unix-socket"))] + Socket { + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long, value_parser = valid_socket_path)] + path: Option, + #[arg(long, value_parser = valid_host, default_value = "0.0.0.0")] + host: String, + #[arg(long, value_parser = clap::value_parser!(u16).range(1..))] + port: Option, + #[arg(long, action = ArgAction::SetTrue)] + server: bool, + #[arg(long, action = ArgAction::SetTrue)] + nowait: bool, + }, + File { + #[arg(long, value_parser = valid_id)] + id: String, + #[arg(long, value_parser = valid_path)] + path: String, + }, +} + +impl ChardevType { + pub fn socket_type(&self) -> Result { + if let ChardevType::Socket { + path, host, port, .. + } = self + { + if path.is_some() && port.is_none() { + return Ok(SocketType::Unix { + path: path.clone().unwrap(), + }); + } else if port.is_some() && path.is_none() { + return Ok(SocketType::Tcp { + host: host.clone(), + port: (*port).unwrap(), + }); } - _ => (), } + bail!("Not socket type or invalid socket type"); } - Ok(()) } -pub fn parse_chardev(cmd_parser: CmdParser) -> Result { - let chardev_id = if let Some(chardev_id) = cmd_parser.get_value::("id")? { - chardev_id - } else { - return Err(ErrorKind::FieldIsMissing("id", "chardev").into()); - }; - let backend = cmd_parser.get_value::("")?; - let path = cmd_parser.get_value::("path")?; - check_chardev_args(cmd_parser)?; - let chardev_type = if let Some(backend) = backend { - match backend.as_str() { - "stdio" => ChardevType::Stdio, - "pty" => ChardevType::Pty, - "socket" => { - if let Some(path) = path { - ChardevType::Socket(path) - } else { - return Err(ErrorKind::FieldIsMissing("path", "socket-type chardev").into()); - } - } - "file" => { - if let Some(path) = path { - ChardevType::File(path) - } else { - return Err(ErrorKind::FieldIsMissing("path", "file-type chardev").into()); - } - } - _ => return Err(ErrorKind::InvalidParam(backend, "chardev".to_string()).into()), - } - } else { - return Err(ErrorKind::FieldIsMissing("backend", "chardev").into()); - }; +pub enum SocketType { + Unix { path: String }, + Tcp { host: String, port: u16 }, +} + +fn valid_host(host: &str) -> Result { + let ip_address = IpAddr::from_str(host); + if ip_address.is_err() { + return Err(anyhow!(ConfigError::InvalidParam( + "host".to_string(), + "tcp-socket".to_string() + ))); + } + Ok(host.to_string()) +} + +/// Get chardev config from qmp arguments. +/// +/// # Arguments +/// +/// * `args` - The qmp arguments. +pub fn get_chardev_config(args: qmp_schema::CharDevAddArgument) -> Result { + let backend = args.backend; + if backend.backend_type.as_str() != "socket" { + return Err(anyhow!(ConfigError::InvalidParam( + "backend".to_string(), + backend.backend_type + ))); + } + + let data = backend.backend_data; + if data.server { + error!("Not support chardev socket as server now."); + return Err(anyhow!(ConfigError::InvalidParam( + "backend".to_string(), + "server".to_string() + ))); + } + + let addr = data.addr; + if addr.addr_type.as_str() != "unix" { + error!("Just support \"unix\" addr type option now."); + return Err(anyhow!(ConfigError::InvalidParam( + "backend".to_string(), + "addr".to_string() + ))); + } Ok(ChardevConfig { - id: chardev_id, - backend: chardev_type, + classtype: ChardevType::Socket { + id: args.id, + path: Some(addr.addr_data.path), + host: "0.0.0.0".to_string(), + port: None, + server: data.server, + nowait: false, + }, }) } -pub fn parse_virtconsole(vm_config: &mut VmConfig, config_args: &str) -> Result { - let mut cmd_parser = CmdParser::new("virtconsole"); - cmd_parser.push("").push("id").push("chardev"); - cmd_parser.parse(config_args)?; - - let chardev_name = if let Some(chardev) = cmd_parser.get_value::("chardev")? { - chardev - } else { - return Err(ErrorKind::FieldIsMissing("chardev", "virtconsole").into()); - }; - - let id = if let Some(chardev_id) = cmd_parser.get_value::("id")? { - chardev_id - } else { - return Err(ErrorKind::FieldIsMissing("id", "virtconsole").into()); - }; - - if let Some(char_dev) = vm_config.chardev.remove(&chardev_name) { - return Ok(VirtioConsole { - id, - chardev: char_dev, - }); +/// Get chardev socket path from ChardevConfig struct. +/// +/// # Arguments +/// +/// * `char_dev` - ChardevConfig struct reference. +pub fn get_chardev_socket_path(chardev: ChardevConfig) -> Result { + let id = chardev.id(); + if let ChardevType::Socket { + path, + server, + nowait, + .. + } = chardev.classtype + { + path.clone() + .with_context(|| format!("Chardev {:?} backend should be unix-socket type.", id))?; + if server || nowait { + bail!( + "Argument \'server\' or \'nowait\' is not need for chardev \'{}\'", + path.unwrap() + ); + } + return Ok(path.unwrap()); } - bail!("Chardev {:?} not found or is in use", &chardev_name); + bail!("Chardev {:?} backend should be unix-socket type.", id); } impl VmConfig { /// Add chardev config to `VmConfig`. pub fn add_chardev(&mut self, chardev_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("chardev"); - cmd_parser - .push("") - .push("id") - .push("path") - .push("server") - .push("nowait"); - - cmd_parser.parse(chardev_config)?; - - let chardev = parse_chardev(cmd_parser)?; + let chardev = ChardevConfig::try_parse_from(str_slip_to_clap(chardev_config, true, true))?; chardev.check()?; - let chardev_id = chardev.id.clone(); - if self.chardev.get(&chardev_id).is_none() { - self.chardev.insert(chardev_id, chardev); - } else { - bail!("Chardev {:?} has been added", &chardev_id); + self.add_chardev_with_config(chardev)?; + Ok(()) + } + + /// Add chardev config to vm config. + /// + /// # Arguments + /// + /// * `conf` - The chardev config to be added to the vm. + pub fn add_chardev_with_config(&mut self, conf: ChardevConfig) -> Result<()> { + let chardev_id = conf.id(); + if self.chardev.contains_key(&chardev_id) { + bail!("Chardev {:?} has been added", chardev_id); } + self.chardev.insert(chardev_id, conf); + Ok(()) + } + + /// Delete chardev config from vm config. + /// + /// # Arguments + /// + /// * `id` - The chardev id which is used to delete chardev config. + pub fn del_chardev_by_id(&mut self, id: &str) -> Result<()> { + self.chardev + .remove(id) + .with_context(|| format!("Chardev {} not found", id))?; Ok(()) } } /// Config structure for serial. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct SerialConfig { pub chardev: ChardevConfig, } @@ -213,17 +299,16 @@ impl VmConfig { if parse_vec.len() == 2 { parse_vec[1] } else { - return Err(ErrorKind::InvalidParam( + return Err(anyhow!(ConfigError::InvalidParam( serial_config.to_string(), "serial".to_string(), - ) - .into()); + ))); } } _ => { let chardev_config = serial_config.to_string() + ",id=serial_chardev"; self.add_chardev(&chardev_config) - .chain_err(|| "Failed to add chardev")?; + .with_context(|| "Failed to add chardev")?; "serial_chardev" } }; @@ -235,236 +320,223 @@ impl VmConfig { } } -/// Config structure for virtio-vsock. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct VsockConfig { - pub id: String, - pub guest_cid: u64, - pub vhost_fd: Option, -} - -impl ConfigCheck for VsockConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err( - ErrorKind::StringLengthTooLong("vsock id".to_string(), MAX_STRING_LENGTH).into(), - ); - } - - if self.guest_cid < MIN_GUEST_CID || self.guest_cid >= MAX_GUEST_CID { - return Err(ErrorKind::IllegalValue( - "Vsock guest-cid".to_string(), - MIN_GUEST_CID, - true, - MAX_GUEST_CID, - false, - ) - .into()); - } - - Ok(()) - } -} - -pub fn parse_vsock(vsock_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("vhost-vsock"); - cmd_parser - .push("") - .push("id") - .push("bus") - .push("addr") - .push("multifunction") - .push("guest-cid") - .push("vhostfd"); - cmd_parser.parse(vsock_config)?; - pci_args_check(&cmd_parser)?; - let id = if let Some(vsock_id) = cmd_parser.get_value::("id")? { - vsock_id - } else { - return Err(ErrorKind::FieldIsMissing("id", "vsock").into()); - }; - - let guest_cid = if let Some(cid) = cmd_parser.get_value::("guest-cid")? { - cid - } else { - return Err(ErrorKind::FieldIsMissing("guest-cid", "vsock").into()); - }; - - let vhost_fd = cmd_parser.get_value::("vhostfd")?; - let vsock = VsockConfig { - id, - guest_cid, - vhost_fd, - }; - Ok(vsock) -} - -#[derive(Clone, Default, Debug)] +#[derive(Parser, Clone, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct VirtioSerialInfo { + #[arg(long, value_parser = ["virtio-serial-pci", "virtio-serial-device"])] + pub classtype: String, + #[arg(long, default_value = "", value_parser = valid_id)] pub id: String, - pub pci_bdf: Option, - pub multifunction: bool, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long, default_value = "31", value_parser = clap::value_parser!(u32).range(1..=DEFAULT_SERIAL_PORTS_NUMBER as i64))] + pub max_ports: u32, } -impl ConfigCheck for VirtioSerialInfo { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "virtio-serial id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); +impl VirtioSerialInfo { + pub fn auto_max_ports(&mut self) { + if self.classtype == "virtio-serial-device" { + // Micro_vm does not support multi-ports in virtio-serial-device. + self.max_ports = 1; } - - Ok(()) - } -} - -pub fn parse_virtio_serial(vm_config: &mut VmConfig, serial_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("virtio-serial"); - cmd_parser - .push("") - .push("id") - .push("bus") - .push("addr") - .push("multifunction"); - cmd_parser.parse(serial_config)?; - pci_args_check(&cmd_parser)?; - - if vm_config.virtio_serial.is_none() { - let id = if let Some(id) = cmd_parser.get_value::("id")? { - id - } else { - "".to_string() - }; - let multifunction = if let Some(switch) = cmd_parser.get_value::("multifunction")? { - switch.into() - } else { - false - }; - let virtio_serial = if serial_config.contains("-pci") { - let pci_bdf = get_pci_bdf(serial_config)?; - VirtioSerialInfo { - id, - pci_bdf: Some(pci_bdf), - multifunction, - } - } else { - VirtioSerialInfo { - id, - pci_bdf: None, - multifunction, - } - }; - virtio_serial.check()?; - vm_config.virtio_serial = Some(virtio_serial); - } else { - bail!("Only one virtio serial device is supported"); } - - Ok(()) } #[cfg(test)] mod tests { use super::*; - use crate::config::parse_virtio_serial; - #[test] - fn test_mmio_console_config_cmdline_parser() { + fn test_mmio_console_config_cmdline_parser(chardev_cfg: &str, expected_chardev: ChardevType) { let mut vm_config = VmConfig::default(); - assert!(parse_virtio_serial(&mut vm_config, "virtio-serial-device").is_ok()); - assert!(vm_config - .add_chardev("socket,id=test_console,path=/path/to/socket,server,nowait") - .is_ok()); - let virt_console = parse_virtconsole( - &mut vm_config, - "virtconsole,chardev=test_console,id=console1", - ); - assert!(virt_console.is_ok()); - let console_cfg = virt_console.unwrap(); - assert_eq!(console_cfg.id, "console1"); - assert_eq!( - console_cfg.chardev.backend, - ChardevType::Socket("/path/to/socket".to_string()) - ); + let serial_cmd = "virtio-serial-device"; + let mut serial_cfg = + VirtioSerialInfo::try_parse_from(str_slip_to_clap(serial_cmd, true, false)).unwrap(); + serial_cfg.auto_max_ports(); + vm_config.virtio_serial = Some(serial_cfg.clone()); + assert!(vm_config.add_chardev(chardev_cfg).is_ok()); + + let port_cmd = "virtconsole,chardev=test_console,id=console1,nr=0"; + let mut port_cfg = + VirtioSerialPortCfg::try_parse_from(str_slip_to_clap(port_cmd, true, false)).unwrap(); + assert!(port_cfg.auto_nr(true, 0, serial_cfg.max_ports).is_ok()); + let chardev = vm_config.chardev.remove(&port_cfg.chardev).unwrap(); + assert_eq!(port_cfg.id, "console1"); + assert_eq!(port_cfg.nr.unwrap(), 0); + assert_eq!(chardev.classtype, expected_chardev); + + // Error: VirtioSerialPortCfg.nr >= VirtioSerialInfo.max_nr_ports. + let port_cmd = "virtconsole,chardev=test_console,id=console1,nr=1"; + let mut port_cfg = + VirtioSerialPortCfg::try_parse_from(str_slip_to_clap(port_cmd, true, false)).unwrap(); + assert!(port_cfg.auto_nr(true, 0, serial_cfg.max_ports).is_err()); let mut vm_config = VmConfig::default(); - assert!( - parse_virtio_serial(&mut vm_config, "virtio-serial-device,bus=pcie.0,addr=0x1") - .is_err() - ); assert!(vm_config .add_chardev("sock,id=test_console,path=/path/to/socket") .is_err()); + } - let mut vm_config = VmConfig::default(); - assert!(parse_virtio_serial(&mut vm_config, "virtio-serial-device").is_ok()); - assert!(vm_config - .add_chardev("socket,id=test_console,path=/path/to/socket,server,nowait") - .is_ok()); - let virt_console = parse_virtconsole( - &mut vm_config, - "virtconsole,chardev=test_console1,id=console1", - ); - // test_console1 does not exist. - assert!(virt_console.is_err()); + #[test] + fn test_mmio_console_config_cmdline_parser_1() { + let chardev_cfg = "socket,id=test_console,path=/path/to/socket,server,nowait"; + let expected_chardev = ChardevType::Socket { + id: "test_console".to_string(), + path: Some("/path/to/socket".to_string()), + host: "0.0.0.0".to_string(), + port: None, + server: true, + nowait: true, + }; + test_mmio_console_config_cmdline_parser(chardev_cfg, expected_chardev) } #[test] - fn test_pci_console_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - assert!( - parse_virtio_serial(&mut vm_config, "virtio-serial-pci,bus=pcie.0,addr=0x1.0x2") - .is_ok() - ); - assert!(vm_config - .add_chardev("socket,id=test_console,path=/path/to/socket,server,nowait") - .is_ok()); - let virt_console = parse_virtconsole( - &mut vm_config, - "virtconsole,chardev=test_console,id=console1", - ); - assert!(virt_console.is_ok()); - let console_cfg = virt_console.unwrap(); + fn test_mmio_console_config_cmdline_parser_2() { + let chardev_cfg = "socket,id=test_console,host=127.0.0.1,port=9090,server,nowait"; + let expected_chardev = ChardevType::Socket { + id: "test_console".to_string(), + path: None, + host: "127.0.0.1".to_string(), + port: Some(9090), + server: true, + nowait: true, + }; + test_mmio_console_config_cmdline_parser(chardev_cfg, expected_chardev) + } + fn test_pci_console_config_cmdline_parser(chardev_cfg: &str, expected_chardev: ChardevType) { + let mut vm_config = VmConfig::default(); + let serial_cmd = "virtio-serial-pci,bus=pcie.0,addr=0x1.0x2,multifunction=on"; + let mut serial_cfg = + VirtioSerialInfo::try_parse_from(str_slip_to_clap(serial_cmd, true, false)).unwrap(); + serial_cfg.auto_max_ports(); + vm_config.virtio_serial = Some(serial_cfg.clone()); + assert!(vm_config.add_chardev(chardev_cfg).is_ok()); + + let console_cmd = "virtconsole,chardev=test_console,id=console1,nr=1"; + let mut console_cfg = + VirtioSerialPortCfg::try_parse_from(str_slip_to_clap(console_cmd, true, false)) + .unwrap(); + assert!(console_cfg.auto_nr(true, 0, serial_cfg.max_ports).is_ok()); + let chardev = vm_config.chardev.remove(&console_cfg.chardev).unwrap(); assert_eq!(console_cfg.id, "console1"); let serial_info = vm_config.virtio_serial.clone().unwrap(); - assert!(serial_info.pci_bdf.is_some()); - let bdf = serial_info.pci_bdf.unwrap(); - assert_eq!(bdf.bus, "pcie.0"); - assert_eq!(bdf.addr, (1, 2)); - assert_eq!( - console_cfg.chardev.backend, - ChardevType::Socket("/path/to/socket".to_string()) - ); + assert_eq!(serial_info.bus.unwrap(), "pcie.0"); + assert_eq!(serial_info.addr.unwrap(), (1, 2)); + assert_eq!(chardev.classtype, expected_chardev); + } - let mut vm_config = VmConfig::default(); - assert!(parse_virtio_serial( - &mut vm_config, - "virtio-serial-pci,bus=pcie.0,addr=0x1.0x2,multifunction=on" - ) - .is_ok()); + #[test] + fn test_pci_console_config_cmdline_parser_1() { + let chardev_cfg = "socket,id=test_console,path=/path/to/socket,server,nowait"; + let expected_chardev = ChardevType::Socket { + id: "test_console".to_string(), + path: Some("/path/to/socket".to_string()), + host: "0.0.0.0".to_string(), + port: None, + server: true, + nowait: true, + }; + test_pci_console_config_cmdline_parser(chardev_cfg, expected_chardev) } #[test] - fn test_vsock_config_cmdline_parser() { - let vsock_cfg_op = parse_vsock("vhost-vsock-device,id=test_vsock,guest-cid=3"); - assert!(vsock_cfg_op.is_ok()); - - let vsock_config = vsock_cfg_op.unwrap(); - assert_eq!(vsock_config.id, "test_vsock"); - assert_eq!(vsock_config.guest_cid, 3); - assert_eq!(vsock_config.vhost_fd, None); - assert!(vsock_config.check().is_ok()); - - let vsock_cfg_op = parse_vsock("vhost-vsock-device,id=test_vsock,guest-cid=3,vhostfd=4"); - assert!(vsock_cfg_op.is_ok()); - - let vsock_config = vsock_cfg_op.unwrap(); - assert_eq!(vsock_config.id, "test_vsock"); - assert_eq!(vsock_config.guest_cid, 3); - assert_eq!(vsock_config.vhost_fd, Some(4)); - assert!(vsock_config.check().is_ok()); + fn test_pci_console_config_cmdline_parser_2() { + let chardev_cfg = "socket,id=test_console,host=127.0.0.1,port=9090,server,nowait"; + let expected_chardev = ChardevType::Socket { + id: "test_console".to_string(), + path: None, + host: "127.0.0.1".to_string(), + port: Some(9090), + server: true, + nowait: true, + }; + test_pci_console_config_cmdline_parser(chardev_cfg, expected_chardev) + } + + #[test] + fn test_chardev_config_cmdline_parser() { + let check_argument = |arg: String, expect: ChardevType| { + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_chardev(&arg).is_ok()); + assert!(vm_config.add_chardev(&arg).is_err()); + + let device_id = "test_id"; + if let Some(char_dev) = vm_config.chardev.remove(device_id) { + assert_eq!(char_dev.classtype, expect); + } else { + assert!(false); + } + }; + + check_argument( + "stdio,id=test_id".to_string(), + ChardevType::Stdio { + id: "test_id".to_string(), + }, + ); + check_argument( + "pty,id=test_id".to_string(), + ChardevType::Pty { + id: "test_id".to_string(), + }, + ); + check_argument( + "file,id=test_id,path=/some/file".to_string(), + ChardevType::File { + id: "test_id".to_string(), + path: "/some/file".to_string(), + }, + ); + + let extra_params = [ + ("", false, false), + (",server", true, false), + (",nowait", false, true), + (",server,nowait", true, true), + (",nowait,server", true, true), + ]; + for (param, server_state, nowait_state) in extra_params { + check_argument( + format!("{}{}", "socket,id=test_id,path=/path/to/socket", param), + ChardevType::Socket { + id: "test_id".to_string(), + path: Some("/path/to/socket".to_string()), + host: "0.0.0.0".to_string(), + port: None, + server: server_state, + nowait: nowait_state, + }, + ); + check_argument( + format!("{}{}", "socket,id=test_id,port=9090", param), + ChardevType::Socket { + id: "test_id".to_string(), + path: None, + host: "0.0.0.0".to_string(), + port: Some(9090), + server: server_state, + nowait: nowait_state, + }, + ); + check_argument( + format!( + "{}{}", + "socket,id=test_id,host=172.56.16.12,port=7070", param + ), + ChardevType::Socket { + id: "test_id".to_string(), + path: None, + host: "172.56.16.12".to_string(), + port: Some(7070), + server: server_state, + nowait: nowait_state, + }, + ); + } } } diff --git a/machine_manager/src/config/devices.rs b/machine_manager/src/config/devices.rs index cfc972c5b004fad9dd069f010ba2505c23d6210f..e355b88f285739bbefee70fdf5b97885d7e600df 100644 --- a/machine_manager/src/config/devices.rs +++ b/machine_manager/src/config/devices.rs @@ -10,31 +10,128 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::errors::Result; -use super::{CmdParser, VmConfig}; +use anyhow::{Context, Result}; +use regex::Regex; + +use super::{get_class_type, VmConfig}; +use crate::qmp::qmp_schema; impl VmConfig { - pub fn add_device(&mut self, device_config: &str) -> Result<()> { - let mut cmd_params = CmdParser::new("device"); - cmd_params.push(""); + /// Add config of hot-plugged devices to `VmConfig`. + pub fn add_device_config(&mut self, args: &qmp_schema::DeviceAddArgument) -> String { + let mut device_info = args.driver.clone(); + + device_info = format!("{},id={}", device_info, args.id); + if let Some(addr) = &args.addr { + device_info = format!("{},addr={}", device_info, addr); + } + if let Some(bus) = &args.bus { + device_info = format!("{},bus={}", device_info, bus); + } + if let Some(drive) = &args.drive { + device_info = format!("{},drive={}", device_info, drive); + } + if let Some(mq) = &args.mq { + device_info = format!("{},mq={}", device_info, mq); + } + if let Some(iothread) = &args.iothread { + device_info = format!("{},iothread={}", device_info, iothread); + } + if let Some(multi) = &args.multifunction { + if *multi { + device_info = format!("{},multifunction=on", device_info); + } else { + device_info = format!("{},multifunction=off", device_info); + } + } + if let Some(queues) = &args.queues { + device_info = format!("{},num-queues={}", device_info, queues); + } + if let Some(size) = &args.queue_size { + device_info = format!("{},queue-size={}", device_info, size); + } + + // For net devices only. + if let Some(mac) = &args.mac { + device_info = format!("{},mac={}", device_info, mac); + } + if let Some(netdev) = &args.netdev { + device_info = format!("{},netdev={}", device_info, netdev); + } + if let Some(mq) = &args.mq { + device_info = format!("{},mq={}", device_info, mq); + } + + // For vhost devices only. + if let Some(chardev) = &args.chardev { + device_info = format!("{},chardev={}", device_info, chardev); + } + + // For block devices only. + if let Some(serial_num) = &args.serial_num { + device_info = format!("{},serial={}", device_info, serial_num); + } + if let Some(boot_index) = &args.boot_index { + device_info = format!("{},bootindex={}", device_info, boot_index); + } + + // For vfio devices only. + if let Some(host) = &args.host { + device_info = format!("{},addr={}", device_info, host); + } + if let Some(sysfsdev) = &args.sysfsdev { + device_info = format!("{},addr={}", device_info, sysfsdev); + } + + // For usb camera devices only. + if let Some(cameradev) = &args.cameradev { + device_info = format!("{},cameradev={}", device_info, cameradev); + } - cmd_params.get_parameters(device_config)?; - if let Some(device_type) = cmd_params.get_value::("")? { - self.devices.push((device_type, device_config.to_string())); + // For usb host devices only. + if args.driver == "usb-host" { + let default_value = "0".to_string(); + let hostbus = args.hostbus.as_ref().unwrap_or(&default_value); + let hostaddr = args.hostaddr.as_ref().unwrap_or(&default_value); + let vendorid = args.vendorid.as_ref().unwrap_or(&default_value); + let productid = args.productid.as_ref().unwrap_or(&default_value); + device_info = format!( + "{},hostbus={},hostaddr={},vendorid={},productid={}", + device_info, hostbus, hostaddr, vendorid, productid + ); + if let Some(hostport) = &args.hostport { + device_info = format!("{},hostport={}", device_info, hostport); + } + if let Some(isobufs) = &args.isobufs { + device_info = format!("{},isobufs={}", device_info, isobufs); + } + if let Some(isobsize) = &args.isobsize { + device_info = format!("{},isobsize={}", device_info, isobsize); + } } + self.devices + .push((args.driver.clone(), device_info.clone())); + + device_info + } + + pub fn add_device(&mut self, device_config: &str) -> Result<()> { + let device_type = get_class_type(device_config).with_context(|| "Missing driver field.")?; + self.devices.push((device_type, device_config.to_string())); + Ok(()) } -} -pub fn parse_device_id(device_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("device"); - cmd_parser.push("id"); + pub fn del_device_by_id(&mut self, dev_id: String) { + let rex = format!("id={}(,|$)", dev_id); + let re = Regex::new(rex.as_str()).unwrap(); - cmd_parser.get_parameters(device_config)?; - if let Some(id) = cmd_parser.get_value::("id")? { - Ok(id) - } else { - Ok(String::new()) + for (index, (_, dev_info)) in self.devices.iter().enumerate() { + if re.is_match(dev_info.as_str()) { + self.devices.remove(index); + return; + } + } } } diff --git a/machine_manager/src/config/display.rs b/machine_manager/src/config/display.rs new file mode 100644 index 0000000000000000000000000000000000000000..7369e4298794ee8fb10381a8f6451aaa04db4028 --- /dev/null +++ b/machine_manager/src/config/display.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "gtk")] +use std::sync::Arc; + +use anyhow::Result; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +use anyhow::{bail, Context}; +use clap::{ArgAction, Parser}; +use serde::{Deserialize, Serialize}; +#[cfg(feature = "gtk")] +use vmm_sys_util::eventfd::EventFd; + +use crate::config::{parse_bool, str_slip_to_clap, VmConfig}; + +/// Event fd related to power button in gtk. +#[cfg(feature = "gtk")] +pub struct UiContext { + /// Name of virtual machine. + pub vm_name: String, + /// Gracefully Shutdown. + pub power_button: Option>, + /// Forced Shutdown. + pub shutdown_req: Option>, + /// Pause Virtual Machine. + pub pause_req: Option>, + /// Resume Virtual Machine. + pub resume_req: Option>, +} + +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +fn get_dir_path(p: &str) -> Result { + if cfg!(debug_assertions) { + return Ok(p.to_string()); + } + + let path = std::fs::canonicalize(p) + .with_context(|| format!("Failed to get real directory path: {:?}", p))?; + if !path.exists() { + bail!( + "The defined directory {:?} path doesn't exist", + path.as_os_str() + ); + } + if !path.is_dir() { + bail!( + "The defined socks-path {:?} is not directory", + path.as_os_str() + ); + } + + Ok(path.to_str().unwrap().to_string()) +} + +/// GTK and OHUI related configuration. +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] + +pub struct DisplayConfig { + #[arg(long, alias = "classtype", value_parser = ["gtk", "ohui"])] + pub display_type: String, + /// App name if configured. + #[arg(long)] + pub app_name: Option, + /// Keep the window fill the desktop. + #[arg(long, default_value = "off", action = ArgAction::Append, value_parser = parse_bool)] + pub full_screen: bool, + /// Create the OHUI thread. + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + #[arg(long)] + pub iothread: Option, + /// Confirm socket path. Default socket path is "/tmp". + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + #[arg(long, alias = "socks-path", default_value = "/tmp/", value_parser = get_dir_path)] + pub sock_path: String, + /// Define the directory path for OHUI framebuffer and cursor. + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + #[arg(long, alias = "ui-path", default_value_if("display_type", "ohui", "/dev/shm/hwf/"), default_value = "/tmp/", value_parser = get_dir_path)] + pub ui_path: String, +} + +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +impl DisplayConfig { + pub fn get_sock_path(&self) -> String { + self.sock_path.clone() + } + + pub fn get_ui_path(&self) -> String { + self.ui_path.clone() + } +} + +impl VmConfig { + pub fn add_display(&mut self, vm_config: &str) -> Result<()> { + let display_config = + DisplayConfig::try_parse_from(str_slip_to_clap(vm_config, true, false))?; + self.display = Some(display_config); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_gtk() { + let mut vm_config = VmConfig::default(); + let config_line = ""; + assert!(vm_config.add_display(config_line).is_err()); + + let config_line = "gtk"; + assert!(vm_config.add_display(config_line).is_ok()); + let display_config = vm_config.display.unwrap(); + assert_eq!(display_config.display_type, "gtk"); + assert!(!display_config.full_screen); + + let mut vm_config = VmConfig::default(); + let config_line = "gtk,full-screen=on"; + assert!(vm_config.add_display(config_line).is_ok()); + let display_config = vm_config.display.unwrap(); + assert_eq!(display_config.display_type, "gtk"); + assert!(display_config.full_screen); + + let mut vm_config = VmConfig::default(); + let config_line = "gtk,full-screen=off"; + assert!(vm_config.add_display(config_line).is_ok()); + let display_config = vm_config.display.unwrap(); + assert_eq!(display_config.display_type, "gtk"); + assert!(!display_config.full_screen); + + let mut vm_config = VmConfig::default(); + let config_line = "gtk,app-name=desktopappengine"; + assert!(vm_config.add_display(config_line).is_ok()); + let display_config = vm_config.display.unwrap(); + assert_eq!(display_config.display_type, "gtk"); + assert!(!display_config.full_screen); + assert_eq!( + display_config.app_name, + Some("desktopappengine".to_string()) + ); + } +} diff --git a/machine_manager/src/config/drive.rs b/machine_manager/src/config/drive.rs index 88d78f29f4601914479cc4974f53197766d453b4..a9a132eb2621b83f4f40ea588c16133f886fbcc3 100644 --- a/machine_manager/src/config/drive.rs +++ b/machine_manager/src/config/drive.rs @@ -10,70 +10,150 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::fs::metadata; +use std::fmt::Display; +use std::fs::{metadata, File}; use std::os::linux::fs::MetadataExt; use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; +use log::error; use serde::{Deserialize, Serialize}; -use super::{ - errors::{ErrorKind, Result}, - pci_args_check, -}; -use crate::config::{CmdParser, ConfigCheck, ExBool, VmConfig, MAX_PATH_LENGTH, MAX_STRING_LENGTH}; +use super::{error::ConfigError, parse_size, valid_id, valid_path}; +use crate::config::{parse_bool, str_slip_to_clap, ConfigCheck, VmConfig, MAX_STRING_LENGTH}; +use util::aio::{aio_probe, AioEngine, WriteZeroesState}; -const MAX_SERIAL_NUM: usize = 20; const MAX_IOPS: u64 = 1_000_000; const MAX_UNIT_ID: usize = 2; -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -pub struct BlkDevConfig { +// L2 Cache max size is 32M. +pub const MAX_L2_CACHE_SIZE: u64 = 32 * (1 << 20); +// Refcount table cache max size is 32M. +const MAX_REFTABLE_CACHE_SIZE: u64 = 32 * (1 << 20); + +/// Represent a single drive backend file. +pub struct DriveFile { + /// Drive id. pub id: String, - pub path_on_host: String, + /// The opened file. + pub file: Arc, + /// The num of drives share same file. + pub count: u32, + /// File path. + pub path: String, + /// File is read only or not. pub read_only: bool, - pub direct: bool, - pub serial_num: Option, - pub iothread: Option, - pub iops: Option, + /// File lock status. + pub locked: bool, + /// The align requirement of request(offset/len). + pub req_align: u32, + /// The align requirement of buffer(iova_base). + pub buf_align: u32, +} + +#[derive(Debug, Clone)] +pub struct BootIndexInfo { + pub boot_index: u8, + pub id: String, + pub dev_path: String, +} + +#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub enum DiskFormat { + #[default] + Raw, + Qcow2, +} + +impl FromStr for DiskFormat { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "raw" => Ok(DiskFormat::Raw), + "qcow2" => Ok(DiskFormat::Qcow2), + _ => Err(anyhow!("Unknown format type")), + } + } } -impl Default for BlkDevConfig { - fn default() -> Self { - BlkDevConfig { - id: "".to_string(), - path_on_host: "".to_string(), - read_only: false, - direct: true, - serial_num: None, - iothread: None, - iops: None, +impl Display for DiskFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DiskFormat::Raw => write!(f, "raw"), + DiskFormat::Qcow2 => write!(f, "qcow2"), } } } -/// Config struct for `drive`. -/// Contains block device's attr. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] +fn valid_l2_cache_size(s: &str) -> Result { + let size = parse_size(s)?; + if size > MAX_L2_CACHE_SIZE { + return Err(anyhow!(ConfigError::IllegalValue( + "l2-cache-size".to_string(), + 0, + true, + MAX_L2_CACHE_SIZE, + true + ))); + } + Ok(size) +} + +fn valid_refcount_cache_size(s: &str) -> Result { + let size = parse_size(s)?; + if size > MAX_REFTABLE_CACHE_SIZE { + return Err(anyhow!(ConfigError::IllegalValue( + "refcount-cache-size".to_string(), + 0, + true, + MAX_REFTABLE_CACHE_SIZE, + true + ))); + } + Ok(size) +} + +/// Config struct for `drive`, including `block drive` and `pflash drive`. +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct DriveConfig { + #[arg(long, default_value = "")] pub id: String, + #[arg(long, alias = "if", default_value = "none", value_parser = ["none", "pflash"])] + pub drive_type: String, + #[arg(long, value_parser = clap::value_parser!(u8).range(..MAX_UNIT_ID as i64))] + pub unit: Option, + #[arg(long, alias = "file", value_parser = valid_path)] pub path_on_host: String, - pub read_only: bool, + #[arg(long, default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] + pub readonly: bool, + #[arg(long, default_value = "true", value_parser = parse_bool, action = ArgAction::Append)] pub direct: bool, + #[arg(long, alias = "throttling.iops-total", value_parser = clap::value_parser!(u64).range(..=MAX_IOPS as u64))] pub iops: Option, -} - -impl Default for DriveConfig { - fn default() -> Self { - DriveConfig { - id: "".to_string(), - path_on_host: "".to_string(), - read_only: false, - direct: true, - iops: None, - } - } + #[arg( + long, + default_value = "native", + default_value_if("direct", "false", "off"), + default_value_if("direct", "off", "off") + )] + pub aio: AioEngine, + #[arg(long, default_value = "disk", value_parser = ["disk", "cdrom"])] + pub media: String, + #[arg(long, default_value = "ignore", value_parser = parse_bool, action = ArgAction::Append)] + pub discard: bool, + #[arg(long, alias = "detect-zeroes", default_value = "off")] + pub write_zeroes: WriteZeroesState, + #[arg(long, default_value = "raw")] + pub format: DiskFormat, + #[arg(long, value_parser = valid_l2_cache_size)] + pub l2_cache_size: Option, + #[arg(long, value_parser = valid_refcount_cache_size)] + pub refcount_cache_size: Option, } impl DriveConfig { @@ -85,25 +165,32 @@ impl DriveConfig { if ((meta.st_mode() & libc::S_IFREG) != libc::S_IFREG) && ((meta.st_mode() & libc::S_IFBLK) != libc::S_IFBLK) { - return Err(ErrorKind::UnRegularFile("Drive File".to_string()).into()); + return Err(anyhow!(ConfigError::UnRegularFileOrBlk( + self.path_on_host.clone() + ))); } } Err(e) => { - error!("Failed to check the drive metadata: {}", e); - return Err(ErrorKind::UnRegularFile("Drive File".to_string()).into()); + error!("Failed to check the drive metadata: {:?}", e); + return Err(anyhow!(ConfigError::NoMetadata( + self.path_on_host.clone(), + e.to_string(), + ))); } } if let Some(file_name) = blk.file_name() { if file_name.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( + return Err(anyhow!(ConfigError::StringLengthTooLong( "File name".to_string(), MAX_STRING_LENGTH, - ) - .into()); + ))); } } else { error!("Failed to check the drive file name"); - return Err(ErrorKind::UnRegularFile("Drive File".to_string()).into()); + return Err(anyhow!(ConfigError::InvalidParam( + self.path_on_host.clone(), + "file".to_string(), + ))); } Ok(()) } @@ -111,235 +198,91 @@ impl DriveConfig { impl ConfigCheck for DriveConfig { fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err( - ErrorKind::StringLengthTooLong("Drive id".to_string(), MAX_STRING_LENGTH).into(), - ); - } - if self.path_on_host.len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "Drive device path".to_string(), - MAX_PATH_LENGTH, - ) - .into()); - } - if self.iops.is_some() && self.iops.unwrap() > MAX_IOPS { - return Err(ErrorKind::IllegalValue( - "iops of block device".to_string(), - 0, - true, - MAX_IOPS, - true, - ) - .into()); - } - Ok(()) - } -} - -impl ConfigCheck for BlkDevConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "drive device id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } - - if self.path_on_host.len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "drive device path".to_string(), - MAX_PATH_LENGTH, - ) - .into()); - } - - if self.serial_num.is_some() && self.serial_num.as_ref().unwrap().len() > MAX_SERIAL_NUM { - return Err(ErrorKind::StringLengthTooLong( - "drive serial number".to_string(), - MAX_SERIAL_NUM, - ) - .into()); - } - - if self.iothread.is_some() && self.iothread.as_ref().unwrap().len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "iothread name".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } - - if self.iops.is_some() && self.iops.unwrap() > MAX_IOPS { - return Err(ErrorKind::IllegalValue( - "iops of block device".to_string(), - 0, - true, - MAX_IOPS, - true, - ) - .into()); - } - - Ok(()) - } -} - -pub fn parse_drive(cmd_parser: CmdParser) -> Result { - let mut drive = DriveConfig::default(); + if self.drive_type == "pflash" { + self.unit.with_context(|| { + ConfigError::FieldIsMissing("unit".to_string(), "pflash".to_string()) + })?; + if self.format.to_string() != "raw" { + bail!("Only \'raw\' type of pflash is supported"); + } + } else { + if self.id.is_empty() { + return Err(anyhow!(ConfigError::FieldIsMissing( + "id".to_string(), + "blk".to_string() + ))); + } + valid_id(&self.id)?; + valid_path(&self.path_on_host)?; + if self.iops > Some(MAX_IOPS) { + return Err(anyhow!(ConfigError::IllegalValue( + "iops of block device".to_string(), + 0, + true, + MAX_IOPS, + true, + ))); + } + if self.l2_cache_size > Some(MAX_L2_CACHE_SIZE) { + return Err(anyhow!(ConfigError::IllegalValue( + "l2-cache-size".to_string(), + 0, + true, + MAX_L2_CACHE_SIZE, + true + ))); + } + if self.refcount_cache_size > Some(MAX_REFTABLE_CACHE_SIZE) { + return Err(anyhow!(ConfigError::IllegalValue( + "refcount-cache-size".to_string(), + 0, + true, + MAX_REFTABLE_CACHE_SIZE, + true + ))); + } - if let Some(format) = cmd_parser.get_value::("format")? { - if format.ne("raw") { - bail!("Only \'raw\' type of block is supported"); + if self.aio != AioEngine::Off { + if self.aio == AioEngine::Native && !self.direct { + return Err(anyhow!(ConfigError::InvalidParam( + "aio".to_string(), + "native aio type should be used with \"direct\" on".to_string(), + ))); + } + aio_probe(self.aio)?; + } else if self.direct { + return Err(anyhow!(ConfigError::InvalidParam( + "aio".to_string(), + "low performance expected when use sync io with \"direct\" on".to_string(), + ))); + } } - } - - if let Some(id) = cmd_parser.get_value::("id")? { - drive.id = id; - } else { - return Err(ErrorKind::FieldIsMissing("id", "blk").into()); - } - - if let Some(file) = cmd_parser.get_value::("file")? { - drive.path_on_host = file; - } else { - return Err(ErrorKind::FieldIsMissing("file", "blk").into()); - } - - if let Some(read_only) = cmd_parser.get_value::("readonly")? { - drive.read_only = read_only.into(); - } - if let Some(direct) = cmd_parser.get_value::("direct")? { - drive.direct = direct.into(); - } - drive.iops = cmd_parser.get_value::("throttling.iops-total")?; - Ok(drive) -} - -pub fn parse_blk(vm_config: &mut VmConfig, drive_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("virtio-blk"); - cmd_parser - .push("") - .push("id") - .push("bus") - .push("addr") - .push("multifunction") - .push("drive") - .push("bootindex") - .push("serial") - .push("iothread"); - - cmd_parser.parse(drive_config)?; - - pci_args_check(&cmd_parser)?; - - if let Err(ref e) = cmd_parser.get_value::("bootindex") { - bail!("Failed to parse \'bootindex\': {:?}", &e); - } - - let mut blkdevcfg = BlkDevConfig::default(); - let blkdrive = if let Some(drive) = cmd_parser.get_value::("drive")? { - drive - } else { - return Err(ErrorKind::FieldIsMissing("drive", "blk").into()); - }; - if let Some(iothread) = cmd_parser.get_value::("iothread")? { - blkdevcfg.iothread = Some(iothread); - } - - if let Some(serial) = cmd_parser.get_value::("serial")? { - blkdevcfg.serial_num = Some(serial); - } + #[cfg(not(test))] + self.check_path()?; - if let Some(id) = cmd_parser.get_value::("id")? { - blkdevcfg.id = id; - } else { - bail!("No id configured for blk device"); - } - - if let Some(drive_arg) = &vm_config.drives.remove(&blkdrive) { - blkdevcfg.path_on_host = drive_arg.path_on_host.clone(); - blkdevcfg.read_only = drive_arg.read_only; - blkdevcfg.direct = drive_arg.direct; - blkdevcfg.iops = drive_arg.iops; - } else { - bail!("No drive configured matched for blk device"); - } - blkdevcfg.check()?; - Ok(blkdevcfg) -} - -/// Config struct for `pflash`. -/// Contains pflash device's attr. -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[serde(deny_unknown_fields)] -pub struct PFlashConfig { - pub path_on_host: String, - pub read_only: bool, - pub unit: usize, -} - -impl ConfigCheck for PFlashConfig { - fn check(&self) -> Result<()> { - if self.path_on_host.len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "drive device path".to_string(), - MAX_PATH_LENGTH, - ) - .into()); - } - - if self.unit >= MAX_UNIT_ID { - return Err(ErrorKind::UnitIdError(self.unit, MAX_UNIT_ID).into()); - } Ok(()) } } impl VmConfig { - /// Add '-drive ...' drive config to `VmConfig`. - pub fn add_drive(&mut self, drive_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("drive"); - cmd_parser.push("if"); - - cmd_parser.get_parameters(drive_config)?; - let drive_type = if let Some(_type) = cmd_parser.get_value::("if")? { - _type - } else { - "none".to_string() - }; - match drive_type.as_str() { + /// Add '-drive ...' drive config to `VmConfig`, including `block drive` and `pflash drive`. + pub fn add_drive(&mut self, drive_config: &str) -> Result { + let drive_cfg = DriveConfig::try_parse_from(str_slip_to_clap(drive_config, false, false))?; + drive_cfg.check()?; + match drive_cfg.drive_type.as_str() { "none" => { - self.add_block_drive(drive_config)?; + self.add_drive_with_config(drive_cfg.clone())?; } "pflash" => { - self.add_pflash(drive_config)?; + self.add_flashdev(drive_cfg.clone())?; } _ => { - bail!("Unknow 'if' argument: {:?}", drive_type.as_str()); + bail!("Unknow 'if' argument: {:?}", &drive_cfg.drive_type); } } - Ok(()) - } - - fn add_block_drive(&mut self, block_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("drive"); - cmd_parser - .push("file") - .push("id") - .push("readonly") - .push("direct") - .push("format") - .push("if") - .push("throttling.iops-total") - .push("serial"); - - cmd_parser.parse(block_config)?; - let drive_cfg = parse_drive(cmd_parser)?; - self.add_drive_with_config(drive_cfg) + Ok(drive_cfg) } /// Add drive config to vm config. @@ -349,11 +292,10 @@ impl VmConfig { /// * `drive_conf` - The drive config to be added to the vm. pub fn add_drive_with_config(&mut self, drive_conf: DriveConfig) -> Result<()> { let drive_id = drive_conf.id.clone(); - if self.drives.get(&drive_id).is_none() { - self.drives.insert(drive_id, drive_conf); - } else { + if self.drives.contains_key(&drive_id) { bail!("Drive {} has been added", drive_id); } + self.drives.insert(drive_id, drive_conf); Ok(()) } @@ -362,23 +304,23 @@ impl VmConfig { /// # Arguments /// /// * `drive_id` - Drive id. - pub fn del_drive_by_id(&mut self, drive_id: &str) -> Result<()> { - if self.drives.get(drive_id).is_some() { - self.drives.remove(drive_id); + pub fn del_drive_by_id(&mut self, drive_id: &str) -> Result { + if self.drives.contains_key(drive_id) { + Ok(self.drives.remove(drive_id).unwrap().path_on_host) } else { bail!("Drive {} not found", drive_id); } - Ok(()) } /// Add new flash device to `VmConfig`. - fn add_flashdev(&mut self, pflash: PFlashConfig) -> Result<()> { + fn add_flashdev(&mut self, pflash: DriveConfig) -> Result<()> { if self.pflashs.is_some() { for pf in self.pflashs.as_ref().unwrap() { - if pf.unit == pflash.unit { - return Err( - ErrorKind::IdRepeat("pflash".to_string(), pf.unit.to_string()).into(), - ); + if pf.unit.unwrap() == pflash.unit.unwrap() { + return Err(anyhow!(ConfigError::IdRepeat( + "pflash".to_string(), + pf.unit.unwrap().to_string() + ))); } } self.pflashs.as_mut().unwrap().push(pflash); @@ -387,148 +329,38 @@ impl VmConfig { } Ok(()) } - - /// Add '-pflash ...' pflash config to `VmConfig`. - pub fn add_pflash(&mut self, pflash_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("pflash"); - cmd_parser - .push("if") - .push("file") - .push("format") - .push("readonly") - .push("unit"); - - cmd_parser.parse(pflash_config)?; - - let mut pflash = PFlashConfig::default(); - - if let Some(format) = cmd_parser.get_value::("format")? { - if format.ne("raw") { - bail!("Only \'raw\' type of pflash is supported"); - } - } - if let Some(drive_path) = cmd_parser.get_value::("file")? { - pflash.path_on_host = drive_path; - } else { - return Err(ErrorKind::FieldIsMissing("file", "pflash").into()); - } - - if let Some(read_only) = cmd_parser.get_value::("readonly")? { - pflash.read_only = read_only.into(); - } - - if let Some(unit_id) = cmd_parser.get_value::("unit")? { - pflash.unit = unit_id as usize; - } else { - return Err(ErrorKind::FieldIsMissing("unit", "pflash").into()); - } - - pflash.check()?; - self.add_flashdev(pflash) - } } #[cfg(test)] mod tests { - use crate::config::get_pci_bdf; - use super::*; #[test] - fn test_drive_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_drive( - "id=rootfs,file=/path/to/rootfs,readonly=off,direct=on,throttling.iops-total=200" - ) - .is_ok()); - let blk_cfg_res = parse_blk( - &mut vm_config, - "virtio-blk-device,drive=rootfs,id=rootfs,iothread=iothread1,serial=111111", - ); - assert!(blk_cfg_res.is_ok()); - let blk_device_config = blk_cfg_res.unwrap(); - assert_eq!(blk_device_config.id, "rootfs"); - assert_eq!(blk_device_config.path_on_host, "/path/to/rootfs"); - assert_eq!(blk_device_config.direct, true); - assert_eq!(blk_device_config.read_only, false); - assert_eq!(blk_device_config.serial_num, Some(String::from("111111"))); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_drive("id=rootfs,file=/path/to/rootfs,readonly=off,direct=on") - .is_ok()); - let blk_cfg_res = parse_blk( - &mut vm_config, - "virtio-blk-device,drive=rootfs1,id=rootfs1,iothread=iothread1,iops=200,serial=111111", - ); - assert!(blk_cfg_res.is_err()); // Can not find drive named "rootfs1". - } - - #[test] - fn test_pci_block_config_cmdline_parser() { + fn test_pflash_drive_config_cmdline_parser() { + // Test1: Right. let mut vm_config = VmConfig::default(); assert!(vm_config - .add_drive("id=rootfs,file=/path/to/rootfs,readonly=off,direct=on") - .is_ok()); - let blk_cfg = "virtio-blk-pci,id=rootfs,bus=pcie.0,addr=0x1.0x2,drive=rootfs,serial=111111"; - let blk_cfg_res = parse_blk(&mut vm_config, blk_cfg); - assert!(blk_cfg_res.is_ok()); - let drive_configs = blk_cfg_res.unwrap(); - assert_eq!(drive_configs.id, "rootfs"); - assert_eq!(drive_configs.path_on_host, "/path/to/rootfs"); - assert_eq!(drive_configs.direct, true); - assert_eq!(drive_configs.read_only, false); - assert_eq!(drive_configs.serial_num, Some(String::from("111111"))); - - let pci_bdf = get_pci_bdf(blk_cfg); - assert!(pci_bdf.is_ok()); - let pci = pci_bdf.unwrap(); - assert_eq!(pci.bus, "pcie.0".to_string()); - assert_eq!(pci.addr, (1, 2)); - - // drive "rootfs" has been removed. - let blk_cfg_res = parse_blk(&mut vm_config, blk_cfg); - assert!(blk_cfg_res.is_err()); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_drive("id=rootfs,file=/path/to/rootfs,serial=111111,readonly=off,direct=on") - .is_ok()); - let blk_cfg = - "virtio-blk-pci,id=blk1,bus=pcie.0,addr=0x1.0x2,drive=rootfs,multifunction=on"; - assert!(parse_blk(&mut vm_config, blk_cfg).is_ok()); - } - - #[test] - fn test_pflash_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=0") + .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=0,format=raw") .is_ok()); assert!(vm_config.pflashs.is_some()); let pflash = vm_config.pflashs.unwrap(); assert!(pflash.len() == 1); let pflash_cfg = &pflash[0]; - assert_eq!(pflash_cfg.unit, 0); + assert_eq!(pflash_cfg.unit.unwrap(), 0); assert_eq!(pflash_cfg.path_on_host, "flash0.fd".to_string()); - assert_eq!(pflash_cfg.read_only, true); + assert!(pflash_cfg.readonly); + // Test2: Change parameters sequence. let mut vm_config = VmConfig::default(); assert!(vm_config .add_drive("readonly=on,file=flash0.fd,unit=0,if=pflash") .is_ok()); - let mut vm_config = VmConfig::default(); assert!(vm_config .add_drive("readonly=on,if=pflash,file=flash0.fd,unit=0") .is_ok()); - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=2") - .is_err()); - + // Test3: Add duplicate pflash. let mut vm_config = VmConfig::default(); assert!(vm_config .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=0") @@ -536,52 +368,103 @@ mod tests { assert!(vm_config .add_drive("if=pflash,file=flash1.fd,unit=1") .is_ok()); + assert!(vm_config + .add_drive("if=pflash,file=flash1.fd,unit=1") + .is_err()); assert!(vm_config.pflashs.is_some()); let pflash = vm_config.pflashs.unwrap(); assert!(pflash.len() == 2); let pflash_cfg = &pflash[0]; - assert_eq!(pflash_cfg.unit, 0); + assert_eq!(pflash_cfg.unit.unwrap(), 0); assert_eq!(pflash_cfg.path_on_host, "flash0.fd".to_string()); - assert_eq!(pflash_cfg.read_only, true); + assert!(pflash_cfg.readonly); let pflash_cfg = &pflash[1]; - assert_eq!(pflash_cfg.unit, 1); + assert_eq!(pflash_cfg.unit.unwrap(), 1); assert_eq!(pflash_cfg.path_on_host, "flash1.fd".to_string()); - assert_eq!(pflash_cfg.read_only, false); - } + assert!(!pflash_cfg.readonly); - #[test] - fn test_drive_config_check() { - let mut drive_conf = DriveConfig::default(); - for _ in 0..MAX_STRING_LENGTH { - drive_conf.id += "A"; - } - assert!(drive_conf.check().is_ok()); + // Test4: Illegal parameters unit/format. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=2") + .is_err()); + assert!(vm_config + .add_drive("if=pflash,readonly=on,file=flash0.fd,unit=0,format=qcow2") + .is_err()); - // Overflow - drive_conf.id += "A"; - assert!(drive_conf.check().is_err()); + // Test5: Missing parameters file/unit. + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_drive("if=pflash,readonly=on,unit=2").is_err()); + assert!(vm_config + .add_drive("if=pflash,readonly=on,file=flash0.fd") + .is_err()); + } - let mut drive_conf = DriveConfig::default(); - for _ in 0..MAX_PATH_LENGTH { - drive_conf.path_on_host += "A"; - } - assert!(drive_conf.check().is_ok()); + #[test] + fn test_block_drive_config_cmdline_parser() { + // Test1: Right. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=qcow2,readonly=off,direct=on,throttling.iops-total=200,discard=unmap,detect-zeroes=unmap") + .is_ok()); + assert!(vm_config.drives.len() == 1); + let drive_cfg = &vm_config.drives.remove("rootfs").unwrap(); + + assert_eq!(drive_cfg.id, "rootfs"); + assert_eq!(drive_cfg.path_on_host, "/path/to/rootfs"); + assert_eq!(drive_cfg.format.to_string(), "qcow2"); + assert!(!drive_cfg.readonly); + assert!(drive_cfg.direct); + assert_eq!(drive_cfg.iops.unwrap(), 200); + assert!(drive_cfg.discard); + assert_eq!( + drive_cfg.write_zeroes, + WriteZeroesState::from_str("unmap").unwrap() + ); - // Overflow - drive_conf.path_on_host += "A"; - assert!(drive_conf.check().is_err()); + // Test2: Change parameters sequence. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("throttling.iops-total=200,file=/path/to/rootfs,format=qcow2,id=rootfs,readonly=off,direct=on,discard=unmap,detect-zeroes=unmap") + .is_ok()); - let mut drive_conf = DriveConfig::default(); - drive_conf.iops = Some(MAX_IOPS); - assert!(drive_conf.check().is_ok()); + // Test3: Add duplicate block drive config. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=qcow2,readonly=off,direct=on") + .is_ok()); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=qcow2,readonly=off,direct=on") + .is_err()); + let drive_cfg = &vm_config.drives.remove("rootfs"); + assert!(drive_cfg.is_some()); - let mut drive_conf = DriveConfig::default(); - drive_conf.iops = None; - assert!(drive_conf.check().is_ok()); + // Test4: Illegal parameters. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=vhdx") + .is_err()); + assert!(vm_config + .add_drive("id=rootfs,if=illegal,file=/path/to/rootfs,format=vhdx") + .is_err()); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=raw,throttling.iops-total=1000001") + .is_err()); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=raw,media=illegal") + .is_err()); + assert!(vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,format=raw,detect-zeroes=illegal") + .is_err()); - // Overflow - drive_conf.iops = Some(MAX_IOPS + 1); - assert!(drive_conf.check().is_err()); + // Test5: Missing parameters id/file. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_drive("file=/path/to/rootfs,format=qcow2,readonly=off,direct=on,throttling.iops-total=200") + .is_err()); + assert!(vm_config + .add_drive("id=rootfs,format=qcow2,readonly=off,direct=on,throttling.iops-total=200") + .is_err()); } #[test] @@ -620,8 +503,56 @@ mod tests { let mut drive_conf = DriveConfig::default(); drive_conf.id = String::from(*id); assert!(vm_config.drives.get(*id).is_some()); - assert!(vm_config.del_drive_by_id(*id).is_ok()); + assert!(vm_config.del_drive_by_id(id).is_ok()); assert!(vm_config.drives.get(*id).is_none()); } } + + #[test] + fn test_drive_config_discard() { + let mut vm_config = VmConfig::default(); + let drive_conf = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,discard=ignore") + .unwrap(); + assert!(!drive_conf.discard); + + let mut vm_config = VmConfig::default(); + let drive_conf = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,discard=unmap") + .unwrap(); + assert!(drive_conf.discard); + + let mut vm_config = VmConfig::default(); + let ret = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,discard=invalid") + .is_err(); + assert!(ret); + } + + #[test] + fn test_drive_config_write_zeroes() { + let mut vm_config = VmConfig::default(); + let drive_conf = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,detect-zeroes=off") + .unwrap(); + assert_eq!(drive_conf.write_zeroes, WriteZeroesState::Off); + + let mut vm_config = VmConfig::default(); + let drive_conf = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,detect-zeroes=on") + .unwrap(); + assert_eq!(drive_conf.write_zeroes, WriteZeroesState::On); + + let mut vm_config = VmConfig::default(); + let drive_conf = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,detect-zeroes=unmap") + .unwrap(); + assert_eq!(drive_conf.write_zeroes, WriteZeroesState::Unmap); + + let mut vm_config = VmConfig::default(); + let ret = vm_config + .add_drive("id=rootfs,file=/path/to/rootfs,detect-zeroes=invalid") + .is_err(); + assert!(ret); + } } diff --git a/machine_manager/src/config/error.rs b/machine_manager/src/config/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..694c9ef3d3b79db62140819cf0835d0dfda7326c --- /dev/null +++ b/machine_manager/src/config/error.rs @@ -0,0 +1,67 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ConfigError { + #[error("UtilError")] + UtilError { + #[from] + source: util::error::UtilError, + }, + #[error("JsonSerde")] + JsonSerde { + #[from] + source: serde_json::Error, + }, + #[error("Invalid json field \'{0}\'")] + InvalidJsonField(String), + #[error("Invalid parameter \'{0}\' for \'{1}\'")] + InvalidParam(String, String), + #[error("Unable to parse \'{0}\' for \'{1}\'")] + ConvertValueFailed(String, String), + #[error("Input {0} string's length must be no more than {1}.")] + StringLengthTooLong(String, usize), + #[error("Input field \'{0}\' in {1} is offered more than once.")] + FieldRepeat(String, String), + #[error("Input id \'{0}\' for {1} repeat.")] + IdRepeat(String, String), + #[error("Integer overflow occurred during parse {0}!")] + IntegerOverflow(String), + #[error("Unknown device type: {0}!")] + UnknownDeviceType(String), + #[error("\'{0}\' is missing for \'{1}\' device.")] + FieldIsMissing(String, String), + #[error("{0} must >{} {1} and <{} {3}.", if *.2 {"="} else {""}, if *.4 {"="} else {""})] + IllegalValue(String, u64, bool, u64, bool), + #[error("{0} must {}{} {3}.", if *.1 {">"} else {"<"}, if *.2 {"="} else {""})] + IllegalValueUnilateral(String, bool, bool, u64), + #[error("Mac address is illegal.")] + MacFormatError, + #[error("Unknown vhost type.")] + UnknownVhostType, + #[error("{0} is not a regular File.")] + UnRegularFile(String), + #[error("{0} is not a regular file or block device.")] + UnRegularFileOrBlk(String), + #[error("Failed to get metadata of file {0}: {1}.")] + NoMetadata(String, String), + #[error("Input value {0} is unaligned with {1} for {2}.")] + Unaligned(String, u64, u64), + #[error("{0} given {1} should not be more than {2}")] + UnitIdError(String, usize, usize), + #[error("Directory {0} does not exist")] + DirNotExist(String), + #[error("File {0} does not exist")] + FileNotExist(String), +} diff --git a/machine_manager/src/config/incoming.rs b/machine_manager/src/config/incoming.rs new file mode 100644 index 0000000000000000000000000000000000000000..98c3abf0884c3c4322d1e0b7edb157e756b84078 --- /dev/null +++ b/machine_manager/src/config/incoming.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::net::Ipv4Addr; + +use anyhow::{bail, Result}; +use serde::{Deserialize, Serialize}; + +use super::VmConfig; + +#[derive(PartialEq, Eq, Debug, Clone, Copy, Serialize, Deserialize)] +pub enum MigrateMode { + File, + Unix, + Tcp, + Unknown, +} + +impl From<&str> for MigrateMode { + fn from(s: &str) -> Self { + match s { + "file" | "File" | "FILE" => MigrateMode::File, + "unix" | "Unix" | "UNIX" => MigrateMode::Unix, + "tcp" | "Tcp" | "TCP" => MigrateMode::Tcp, + _ => MigrateMode::Unknown, + } + } +} + +/// Parse `-incoming` cmdline to migrate mode and path. +pub fn parse_incoming_uri(uri: &str) -> Result<(MigrateMode, String)> { + let parse_vec: Vec<&str> = uri.split(':').collect(); + if parse_vec.len() == 2 { + match MigrateMode::from(parse_vec[0]) { + MigrateMode::File => Ok((MigrateMode::File, String::from(parse_vec[1]))), + MigrateMode::Unix => Ok((MigrateMode::Unix, String::from(parse_vec[1]))), + _ => bail!("Invalid incoming uri {}", uri), + } + } else if parse_vec.len() == 3 { + match MigrateMode::from(parse_vec[0]) { + MigrateMode::Tcp => { + if parse_vec[1].parse::().is_err() { + bail!("Invalid ip address {}", parse_vec[1]); + } + if parse_vec[2].parse::().is_err() { + bail!("Invalid ip port {}", parse_vec[2]); + } + + Ok(( + MigrateMode::Tcp, + format!("{}:{}", parse_vec[1], parse_vec[2]), + )) + } + + _ => bail!("Invalid incoming uri {}", uri), + } + } else { + bail!("Invalid incoming uri {}", uri) + } +} + +pub type Incoming = (MigrateMode, String); + +impl VmConfig { + /// Add incoming mode and path. + pub fn add_incoming(&mut self, config: &str) -> Result<()> { + let (mode, uri) = parse_incoming_uri(config)?; + let incoming = match mode { + MigrateMode::File => (MigrateMode::File, uri), + MigrateMode::Unix => (MigrateMode::Unix, uri), + MigrateMode::Tcp => (MigrateMode::Tcp, uri), + MigrateMode::Unknown => { + bail!("Unsupported incoming unix path type") + } + }; + + self.incoming = Some(incoming); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_migrate_mode() { + assert_eq!(MigrateMode::from("File"), MigrateMode::File); + assert_eq!(MigrateMode::from("UNIX"), MigrateMode::Unix); + assert_eq!(MigrateMode::from("tcp"), MigrateMode::Tcp); + assert_eq!(MigrateMode::from("fd"), MigrateMode::Unknown); + } + + #[test] + fn test_parse_incoming_uri() { + let incoming_case1 = "unix:/tmp/stratovirt.sock"; + let result = parse_incoming_uri(incoming_case1); + assert!(result.is_ok()); + let result_1 = result.unwrap(); + assert_eq!(result_1.0, MigrateMode::Unix); + assert_eq!(result_1.1, "/tmp/stratovirt.sock".to_string()); + + let incoming_case2 = "tcp:192.168.1.2:2022"; + let result = parse_incoming_uri(incoming_case2); + assert!(result.is_ok()); + let result_2 = result.unwrap(); + assert_eq!(result_2.0, MigrateMode::Tcp); + assert_eq!(result_2.1, "192.168.1.2:2022".to_string()); + + let incoming_case3 = "tcp:192.168.1.2:2:2"; + let result_3 = parse_incoming_uri(incoming_case3); + assert!(result_3.is_err()); + + let incoming_case4 = "tcp:300.168.1.2:22"; + let result_4 = parse_incoming_uri(incoming_case4); + assert!(result_4.is_err()); + + let incoming_case5 = "tcp:192.168.1.2:65568"; + let result_5 = parse_incoming_uri(incoming_case5); + assert!(result_5.is_err()); + } + + #[test] + fn test_add_incoming() { + let mut vm_config_case1 = VmConfig::default(); + assert!(vm_config_case1.add_incoming("tcp:192.168.1.2:2022").is_ok()); + assert_eq!( + vm_config_case1.incoming.unwrap(), + (MigrateMode::Tcp, "192.168.1.2:2022".to_string()) + ); + + let mut vm_config_case2 = VmConfig::default(); + assert!(vm_config_case2 + .add_incoming("unix:/tmp/stratovirt.sock") + .is_ok()); + assert_eq!( + vm_config_case2.incoming.unwrap(), + (MigrateMode::Unix, "/tmp/stratovirt.sock".to_string()) + ); + + let mut vm_config_case2 = VmConfig::default(); + assert!(vm_config_case2.add_incoming("unknown:/tmp/").is_err()); + } +} diff --git a/machine_manager/src/config/iothread.rs b/machine_manager/src/config/iothread.rs index 4d615d229cc49ff44273fdaf5c7dcb985e3e72ca..029d6583cc6f5df53858898baa92297c3a6c44ce 100644 --- a/machine_manager/src/config/iothread.rs +++ b/machine_manager/src/config/iothread.rs @@ -10,61 +10,48 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::errors::{ErrorKind, Result}; -use crate::config::{CmdParser, ConfigCheck, VmConfig, MAX_STRING_LENGTH}; +use anyhow::{anyhow, Result}; +use clap::Parser; +use serde::{Deserialize, Serialize}; + +use super::{error::ConfigError, str_slip_to_clap, valid_id}; +use crate::config::VmConfig; const MAX_IOTHREAD_NUM: usize = 8; /// Config structure for iothread. -#[derive(Debug, Clone, Default)] +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct IothreadConfig { + #[arg(long, value_parser = ["iothread"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] pub id: String, } -impl ConfigCheck for IothreadConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "iothread id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } - - Ok(()) - } -} - impl VmConfig { /// Add new iothread device to `VmConfig`. pub fn add_iothread(&mut self, iothread_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("iothread"); - cmd_parser.push("").push("id"); - cmd_parser.parse(iothread_config)?; - - let mut iothread = IothreadConfig::default(); - if let Some(id) = cmd_parser.get_value::("id")? { - iothread.id = id; - } - iothread.check()?; + let iothread = + IothreadConfig::try_parse_from(str_slip_to_clap(iothread_config, true, false))?; if self.iothreads.is_some() { if self.iothreads.as_ref().unwrap().len() >= MAX_IOTHREAD_NUM { - return Err(ErrorKind::IllegalValue( + return Err(anyhow!(ConfigError::IllegalValue( "Iothread number".to_string(), 0, true, MAX_IOTHREAD_NUM as u64, true, - ) - .into()); + ))); } for t in self.iothreads.as_ref().unwrap() { if t.id == iothread.id { - return Err( - ErrorKind::IdRepeat("iothread".to_string(), t.id.to_string()).into(), - ); + return Err(anyhow!(ConfigError::IdRepeat( + "iothread".to_string(), + t.id.to_string() + ))); } } diff --git a/machine_manager/src/config/machine_config.rs b/machine_manager/src/config/machine_config.rs index 92fddab92f019fb1e3f6f2b35674bdb856348358..3d277ba155d3c22e708aaa0d44a60c05a19ce36a 100644 --- a/machine_manager/src/config/machine_config.rs +++ b/machine_manager/src/config/machine_config.rs @@ -12,19 +12,32 @@ use std::str::FromStr; +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; use serde::{Deserialize, Serialize}; -use super::errors::{ErrorKind, Result, ResultExt}; -use crate::config::{CmdParser, ConfigCheck, ExBool, VmConfig}; +use super::error::ConfigError; +use super::{ + get_value_of_parameter, parse_bool, parse_size, str_slip_to_clap, valid_id, valid_path, +}; +use crate::config::{ConfigCheck, IntegerList, VmConfig, MAX_NODES}; +use crate::machine::HypervisorType; const DEFAULT_CPUS: u8 = 1; +const DEFAULT_THREADS: u8 = 1; +const DEFAULT_CORES: u8 = 1; +const DEFAULT_DIES: u8 = 1; +const DEFAULT_CLUSTERS: u8 = 1; +const DEFAULT_SOCKETS: u8 = 1; +const DEFAULT_MAX_CPUS: u8 = 1; const DEFAULT_MEMSIZE: u64 = 256; -const MAX_NR_CPUS: u64 = 254; -const MIN_NR_CPUS: u64 = 1; +const MAX_NR_CPUS: u8 = 254; +const MIN_NR_CPUS: u8 = 1; const MAX_MEMSIZE: u64 = 549_755_813_888; -const MIN_MEMSIZE: u64 = 268_435_456; -const M: u64 = 1024 * 1024; -const G: u64 = 1024 * 1024 * 1024; +const MIN_MEMSIZE: u64 = 134_217_728; +pub const K: u64 = 1024; +pub const M: u64 = 1024 * 1024; +pub const G: u64 = 1024 * 1024 * 1024; #[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq)] pub enum MachineType { @@ -34,7 +47,7 @@ pub enum MachineType { } impl FromStr for MachineType { - type Err = (); + type Err = anyhow::Error; fn from_str(s: &str) -> std::result::Result { match s.to_lowercase().as_str() { @@ -44,11 +57,67 @@ impl FromStr for MachineType { "q35" => Ok(MachineType::StandardVm), #[cfg(target_arch = "aarch64")] "virt" => Ok(MachineType::StandardVm), - _ => Err(()), + _ => Err(anyhow!("Invalid machine type.")), } } } +#[repr(u32)] +#[derive(PartialEq, Eq)] +pub enum HostMemPolicy { + Default = 0, + Preferred = 1, + Bind = 2, + Interleave = 3, + NotSupported = 4, +} + +impl From for HostMemPolicy { + fn from(str: String) -> HostMemPolicy { + match str.to_lowercase().as_str() { + "default" => HostMemPolicy::Default, + "preferred" => HostMemPolicy::Preferred, + "bind" => HostMemPolicy::Bind, + "interleave" => HostMemPolicy::Interleave, + _ => HostMemPolicy::NotSupported, + } + } +} + +#[derive(Parser, Clone, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct MemZoneConfig { + #[arg(long, alias = "classtype", value_parser = ["memory-backend-ram", "memory-backend-file", "memory-backend-memfd"])] + pub mem_type: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long, value_parser = parse_size)] + pub size: u64, + // Note: + // `Clap` will incorrectly assume that we're trying to get multiple arguments since we got + // a `Vec` from parser function `get_host_nodes`. Generally, we should use `Box` or a `new struct type` + // to encapsulate this `Vec`. And fortunately, there's a trick (using full qualified path of Vec) + // to avoid the new type wrapper. See: github.com/clap-rs/clap/issues/4626. + #[arg(long, alias = "host-nodes", value_parser = get_host_nodes)] + pub host_numa_nodes: Option<::std::vec::Vec>, + #[arg(long, default_value = "default", value_parser=["default", "preferred", "bind", "interleave"])] + pub policy: String, + #[arg(long, value_parser = valid_path)] + pub mem_path: Option, + #[arg(long, default_value = "true", value_parser = parse_bool, action = ArgAction::Append)] + pub dump_guest_core: bool, + #[arg(long, default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] + pub share: bool, + #[arg(long, alias = "mem-prealloc", default_value = "false", value_parser = parse_bool, action = ArgAction::Append)] + pub prealloc: bool, +} + +impl MemZoneConfig { + pub fn memfd(&self) -> bool { + self.mem_type.eq("memory-backend-memfd") + } +} + /// Config that contains machine's memory information config. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MachineMemConfig { @@ -57,6 +126,7 @@ pub struct MachineMemConfig { pub dump_guest_core: bool, pub mem_share: bool, pub mem_prealloc: bool, + pub mem_zones: Option>, } impl Default for MachineMemConfig { @@ -67,17 +137,88 @@ impl Default for MachineMemConfig { dump_guest_core: true, mem_share: false, mem_prealloc: false, + mem_zones: None, } } } +#[derive(Parser, Clone, Debug, Serialize, Deserialize, Default)] +#[command(no_binary_name(true))] +pub struct CpuConfig { + #[arg(long, alias = "classtype", value_parser = ["host"])] + pub family: String, + #[arg(long, default_value = "off")] + pub pmu: PmuConfig, + #[arg(long, default_value = "off")] + pub sve: SveConfig, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)] +pub enum PmuConfig { + On, + #[default] + Off, +} + +impl FromStr for PmuConfig { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "on" => Ok(PmuConfig::On), + "off" => Ok(PmuConfig::Off), + _ => Err(anyhow!( + "Invalid PMU option,must be one of \'on\" or \"off\"." + )), + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)] +pub enum SveConfig { + On, + #[default] + Off, +} + +impl FromStr for SveConfig { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "on" => Ok(SveConfig::On), + "off" => Ok(SveConfig::Off), + _ => Err(anyhow!( + "Invalid SVE option, must be one of \"on\" or \"off\"." + )), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, Default)] +pub enum ShutdownAction { + #[default] + ShutdownActionPoweroff, + ShutdownActionPause, +} + /// Config struct for machine-config. /// Contains some basic Vm config about cpu, memory, name. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MachineConfig { pub mach_type: MachineType, + pub hypervisor: HypervisorType, pub nr_cpus: u8, + pub nr_threads: u8, + pub nr_cores: u8, + pub nr_dies: u8, + pub nr_clusters: u8, + pub nr_sockets: u8, + pub max_cpus: u8, pub mem_config: MachineMemConfig, + pub cpu_config: CpuConfig, + pub shutdown_action: ShutdownAction, + pub battery: bool, } impl Default for MachineConfig { @@ -85,8 +226,18 @@ impl Default for MachineConfig { fn default() -> Self { MachineConfig { mach_type: MachineType::MicroVm, + hypervisor: HypervisorType::Kvm, nr_cpus: DEFAULT_CPUS, + nr_threads: DEFAULT_THREADS, + nr_cores: DEFAULT_CORES, + nr_dies: DEFAULT_DIES, + nr_clusters: DEFAULT_CLUSTERS, + nr_sockets: DEFAULT_SOCKETS, + max_cpus: DEFAULT_MAX_CPUS, mem_config: MachineMemConfig::default(), + cpu_config: CpuConfig::default(), + shutdown_action: ShutdownAction::default(), + battery: false, } } } @@ -94,7 +245,7 @@ impl Default for MachineConfig { impl ConfigCheck for MachineConfig { fn check(&self) -> Result<()> { if self.mem_config.mem_size < MIN_MEMSIZE || self.mem_config.mem_size > MAX_MEMSIZE { - bail!("Memory size must >= 256MiB and <= 512GiB, default unit: MiB, current memory size: {:?} bytes", + bail!("Memory size must >= 128MiB and <= 512GiB, default unit: MiB, current memory size: {:?} bytes", &self.mem_config.mem_size); } @@ -102,134 +253,203 @@ impl ConfigCheck for MachineConfig { } } -impl VmConfig { - /// Add argument `name` to `VmConfig`. - /// - /// # Arguments - /// - /// * `name` - The name `String` added to `VmConfig`. - pub fn add_machine(&mut self, mach_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("machine"); - cmd_parser - .push("") - .push("type") - .push("accel") - .push("usb") - .push("dump-guest-core") - .push("mem-share"); - #[cfg(target_arch = "aarch64")] - cmd_parser.push("gic-version"); - cmd_parser.parse(mach_config)?; +#[derive(Parser)] +#[command(no_binary_name(true))] +struct AccelConfig { + #[arg(long, alias = "classtype")] + hypervisor: HypervisorType, +} - #[cfg(target_arch = "aarch64")] - if let Some(gic_version) = cmd_parser.get_value::("gic-version")? { - if gic_version != 3 { - bail!("Unsupported gic version, only gicv3 is supported"); +#[derive(Parser)] +#[command(no_binary_name(true))] +struct MemSizeConfig { + #[arg(long, alias = "classtype", value_parser = parse_size)] + size: u64, +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +struct MachineCmdConfig { + #[arg(long, aliases = ["classtype", "type"])] + mach_type: MachineType, + #[arg(long, default_value = "on", action = ArgAction::Append, value_parser = parse_bool)] + dump_guest_core: bool, + #[arg(long, default_value = "off", action = ArgAction::Append, value_parser = parse_bool)] + mem_share: bool, + #[arg(long, default_value = "kvm")] + accel: HypervisorType, + // The "usb" member is added for compatibility with libvirt and is currently not in use. + // It only supports configuration as "off". Currently, a `String` type is used to verify incoming values. + // When it will be used, it needs to be changed to a `bool` type. + #[arg(long, default_value = "off", value_parser = ["off"])] + usb: String, + #[cfg(target_arch = "aarch64")] + #[arg(long, default_value = "3", value_parser = clap::value_parser!(u8).range(3..=3))] + gic_version: u8, +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +struct SmpConfig { + #[arg(long, alias = "classtype", value_parser = clap::value_parser!(u8).range(i64::from(MIN_NR_CPUS)..=i64::from(MAX_NR_CPUS)))] + cpus: u8, + #[arg(long, default_value = "0")] + maxcpus: u8, + #[arg(long, default_value = "0", value_parser = clap::value_parser!(u8).range(..i64::from(u8::MAX)))] + sockets: u8, + #[arg(long, default_value = "1", value_parser = clap::value_parser!(u8).range(1..i64::from(u8::MAX)))] + dies: u8, + #[arg(long, default_value = "1", value_parser = clap::value_parser!(u8).range(1..i64::from(u8::MAX)))] + clusters: u8, + #[arg(long, default_value = "0", value_parser = clap::value_parser!(u8).range(..i64::from(u8::MAX)))] + cores: u8, + #[arg(long, default_value = "0", value_parser = clap::value_parser!(u8).range(..i64::from(u8::MAX)))] + threads: u8, +} + +impl SmpConfig { + fn auto_adjust_topology(&mut self) -> Result<()> { + let mut max_cpus = self.maxcpus; + let mut sockets = self.sockets; + let mut cores = self.cores; + let mut threads = self.threads; + + if max_cpus == 0 { + let mut tmp_max = sockets + .checked_mul(self.dies) + .with_context(|| "Illegal smp config")?; + tmp_max = tmp_max + .checked_mul(self.clusters) + .with_context(|| "Illegal smp config")?; + tmp_max = tmp_max + .checked_mul(cores) + .with_context(|| "Illegal smp config")?; + tmp_max = tmp_max + .checked_mul(threads) + .with_context(|| "Illegal smp config")?; + + if tmp_max > 0 { + max_cpus = tmp_max; + } else { + max_cpus = self.cpus; } } - if let Some(accel) = cmd_parser.get_value::("accel")? { - if accel.ne("kvm:tcg") && accel.ne("tcg") && accel.ne("kvm") { - bail!("Only \'kvm\', \'kvm:tcg\' and \'tcg\' are supported for \'accel\' of \'machine\'"); + if cores == 0 { + if sockets == 0 { + sockets = 1; } - } - if let Some(usb) = cmd_parser.get_value::("usb")? { - if usb.into() { - bail!("Argument \'usb\' should be set to \'off\'"); + if threads == 0 { + threads = 1; } + cores = max_cpus / (sockets * self.dies * self.clusters * threads); + } else if sockets == 0 { + if threads == 0 { + threads = 1; + } + sockets = max_cpus / (self.dies * self.clusters * cores * threads); } - if let Some(mach_type) = cmd_parser - .get_value::("") - .chain_err(|| "Unrecognized machine type")? - { - self.machine_config.mach_type = mach_type; - } - if let Some(mach_type) = cmd_parser - .get_value::("type") - .chain_err(|| "Unrecognized machine type")? - { - self.machine_config.mach_type = mach_type; + + if threads == 0 { + threads = max_cpus / (sockets * self.dies * self.clusters * cores); } - if let Some(dump_guest) = cmd_parser.get_value::("dump-guest-core")? { - self.machine_config.mem_config.dump_guest_core = dump_guest.into(); + + let min_max_cpus = std::cmp::max(self.cpus, MIN_NR_CPUS); + + if !(min_max_cpus..=MAX_NR_CPUS).contains(&max_cpus) { + return Err(anyhow!(ConfigError::IllegalValue( + "MAX CPU number".to_string(), + u64::from(min_max_cpus), + true, + u64::from(MAX_NR_CPUS), + true, + ))); } - if let Some(mem_share) = cmd_parser.get_value::("mem-share")? { - self.machine_config.mem_config.mem_share = mem_share.into(); + + if sockets * self.dies * self.clusters * cores * threads != max_cpus { + bail!("sockets * dies * clusters * cores * threads must be equal to max_cpus"); } + self.maxcpus = max_cpus; + self.sockets = sockets; + self.cores = cores; + self.threads = threads; + Ok(()) } +} - /// Add '-m' memory config to `VmConfig`. - pub fn add_memory(&mut self, mem_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("m"); - cmd_parser.push("").push("size"); +impl VmConfig { + /// Add argument `name` to `VmConfig`. + /// + /// # Arguments + /// + /// * `name` - The name `String` added to `VmConfig`. + pub fn add_machine(&mut self, mach_config: &str) -> Result<()> { + let mut has_type_label = false; + if get_value_of_parameter("type", mach_config).is_ok() { + has_type_label = true; + } + let mach_cfg = MachineCmdConfig::try_parse_from(str_slip_to_clap( + mach_config, + !has_type_label, + false, + ))?; + // TODO: The current "accel" configuration in "-machine" command line and "-accel" command line are not foolproof. + // Later parsing will overwrite first parsing. We will optimize this in the future. + self.machine_config.hypervisor = mach_cfg.accel; + self.machine_config.mach_type = mach_cfg.mach_type; + self.machine_config.mem_config.dump_guest_core = mach_cfg.dump_guest_core; + self.machine_config.mem_config.mem_share = mach_cfg.mem_share; - cmd_parser.parse(mem_config)?; + Ok(()) + } - let mem = if let Some(mem_size) = cmd_parser.get_value::("")? { - memory_unit_conversion(&mem_size)? - } else if let Some(mem_size) = cmd_parser.get_value::("size")? { - memory_unit_conversion(&mem_size)? - } else { - return Err(ErrorKind::FieldIsMissing("size", "memory").into()); - }; + /// Add '-accel' accelerator config to `VmConfig`. + pub fn add_accel(&mut self, accel_config: &str) -> Result<()> { + let accel_cfg = AccelConfig::try_parse_from(str_slip_to_clap(accel_config, true, false))?; + self.machine_config.hypervisor = accel_cfg.hypervisor; + Ok(()) + } - self.machine_config.mem_config.mem_size = mem; + /// Add '-m' memory config to `VmConfig`. + pub fn add_memory(&mut self, mem_config: &str) -> Result<()> { + // Is there a "size=" prefix tag in the command line. + let mut has_size_label = false; + if get_value_of_parameter("size", mem_config).is_ok() { + has_size_label = true; + } + let mem_cfg = + MemSizeConfig::try_parse_from(str_slip_to_clap(mem_config, !has_size_label, false))?; + self.machine_config.mem_config.mem_size = mem_cfg.size; Ok(()) } /// Add '-smp' cpu config to `VmConfig`. pub fn add_cpu(&mut self, cpu_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("smp"); - cmd_parser - .push("") - .push("sockets") - .push("cores") - .push("threads") - .push("cpus"); - - cmd_parser.parse(cpu_config)?; - - let cpu = if let Some(cpu) = cmd_parser.get_value::("")? { - cpu - } else if let Some(cpu) = cmd_parser.get_value::("cpus")? { - cpu - } else { - return Err(ErrorKind::FieldIsMissing("cpus", "smp").into()); - }; - - if let Some(sockets) = cmd_parser.get_value::("sockets")? { - if sockets.ne(&cpu) { - bail!("Invalid \'sockets\' arguments for \'smp\', it should equal to the number of cpus"); - } - } - if let Some(cores) = cmd_parser.get_value::("cores")? { - if cores.ne(&1) { - bail!("Invalid \'cores\' arguments for \'smp\', it should be \'1\'"); - } - } - if let Some(threads) = cmd_parser.get_value::("threads")? { - if threads.ne(&1) { - bail!("Invalid \'threads\' arguments for \'smp\', it should be \'1\'"); - } + let mut has_cpus_label = false; + if get_value_of_parameter("cpus", cpu_config).is_ok() { + has_cpus_label = true; } + let mut smp_cfg = + SmpConfig::try_parse_from(str_slip_to_clap(cpu_config, !has_cpus_label, false))?; + smp_cfg.auto_adjust_topology()?; - // limit cpu count - if !(MIN_NR_CPUS..=MAX_NR_CPUS).contains(&cpu) { - return Err(ErrorKind::IllegalValue( - "CPU number".to_string(), - MIN_NR_CPUS, - true, - MAX_NR_CPUS, - true, - ) - .into()); - } + self.machine_config.nr_cpus = smp_cfg.cpus; + self.machine_config.nr_threads = smp_cfg.threads; + self.machine_config.nr_cores = smp_cfg.cores; + self.machine_config.nr_dies = smp_cfg.dies; + self.machine_config.nr_clusters = smp_cfg.clusters; + self.machine_config.nr_sockets = smp_cfg.sockets; + self.machine_config.max_cpus = smp_cfg.maxcpus; + + Ok(()) + } - // it is safe, as value limited before - self.machine_config.nr_cpus = cpu as u8; + pub fn add_cpu_feature(&mut self, features: &str) -> Result<()> { + let cpu_config = CpuConfig::try_parse_from(str_slip_to_clap(features, true, false))?; + self.machine_config.cpu_config = cpu_config; Ok(()) } @@ -242,10 +462,84 @@ impl VmConfig { pub fn enable_mem_prealloc(&mut self) { self.machine_config.mem_config.mem_prealloc = true; } + + pub fn add_no_shutdown(&mut self) -> bool { + self.machine_config.shutdown_action = ShutdownAction::ShutdownActionPause; + true + } + + pub fn add_battery(&mut self) -> bool { + self.machine_config.battery = true; + true + } + + pub fn add_hw_signature(&mut self, config: &str) -> Result<()> { + self.hardware_signature = Some(u32::from_str(config)?); + Ok(()) + } +} + +impl VmConfig { + /// Convert memory zone cmdline to VM config + /// + /// # Arguments + /// + /// * `mem_zone` - The memory zone cmdline string. + pub fn add_mem_zone(&mut self, mem_zone: &str) -> Result { + let zone_config = MemZoneConfig::try_parse_from(str_slip_to_clap(mem_zone, true, false))?; + + if (zone_config.mem_path.is_none() && zone_config.mem_type.eq("memory-backend-file")) + || (zone_config.mem_path.is_some() && zone_config.mem_type.ne("memory-backend-file")) + { + bail!("Object type: {} config path err", zone_config.mem_type); + } + + if self.object.mem_object.contains_key(&zone_config.id) { + bail!("Object: {} has been added", zone_config.id); + } + self.object + .mem_object + .insert(zone_config.id.clone(), zone_config.clone()); + + if zone_config.host_numa_nodes.is_none() { + return Ok(zone_config); + } + + if self.machine_config.mem_config.mem_zones.is_some() { + self.machine_config + .mem_config + .mem_zones + .as_mut() + .unwrap() + .push(zone_config.clone()); + } else { + self.machine_config.mem_config.mem_zones = Some(vec![zone_config.clone()]); + } + + Ok(zone_config) + } } -fn memory_unit_conversion(origin_value: &str) -> Result { - if (origin_value.ends_with('M') | origin_value.ends_with('m')) +/// Convert memory units from GiB, Mib to Byte. +/// +/// # Arguments +/// +/// * `origin_value` - The origin memory value from user. +pub fn memory_unit_conversion(origin_value: &str, default_unit: u64) -> Result { + if (origin_value.ends_with('K') | origin_value.ends_with('k')) + && (origin_value.contains('K') ^ origin_value.contains('k')) + { + let value = origin_value.replacen('K', "", 1); + let value = value.replacen('k', "", 1); + get_inner( + value + .parse::() + .with_context(|| { + ConfigError::ConvertValueFailed(origin_value.to_string(), String::from("u64")) + })? + .checked_mul(K), + ) + } else if (origin_value.ends_with('M') | origin_value.ends_with('m')) && (origin_value.contains('M') ^ origin_value.contains('m')) { let value = origin_value.replacen('M', "", 1); @@ -253,8 +547,8 @@ fn memory_unit_conversion(origin_value: &str) -> Result { get_inner( value .parse::() - .map_err(|_| { - ErrorKind::ConvertValueFailed(origin_value.to_string(), String::from("u64")) + .with_context(|| { + ConfigError::ConvertValueFailed(origin_value.to_string(), String::from("u64")) })? .checked_mul(M), ) @@ -266,28 +560,52 @@ fn memory_unit_conversion(origin_value: &str) -> Result { get_inner( value .parse::() - .map_err(|_| { - ErrorKind::ConvertValueFailed(origin_value.to_string(), String::from("u64")) + .with_context(|| { + ConfigError::ConvertValueFailed(origin_value.to_string(), String::from("u64")) })? .checked_mul(G), ) } else { - let size = origin_value.parse::().map_err(|_| { - ErrorKind::ConvertValueFailed(origin_value.to_string(), String::from("u64")) + let size = origin_value.parse::().with_context(|| { + ConfigError::ConvertValueFailed(origin_value.to_string(), String::from("u64")) })?; - let memory_size = size.checked_mul(M); + let memory_size = size.checked_mul(default_unit); get_inner(memory_size) } } fn get_inner(outer: Option) -> Result { - if let Some(x) = outer { - Ok(x) - } else { - Err(ErrorKind::IntegerOverflow("-m".to_string()).into()) + outer.with_context(|| ConfigError::IntegerOverflow("-m".to_string())) +} + +fn get_host_nodes(nodes: &str) -> Result> { + let mut host_nodes = IntegerList::from_str(nodes) + .with_context(|| { + ConfigError::ConvertValueFailed(String::from("u32"), "host-nodes".to_string()) + })? + .0 + .iter() + .map(|e| *e as u32) + .collect::>(); + + if host_nodes.is_empty() { + bail!("Got empty host nodes list!"); } + + host_nodes.sort_unstable(); + if host_nodes[host_nodes.len() - 1] >= MAX_NODES { + return Err(anyhow!(ConfigError::IllegalValue( + "host_nodes".to_string(), + 0, + true, + u64::from(MAX_NODES), + false, + ))); + } + + Ok(host_nodes) } #[cfg(test)] @@ -302,11 +620,22 @@ mod tests { mem_share: false, dump_guest_core: false, mem_prealloc: false, + mem_zones: None, }; let mut machine_config = MachineConfig { mach_type: MachineType::MicroVm, - nr_cpus: MIN_NR_CPUS as u8, + hypervisor: HypervisorType::Kvm, + nr_cpus: 1, + nr_cores: 1, + nr_threads: 1, + nr_dies: 1, + nr_clusters: 1, + nr_sockets: 1, + max_cpus: MIN_NR_CPUS as u8, mem_config: memory_config, + cpu_config: CpuConfig::default(), + shutdown_action: ShutdownAction::default(), + battery: false, }; assert!(machine_config.check().is_ok()); @@ -316,11 +645,408 @@ mod tests { machine_config.nr_cpus = MIN_NR_CPUS as u8; machine_config.mem_config.mem_size = MIN_MEMSIZE - 1; - assert!(!machine_config.check().is_ok()); + assert!(machine_config.check().is_err()); machine_config.mem_config.mem_size = MAX_MEMSIZE + 1; - assert!(!machine_config.check().is_ok()); + assert!(machine_config.check().is_err()); machine_config.mem_config.mem_size = MIN_MEMSIZE; assert!(machine_config.check().is_ok()); } + + #[test] + fn test_memory_unit_conversion() { + let test_string = "6G"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_ok()); + let ret = ret.unwrap(); + assert_eq!(ret, 6 * 1024 * 1024 * 1024); + + let test_string = "6g"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_ok()); + let ret = ret.unwrap(); + assert_eq!(ret, 6 * 1024 * 1024 * 1024); + + let test_string = "6M"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_ok()); + let ret = ret.unwrap(); + assert_eq!(ret, 6 * 1024 * 1024); + + let test_string = "6m"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_ok()); + let ret = ret.unwrap(); + assert_eq!(ret, 6 * 1024 * 1024); + + // default unit is MiB + let test_string = "6"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_ok()); + let ret = ret.unwrap(); + assert_eq!(ret, 6 * 1024 * 1024); + + let test_string = "G6"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "G6G"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "6Gg"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "6gG"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "g6G"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "G6g"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "M6"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "M6M"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "6Mm"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "6mM"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "m6M"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + + let test_string = "M6m"; + let ret = memory_unit_conversion(test_string, M); + assert!(ret.is_err()); + } + + #[test] + fn test_machine_type() { + let test_string = "none"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::None); + + let test_string = "None"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::None); + + let test_string = "NONE"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::None); + + let test_string = "no"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_err()); + + let test_string = "microvm"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::MicroVm); + + let test_string = "MICROVM"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::MicroVm); + + let test_string = "machine"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_err()); + + #[cfg(target_arch = "x86_64")] + { + let test_string = "q35"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::StandardVm); + + let test_string = "Q35"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::StandardVm); + + let test_string = "virt"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_err()); + } + + #[cfg(target_arch = "aarch64")] + { + let test_string = "virt"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::StandardVm); + + let test_string = "VIRT"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_ok()); + let machine_type = machine_type.unwrap(); + assert_eq!(machine_type, MachineType::StandardVm); + + let test_string = "q35"; + let machine_type = MachineType::from_str(test_string); + assert!(machine_type.is_err()); + } + } + + #[test] + fn test_add_memory() { + let mut vm_config = VmConfig::default(); + let memory_cfg = "size=8"; + let mem_cfg_ret = vm_config.add_memory(memory_cfg); + assert!(mem_cfg_ret.is_ok()); + let mem_size = vm_config.machine_config.mem_config.mem_size; + assert_eq!(mem_size, 8 * 1024 * 1024); + + let memory_cfg = "size=8m"; + let mem_cfg_ret = vm_config.add_memory(memory_cfg); + assert!(mem_cfg_ret.is_ok()); + let mem_size = vm_config.machine_config.mem_config.mem_size; + assert_eq!(mem_size, 8 * 1024 * 1024); + + let memory_cfg = "size=8G"; + let mem_cfg_ret = vm_config.add_memory(memory_cfg); + assert!(mem_cfg_ret.is_ok()); + let mem_size = vm_config.machine_config.mem_config.mem_size; + assert_eq!(mem_size, 8 * 1024 * 1024 * 1024); + } + + #[test] + fn test_add_machine() { + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,dump-guest-core=on,mem-share=on,accel=kvm,usb=off"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_ok()); + let machine_cfg = vm_config.machine_config; + assert_eq!(machine_cfg.mach_type, MachineType::None); + assert!(machine_cfg.mem_config.dump_guest_core); + assert!(machine_cfg.mem_config.mem_share); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "none,dump-guest-core=off,mem-share=off,accel=kvm,usb=off"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_ok()); + let machine_cfg = vm_config.machine_config; + assert_eq!(machine_cfg.mach_type, MachineType::None); + assert_eq!(machine_cfg.hypervisor, HypervisorType::Kvm); + assert!(!machine_cfg.mem_config.dump_guest_core); + assert!(!machine_cfg.mem_config.mem_share); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,accel=kvm-tcg"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_err()); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,usb=on"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_err()); + + #[cfg(target_arch = "aarch64")] + { + let mut vm_config = VmConfig::default(); + let memory_cfg_str = + "type=none,dump-guest-core=off,mem-share=off,accel=kvm,usb=off,gic-version=3"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_ok()); + let machine_cfg = vm_config.machine_config; + assert_eq!(machine_cfg.mach_type, MachineType::None); + assert_eq!(machine_cfg.mem_config.dump_guest_core, false); + assert_eq!(machine_cfg.mem_config.mem_share, false); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,gic-version=-1"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_err()); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,gic-version=256"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_err()); + + let mut vm_config = VmConfig::default(); + let memory_cfg_str = "type=none,gic-version=4"; + let machine_cfg_ret = vm_config.add_machine(memory_cfg_str); + assert!(machine_cfg_ret.is_err()); + } + } + + #[test] + fn test_add_mem_path() { + let mut vm_config = VmConfig::default(); + let memory_path_str = "/path/to/memory-backend"; + let mem_path = vm_config.machine_config.mem_config.mem_path.clone(); + // default value is none. + assert!(mem_path.is_none()); + let mem_cfg_ret = vm_config.add_mem_path(memory_path_str); + assert!(mem_cfg_ret.is_ok()); + let mem_path = vm_config.machine_config.mem_config.mem_path; + assert!(mem_path.is_some()); + let mem_path = mem_path.unwrap(); + assert_eq!(mem_path, memory_path_str); + } + + #[test] + fn test_enable_memory_prealloc() { + let mut vm_config = VmConfig::default(); + let mem_prealloc = vm_config.machine_config.mem_config.mem_prealloc; + // default value is false. + assert!(!mem_prealloc); + vm_config.enable_mem_prealloc(); + let mem_prealloc = vm_config.machine_config.mem_config.mem_prealloc; + assert!(mem_prealloc); + } + + #[test] + fn test_add_cpu() { + let mut vm_config = VmConfig::default(); + let cpu_cfg_str = "cpus=8,sockets=8,cores=1,threads=1"; + let cpu_cfg_ret = vm_config.add_cpu(cpu_cfg_str); + assert!(cpu_cfg_ret.is_ok()); + let nr_cpu = vm_config.machine_config.nr_cpus; + assert_eq!(nr_cpu, 8); + + let mut vm_config = VmConfig::default(); + let cpu_cfg_str = "cpus=9,sockets=8,cores=1,threads=1"; + let cpu_cfg_ret = vm_config.add_cpu(cpu_cfg_str); + assert!(cpu_cfg_ret.is_err()); + + let mut vm_config = VmConfig::default(); + let cpu_cfg_str = "cpus=0,sockets=0,cores=1,threads=1"; + let cpu_cfg_ret = vm_config.add_cpu(cpu_cfg_str); + assert!(cpu_cfg_ret.is_err()); + + let mut vm_config = VmConfig::default(); + let cpu_cfg_str = "cpus=254,sockets=254,cores=1,threads=1"; + let cpu_cfg_ret = vm_config.add_cpu(cpu_cfg_str); + assert!(cpu_cfg_ret.is_ok()); + let nr_cpu = vm_config.machine_config.nr_cpus; + assert_eq!(nr_cpu, 254); + + let mut vm_config = VmConfig::default(); + let cpu_cfg_str = "cpus=255,sockets=255,cores=1,threads=1"; + let cpu_cfg_ret = vm_config.add_cpu(cpu_cfg_str); + assert!(cpu_cfg_ret.is_err()); + } + + #[test] + fn test_add_mem_zone() { + let mut vm_config = VmConfig::default(); + let zone_config_1 = vm_config + .add_mem_zone("memory-backend-ram,size=2G,id=mem1,host-nodes=1,policy=bind") + .unwrap(); + assert_eq!(zone_config_1.id, "mem1"); + assert_eq!(zone_config_1.size, 2147483648); + assert_eq!(zone_config_1.host_numa_nodes, Some(vec![1])); + assert_eq!(zone_config_1.policy, "bind"); + + let zone_config_2 = vm_config + .add_mem_zone("memory-backend-ram,size=2G,id=mem2,host-nodes=1-2,policy=default") + .unwrap(); + assert_eq!(zone_config_2.host_numa_nodes, Some(vec![1, 2])); + + let zone_config_3 = vm_config + .add_mem_zone("memory-backend-ram,size=2M,id=mem3,share=on") + .unwrap(); + assert_eq!(zone_config_3.size, 2 * 1024 * 1024); + assert!(zone_config_3.share); + + let zone_config_4 = vm_config + .add_mem_zone("memory-backend-ram,size=2M,id=mem4") + .unwrap(); + assert!(!zone_config_4.share); + assert!(!zone_config_4.memfd()); + + let zone_config_5 = vm_config + .add_mem_zone("memory-backend-memfd,size=2M,id=mem5") + .unwrap(); + assert!(zone_config_5.memfd()); + } + + #[test] + fn test_host_mem_policy() { + let policy = HostMemPolicy::from(String::from("default")); + assert!(policy == HostMemPolicy::Default); + + let policy = HostMemPolicy::from(String::from("interleave")); + assert!(policy == HostMemPolicy::Interleave); + + let policy = HostMemPolicy::from(String::from("error")); + assert!(policy == HostMemPolicy::NotSupported); + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn test_cpu_features() { + // Test PMU flags + let mut vm_config = VmConfig::default(); + vm_config.add_cpu_feature("host").unwrap(); + assert!(vm_config.machine_config.cpu_config.pmu == PmuConfig::Off); + vm_config.add_cpu_feature("host,pmu=off").unwrap(); + assert!(vm_config.machine_config.cpu_config.pmu == PmuConfig::Off); + vm_config.add_cpu_feature("host,pmu=on").unwrap(); + assert!(vm_config.machine_config.cpu_config.pmu == PmuConfig::On); + vm_config.add_cpu_feature("host,sve=on").unwrap(); + assert!(vm_config.machine_config.cpu_config.sve == SveConfig::On); + vm_config.add_cpu_feature("host,sve=off").unwrap(); + assert!(vm_config.machine_config.cpu_config.sve == SveConfig::Off); + + // Illegal cpu command lines: should set cpu family. + let result = vm_config.add_cpu_feature("pmu=off"); + assert!(result.is_err()); + let result = vm_config.add_cpu_feature("sve=on"); + assert!(result.is_err()); + + // Illegal parameters. + let result = vm_config.add_cpu_feature("host,sve1=on"); + assert!(result.is_err()); + + // Illegal values. + let result = vm_config.add_cpu_feature("host,sve=false"); + assert!(result.is_err()); + } + + #[test] + fn test_add_accel() { + let mut vm_config = VmConfig::default(); + let accel_cfg = "kvm"; + assert!(vm_config.add_accel(accel_cfg).is_ok()); + let machine_cfg = vm_config.machine_config; + assert_eq!(machine_cfg.hypervisor, HypervisorType::Kvm); + + let mut vm_config = VmConfig::default(); + let accel_cfg = "kvm:tcg"; + assert!(vm_config.add_accel(accel_cfg).is_ok()); + let machine_cfg = vm_config.machine_config; + assert_eq!(machine_cfg.hypervisor, HypervisorType::Kvm); + + let mut vm_config = VmConfig::default(); + let accel_cfg = "kvm1"; + assert!(vm_config.add_accel(accel_cfg).is_err()); + } } diff --git a/machine_manager/src/config/mod.rs b/machine_manager/src/config/mod.rs index 4c4e2bcd3071704965ea053562b7c1949771ca43..6f793d4e677a200d6b2cc60a0e1dfce5e200914a 100644 --- a/machine_manager/src/config/mod.rs +++ b/machine_manager/src/config/mod.rs @@ -10,138 +10,114 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -pub mod errors { - error_chain! { - links { - Util(util::errors::Error, util::errors::ErrorKind); - } - foreign_links { - JsonSerde(serde_json::Error); - } - errors { - InvalidJsonField(field: String) { - display("Invalid json field \'{}\'", field) - } - InvalidParam(param: String, name: String) { - display("Invalid parameter \'{}\' for \'{}\'", param, name) - } - ConvertValueFailed(param: String, value: String) { - display("Unable to parse \'{}\' for \'{}\'", value, param) - } - StringLengthTooLong(t: String, len: usize) { - display("Input {} string's length must be no more than {}.", t, len) - } - FieldRepeat(param: String, field: String) { - display("Input field \'{}\' in {} is offered more than once.", field, param) - } - IdRepeat(param: String, id: String) { - display("Input id \'{}\' for {} repeat.", id, param) - } - IntegerOverflow(item: String) { - display("Integer overflow occurred during parse {}!", item) - } - UnknownDeviceType(item: String) { - display("Unknown device type: {}!", item) - } - FieldIsMissing(field: &'static str, device: &'static str) { - display("\'{}\' is missing for \'{}\' device.", field, device) - } - IllegalValue(name: String, min: u64, min_include: bool, max: u64, max_include: bool) { - display( - "{} must >{} {} and <{} {}.", - name, - if *min_include {"="} else {""}, - min, - if *max_include {"="} else {""}, - max - ) - } - MacFormatError { - display("Mac address is illegal.") - } - UnknownVhostType { - display("Unknown vhost type.") - } - UnRegularFile(t: String) { - display("{} is not a regular File.", t) - } - Unaligned(param: String, value: u64, align: u64) { - display("Input value {} is unaligned with {} for {}.", value, align, param) - } - UnitIdError(id: usize, max: usize){ - description("Check unit id of pflash device.") - display("PFlash unit id given {} should not be more than {}", id, max) - } - } - } -} +#[cfg(feature = "usb_camera")] +pub mod camera; +#[cfg(any(feature = "gtk", feature = "ohui_srv"))] +pub mod display; +pub mod error; +#[cfg(feature = "vnc")] +pub mod vnc; -pub use self::errors::{ErrorKind, Result, ResultExt}; -pub use balloon::*; -pub use boot_source::*; -pub use chardev::*; -pub use devices::*; -pub use drive::*; -pub use iothread::*; -pub use machine_config::*; -pub use network::*; -pub use pci::*; -pub use rng::*; -pub use vfio::*; - -mod balloon; mod boot_source; mod chardev; mod devices; mod drive; +mod incoming; mod iothread; mod machine_config; mod network; +mod numa; mod pci; mod rng; -mod vfio; +#[cfg(feature = "vnc_auth")] +mod sasl_auth; +mod smbios; +#[cfg(feature = "vnc_auth")] +mod tls_creds; + +pub use boot_source::*; +#[cfg(feature = "usb_camera")] +pub use camera::*; +pub use chardev::*; +#[cfg(any(feature = "gtk", feature = "ohui_srv"))] +pub use display::*; +pub use drive::*; +pub use error::ConfigError; +pub use incoming::*; +pub use iothread::*; +pub use machine_config::*; +pub use network::*; +pub use numa::*; +pub use pci::*; +pub use rng::*; +#[cfg(feature = "vnc_auth")] +pub use sasl_auth::*; +pub use smbios::*; +#[cfg(feature = "vnc_auth")] +pub use tls_creds::*; +#[cfg(feature = "vnc")] +pub use vnc::*; -use std::any::Any; use std::collections::HashMap; +use std::fs::{canonicalize, File}; +use std::io::Read; +use std::os::unix::io::AsRawFd; +use std::path::Path; use std::str::FromStr; +use std::sync::Arc; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use log::{error, info}; +use serde::{Deserialize, Serialize}; +use trace::{enable_state_by_type, set_state_by_pattern, TraceType}; #[cfg(target_arch = "aarch64")] use util::device_tree::{self, FdtBuilder}; -use util::trace::enable_trace_events; +use util::{ + file::{get_file_alignment, open_file}, + num_ops::str_to_num, + test_helper::is_test_enabled, + AsAny, +}; pub const MAX_STRING_LENGTH: usize = 255; pub const MAX_PATH_LENGTH: usize = 4096; -pub const MAX_VIRTIO_QUEUE: usize = 1024; +// Maximum length of the socket path is restricted by linux. +pub const MAX_SOCK_PATH_LENGTH: usize = 108; +// FIXME: `queue_config` len in `VirtioPciState` struct needs to be modified together. +pub const MAX_VIRTIO_QUEUE: usize = 32; pub const FAST_UNPLUG_ON: &str = "1"; pub const FAST_UNPLUG_OFF: &str = "0"; - -#[derive(Debug, Clone)] -pub enum ObjConfig { - Rng(RngObjConfig), +pub const MAX_NODES: u32 = 128; +/// Default virtqueue size for virtio devices excepts virtio-fs. +pub const DEFAULT_VIRTQUEUE_SIZE: u16 = 256; +// Seg_max = queue_size - 2. So, size of each virtqueue for virtio-scsi/virtio-blk should be larger than 2. +pub const MIN_QUEUE_SIZE_BLOCK_DEVICE: u64 = 2; +// Max size of each virtqueue for virtio-scsi/virtio-blk. +pub const MAX_QUEUE_SIZE_BLOCK_DEVICE: u64 = 1024; +/// The bar0 size of enable_bar0 features +pub const VIRTIO_GPU_ENABLE_BAR0_SIZE: u64 = 64 * M; + +#[derive(Parser)] +#[command(no_binary_name(true))] +struct GlobalConfig { + #[arg(long, alias = "pcie-root-port.fast-unplug", value_parser = ["0", "1"])] + fast_unplug: Option, } -fn parse_rng_obj(object_args: &str) -> Result { - let mut cmd_params = CmdParser::new("rng-object"); - cmd_params.push("").push("id").push("filename"); - - cmd_params.parse(object_args)?; - let id = if let Some(obj_id) = cmd_params.get_value::("id")? { - obj_id - } else { - return Err(ErrorKind::FieldIsMissing("id", "rng-object").into()); - }; - let filename = if let Some(name) = cmd_params.get_value::("filename")? { - name - } else { - return Err(ErrorKind::FieldIsMissing("filename", "rng-object").into()); - }; - let rng_obj_cfg = RngObjConfig { id, filename }; - - Ok(rng_obj_cfg) +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +pub struct ObjectConfig { + pub rng_object: HashMap, + pub mem_object: HashMap, + #[cfg(feature = "vnc_auth")] + pub tls_object: HashMap, + #[cfg(feature = "vnc_auth")] + pub sasl_object: HashMap, } /// This main config structure for Vm, contains Vm's basic configuration and devices. -#[derive(Clone, Default, Debug)] +#[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct VmConfig { pub guest_name: String, pub machine_config: MachineConfig, @@ -153,10 +129,22 @@ pub struct VmConfig { pub devices: Vec<(String, String)>, pub serial: Option, pub iothreads: Option>, - pub object: HashMap, - pub pflashs: Option>, + pub object: ObjectConfig, + pub pflashs: Option>, pub dev_name: HashMap, pub global_config: HashMap, + pub numa_nodes: Vec<(String, String)>, + pub incoming: Option, + pub hardware_signature: Option, + #[cfg(feature = "vnc")] + pub vnc: Option, + #[cfg(any(feature = "gtk", all(target_env = "ohos", feature = "ohui_srv")))] + pub display: Option, + #[cfg(feature = "usb_camera")] + pub camera_backend: HashMap, + #[cfg(feature = "windows_emu_pid")] + pub emulator_pid: Option, + pub smbios: SmbiosConfig, } impl VmConfig { @@ -165,31 +153,30 @@ impl VmConfig { self.boot_source.check()?; self.machine_config.check()?; - if self.guest_name.len() > MAX_STRING_LENGTH { - return Err(self::errors::ErrorKind::StringLengthTooLong( - "name".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } + check_arg_too_long(&self.guest_name, "name")?; + if self.boot_source.kernel_file.is_none() && self.machine_config.mach_type == MachineType::MicroVm { bail!("kernel file is required for microvm machine type, which is not provided"); } - if self.boot_source.initrd.is_none() && self.drives.is_empty() { - bail!("Before Vm start, set a initrd or drive_file as rootfs"); + if self.boot_source.initrd.is_none() + && self.drives.is_empty() + && self.chardev.is_empty() + && !is_test_enabled() + { + bail!("Before Vm start, set a initrd or drive_file or vhost-user blk as rootfs"); } let mut stdio_count = 0; if let Some(serial) = self.serial.as_ref() { - if serial.chardev.backend == ChardevType::Stdio { + if let ChardevType::Stdio { .. } = serial.chardev.classtype { stdio_count += 1; } } for (_, char_dev) in self.chardev.clone() { - if char_dev.backend == ChardevType::Stdio { + if let ChardevType::Stdio { .. } = char_dev.classtype { stdio_count += 1; } } @@ -219,32 +206,35 @@ impl VmConfig { /// /// * `object_args` - The args of object. pub fn add_object(&mut self, object_args: &str) -> Result<()> { - let mut cmd_params = CmdParser::new("object"); - cmd_params.push(""); - - cmd_params.get_parameters(object_args)?; - let obj_type = cmd_params.get_value::("")?; - if obj_type.is_none() { - bail!("Object type not specified"); - } - let device_type = obj_type.unwrap(); - match device_type.as_str() { + let object_type = + get_class_type(object_args).with_context(|| "Object type not specified")?; + match object_type.as_str() { "iothread" => { self.add_iothread(object_args) - .chain_err(|| "Failed to add iothread")?; + .with_context(|| "Failed to add iothread")?; } "rng-random" => { - let rng_cfg = parse_rng_obj(object_args)?; + let rng_cfg = + RngObjConfig::try_parse_from(str_slip_to_clap(object_args, true, false))?; let id = rng_cfg.id.clone(); - let object_config = ObjConfig::Rng(rng_cfg); - if self.object.get(&id).is_none() { - self.object.insert(id, object_config); - } else { - bail!("Object: {:?} has been added"); + if self.object.rng_object.contains_key(&id) { + bail!("Object: {} has been added", id); } + self.object.rng_object.insert(id, rng_cfg); + } + "memory-backend-ram" | "memory-backend-file" | "memory-backend-memfd" => { + self.add_mem_zone(object_args)?; + } + #[cfg(feature = "vnc_auth")] + "tls-creds-x509" => { + self.add_tlscred(object_args)?; + } + #[cfg(feature = "vnc_auth")] + "authz-simple" => { + self.add_saslauth(object_args)?; } _ => { - bail!("Unknow object type: {:?}", &device_type); + bail!("Unknow object type: {:?}", &object_type); } } @@ -257,48 +247,164 @@ impl VmConfig { /// /// * `global_config` - The args of global config. pub fn add_global_config(&mut self, global_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("global"); - cmd_parser.push("pcie-root-port.fast-unplug"); - cmd_parser.parse(global_config)?; + let global_config = + GlobalConfig::try_parse_from(str_slip_to_clap(global_config, false, false))?; - if let Some(fast_unplug_value) = - cmd_parser.get_value::("pcie-root-port.fast-unplug")? - { - if fast_unplug_value != FAST_UNPLUG_ON && fast_unplug_value != FAST_UNPLUG_OFF { - bail!("The value of fast-unplug is invalid: {}", fast_unplug_value); - } + if let Some(fast_unplug_value) = global_config.fast_unplug { let fast_unplug_key = String::from("pcie-root-port.fast-unplug"); - if self.global_config.get(&fast_unplug_key).is_none() { - self.global_config - .insert(fast_unplug_key, fast_unplug_value); - } else { + if self.global_config.contains_key(&fast_unplug_key) { bail!("Global config {} has been added", fast_unplug_key); } + self.global_config + .insert(fast_unplug_key, fast_unplug_value); } + Ok(()) } -} -#[cfg(target_arch = "aarch64")] -impl device_tree::CompileFDT for VmConfig { - fn generate_fdt_node(&self, _fdt: &mut FdtBuilder) -> util::errors::Result<()> { + /// Add argument `windows_emu_pid` to `VmConfig`. + /// + /// # Arguments + /// + /// * `windows_emu_pid` - The args of windows_emu_pid. + #[cfg(feature = "windows_emu_pid")] + pub fn add_windows_emu_pid(&mut self, windows_emu_pid: &str) -> Result<()> { + if windows_emu_pid.is_empty() { + bail!("The arg of emulator_pid is empty!"); + } + self.emulator_pid = Some(windows_emu_pid.to_string()); Ok(()) } -} -/// This trait is to cast trait object to struct. -pub trait AsAny { - fn as_any(&self) -> &dyn Any; + /// Add a file to drive file store. + pub fn add_drive_file( + drive_files: &mut HashMap, + id: &str, + path: &str, + read_only: bool, + direct: bool, + ) -> Result<()> { + if let Some(drive_file) = drive_files.get_mut(path) { + if drive_file.read_only && read_only { + // File can be shared with read_only. + drive_file.count += 1; + return Ok(()); + } else { + return Err(anyhow!( + "Failed to add drive {}, file can only be shared with read_only. \ + Is it used more than once or another process using the same file?", + path + )); + } + } + let file = open_file(path, read_only, direct)?; + let (req_align, buf_align) = get_file_alignment(&file, direct); + if req_align == 0 || buf_align == 0 { + bail!( + "Failed to detect alignment requirement of drive file {}.", + path + ); + } + let drive_file = DriveFile { + id: id.to_string(), + file: Arc::new(file), + count: 1, + read_only, + path: path.to_string(), + locked: false, + req_align, + buf_align, + }; + info!("Open file {}, fd: {}", path, drive_file.file.as_raw_fd()); + drive_files.insert(path.to_string(), drive_file); + Ok(()) + } + + /// Remove a file from drive file store. + pub fn remove_drive_file( + drive_files: &mut HashMap, + path: &str, + ) -> Result<()> { + if let Some(drive_file) = drive_files.get_mut(path) { + drive_file.count -= 1; + if drive_file.count == 0 { + drive_files.remove(path); + } + } else { + return Err(anyhow!( + "Failed to remove drive {}, it does not exist", + path + )); + } + Ok(()) + } + + /// Get a file from drive file store. + pub fn fetch_drive_file( + drive_files: &HashMap, + path: &str, + ) -> Result> { + match drive_files.get(path) { + Some(drive_file) => Ok(drive_file.file.clone()), + None => Err(anyhow!("The file {} is not in drive backend", path)), + } + } + + /// Get drive id from drive file store. + pub fn get_drive_id(drive_files: &HashMap, path: &str) -> Result { + match drive_files.get(path) { + Some(drive_file) => Ok(drive_file.id.clone()), + None => Err(anyhow!("The file {} is not in drive backend", path)), + } + } + + /// Get alignment requirement from drive file store. + pub fn fetch_drive_align( + drive_files: &HashMap, + path: &str, + ) -> Result<(u32, u32)> { + match drive_files.get(path) { + Some(drive_file) => Ok((drive_file.req_align, drive_file.buf_align)), + None => Err(anyhow!("The file {} is not in drive backend", path)), + } + } + + /// Create initial drive file store from cmdline drive. + pub fn init_drive_files(&self) -> Result> { + let mut drive_files: HashMap = HashMap::new(); + for drive in self.drives.values() { + Self::add_drive_file( + &mut drive_files, + &drive.id, + &drive.path_on_host, + drive.readonly, + drive.direct, + )?; + } + if let Some(pflashs) = self.pflashs.as_ref() { + for pflash in pflashs { + Self::add_drive_file( + &mut drive_files, + "", + &pflash.path_on_host, + pflash.readonly, + false, + )?; + } + } + Ok(drive_files) + } } -impl AsAny for T { - fn as_any(&self) -> &dyn Any { - self +#[cfg(target_arch = "aarch64")] +impl device_tree::CompileFDT for VmConfig { + fn generate_fdt_node(&self, _fdt: &mut FdtBuilder) -> Result<()> { + Ok(()) } } /// This trait is to check the legality of Config structure. -pub trait ConfigCheck: AsAny + Send + Sync { +pub trait ConfigCheck: AsAny + Send + Sync + std::fmt::Debug { /// To check the legality of Config structure. /// /// # Errors @@ -312,182 +418,355 @@ pub trait ConfigCheck: AsAny + Send + Sync { fn check(&self) -> Result<()>; } -/// Struct `CmdParser` used to parse and check cmdline parameters to vm config. -pub struct CmdParser { - name: String, - params: HashMap>, +/// This struct is a wrapper for `bool`. +/// More switch string can be transferred to this structure. +pub struct ExBool { + inner: bool, } -impl CmdParser { - /// Allocates an empty `CmdParser`. - pub fn new(name: &str) -> Self { - CmdParser { - name: name.to_string(), - params: HashMap::>::new(), +impl FromStr for ExBool { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "true" | "on" | "yes" | "unmap" => Ok(ExBool { inner: true }), + "false" | "off" | "no" | "ignore" => Ok(ExBool { inner: false }), + _ => Err(anyhow!("Unknown Exbool value {}", s)), } } +} - /// Push a new param field into `params`. - /// - /// # Arguments - /// - /// * `param_field`: The cmdline parameter field name. - pub fn push(&mut self, param_field: &str) -> &mut Self { - self.params.insert(param_field.to_string(), None); +impl From for bool { + fn from(item: ExBool) -> Self { + item.inner + } +} - self +pub fn parse_bool(s: &str) -> Result { + match s { + "true" | "on" | "yes" | "unmap" => Ok(true), + "false" | "off" | "no" | "ignore" => Ok(false), + _ => Err(anyhow!("Unknow bool value {s}")), } +} - /// Parse cmdline parameters string into `params`. - /// - /// # Arguments - /// - /// * `cmd_param`: The whole cmdline parameter string. - pub fn parse(&mut self, cmd_param: &str) -> Result<()> { - if cmd_param.starts_with(',') || cmd_param.ends_with(',') { - return Err(ErrorKind::InvalidParam(cmd_param.to_string(), self.name.clone()).into()); +fn enable_trace_state_from_file(path: &str) -> Result<()> { + let mut file = File::open(path).with_context(|| format!("Failed to open {}", path))?; + let mut buf = String::new(); + file.read_to_string(&mut buf) + .with_context(|| format!("Failed to read {}", path))?; + + let state: Vec<&str> = buf.split('\n').filter(|&s| !s.is_empty()).collect(); + for s in state { + set_state_by_pattern(s.trim().to_string(), true).with_context(|| { + format!( + "Unable to set the state of {} according to {}", + s.trim(), + path + ) + })?; + } + Ok(()) +} + +fn enable_trace_state_from_type(type_str: &str) -> Result<()> { + match type_str { + "events" => enable_state_by_type(TraceType::Event)?, + "scopes" => enable_state_by_type(TraceType::Scope)?, + "all" => { + enable_state_by_type(TraceType::Event)?; + enable_state_by_type(TraceType::Scope)?; } - let param_items = cmd_param.split(',').collect::>(); - for (i, param_item) in param_items.iter().enumerate() { - if param_item.starts_with('=') || param_item.ends_with('=') { - return Err( - ErrorKind::InvalidParam(param_item.to_string(), self.name.clone()).into(), - ); + _ => bail!("Unknown trace type {}", type_str), + }; + + Ok(()) +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +struct TraceConfig { + #[arg(long)] + file: Option, + #[arg(long, alias = "type")] + type_str: Option, +} + +pub fn add_trace(opt: &str) -> Result<()> { + let trace_cfg = TraceConfig::try_parse_from(str_slip_to_clap(opt, false, false))?; + if trace_cfg.type_str.is_none() && trace_cfg.file.is_none() { + bail!("No type or file after -trace"); + } + + if let Some(type_str) = trace_cfg.type_str { + enable_trace_state_from_type(&type_str)?; + } + if let Some(file) = trace_cfg.file { + enable_trace_state_from_file(&file)?; + } + Ok(()) +} + +/// This struct is a wrapper for `usize`. +/// Hexadecimal string can be converted to integers by this structure method. +pub struct UnsignedInteger(pub usize); + +impl FromStr for UnsignedInteger { + type Err = (); + + fn from_str(s: &str) -> std::result::Result { + let value = + str_to_num::(s).map_err(|e| error!("Invalid value {}, error is {:?}", s, e))?; + Ok(UnsignedInteger(value)) + } +} + +pub struct IntegerList(pub Vec); + +impl FromStr for IntegerList { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + let mut integer_list = Vec::new(); + let lists: Vec<&str> = s + .trim() + .trim_matches(|c| c == '[' || c == ']') + .split(':') + .collect(); + for list in lists.iter() { + let items: Vec<&str> = list.split('-').collect(); + if items.len() > 2 { + return Err(anyhow!( + "{} parameters connected by -, should be no more than 2.", + items.len() + )); } - let param = param_item.splitn(2, '=').collect::>(); - let (param_key, param_value) = match param.len() { - 1 => { - if i == 0 { - ("", param[0]) - } else { - (param[0], "") - } - } - 2 => (param[0], param[1]), - _ => { - return Err( - ErrorKind::InvalidParam(param_item.to_string(), self.name.clone()).into(), - ); + + let start = items[0] + .parse::() + .map_err(|e| anyhow!("Invalid value {}, error is {:?}", items[0], e))?; + integer_list.push(start); + if items.len() == 2 { + let end = items[1] + .parse::() + .map_err(|e| anyhow!("Invalid value {}, error is {:?}", items[1], e))?; + if start >= end { + return Err(anyhow!("start {} is bigger than end {}.", start, end)); } - }; - if self.params.contains_key(param_key) { - let field_value = self.params.get_mut(param_key).unwrap(); - if field_value.is_none() { - *field_value = Some(String::from(param_value)); - } else { - return Err( - ErrorKind::FieldRepeat(self.name.clone(), param_key.to_string()).into(), - ); + for i in start..end { + integer_list.push(i + 1); } - } else { - return Err( - ErrorKind::InvalidParam(param[0].to_string(), self.name.clone()).into(), - ); } } - Ok(()) + Ok(IntegerList(integer_list)) } +} - /// Parse all cmdline parameters string into `params`. - /// - /// # Arguments - /// - /// * `cmd_param`: The whole cmdline parameter string. - fn get_parameters(&mut self, cmd_param: &str) -> Result<()> { - if cmd_param.starts_with(',') || cmd_param.ends_with(',') { - return Err(ErrorKind::InvalidParam(cmd_param.to_string(), self.name.clone()).into()); - } - let param_items = cmd_param.split(',').collect::>(); - for param_item in param_items { - let param = param_item.splitn(2, '=').collect::>(); - let (param_key, param_value) = match param.len() { - 1 => ("", param[0]), - 2 => (param[0], param[1]), - _ => { - return Err( - ErrorKind::InvalidParam(param_item.to_string(), self.name.clone()).into(), - ); - } - }; +pub fn check_arg_too_long(arg: &str, name: &str) -> Result<()> { + if arg.len() > MAX_STRING_LENGTH { + bail!(ConfigError::StringLengthTooLong( + name.to_string(), + MAX_STRING_LENGTH + )); + } + Ok(()) +} - if self.params.contains_key(param_key) { - let field_value = self.params.get_mut(param_key).unwrap(); - if field_value.is_none() { - *field_value = Some(String::from(param_value)); - } else { - return Err( - ErrorKind::FieldRepeat(self.name.clone(), param_key.to_string()).into(), - ); - } - } - } +pub fn check_path_too_long(arg: &str, name: &str) -> Result<()> { + if arg.len() > MAX_PATH_LENGTH { + bail!(ConfigError::StringLengthTooLong( + name.to_string(), + MAX_PATH_LENGTH + )); + } + Ok(()) +} - Ok(()) +/// Make sure args are existed. +/// +/// arg_name: Name of arg. +/// arg_value: Value of arg. Should be Option<> class. +/// Eg: +/// check_arg_exist!(("id", id)); +/// check_arg_exist!(("bus", bus), ("addr", addr)); +#[macro_export] +macro_rules! check_arg_exist{ + ($(($arg_name:tt, $arg_value:expr)),*) => { + $($arg_value.clone().with_context(|| format!("Should set {}.", $arg_name))?;)* } +} - /// Get cmdline parameters value from param field name. - /// - /// # Arguments - /// - /// * `param_field`: The cmdline parameter field name. - pub fn get_value(&self, param_field: &str) -> Result> { - match self.params.get(param_field) { - Some(value) => { - let field_msg = if param_field.is_empty() { - &self.name - } else { - param_field - }; +/// Make sure args are existed. +/// +/// arg_name: Name of arg. +/// arg_value: Value of arg. Should be Option<> class. +/// Eg: +/// check_arg_nonexist!(("id", id)); +/// check_arg_nonexist!(("bus", bus), ("addr", addr)); +#[macro_export] +macro_rules! check_arg_nonexist{ + ($(($arg_name:tt, $arg_value:expr)),*) => { + $($arg_value.clone().map_or(Some(0), |_| None).with_context(|| format!("Should not set {}", $arg_name))?;)* + } +} - if let Some(raw_value) = value { - Ok(Some(raw_value.parse().map_err(|_| { - ErrorKind::ConvertValueFailed(field_msg.to_string(), raw_value.clone()) - })?)) +fn concat_classtype(args: &str, concat: bool) -> String { + if concat { + format!("classtype={}", args) + } else { + args.to_string() + } +} + +/// Configure StratoVirt parameters in clap format. +/// +/// The first parameter will be parsed as the `binary name` unless Command::no_binary_name is used when using `clap`. +/// Stratovirt command line may use the first parameter as class type. +/// Eg: +/// 1. drive config: "-drive file=,if=pflash,unit=0" +/// This cmdline has no class type. +/// 2. device config: "-device virtio-balloon-pci,id=,bus=,addr=<0x4>" +/// This cmdline sets device type `virtio-balloon-pci` as the first parameter. +/// +/// Use first_pos_is_type to indicate whether the first parameter is a type class which needs a separate analysis. +/// Eg: +/// 1. drive config: "-drive file=,if=pflash,unit=0" +/// Set first_pos_is_type false for this cmdline has no class type. +/// 2. device config: "-device virtio-balloon-pci,id=,bus=,addr=<0x4>" +/// Set first_pos_is_type true for this cmdline has device type "virtio-balloon-pci" as the first parameter. +/// +/// Use first_pos_is_subcommand to indicate whether the first parameter is a subclass. +/// Eg: +/// Chardev has stdio/unix-socket/tcp-socket/pty/file classes. These classes have different configurations but will be stored +/// in the same `ChardevConfig` structure by using `enum`. So, we will use class type as a subcommand to indicate which subtype +/// will be used to store the configuration in enumeration type. Subcommand in `clap` doesn't need `--` in parameter. +/// 1. -serial file,path= +/// Set first_pos_is_subcommand true for first parameter `file` is the subclass type for chardev. +pub fn str_slip_to_clap( + args: &str, + first_pos_is_type: bool, + first_pos_is_subcommand: bool, +) -> Vec { + let mut subcommand = first_pos_is_subcommand; + let args_str = concat_classtype(args, first_pos_is_type && !subcommand); + let args_vecs = args_str.split([',']).collect::>(); + let mut itr: Vec = Vec::with_capacity(args_vecs.len() * 2); + for params in args_vecs { + let key_value = params.split(['=']).collect::>(); + // Command line like "key=value" will be converted to "--key value". + // Command line like "key" will be converted to "--key". + for (cnt, param) in key_value.iter().enumerate() { + if cnt % 2 == 0 { + if subcommand { + itr.push(param.to_string()); + subcommand = false; } else { - Ok(None) + itr.push(format!("--{}", param)); } + } else { + itr.push(param.to_string()); } - None => Ok(None), } } + itr } -/// This struct is a wrapper for `bool`. -/// More switch string can be transferred to this structure. -pub struct ExBool { - inner: bool, +/// Retrieve the value of the specified parameter from a string in the format "key=value". +pub fn get_value_of_parameter(parameter: &str, args_str: &str) -> Result { + let args_vecs = args_str.split([',']).collect::>(); + + for args in args_vecs { + let key_value = args.split(['=']).collect::>(); + if key_value.len() != 2 || key_value[0] != parameter { + continue; + } + if key_value[1].is_empty() { + bail!("Find empty arg {} in string {}.", key_value[0], args_str); + } + return Ok(key_value[1].to_string()); + } + + bail!("Cannot find {}'s value from string {}", parameter, args_str); } -impl FromStr for ExBool { - type Err = (); +pub fn get_class_type(args: &str) -> Result { + let args_str = concat_classtype(args, true); + get_value_of_parameter("classtype", &args_str) +} - fn from_str(s: &str) -> std::result::Result { - match s { - "true" | "on" | "yes" => Ok(ExBool { inner: true }), - "false" | "off" | "no" => Ok(ExBool { inner: false }), - _ => Err(()), - } +pub fn valid_id(id: &str) -> Result { + check_arg_too_long(id, "id")?; + Ok(id.to_string()) +} + +// Virtio queue size must be power of 2 and in range [min_size, max_size]. +pub fn valid_virtqueue_size(size: u64, min_size: u64, max_size: u64) -> Result<()> { + if size < min_size || size > max_size { + return Err(anyhow!(ConfigError::IllegalValue( + "virtqueue size".to_string(), + min_size, + true, + max_size, + true + ))); + } + + if size & (size - 1) != 0 { + bail!("Virtqueue size should be power of 2!"); } + + Ok(()) } -impl From for bool { - fn from(item: ExBool) -> Self { - item.inner +pub fn valid_path(path: &str) -> Result { + if path.len() > MAX_PATH_LENGTH { + return Err(anyhow!(ConfigError::StringLengthTooLong( + "path".to_string(), + MAX_PATH_LENGTH, + ))); } + + let canonical_path = canonicalize(path).map_or(path.to_string(), |pathbuf| { + String::from(pathbuf.to_str().unwrap()) + }); + + Ok(canonical_path) } -pub fn add_trace_events(config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("trace"); - cmd_parser.push("events"); - cmd_parser.get_parameters(config)?; +pub fn valid_socket_path(sock_path: &str) -> Result { + if sock_path.len() > MAX_SOCK_PATH_LENGTH { + return Err(anyhow!(ConfigError::StringLengthTooLong( + "socket path".to_string(), + MAX_SOCK_PATH_LENGTH, + ))); + } + valid_path(sock_path) +} - if let Some(file) = cmd_parser.get_value::("events")? { - enable_trace_events(&file)?; - return Ok(()); +pub fn valid_dir(d: &str) -> Result { + let dir = String::from(d); + if !Path::new(&dir).is_dir() { + return Err(anyhow!(ConfigError::DirNotExist(dir))); } - bail!("trace: events file must be set."); + Ok(dir) +} + +pub fn valid_block_device_virtqueue_size(s: &str) -> Result { + let size: u64 = s.parse()?; + valid_virtqueue_size( + size, + MIN_QUEUE_SIZE_BLOCK_DEVICE + 1, + MAX_QUEUE_SIZE_BLOCK_DEVICE, + )?; + + Ok(size as u16) +} + +pub fn parse_size(s: &str) -> Result { + let size = memory_unit_conversion(s, M).with_context(|| format!("Invalid size: {}", s))?; + Ok(size) } #[cfg(test)] @@ -495,114 +774,23 @@ mod tests { use super::*; #[test] - fn test_cmd_parser() { - let mut cmd_parser = CmdParser::new("test"); - cmd_parser - .push("") - .push("id") - .push("path") - .push("num") - .push("test1") - .push("test2") - .push("test3") - .push("test4") - .push("test5") - .push("test6") - .push("test7"); - assert!(cmd_parser - .parse("socket,id=charconsole0,path=/tmp/console.sock,num=1,test1=true,test2=on,test3=yes,test4=false,test5=off,test6=no,test7=random") - .is_ok()); - assert_eq!( - cmd_parser.get_value::("").unwrap().unwrap(), - "socket".to_string() - ); - assert_eq!( - cmd_parser.get_value::("id").unwrap().unwrap(), - "charconsole0".to_string() - ); - assert_eq!( - cmd_parser.get_value::("path").unwrap().unwrap(), - "/tmp/console.sock".to_string() - ); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_u64); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_u32); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_u16); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_u8); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_i64); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_i32); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_i16); - assert_eq!(cmd_parser.get_value::("num").unwrap().unwrap(), 1_i8); - assert!(cmd_parser.get_value::("test1").unwrap().unwrap()); - assert!( - cmd_parser - .get_value::("test1") - .unwrap() - .unwrap() - .inner - ); - assert!( - cmd_parser - .get_value::("test2") - .unwrap() - .unwrap() - .inner - ); - assert!( - cmd_parser - .get_value::("test3") - .unwrap() - .unwrap() - .inner - ); - assert!(!cmd_parser.get_value::("test4").unwrap().unwrap()); - assert!( - !cmd_parser - .get_value::("test4") - .unwrap() - .unwrap() - .inner - ); - assert!( - !cmd_parser - .get_value::("test5") - .unwrap() - .unwrap() - .inner - ); - assert!( - !cmd_parser - .get_value::("test6") - .unwrap() - .unwrap() - .inner - ); - assert!(cmd_parser.get_value::("test7").is_err()); - assert!(cmd_parser.get_value::("test7").is_err()); - assert!(cmd_parser.get_value::("random").unwrap().is_none()); - assert!(cmd_parser.parse("random=false").is_err()); - } + fn test_add_trace() { + assert!(std::fs::File::create("/tmp/trace_file").is_ok()); - #[test] - fn test_add_trace_events_01() { - assert!(add_trace_events("event=test_trace_events").is_err()); - assert!(add_trace_events("events").is_err()); - assert!(add_trace_events("events=test_trace_events").is_err()); - } + assert!(add_trace("file=/tmp/trace_file,type=all").is_ok()); + assert!(add_trace("fil=test_trace").is_err()); + assert!(add_trace("file").is_err()); + assert!(add_trace("file=test_trace").is_err()); - #[test] - fn test_add_trace_events_02() { - use std::fs::File; - use std::io::Write; - use util::trace::is_trace_event_enabled; + assert!(add_trace("type=events").is_ok()); + assert!(add_trace("type=scopes").is_ok()); + assert!(add_trace("type=all").is_ok()); + assert!(add_trace("type=xxxxx").is_err()); - let file = "/tmp/test_trace_events"; - let mut fd = File::create(file).unwrap(); - let event = "add_trace_events"; - fd.write(event.as_bytes()).unwrap(); - add_trace_events(format!("events={}", file).as_str()).unwrap(); + assert!(add_trace("").is_err()); + assert!(add_trace("file=/tmp/trace_file,type=all").is_ok()); - assert!(is_trace_event_enabled(event)); - std::fs::remove_file(file).unwrap(); + assert!(std::fs::remove_file("/tmp/trace_file").is_ok()); } #[test] @@ -637,4 +825,20 @@ mod tests { let res = vm_config.add_global_config("pcie-root-port.fast-unplug=1"); assert!(res.is_err()); } + + #[test] + fn test_get_value_of_parameter() { + let cmd = "scsi-hd,id=disk1,drive=scsi-drive-0"; + let id = get_value_of_parameter("id", cmd).unwrap(); + assert_eq!(id, "disk1"); + + let cmd = "id="; + assert!(get_value_of_parameter("id", cmd).is_err()); + + let cmd = "id"; + assert!(get_value_of_parameter("id", cmd).is_err()); + + let cmd = "scsi-hd,idxxx=disk1"; + assert!(get_value_of_parameter("id", cmd).is_err()); + } } diff --git a/machine_manager/src/config/network.rs b/machine_manager/src/config/network.rs index 70e29aa9aa8b9521e81d57fa939e44d732585280..30f1ec807dd55b040b179368df9d941dbec059f2 100644 --- a/machine_manager/src/config/network.rs +++ b/machine_manager/src/config/network.rs @@ -10,374 +10,336 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::os::unix::io::RawFd; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; use serde::{Deserialize, Serialize}; -use super::{ - errors::{ErrorKind, Result}, - pci_args_check, -}; -use crate::config::{ - CmdParser, ConfigCheck, ExBool, VmConfig, MAX_STRING_LENGTH, MAX_VIRTIO_QUEUE, -}; -use crate::qmp::{qmp_schema, QmpChannel}; +use super::error::ConfigError; +use super::{get_pci_df, parse_bool, str_slip_to_clap, valid_id, valid_virtqueue_size}; +use crate::config::{ConfigCheck, VmConfig, DEFAULT_VIRTQUEUE_SIZE, MAX_VIRTIO_QUEUE}; +use crate::qmp::{qmp_channel::QmpChannel, qmp_schema}; const MAC_ADDRESS_LENGTH: usize = 17; -#[derive(Debug, Clone, Serialize, Deserialize)] +/// Max virtqueue size of each virtqueue. +const MAX_QUEUE_SIZE_NET: u64 = 4096; +/// Max num of virtqueues. +const MAX_QUEUE_PAIRS: usize = MAX_VIRTIO_QUEUE / 2; + +#[derive(Parser, Debug, Clone, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct NetDevcfg { + #[arg(long, alias="classtype", value_parser = ["tap", "vhost-user"])] + pub netdev_type: String, + #[arg(long, value_parser = valid_id)] pub id: String, + #[arg(long, aliases = ["fds", "fd"], use_value_delimiter = true, value_delimiter = ':')] pub tap_fds: Option>, - pub vhost_type: Option, + #[arg(long, alias = "vhost", default_value = "off", value_parser = parse_bool, action = ArgAction::Append)] + pub vhost_kernel: bool, + #[arg(long, aliases = ["vhostfds", "vhostfd"], use_value_delimiter = true, value_delimiter = ':')] pub vhost_fds: Option>, + #[arg(long, default_value = "", value_parser = valid_id)] pub ifname: String, + #[arg(long, default_value = "1", value_parser = parse_queues)] pub queues: u16, + #[arg(long)] + pub chardev: Option, +} + +impl NetDevcfg { + pub fn vhost_type(&self) -> Option { + if self.vhost_kernel { + return Some("vhost-kernel".to_string()); + } + if self.netdev_type == "vhost-user" { + return Some("vhost-user".to_string()); + } + // Default: virtio net. + None + } + + fn auto_queues(&mut self) -> Result<()> { + if let Some(fds) = &self.tap_fds { + let fds_num = fds + .len() + .checked_mul(2) + .with_context(|| format!("Invalid fds number {}", fds.len()))? + as u16; + if fds_num > self.queues { + self.queues = fds_num; + } + } + if let Some(fds) = &self.vhost_fds { + let fds_num = fds + .len() + .checked_mul(2) + .with_context(|| format!("Invalid vhostfds number {}", fds.len()))? + as u16; + if fds_num > self.queues { + self.queues = fds_num; + } + } + Ok(()) + } +} + +fn parse_queues(q: &str) -> Result { + let queues = q + .parse::()? + .checked_mul(2) + .with_context(|| "Invalid 'queues' value")?; + is_netdev_queues_valid(queues)?; + Ok(queues) } impl Default for NetDevcfg { fn default() -> Self { NetDevcfg { + netdev_type: "".to_string(), id: "".to_string(), tap_fds: None, - vhost_type: None, + vhost_kernel: false, vhost_fds: None, ifname: "".to_string(), queues: 2, + chardev: None, } } } impl ConfigCheck for NetDevcfg { fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong("id".to_string(), MAX_STRING_LENGTH).into()); + if self.vhost_kernel && self.netdev_type == "vhost-user" { + bail!("vhost-user netdev does not support 'vhost' option"); } - if self.ifname.len() > MAX_STRING_LENGTH { - return Err( - ErrorKind::StringLengthTooLong(self.ifname.clone(), MAX_STRING_LENGTH).into(), - ); + if self.vhost_fds.is_some() && self.vhost_type().is_none() { + bail!("Argument 'vhostfd' or 'vhostfds' are not needed for virtio-net device"); } - - if let Some(vhost_type) = self.vhost_type.as_ref() { - if vhost_type != "vhost-kernel" { - return Err(ErrorKind::UnknownVhostType.into()); - } + if self.tap_fds.is_none() && self.ifname.eq("") && self.netdev_type.ne("vhost-user") { + bail!("Tap device is missing, use \'ifname\' or \'fd\' to configure a tap device"); } + is_netdev_queues_valid(self.queues)?; + Ok(()) } } /// Config struct for network /// Contains network device config, such as `host_dev_name`, `mac`... -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Parser)] #[serde(deny_unknown_fields)] +#[command(no_binary_name(true))] pub struct NetworkInterfaceConfig { + #[arg(long, value_parser = ["virtio-net-pci", "virtio-net-device"])] + pub classtype: String, + #[arg(long, default_value = "", value_parser = valid_id)] pub id: String, - pub host_dev_name: String, + #[arg(long)] + pub netdev: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long, value_parser = valid_mac)] pub mac: Option, - pub tap_fds: Option>, - pub vhost_type: Option, - pub vhost_fds: Option>, + #[arg(long)] pub iothread: Option, - pub queues: u16, + #[arg(long)] + pub rx_iothread: Option, + #[arg(long)] + pub tx_iothread: Option, + #[arg(long, default_value="off", value_parser = parse_bool, action = ArgAction::Append)] pub mq: bool, -} - -impl NetworkInterfaceConfig { - pub fn set_mac(&mut self, mac_addr: String) { - self.mac = Some(mac_addr); - } + // All queues of a net device have the same queue size now. + #[arg(long, default_value = "256", alias = "queue-size", value_parser = valid_network_queue_size)] + pub queue_size: u16, + // MSI-X vectors the this network device has. This member isn't used now in stratovirt. + #[arg(long, default_value = "0")] + pub vectors: u16, } impl Default for NetworkInterfaceConfig { fn default() -> Self { NetworkInterfaceConfig { + classtype: "".to_string(), id: "".to_string(), - host_dev_name: "".to_string(), + netdev: "".to_string(), + bus: None, + addr: None, + multifunction: None, mac: None, - tap_fds: None, - vhost_type: None, - vhost_fds: None, iothread: None, - queues: 2, + rx_iothread: None, + tx_iothread: None, mq: false, + queue_size: DEFAULT_VIRTQUEUE_SIZE, + vectors: 0, } } } -impl ConfigCheck for NetworkInterfaceConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong("id".to_string(), MAX_STRING_LENGTH).into()); +impl NetworkInterfaceConfig { + pub fn auto_iothread(&mut self) { + // If rx_iothread or tx_iothread is not configured, the default iothread will be used. + if self.rx_iothread.is_none() { + self.rx_iothread.clone_from(&self.iothread); } - - if self.host_dev_name.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - self.host_dev_name.clone(), - MAX_STRING_LENGTH, - ) - .into()); + if self.tx_iothread.is_none() { + self.tx_iothread.clone_from(&self.iothread); } + } +} - if self.mac.is_some() && !check_mac_address(self.mac.as_ref().unwrap()) { - return Err(ErrorKind::MacFormatError.into()); - } +fn valid_network_queue_size(s: &str) -> Result { + let size: u64 = s.parse()?; + valid_virtqueue_size(size, u64::from(DEFAULT_VIRTQUEUE_SIZE), MAX_QUEUE_SIZE_NET)?; - if let Some(vhost_type) = self.vhost_type.as_ref() { - if vhost_type != "vhost-kernel" { - return Err(ErrorKind::UnknownVhostType.into()); - } - } + Ok(size as u16) +} - if self.queues * 2 + 1 > MAX_VIRTIO_QUEUE as u16 { - return Err(ErrorKind::StringLengthTooLong( - "queues".to_string(), - (MAX_VIRTIO_QUEUE - 1) / 2, - ) - .into()); +impl ConfigCheck for NetworkInterfaceConfig { + fn check(&self) -> Result<()> { + if self.mac.is_some() && !check_mac_address(self.mac.as_ref().unwrap()) { + return Err(anyhow!(ConfigError::MacFormatError)); } - if self.iothread.is_some() && self.iothread.as_ref().unwrap().len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "iothread name".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } + valid_network_queue_size(&self.queue_size.to_string())?; Ok(()) } } -fn parse_fds(cmd_parser: &CmdParser, name: &str) -> Result>> { - if let Some(fds) = cmd_parser.get_value::(name)? { - let mut raw_fds = Vec::new(); - for fd in fds.split(':').collect::>().iter() { - raw_fds.push((*fd).parse::().map_err(|_| "Failed to parse fds")?); - } - Ok(Some(raw_fds)) +fn get_netdev_fd(fd_name: &str) -> Result { + if let Some(fd) = QmpChannel::get_fd(fd_name) { + Ok(fd) } else { - Ok(None) + // try to convert string to RawFd + let fd_num = fd_name + .parse::() + .with_context(|| format!("Failed to parse fd: {}", fd_name))?; + Ok(fd_num) } } -pub fn parse_netdev(cmd_parser: CmdParser) -> Result { - let mut net = NetDevcfg::default(); - let netdev_type = if let Some(netdev_type) = cmd_parser.get_value::("")? { - netdev_type - } else { - "".to_string() - }; - if netdev_type.ne("tap") { - bail!("Unsupported netdev type: {:?}", &netdev_type); - } - if let Some(net_id) = cmd_parser.get_value::("id")? { - net.id = net_id; - } else { - return Err(ErrorKind::FieldIsMissing("id", "netdev").into()); - } - if let Some(ifname) = cmd_parser.get_value::("ifname")? { - net.ifname = ifname; - } - if let Some(queue_pairs) = cmd_parser.get_value::("queues")? { - let queues = queue_pairs * 2; - if queues > net.queues { - net.queues = queues; - } - } - - if let Some(tap_fd) = parse_fds(&cmd_parser, "fd")? { - net.tap_fds = Some(tap_fd); - } else if let Some(tap_fds) = parse_fds(&cmd_parser, "fds")? { - net.tap_fds = Some(tap_fds); - } - if let Some(fds) = &net.tap_fds { - let fds_num = (fds.len() * 2) as u16; - if fds_num > net.queues { - net.queues = fds_num; - } - } - - if let Some(vhost) = cmd_parser.get_value::("vhost")? { - if vhost.into() { - net.vhost_type = Some(String::from("vhost-kernel")); - } - } - if let Some(vhost_fd) = parse_fds(&cmd_parser, "vhostfd")? { - net.vhost_fds = Some(vhost_fd); - } else if let Some(vhost_fds) = parse_fds(&cmd_parser, "vhostfds")? { - net.vhost_fds = Some(vhost_fds); - } - if let Some(fds) = &net.vhost_fds { - let fds_num = (fds.len() * 2) as u16; - if fds_num > net.queues { - net.queues = fds_num; - } - } - - if net.vhost_fds.is_some() && net.vhost_type.is_none() { - bail!("Argument \'vhostfd\' is not needed for virtio-net device"); - } - if net.tap_fds.is_none() && net.ifname.eq("") { - bail!("Tap device is missing, use \'ifname\' or \'fd\' to configure a tap device"); - } - - Ok(net) -} - -pub fn parse_net(vm_config: &mut VmConfig, net_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("virtio-net"); - cmd_parser - .push("") - .push("id") - .push("netdev") - .push("mq") - .push("vectors") - .push("bus") - .push("addr") - .push("multifunction") - .push("mac") - .push("iothread"); - - cmd_parser.parse(net_config)?; - pci_args_check(&cmd_parser)?; - let mut netdevinterfacecfg = NetworkInterfaceConfig::default(); - - let netdev = if let Some(devname) = cmd_parser.get_value::("netdev")? { - devname - } else { - return Err(ErrorKind::FieldIsMissing("netdev", "net").into()); - }; - let netid = if let Some(id) = cmd_parser.get_value::("id")? { - id - } else { - "".to_string() - }; - - if let Some(mq) = cmd_parser.get_value::("mq")? { - netdevinterfacecfg.mq = mq.inner; - } - netdevinterfacecfg.iothread = cmd_parser.get_value::("iothread")?; - netdevinterfacecfg.mac = cmd_parser.get_value::("mac")?; - - if let Some(netcfg) = &vm_config.netdevs.remove(&netdev) { - netdevinterfacecfg.id = netid; - netdevinterfacecfg.host_dev_name = netcfg.ifname.clone(); - netdevinterfacecfg.tap_fds = netcfg.tap_fds.clone(); - netdevinterfacecfg.vhost_fds = netcfg.vhost_fds.clone(); - netdevinterfacecfg.vhost_type = netcfg.vhost_type.clone(); - netdevinterfacecfg.queues = netcfg.queues; - } else { - bail!("Netdev: {:?} not found for net device", &netdev); +fn get_netdev_fds(fds_name: &str) -> Result> { + let fds_vec: Vec<&str> = fds_name.split(':').collect(); + let mut fds = Vec::new(); + for fd_name in fds_vec { + fds.push(get_netdev_fd(fd_name)?); + } + if fds.len() > MAX_QUEUE_PAIRS { + bail!( + "The num of fd {} is bigger than max queue num {}", + fds.len(), + MAX_QUEUE_PAIRS + ); } - - netdevinterfacecfg.check()?; - Ok(netdevinterfacecfg) + Ok(fds) } pub fn get_netdev_config(args: Box) -> Result { + let queues = args + .queues + .unwrap_or(1) + .checked_mul(2) + .with_context(|| "Invalid 'queues' value")?; + is_netdev_queues_valid(queues)?; let mut config = NetDevcfg { + netdev_type: args.net_type.unwrap_or_default(), id: args.id, tap_fds: None, - vhost_type: None, + vhost_kernel: args.vhost.unwrap_or_default(), vhost_fds: None, ifname: String::new(), - queues: 2, + queues, + chardev: args.chardev, }; - if let Some(fds) = args.fds { - let netdev_fd = if fds.contains(':') { - let col: Vec<_> = fds.split(':').collect(); - String::from(col[col.len() - 1]) - } else { - String::from(&fds) - }; - if let Some(fd_num) = QmpChannel::get_fd(&netdev_fd) { - config.tap_fds = Some(vec![fd_num]); - } else { - // try to convert string to RawFd - let fd_num = match netdev_fd.parse::() { - Ok(fd) => fd, - _ => { - bail!("Failed to parse fd: {}", netdev_fd); - } - }; - config.tap_fds = Some(vec![fd_num]); + if let Some(tap_fd) = args.fd { + if args.if_name.is_some() + || args.script.is_some() + || args.downscript.is_some() + || args.queues.is_some() + || args.fds.is_some() + || args.vhostfds.is_some() + { + bail!("fd is conflict with ifname/script/downscript/queues/fds/vhostfds"); + } + let fd = get_netdev_fd(&tap_fd)?; + config.tap_fds = Some(vec![fd]); + + if let Some(vhostfd) = args.vhostfd { + let fd = get_netdev_fd(&vhostfd)?; + config.vhost_fds = Some(vec![fd]); + } + } else if let Some(tap_fds) = args.fds { + if args.if_name.is_some() + || args.script.is_some() + || args.downscript.is_some() + || args.queues.is_some() + || args.vhostfd.is_some() + { + bail!("fds are conflict with ifname/script/downscript/queues/vhostfd"); + } + config.tap_fds = Some(get_netdev_fds(&tap_fds)?); + config.queues = 2 * config.tap_fds.as_ref().unwrap().len() as u16; + + if let Some(vhostfds) = args.vhostfds { + config.vhost_fds = Some(get_netdev_fds(&vhostfds)?); + if config.tap_fds.as_ref().unwrap().len() != config.vhost_fds.as_ref().unwrap().len() { + bail!("The num of vhostfds must equal to fds"); + } } } else if let Some(if_name) = args.if_name { config.ifname = if_name; } - if let Some(vhost) = args.vhost { - match vhost.parse::() { - Ok(vhost) => { - if vhost.into() { - config.vhost_type = Some(String::from("vhost-kernel")); - } - } - Err(_) => { - bail!("Failed to get vhost type: {}", vhost); - } - }; - } - if let Some(vhostfd) = args.vhostfds { - match vhostfd.parse::() { - Ok(fd) => config.vhost_fds = Some(vec![fd]), - Err(_e) => { - bail!("Failed to get vhost fd: {}", vhostfd); - } - }; - } - if config.vhost_fds.is_some() && config.vhost_type.is_none() { - bail!("Argument \'vhostfd\' is not needed for virtio-net device"); - } - if config.tap_fds.is_none() && config.ifname.eq("") { - bail!("Tap device is missing, use \'ifname\' or \'fd\' to configure a tap device"); - } + config.check()?; Ok(config) } impl VmConfig { pub fn add_netdev(&mut self, netdev_config: &str) -> Result<()> { - let mut cmd_parser = CmdParser::new("netdev"); - cmd_parser - .push("") - .push("id") - .push("fd") - .push("fds") - .push("vhost") - .push("ifname") - .push("vhostfd") - .push("vhostfds") - .push("queues"); - - cmd_parser.parse(netdev_config)?; - let drive_cfg = parse_netdev(cmd_parser)?; - self.add_netdev_with_config(drive_cfg) + let mut netdev_cfg = + NetDevcfg::try_parse_from(str_slip_to_clap(netdev_config, true, false))?; + netdev_cfg.auto_queues()?; + netdev_cfg.check()?; + self.add_netdev_with_config(netdev_cfg) } pub fn add_netdev_with_config(&mut self, conf: NetDevcfg) -> Result<()> { let netdev_id = conf.id.clone(); - if self.netdevs.get(&netdev_id).is_none() { - self.netdevs.insert(netdev_id, conf); - } else { + if self.netdevs.contains_key(&netdev_id) { bail!("Netdev {:?} has been added", netdev_id); } + self.netdevs.insert(netdev_id, conf); Ok(()) } pub fn del_netdev_by_id(&mut self, id: &str) -> Result<()> { - if self.netdevs.get(id).is_some() { - self.netdevs.remove(id); - } else { - bail!("Netdev {} not found", id); - } + self.netdevs + .remove(id) + .with_context(|| format!("Netdev {} not found", id))?; + Ok(()) } } +fn valid_mac(mac: &str) -> Result { + if !check_mac_address(mac) { + return Err(anyhow!(ConfigError::MacFormatError)); + } + Ok(mac.to_string()) +} + fn check_mac_address(mac: &str) -> bool { if mac.len() != MAC_ADDRESS_LENGTH { return false; @@ -407,185 +369,157 @@ fn check_mac_address(mac: &str) -> bool { true } +fn is_netdev_queues_valid(queues: u16) -> Result<()> { + if !(queues >= 2 && queues <= MAX_VIRTIO_QUEUE as u16) { + return Err(anyhow!(ConfigError::IllegalValue( + "number queues of net device".to_string(), + 1, + true, + MAX_QUEUE_PAIRS as u64, + true, + ))); + } + + Ok(()) +} + #[cfg(test)] mod tests { - use crate::config::get_pci_bdf; - use super::*; #[test] - fn test_network_config_cmdline_parser() { + fn test_netdev_config_cmdline_parser() { let mut vm_config = VmConfig::default(); + + // Test1: Right. assert!(vm_config.add_netdev("tap,id=eth0,ifname=tap0").is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net0,netdev=eth0,iothread=iothread0", - ); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.id, "net0"); - assert_eq!(network_configs.host_dev_name, "tap0"); - assert_eq!(network_configs.iothread, Some("iothread0".to_string())); - assert!(network_configs.mac.is_none()); - assert!(network_configs.tap_fds.is_none()); - assert!(network_configs.vhost_type.is_none()); - assert!(network_configs.vhost_fds.is_none()); + assert!(vm_config.add_netdev("tap,id=eth0,ifname=tap0").is_err()); + let netdev_cfg = vm_config.netdevs.get("eth0").unwrap(); + assert_eq!(netdev_cfg.id, "eth0"); + assert_eq!(netdev_cfg.ifname, "tap0"); + assert!(netdev_cfg.tap_fds.is_none()); + assert!(!netdev_cfg.vhost_kernel); + assert!(netdev_cfg.vhost_fds.is_none()); + assert_eq!(netdev_cfg.queues, 2); + assert!(netdev_cfg.vhost_type().is_none()); - let mut vm_config = VmConfig::default(); assert!(vm_config .add_netdev("tap,id=eth1,ifname=tap1,vhost=on,vhostfd=4") .is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net1,netdev=eth1,mac=12:34:56:78:9A:BC", - ); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.id, "net1"); - assert_eq!(network_configs.host_dev_name, "tap1"); - assert_eq!(network_configs.mac, Some(String::from("12:34:56:78:9A:BC"))); - assert!(network_configs.tap_fds.is_none()); - assert_eq!( - network_configs.vhost_type, - Some(String::from("vhost-kernel")) - ); - assert_eq!(network_configs.vhost_fds, Some(vec![4])); + let netdev_cfg = vm_config.netdevs.get("eth1").unwrap(); + assert_eq!(netdev_cfg.ifname, "tap1"); + assert_eq!(netdev_cfg.vhost_type().unwrap(), "vhost-kernel"); + assert_eq!(netdev_cfg.vhost_fds, Some(vec![4])); - let mut vm_config = VmConfig::default(); - assert!(vm_config.add_netdev("tap,id=eth1,fd=35").is_ok()); - let net_cfg_res = parse_net(&mut vm_config, "virtio-net-device,id=net1,netdev=eth1"); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.id, "net1"); - assert_eq!(network_configs.host_dev_name, ""); - assert_eq!(network_configs.tap_fds, Some(vec![35])); + assert!(vm_config.add_netdev("tap,id=eth2,fd=35").is_ok()); + let netdev_cfg = vm_config.netdevs.get("eth2").unwrap(); + assert_eq!(netdev_cfg.tap_fds, Some(vec![35])); - let mut vm_config = VmConfig::default(); assert!(vm_config - .add_netdev("tap,id=eth1,ifname=tap1,vhost=on,vhostfd=4") + .add_netdev("tap,id=eth3,ifname=tap0,queues=4") .is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net1,netdev=eth2,mac=12:34:56:78:9A:BC", - ); - assert!(net_cfg_res.is_err()); + let netdev_cfg = vm_config.netdevs.get("eth3").unwrap(); + assert_eq!(netdev_cfg.queues, 8); - let mut vm_config = VmConfig::default(); - assert!(vm_config.add_netdev("tap,id=eth1,fd=35").is_ok()); - let net_cfg_res = parse_net(&mut vm_config, "virtio-net-device,id=net1,netdev=eth3"); - assert!(net_cfg_res.is_err()); - - // multi queue testcases - let mut vm_config = VmConfig::default(); assert!(vm_config - .add_netdev("tap,id=eth0,ifname=tap0,queues=4") + .add_netdev("tap,id=eth4,fds=34:35:36:37:38") .is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net0,netdev=eth0,iothread=iothread0,mq=on,vectors=6", - ); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.queues, 8); - assert_eq!(network_configs.mq, true); + let netdev_cfg = vm_config.netdevs.get("eth4").unwrap(); + assert_eq!(netdev_cfg.queues, 10); + assert_eq!(netdev_cfg.tap_fds, Some(vec![34, 35, 36, 37, 38])); - let mut vm_config = VmConfig::default(); assert!(vm_config - .add_netdev("tap,id=eth0,fds=34:35:36:37:38") + .add_netdev("tap,id=eth5,fds=34:35:36:37:38,vhost=on,vhostfds=39:40:41:42:43") .is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net0,netdev=eth0,iothread=iothread0,mq=off,vectors=12", - ); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.queues, 10); - assert_eq!(network_configs.tap_fds, Some(vec![34, 35, 36, 37, 38])); - assert_eq!(network_configs.mq, false); + let netdev_cfg = vm_config.netdevs.get("eth5").unwrap(); + assert_eq!(netdev_cfg.queues, 10); + assert_eq!(netdev_cfg.vhost_fds, Some(vec![39, 40, 41, 42, 43])); - let mut vm_config = VmConfig::default(); + // Test2: Missing values assert!(vm_config - .add_netdev("tap,id=eth0,fds=34:35:36:37:38,vhost=on,vhostfds=39:40:41:42:43") - .is_ok()); - let net_cfg_res = parse_net( - &mut vm_config, - "virtio-net-device,id=net0,netdev=eth0,iothread=iothread0,mq=off,vectors=12", - ); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.queues, 10); - assert_eq!(network_configs.vhost_fds, Some(vec![39, 40, 41, 42, 43])); - assert_eq!(network_configs.mq, false); + .add_netdev("tap,fds=34:35:36:37:38,vhost=on") + .is_err()); + + // Test3: Illegal values. + assert!(vm_config + .add_netdev("tap,id=eth10,fds=34:35:36:37:38,vhost=on,vhostfds=39,40,41,42,43") + .is_err()); + assert!(vm_config.add_netdev("tap,id=eth10,queues=0").is_err()); + assert!(vm_config.add_netdev("tap,id=eth10,queues=17").is_err()); } #[test] - fn test_pci_network_config_cmdline_parser() { + fn test_networkinterface_config_cmdline_parser() { + // Test1: Right. let mut vm_config = VmConfig::default(); - assert!(vm_config .add_netdev("tap,id=eth1,ifname=tap1,vhost=on,vhostfd=4") .is_ok()); + let net_cmd = + "virtio-net-pci,id=net1,netdev=eth1,bus=pcie.0,addr=0x1.0x2,mac=12:34:56:78:9A:BC,mq=on,vectors=6,queue-size=2048,multifunction=on"; let net_cfg = - "virtio-net-pci,id=net1,netdev=eth1,bus=pcie.0,addr=0x1.0x2,mac=12:34:56:78:9A:BC"; - let net_cfg_res = parse_net(&mut vm_config, net_cfg); - assert!(net_cfg_res.is_ok()); - let network_configs = net_cfg_res.unwrap(); - assert_eq!(network_configs.id, "net1"); - assert_eq!(network_configs.host_dev_name, "tap1"); - assert_eq!(network_configs.mac, Some(String::from("12:34:56:78:9A:BC"))); - assert!(network_configs.tap_fds.is_none()); - assert_eq!( - network_configs.vhost_type, - Some(String::from("vhost-kernel")) - ); - assert_eq!(network_configs.vhost_fds.unwrap()[0], 4); - let pci_bdf = get_pci_bdf(net_cfg); - assert!(pci_bdf.is_ok()); - let pci = pci_bdf.unwrap(); - assert_eq!(pci.bus, "pcie.0".to_string()); - assert_eq!(pci.addr, (1, 2)); - - let net_cfg_res = parse_net(&mut vm_config, net_cfg); - assert!(net_cfg_res.is_err()); - + NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)).unwrap(); + assert_eq!(net_cfg.id, "net1"); + assert_eq!(net_cfg.netdev, "eth1"); + assert_eq!(net_cfg.bus.unwrap(), "pcie.0"); + assert_eq!(net_cfg.addr.unwrap(), (1, 2)); + assert_eq!(net_cfg.mac.unwrap(), "12:34:56:78:9A:BC"); + assert_eq!(net_cfg.vectors, 6); + assert!(net_cfg.mq); + assert_eq!(net_cfg.queue_size, 2048); + assert_eq!(net_cfg.multifunction, Some(true)); + let netdev_cfg = vm_config.netdevs.get(&net_cfg.netdev).unwrap(); + assert_eq!(netdev_cfg.vhost_type().unwrap(), "vhost-kernel"); + + // Test2: Default values. let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_netdev("tap,id=eth1,ifname=tap1,vhost=on,vhostfd=4") - .is_ok()); + assert!(vm_config.add_netdev("vhost-user,id=netdevid").is_ok()); + let net_cmd = + "virtio-net-pci,id=netid,netdev=netdevid,bus=pcie.0,addr=0x2.0x0,mac=12:34:56:78:9A:BC"; let net_cfg = - "virtio-net-pci,id=net1,netdev=eth1,bus=pcie.0,addr=0x1.0x2,mac=12:34:56:78:9A:BC,multifunction=on"; - assert!(parse_net(&mut vm_config, net_cfg).is_ok()); + NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)).unwrap(); + assert_eq!(net_cfg.queue_size, 256); + assert!(!net_cfg.mq); + assert_eq!(net_cfg.vectors, 0); + let netdev_cfg = vm_config.netdevs.get(&net_cfg.netdev).unwrap(); + assert_eq!(netdev_cfg.vhost_type().unwrap(), "vhost-user"); + + // Test3: Missing Parameters. + let net_cmd = "virtio-net-pci,id=netid"; + let result = NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)); + assert!(result.is_err()); + + // Test4: Illegal Parameters. + let net_cmd = "virtio-net-pci,id=netid,netdev=netdevid,mac=1:1:1"; + let result = NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)); + assert!(result.is_err()); + let net_cmd = "virtio-net-pci,id=netid,netdev=netdevid,queue-size=128"; + let result = NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)); + assert!(result.is_err()); + let net_cmd = "virtio-net-pci,id=netid,netdev=netdevid,queue-size=10240"; + let result = NetworkInterfaceConfig::try_parse_from(str_slip_to_clap(net_cmd, true, false)); + assert!(result.is_err()); } #[test] - fn test_netdev_config_check() { - let mut netdev_conf = NetDevcfg::default(); - for _ in 0..MAX_STRING_LENGTH { - netdev_conf.id += "A"; - } - assert!(netdev_conf.check().is_ok()); + fn test_add_netdev_with_different_queues() { + let mut vm_config = VmConfig::default(); - // Overflow - netdev_conf.id += "A"; - assert!(netdev_conf.check().is_err()); + let set_queues = |q: u16| { + format!( + "vhost-user,id=netdevid{num},chardev=chardevid,queues={num}", + num = q.to_string() + ) + }; - let mut netdev_conf = NetDevcfg::default(); - for _ in 0..MAX_STRING_LENGTH { - netdev_conf.ifname += "A"; - } - assert!(netdev_conf.check().is_ok()); - - // Overflow - netdev_conf.ifname += "A"; - assert!(netdev_conf.check().is_err()); - - let mut netdev_conf = NetDevcfg::default(); - netdev_conf.vhost_type = None; - assert!(netdev_conf.check().is_ok()); - netdev_conf.vhost_type = Some(String::from("vhost-kernel")); - assert!(netdev_conf.check().is_ok()); - netdev_conf.vhost_type = Some(String::from("vhost-")); - assert!(netdev_conf.check().is_err()); + assert!(vm_config.add_netdev(&set_queues(0)).is_err()); + assert!(vm_config.add_netdev(&set_queues(1)).is_ok()); + assert!(vm_config + .add_netdev(&set_queues(MAX_VIRTIO_QUEUE as u16 / 2)) + .is_ok()); + assert!(vm_config + .add_netdev(&set_queues(MAX_VIRTIO_QUEUE as u16 / 2 + 1)) + .is_err()); } #[test] @@ -627,112 +561,192 @@ mod tests { let mut net_conf = NetDevcfg::default(); net_conf.id = String::from(*id); assert!(vm_config.netdevs.get(*id).is_some()); - assert!(vm_config.del_netdev_by_id(*id).is_ok()); + assert!(vm_config.del_netdev_by_id(id).is_ok()); assert!(vm_config.netdevs.get(*id).is_none()); } } - fn create_netdev_add( - id: String, - if_name: Option, - fds: Option, - vhost: Option, - vhostfds: Option, - ) -> Box { - Box::new(qmp_schema::NetDevAddArgument { - id, - if_name, - fds, - dnssearch: None, - net_type: None, - vhost, - vhostfds, - ifname: None, - downscript: None, - script: None, - queues: None, - }) + fn check_err_msg(netdev: Box, err_msg: &str) { + if let Err(err) = get_netdev_config(netdev) { + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } } #[test] fn test_get_netdev_config() { - // Invalid vhost - let netdev_add = create_netdev_add( - String::from("netdev"), - None, - None, - Some(String::from("1")), - None, - ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_err()); - - // Invalid vhost fd - let netdev_add = create_netdev_add( - String::from("netdev"), - None, - None, - None, - Some(String::from("999999999999999999999")), - ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_err()); - - // No need to config vhost fd - let netdev_add = create_netdev_add( - String::from("netdev"), - None, - None, - None, - Some(String::from("55")), - ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_err()); - - // No ifname or fd - let netdev_add = create_netdev_add( - String::from("netdev"), - None, - None, - Some(String::from("on")), - Some(String::from("55")), - ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_err()); - - let netdev_add = create_netdev_add( - String::from("netdev"), - Some(String::from("tap0")), - None, - None, - None, - ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_ok()); - assert_eq!(net_cfg.unwrap().ifname, "tap0"); - - let netdev_add = create_netdev_add( - String::from("netdev"), - Some(String::from("tap0")), - None, - Some(String::from("on")), - None, + QmpChannel::object_init(); + // Normal test with common elem. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + id: "netdev".to_string(), + if_name: Some("tap0".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + let net_cfg = get_netdev_config(netdev).unwrap(); + assert_eq!(net_cfg.id, "netdev"); + assert_eq!(net_cfg.ifname, "tap0"); + + // Set fd_name and fd_value to qmp channel. + for i in 0..5 { + let fd_name = "fd-net0".to_string() + &i.to_string(); + QmpChannel::set_fd(fd_name, 11 + i); + let vhostfd_name = "vhostfd-net0".to_string() + &i.to_string(); + QmpChannel::set_fd(vhostfd_name, 21 + i); + } + + // Normal test with 'fd' value or name. + for value in ["11", "fd-net00"] { + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fd: Some(value.to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + let net_cfg = get_netdev_config(netdev).unwrap(); + assert_eq!(net_cfg.tap_fds.unwrap()[0], 11); + } + + // Normal test with 'fds' value or name. + for value in ["11:12:13:14", "fd-net00:fd-net01:fd-net02:fd-net03"] { + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some(value.to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + let net_cfg = get_netdev_config(netdev).unwrap(); + assert_eq!(net_cfg.tap_fds.unwrap(), [11, 12, 13, 14]); + } + + // Normal test with 'vhostfd'. + for (fd, vhostfd) in [("11", "21"), ("fd-net00", "vhostfd-net00")] { + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fd: Some(fd.to_string()), + vhostfd: Some(vhostfd.to_string()), + vhost: Some(true), + ..qmp_schema::NetDevAddArgument::default() + }); + let net_cfg = get_netdev_config(netdev).unwrap(); + assert_eq!(net_cfg.vhost_type().unwrap(), "vhost-kernel"); + assert_eq!(net_cfg.tap_fds.unwrap()[0], 11); + assert_eq!(net_cfg.vhost_fds.unwrap()[0], 21); + } + + // Normal test with 'vhostfds'. + for (fds, vhostfds) in [ + ("11:12:13:14", "21:22:23:24"), + ( + "fd-net00:fd-net01:fd-net02:fd-net03", + "vhostfd-net00:vhostfd-net01:vhostfd-net02:vhostfd-net03", + ), + ] { + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some(fds.to_string()), + vhostfds: Some(vhostfds.to_string()), + vhost: Some(true), + ..qmp_schema::NetDevAddArgument::default() + }); + let net_cfg = get_netdev_config(netdev).unwrap(); + assert_eq!(net_cfg.vhost_type().unwrap(), "vhost-kernel"); + assert_eq!(net_cfg.tap_fds.unwrap(), vec![11, 12, 13, 14]); + assert_eq!(net_cfg.vhost_fds.unwrap(), vec![21, 22, 23, 24]); + } + + let err_msgs = [ + "Invalid 'queues' value", + "fd is conflict with ifname/script/downscript/queues/fds/vhostfds", + "fds are conflict with ifname/script/downscript/queues/vhostfd", + "The num of vhostfds must equal to fds", + "vhost-user netdev does not support 'vhost' option", + "Argument 'vhostfd' or 'vhostfds' are not needed for virtio-net device", + "Tap device is missing, use 'ifname' or 'fd' to configure a tap device", + ]; + + // Abnornal test with invalid 'queues': u16::MAX. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + queues: Some(u16::MAX), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[0]); + + // Abnornal test with invalid 'queues': MAX_QUEUE_PAIRS + 1. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + queues: Some(MAX_QUEUE_PAIRS as u16 + 1), + ..qmp_schema::NetDevAddArgument::default() + }); + let err_msg = format!( + "number queues of net device must >= 1 and <= {}.", + MAX_QUEUE_PAIRS ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_ok()); - assert_eq!(net_cfg.unwrap().vhost_type.unwrap(), "vhost-kernel"); - - let netdev_add = create_netdev_add( - String::from("netdev"), - Some(String::from("tap0")), - None, - Some(String::from("on")), - Some(String::from("12")), + check_err_msg(netdev, &err_msg); + + // Abnornal test with 'fd' and 'vhostfds'. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fd: Some("11".to_string()), + vhostfds: Some("21:22:23:24".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[1]); + + // Abnornal test with 'fds' and 'vhostfd'. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some("11:12:13:14".to_string()), + vhostfd: Some("21".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[2]); + + // Abnornal test with different num of 'fds' and 'vhostfds'. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some("11:12:13:14".to_string()), + vhostfds: Some("21:22:23".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[3]); + + // Abnornal test with 'net_type=vhost-user'. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fd: Some("11".to_string()), + vhostfd: Some("21".to_string()), + vhost: Some(true), + net_type: Some("vhost-user".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[4]); + + // Abnornal test with 'fds/vhostfds' and no 'vhost'. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some("11:12:13:14".to_string()), + vhostfds: Some("21:22:23:24".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[5]); + + // Abnornal test with all default value. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, err_msgs[6]); + + // Abnornal test with invalid fd value. + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fd: Some("invalid_fd".to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + check_err_msg(netdev, "Failed to parse fd: invalid_fd"); + + // Abnornal test with fd num bigger than MAX_QUEUE_PAIRS. + let mut fds = "0".to_string(); + for i in 1..MAX_QUEUE_PAIRS + 1 { + fds += &(":".to_string() + &i.to_string()); + } + let netdev = Box::new(qmp_schema::NetDevAddArgument { + fds: Some(fds.to_string()), + ..qmp_schema::NetDevAddArgument::default() + }); + // number queues of net device + let err_msg = format!( + "The num of fd {} is bigger than max queue num {}", + MAX_QUEUE_PAIRS + 1, + MAX_QUEUE_PAIRS ); - let net_cfg = get_netdev_config(netdev_add); - assert!(net_cfg.is_ok()); - let net_cfg = net_cfg.unwrap(); - assert_eq!(net_cfg.vhost_type.unwrap(), "vhost-kernel"); - assert_eq!(net_cfg.vhost_fds.unwrap()[0], 12); + check_err_msg(netdev, &err_msg); } } diff --git a/machine_manager/src/config/numa.rs b/machine_manager/src/config/numa.rs new file mode 100644 index 0000000000000000000000000000000000000000..f628fef7976e2c7225c789816933f1c8b5b11fc8 --- /dev/null +++ b/machine_manager/src/config/numa.rs @@ -0,0 +1,327 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cmp::max; +use std::collections::{BTreeMap, HashSet}; +use std::str::FromStr; + +use anyhow::{bail, Context, Result}; +use clap::Parser; + +use super::error::ConfigError; +use super::{get_class_type, str_slip_to_clap}; +use crate::config::{IntegerList, VmConfig, MAX_NODES}; + +const MIN_NUMA_DISTANCE: u8 = 10; + +#[derive(Default)] +pub struct NumaNode { + pub cpus: Vec, + pub distances: BTreeMap, + pub size: u64, + pub mem_dev: String, +} + +pub type NumaNodes = BTreeMap; + +/// Complete the NUMA node parameters from user. +/// +/// # Arguments +/// +/// * `numa_nodes` - The NUMA node information parsing from user. +/// * `nr_cpus` - The VM cpus number. +/// * `mem_size` - The VM memory size. +pub fn complete_numa_node(numa_nodes: &mut NumaNodes, nr_cpus: u8, mem_size: u64) -> Result<()> { + if numa_nodes.len() > 8 { + bail!( + "NUMA nodes should be less than or equal to 8, now is {}", + numa_nodes.len() + ); + } + + let mut total_ram_size = 0_u64; + let mut max_cpu_id = 0_u8; + let mut cpus_id = HashSet::::new(); + for (_, node) in numa_nodes.iter() { + total_ram_size += node.size; + for id in node.cpus.iter() { + if cpus_id.contains(id) { + bail!("CPU id {} is repeat, please check it again", *id); + } + cpus_id.insert(*id); + max_cpu_id = max(max_cpu_id, *id); + } + } + + if cpus_id.len() < nr_cpus as usize { + if let Some(node_0) = numa_nodes.get_mut(&0) { + for id in 0..nr_cpus { + if !cpus_id.contains(&id) { + node_0.cpus.push(id); + } + } + } + } + + if total_ram_size != mem_size { + bail!( + "Total memory {} of NUMA nodes is not equals to memory size {}", + total_ram_size, + mem_size, + ); + } + if max_cpu_id >= nr_cpus { + bail!( + "CPU index {} should be smaller than max cpu {}", + max_cpu_id, + nr_cpus + ); + } + if cpus_id.len() > nr_cpus as usize { + bail!( + "Total cpu numbers {} of NUMA nodes should be less than or equals to smp {}", + cpus_id.len(), + nr_cpus + ); + } + + Ok(()) +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +pub struct NumaNodeConfig { + #[arg(long, value_parser = ["node"])] + pub classtype: String, + #[arg(long, alias = "nodeid", value_parser = clap::value_parser!(u32).range(..MAX_NODES as i64))] + pub numa_id: u32, + #[arg(long, value_parser = get_cpus)] + pub cpus: ::std::vec::Vec, + #[arg(long, alias = "memdev")] + pub mem_dev: String, +} + +fn get_cpus(cpus_str: &str) -> Result> { + let mut cpus = IntegerList::from_str(cpus_str) + .with_context(|| ConfigError::ConvertValueFailed(String::from("u8"), "cpus".to_string()))? + .0 + .iter() + .map(|e| *e as u8) + .collect::>(); + + if cpus.is_empty() { + bail!("Got empty cpus list!"); + } + + cpus.sort_unstable(); + + Ok(cpus) +} + +/// Parse the NUMA node memory parameters. +/// +/// # Arguments +/// +/// * `numa_config` - The NUMA node configuration. +pub fn parse_numa_mem(numa_config: &str) -> Result { + let config = NumaNodeConfig::try_parse_from(str_slip_to_clap(numa_config, true, false))?; + Ok(config) +} + +#[derive(Parser)] +#[command(no_binary_name(true))] +pub struct NumaDistConfig { + #[arg(long, value_parser = ["dist"])] + pub classtype: String, + #[arg(long, alias = "src", value_parser = clap::value_parser!(u32).range(..MAX_NODES as i64))] + pub numa_id: u32, + #[arg(long, alias = "dst", value_parser = clap::value_parser!(u32).range(..MAX_NODES as i64))] + pub destination: u32, + #[arg(long, alias = "val", value_parser = clap::value_parser!(u8).range(MIN_NUMA_DISTANCE as i64..))] + pub distance: u8, +} + +impl NumaDistConfig { + fn check(&self) -> Result<()> { + if self.numa_id == self.destination && self.distance != MIN_NUMA_DISTANCE { + bail!("Local distance of node {} should be 10.", self.numa_id); + } + if self.numa_id != self.destination && self.distance == MIN_NUMA_DISTANCE { + bail!( + "Remote distance of node {} should be more than 10.", + self.numa_id + ); + } + Ok(()) + } +} + +/// Parse the NUMA node distance parameters. +/// +/// # Arguments +/// +/// * `numa_dist` - The NUMA node distance configuration. +pub fn parse_numa_distance(numa_dist: &str) -> Result { + let dist_cfg = NumaDistConfig::try_parse_from(str_slip_to_clap(numa_dist, true, false))?; + dist_cfg.check()?; + Ok(dist_cfg) +} + +impl VmConfig { + /// Add the NUMA node config to vm config. + /// + /// # Arguments + /// + /// * `numa_config` - The NUMA node configuration. + pub fn add_numa(&mut self, numa_config: &str) -> Result<()> { + let numa_type = get_class_type(numa_config).with_context(|| "Numa type not specified")?; + self.numa_nodes.push((numa_type, numa_config.to_string())); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_numa_mem() { + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_numa("node,nodeid=0,cpus=0-1,memdev=mem0") + .is_ok()); + assert!(vm_config + .add_numa("node,nodeid=1,cpus=2-1,memdev=mem1") + .is_ok()); + assert!(vm_config.add_numa("node,nodeid=2,memdev=mem2").is_ok()); + assert!(vm_config.add_numa("node,nodeid=3,cpus=3-4").is_ok()); + assert!(vm_config + .add_numa("node,nodeid=0,cpus=[0-1:3-5],memdev=mem0") + .is_ok()); + + let numa = vm_config.numa_nodes.first().unwrap(); + let numa_config = parse_numa_mem(numa.1.as_str()).unwrap(); + assert_eq!(numa_config.cpus, vec![0, 1]); + assert_eq!(numa_config.mem_dev, "mem0"); + + let numa = vm_config.numa_nodes.get(1).unwrap(); + assert!(parse_numa_mem(numa.1.as_str()).is_err()); + let numa = vm_config.numa_nodes.get(2).unwrap(); + assert!(parse_numa_mem(numa.1.as_str()).is_err()); + let numa = vm_config.numa_nodes.get(3).unwrap(); + assert!(parse_numa_mem(numa.1.as_str()).is_err()); + + let numa = vm_config.numa_nodes.get(4).unwrap(); + let numa_config = parse_numa_mem(numa.1.as_str()).unwrap(); + assert_eq!(numa_config.cpus, vec![0, 1, 3, 4, 5]); + } + + #[test] + fn test_parse_numa_distance() { + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_numa("dist,src=0,dst=1,val=15").is_ok()); + assert!(vm_config.add_numa("dist,dst=1,val=10").is_ok()); + assert!(vm_config.add_numa("dist,src=0,val=10").is_ok()); + assert!(vm_config.add_numa("dist,src=0,dst=1").is_ok()); + assert!(vm_config.add_numa("dist,src=0,dst=1,val=10").is_ok()); + + let numa = vm_config.numa_nodes.first().unwrap(); + let dist_cfg = parse_numa_distance(numa.1.as_str()).unwrap(); + assert_eq!(dist_cfg.numa_id, 0); + assert_eq!(dist_cfg.destination, 1); + assert_eq!(dist_cfg.distance, 15); + + let numa = vm_config.numa_nodes.get(1).unwrap(); + assert!(parse_numa_distance(numa.1.as_str()).is_err()); + let numa = vm_config.numa_nodes.get(2).unwrap(); + assert!(parse_numa_distance(numa.1.as_str()).is_err()); + let numa = vm_config.numa_nodes.get(3).unwrap(); + assert!(parse_numa_distance(numa.1.as_str()).is_err()); + let numa = vm_config.numa_nodes.get(4).unwrap(); + assert!(parse_numa_distance(numa.1.as_str()).is_err()); + } + + #[test] + fn test_check_numa_nodes() { + let nr_cpus = 4; + let mem_size = 2147483648; + + let numa_node1 = NumaNode { + cpus: vec![0, 1], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node1"), + }; + let numa_node2 = NumaNode { + cpus: vec![2, 3], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node2"), + }; + + let mut numa_nodes = BTreeMap::new(); + numa_nodes.insert(0, numa_node1); + numa_nodes.insert(1, numa_node2); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_ok()); + + let numa_node3 = NumaNode { + cpus: vec![2], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node3"), + }; + numa_nodes.remove(&1); + numa_nodes.insert(2, numa_node3); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_ok()); + + let numa_node4 = NumaNode { + cpus: vec![2, 3, 4], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node4"), + }; + numa_nodes.remove(&1); + numa_nodes.insert(1, numa_node4); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_err()); + + let numa_node5 = NumaNode { + cpus: vec![3, 4], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node5"), + }; + numa_nodes.remove(&1); + numa_nodes.insert(1, numa_node5); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_err()); + + let numa_node6 = NumaNode { + cpus: vec![0, 1], + distances: Default::default(), + size: 1073741824, + mem_dev: String::from("numa_node6"), + }; + numa_nodes.remove(&1); + numa_nodes.insert(1, numa_node6); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_err()); + + let numa_node7 = NumaNode { + cpus: vec![2, 3], + distances: Default::default(), + size: 2147483648, + mem_dev: String::from("numa_node7"), + }; + numa_nodes.remove(&1); + numa_nodes.insert(1, numa_node7); + assert!(complete_numa_node(&mut numa_nodes, nr_cpus, mem_size).is_err()); + } +} diff --git a/machine_manager/src/config/pci.rs b/machine_manager/src/config/pci.rs index 9b64092809d2f8d53e685779cdf141b213e58236..6642f4f62bc4c6220e0986d22be5ecc4ddbc03be 100644 --- a/machine_manager/src/config/pci.rs +++ b/machine_manager/src/config/pci.rs @@ -10,13 +10,15 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{CmdParser, ConfigCheck, MAX_STRING_LENGTH}; -use crate::config::ExBool; +use anyhow::{bail, Context, Result}; +use serde::{Deserialize, Serialize}; + +use super::get_value_of_parameter; +use util::num_ops::str_to_num; /// Basic information of pci devices such as bus number, /// slot number and function number. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct PciBdf { /// Bus number pub bus: String, @@ -39,38 +41,6 @@ impl Default for PciBdf { } } -/// Basic information of RootPort like port number. -#[derive(Debug, Clone)] -pub struct RootPortConfig { - pub port: u8, - pub id: String, - pub multifunction: bool, -} - -impl ConfigCheck for RootPortConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "root_port id".to_string(), - MAX_STRING_LENGTH, - ) - .into()); - } - - Ok(()) - } -} - -impl Default for RootPortConfig { - fn default() -> Self { - RootPortConfig { - port: 0, - id: "".to_string(), - multifunction: false, - } - } -} - pub fn get_pci_df(addr: &str) -> Result<(u8, u8)> { let addr_vec: Vec<&str> = addr.split('.').collect(); if addr_vec.len() > 2 { @@ -79,115 +49,36 @@ pub fn get_pci_df(addr: &str) -> Result<(u8, u8)> { addr_vec.len() ); } - let slot = addr_vec.get(0).unwrap(); - let without_prefix = slot.trim_start_matches("0x"); - let slot = u8::from_str_radix(without_prefix, 16) - .chain_err(|| format!("Invalid slot num: {}", slot))?; + + let slot = addr_vec.first().unwrap(); + let slot = str_to_num::(slot).with_context(|| format!("Invalid slot num: {}", slot))?; if slot > 31 { bail!("Invalid slot num: {}", slot); } + let func = if addr_vec.get(1).is_some() { let function = addr_vec.get(1).unwrap(); - let without_prefix = function.trim_start_matches("0x"); - u8::from_str_radix(without_prefix, 16) - .chain_err(|| format!("Invalid function num: {}", function))? + str_to_num::(function).with_context(|| format!("Invalid function num: {}", function))? } else { 0 }; if func > 7 { bail!("Invalid function num: {}", func); } + Ok((slot, func)) } pub fn get_pci_bdf(pci_cfg: &str) -> Result { - let mut cmd_parser = CmdParser::new("bdf"); - cmd_parser.push("").push("bus").push("addr"); - cmd_parser.get_parameters(pci_cfg)?; - - let mut pci_bdf = PciBdf::default(); - if let Some(bus) = cmd_parser.get_value::("bus")? { - pci_bdf.bus = bus; - } else { - bail!("Bus not specified for pci device"); - } - if let Some(addr) = cmd_parser.get_value::("addr")? { - pci_bdf.addr = get_pci_df(&addr).chain_err(|| "Failed to get addr")?; - } else { - bail!("No addr found for pci device"); - } - Ok(pci_bdf) -} - -pub fn get_multi_function(pci_cfg: &str) -> Result { - let mut cmd_parser = CmdParser::new("multifunction"); - cmd_parser.push("").push("multifunction"); - cmd_parser.get_parameters(pci_cfg)?; - - if let Some(multi_func) = cmd_parser - .get_value::("multifunction") - .chain_err(|| "Failed to get multifunction parameter, please set on or off (default).")? - { - return Ok(multi_func.inner); - } - - Ok(false) -} - -pub fn parse_root_port(rootport_cfg: &str) -> Result { - let mut cmd_parser = CmdParser::new("pcie-root-port"); - cmd_parser - .push("") - .push("bus") - .push("addr") - .push("port") - .push("chassis") - .push("multifunction") - .push("id"); - cmd_parser.parse(rootport_cfg)?; - - let mut root_port = RootPortConfig::default(); - if let Some(port) = cmd_parser.get_value::("port")? { - let without_prefix = port.trim_start_matches("0x"); - root_port.port = u8::from_str_radix(without_prefix, 16).unwrap(); - } else { - return Err(ErrorKind::FieldIsMissing("port", "rootport").into()); - } - let _ = cmd_parser.get_value::("chassis")?; - - if let Some(id) = cmd_parser.get_value::("id")? { - root_port.id = id; - } else { - return Err(ErrorKind::FieldIsMissing("id", "rootport").into()); + let bus = get_value_of_parameter("bus", pci_cfg)?; + let addr_str = get_value_of_parameter("addr", pci_cfg)?; + if addr_str.is_empty() { + bail!("Invalid addr."); } - root_port.multifunction = - if let Some(multi_func) = cmd_parser.get_value::("multifunction")? { - multi_func.into() - } else { - false - }; - root_port.check()?; - - Ok(root_port) -} + let addr = get_pci_df(&addr_str).with_context(|| "Failed to get addr")?; + let pci_bdf = PciBdf::new(bus, addr); -pub fn pci_args_check(cmd_parser: &CmdParser) -> Result<()> { - let device_type = cmd_parser.get_value::("")?; - let dev_type = device_type.unwrap(); - // Safe, because this function only be called when certain - // devices type are added. - if dev_type.ends_with("-device") { - if cmd_parser.get_value::("bus")?.is_some() { - bail!("virtio mmio device does not support bus arguments"); - } - if cmd_parser.get_value::("addr")?.is_some() { - bail!("virtio mmio device does not support addr arguments"); - } - if cmd_parser.get_value::("multifunction")?.is_some() { - bail!("virtio mmio device does not support multifunction arguments"); - } - } - Ok(()) + Ok(pci_bdf) } #[cfg(test)] @@ -250,26 +141,4 @@ mod tests { let pci_bdf = get_pci_bdf("virtio-balloon-device,addr=0x1.0x2"); assert!(pci_bdf.is_err()); } - - #[test] - fn test_get_multi_function() { - assert_eq!( - get_multi_function("virtio-balloon-device,bus=pcie.0,addr=0x1.0x2").unwrap(), - false - ); - assert_eq!( - get_multi_function("virtio-balloon-device,bus=pcie.0,addr=0x1.0x2,multifunction=on") - .unwrap(), - true - ); - assert_eq!( - get_multi_function("virtio-balloon-device,bus=pcie.0,addr=0x1.0x2,multifunction=off") - .unwrap(), - false - ); - assert!(get_multi_function( - "virtio-balloon-device,bus=pcie.0,addr=0x1.0x2,multifunction=close" - ) - .is_err()); - } } diff --git a/machine_manager/src/config/rng.rs b/machine_manager/src/config/rng.rs index 349bf31cf165a0f36e5688e086ab3bd0150c8945..78c3ef79cc8139d662319057d37dba8decbfa5ac 100644 --- a/machine_manager/src/config/rng.rs +++ b/machine_manager/src/config/rng.rs @@ -10,236 +10,18 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::errors::{ErrorKind, Result}; -use super::{pci_args_check, ObjConfig}; -use crate::config::{CmdParser, ConfigCheck, VmConfig, MAX_PATH_LENGTH}; +use clap::Parser; +use serde::{Deserialize, Serialize}; -const MIN_BYTES_PER_SEC: u64 = 64; -const MAX_BYTES_PER_SEC: u64 = 1_000_000_000; +use crate::config::{valid_id, valid_path}; -#[derive(Debug, Clone, Default)] +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] pub struct RngObjConfig { + #[arg(long, value_parser = ["rng-random"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] pub id: String, + #[arg(long, value_parser = valid_path)] pub filename: String, } - -/// Config structure for virtio-rng. -#[derive(Debug, Clone, Default)] -pub struct RngConfig { - pub id: String, - pub random_file: String, - pub bytes_per_sec: Option, -} - -impl ConfigCheck for RngConfig { - fn check(&self) -> Result<()> { - if self.id.len() > MAX_PATH_LENGTH { - return Err( - ErrorKind::StringLengthTooLong("rng id".to_string(), MAX_PATH_LENGTH).into(), - ); - } - - if self.random_file.len() > MAX_PATH_LENGTH { - return Err(ErrorKind::StringLengthTooLong( - "rng random file".to_string(), - MAX_PATH_LENGTH, - ) - .into()); - } - - if let Some(bytes_per_sec) = self.bytes_per_sec { - if !(MIN_BYTES_PER_SEC..=MAX_BYTES_PER_SEC).contains(&bytes_per_sec) { - return Err(ErrorKind::IllegalValue( - "The bytes per second of rng device".to_string(), - MIN_BYTES_PER_SEC, - true, - MAX_BYTES_PER_SEC, - true, - ) - .into()); - } - } - - Ok(()) - } -} - -pub fn parse_rng_dev(vm_config: &mut VmConfig, rng_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("rng"); - cmd_parser - .push("") - .push("id") - .push("bus") - .push("addr") - .push("multifunction") - .push("max-bytes") - .push("period") - .push("rng"); - - cmd_parser.parse(rng_config)?; - pci_args_check(&cmd_parser)?; - let mut rng_cfg = RngConfig::default(); - let rng = if let Some(rng_id) = cmd_parser.get_value::("rng")? { - rng_id - } else { - return Err(ErrorKind::FieldIsMissing("rng", "rng").into()); - }; - - rng_cfg.id = if let Some(rng_id) = cmd_parser.get_value::("id")? { - rng_id - } else { - "".to_string() - }; - - if let Some(max) = cmd_parser.get_value::("max-bytes")? { - if let Some(peri) = cmd_parser.get_value::("period")? { - let mul = if let Some(res) = max.checked_mul(1000) { - res - } else { - bail!("Illegal max-bytes arguments: {:?}", max) - }; - let div = if let Some(res) = mul.checked_div(peri) { - res - } else { - bail!("Illegal period arguments: {:?}", peri) - }; - rng_cfg.bytes_per_sec = Some(div); - } else { - bail!("Argument 'period' is missing"); - } - } else if cmd_parser.get_value::("period")?.is_some() { - bail!("Argument 'max-bytes' is missing"); - } - - if let Some(object_cfg) = vm_config.object.remove(&rng) { - #[allow(irrefutable_let_patterns)] - if let ObjConfig::Rng(obj_cfg) = object_cfg { - rng_cfg.random_file = obj_cfg.filename; - } - } else { - bail!("Object for rng-random device not found"); - } - - rng_cfg.check()?; - Ok(rng_cfg) -} - -#[cfg(test)] -mod tests { - use crate::config::get_pci_bdf; - - use super::*; - - #[test] - fn test_rng_config_cmdline_parser_01() { - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev(&mut vm_config, "virtio-rng-device,rng=objrng0"); - assert!(rng_config.is_ok()); - let config = rng_config.unwrap(); - assert_eq!(config.random_file, "/path/to/random_file"); - assert_eq!(config.bytes_per_sec, None); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev( - &mut vm_config, - "virtio-rng-device,rng=objrng0,max-bytes=1234,period=1000", - ); - assert!(rng_config.is_ok()); - let config = rng_config.unwrap(); - assert_eq!(config.random_file, "/path/to/random_file"); - assert_eq!(config.bytes_per_sec, Some(1234)); - } - - #[test] - fn test_rng_config_cmdline_parser_02() { - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev( - &mut vm_config, - "virtio-rng-device,rng=objrng0,max-bytes=63,period=1000", - ); - assert!(rng_config.is_err()); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev( - &mut vm_config, - "virtio-rng-device,rng=objrng0,max-bytes=64,period=1000", - ); - assert!(rng_config.is_ok()); - let config = rng_config.unwrap(); - assert_eq!(config.random_file, "/path/to/random_file"); - assert_eq!(config.bytes_per_sec, Some(64)); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev( - &mut vm_config, - "virtio-rng-device,rng=objrng0,max-bytes=1000000000,period=1000", - ); - assert!(rng_config.is_ok()); - let config = rng_config.unwrap(); - assert_eq!(config.random_file, "/path/to/random_file"); - assert_eq!(config.bytes_per_sec, Some(1000000000)); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_config = parse_rng_dev( - &mut vm_config, - "virtio-rng-device,rng=objrng0,max-bytes=1000000001,period=1000", - ); - assert!(rng_config.is_err()); - } - - #[test] - fn test_pci_rng_config_cmdline_parser() { - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_cfg = "virtio-rng-pci,rng=objrng0,bus=pcie.0,addr=0x1.0x3"; - let rng_config = parse_rng_dev(&mut vm_config, rng_cfg); - assert!(rng_config.is_ok()); - let config = rng_config.unwrap(); - assert_eq!(config.random_file, "/path/to/random_file"); - assert_eq!(config.bytes_per_sec, None); - let pci_bdf = get_pci_bdf(rng_cfg); - assert!(pci_bdf.is_ok()); - let pci = pci_bdf.unwrap(); - assert_eq!(pci.bus, "pcie.0".to_string()); - assert_eq!(pci.addr, (1, 3)); - - // object "objrng0" has been removed. - let rng_config = parse_rng_dev(&mut vm_config, rng_cfg); - assert!(rng_config.is_err()); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_cfg = "virtio-rng-device,rng=objrng0,bus=pcie.0,addr=0x1.0x3"; - let rng_config = parse_rng_dev(&mut vm_config, rng_cfg); - assert!(rng_config.is_err()); - - let mut vm_config = VmConfig::default(); - assert!(vm_config - .add_object("rng-random,id=objrng0,filename=/path/to/random_file") - .is_ok()); - let rng_cfg = "virtio-rng-pci,rng=objrng0,bus=pcie.0,addr=0x1.0x3,multifunction=on"; - assert!(parse_rng_dev(&mut vm_config, rng_cfg).is_ok()); - } -} diff --git a/machine_manager/src/config/sasl_auth.rs b/machine_manager/src/config/sasl_auth.rs new file mode 100644 index 0000000000000000000000000000000000000000..f01699a19bc6249f00481c7a8234ea8e54af4bcf --- /dev/null +++ b/machine_manager/src/config/sasl_auth.rs @@ -0,0 +1,68 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::{anyhow, Result}; +use clap::Parser; +use serde::{Deserialize, Serialize}; + +use crate::config::{str_slip_to_clap, valid_id, ConfigError, VmConfig}; + +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SaslAuthObjConfig { + #[arg(long, value_parser = ["authz-simple"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + /// Authentication User Name. + #[arg(long, default_value = "")] + pub identity: String, +} + +impl VmConfig { + pub fn add_saslauth(&mut self, saslauth_config: &str) -> Result<()> { + let saslauth = + SaslAuthObjConfig::try_parse_from(str_slip_to_clap(saslauth_config, true, false))?; + let id = saslauth.id.clone(); + if self.object.sasl_object.contains_key(&id) { + return Err(anyhow!(ConfigError::IdRepeat("saslauth".to_string(), id))); + } + self.object.sasl_object.insert(id, saslauth); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_saslauth() { + let id = String::from("authz0"); + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_object("authz-simple,id=authz0,identity=test") + .is_ok()); + assert!(vm_config.object.sasl_object.get(&id).is_some()); + if let Some(obj_cfg) = vm_config.object.sasl_object.get(&id) { + assert_eq!(obj_cfg.identity, "test".to_string()); + } + + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_object("authz-simple,id=authz0").is_ok()); + assert!(vm_config.object.sasl_object.get(&id).is_some()); + if let Some(obj_cfg) = vm_config.object.sasl_object.get(&id) { + assert!(obj_cfg.identity == *""); + } + } +} diff --git a/machine_manager/src/config/smbios.rs b/machine_manager/src/config/smbios.rs new file mode 100644 index 0000000000000000000000000000000000000000..75220f456fbe1427d89b463d0974f00135c77e02 --- /dev/null +++ b/machine_manager/src/config/smbios.rs @@ -0,0 +1,377 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::str::FromStr; + +use anyhow::{anyhow, bail, Result}; +use clap::Parser; +use serde::{Deserialize, Serialize}; + +use super::{get_value_of_parameter, str_slip_to_clap}; +use crate::config::VmConfig; + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType0Config { + #[arg(long, alias = "type", value_parser = ["0"])] + pub smbios_type: String, + #[arg(long)] + pub vendor: Option, + #[arg(long)] + pub version: Option, + #[arg(long)] + pub date: Option, + // Note: we don't set `ArgAction::Append` for `added`, so it cannot be specified + // from the command line, as command line will parse errors. + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType1Config { + #[arg(long, alias = "type", value_parser = ["1"])] + pub smbios_type: String, + #[arg(long)] + pub manufacturer: Option, + #[arg(long)] + pub product: Option, + #[arg(long)] + pub version: Option, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub sku: Option, + #[arg(long)] + pub family: Option, + #[arg(long, value_parser = get_uuid)] + pub uuid: Option, + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType2Config { + #[arg(long, alias = "type", value_parser = ["2"])] + pub smbios_type: String, + #[arg(long)] + pub manufacturer: Option, + #[arg(long)] + pub product: Option, + #[arg(long)] + pub version: Option, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub asset: Option, + #[arg(long)] + pub location: Option, + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType3Config { + #[arg(long, alias = "type", value_parser = ["3"])] + pub smbios_type: String, + #[arg(long)] + pub manufacturer: Option, + #[arg(long)] + pub version: Option, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub sku: Option, + #[arg(long)] + pub asset: Option, + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType4Config { + #[arg(long, alias = "type", value_parser = ["4"])] + pub smbios_type: String, + #[arg(long)] + pub manufacturer: Option, + #[arg(long)] + pub version: Option, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub asset: Option, + #[arg(long, alias = "sock_pfx")] + pub sock_pfx: Option, + #[arg(long)] + pub part: Option, + #[arg(long)] + pub max_speed: Option, + #[arg(long)] + pub current_speed: Option, + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Parser, Clone, Default, Debug, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct SmbiosType17Config { + #[arg(long, alias = "type", value_parser = ["17"])] + pub smbios_type: String, + #[arg(long)] + pub manufacturer: Option, + #[arg(long)] + pub serial: Option, + #[arg(long)] + pub asset: Option, + #[arg(long, alias = "loc_pfx")] + pub loc_pfx: Option, + #[arg(long)] + pub part: Option, + #[arg(long, default_value = "0")] + pub speed: u16, + #[arg(long)] + pub bank: Option, + #[arg(long, default_value = "true")] + pub added: bool, +} + +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +pub struct SmbiosConfig { + pub type0: SmbiosType0Config, + pub type1: SmbiosType1Config, + pub type2: SmbiosType2Config, + pub type3: SmbiosType3Config, + pub type4: SmbiosType4Config, + pub type17: SmbiosType17Config, +} + +/// Check if the uuid is valid. +fn check_valid_uuid(uuid: &str) -> bool { + if uuid.len() != 36 { + return false; + } + + // Char located at 8, 13, 18, 23 should be `-` + let indexes = &[8, 13, 18, 23]; + for i in indexes { + if uuid.chars().nth(*i).unwrap() != '-' { + return false; + } + } + + for ch in uuid.chars() { + if ch != '-' && (!ch.is_ascii_hexdigit()) { + return false; + } + } + + true +} + +/// Convert an ASCII string to a 128-bit buffer. +/// format: 33DB4D5E-1FF7-401C-9657-7441C03DD766 +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +pub struct Uuid { + pub name: Vec, +} + +impl FromStr for Uuid { + type Err = anyhow::Error; + + fn from_str(str: &str) -> std::result::Result { + let name = str.to_string(); + + if !check_valid_uuid(&name) { + return Err(anyhow!("Invalid uuid {}", name)); + } + + let mut uuid_bytes = Vec::new(); + // If the UUID is "aabbccdd-eeff-gghh-iijj-kkllmmnnoopp", then the encoded order is: + // dd cc bb aa ff ee hh gg ii jj kk ll mm nn oo pp + let index = &[6, 4, 2, 0, 11, 9, 16, 14, 19, 21, 24, 26, 28, 30, 32, 34]; + + for i in index { + let mut chars = name.chars(); + uuid_bytes.push( + (chars.nth(*i).unwrap().to_digit(16).unwrap() as u8) << 4 + | chars.next().unwrap().to_digit(16).unwrap() as u8, + ); + } + Ok(Uuid { name: uuid_bytes }) + } +} + +fn get_uuid(s: &str) -> Result { + let uuid = Uuid::from_str(s)?; + Ok(uuid) +} + +impl VmConfig { + /// # Arguments + /// + /// * `type0` - The type0 cmdline string. + fn add_smbios_type0(&mut self, type0: &str) -> Result<()> { + if self.smbios.type0.added { + bail!("smbios type0 has been added"); + } + + let type0_cfg = SmbiosType0Config::try_parse_from(str_slip_to_clap(type0, false, false))?; + self.smbios.type0 = type0_cfg; + + Ok(()) + } + + /// # Arguments + /// + /// * `type1` - The type1 cmdline string. + fn add_smbios_type1(&mut self, type1: &str) -> Result<()> { + if self.smbios.type1.added { + bail!("smbios type1 has been added"); + } + + let type1_cfg = SmbiosType1Config::try_parse_from(str_slip_to_clap(type1, false, false))?; + self.smbios.type1 = type1_cfg; + + Ok(()) + } + + /// # Arguments + /// + /// * `type2` - The type2 cmdline string. + fn add_smbios_type2(&mut self, type2: &str) -> Result<()> { + if self.smbios.type2.added { + bail!("smbios type2 has been added"); + } + let type2_cfg = SmbiosType2Config::try_parse_from(str_slip_to_clap(type2, false, false))?; + self.smbios.type2 = type2_cfg; + + Ok(()) + } + + /// # Arguments + /// + /// * `type3` - The type3 cmdline string. + fn add_smbios_type3(&mut self, type3: &str) -> Result<()> { + if self.smbios.type3.added { + bail!("smbios type3 has been added"); + } + + let type3_cfg = SmbiosType3Config::try_parse_from(str_slip_to_clap(type3, false, false))?; + self.smbios.type3 = type3_cfg; + + Ok(()) + } + + /// # Arguments + /// + /// * `type4` - The type4 cmdline string. + fn add_smbios_type4(&mut self, type4: &str) -> Result<()> { + if self.smbios.type4.added { + bail!("smbios type4 has been added"); + } + + let type4_cfg = SmbiosType4Config::try_parse_from(str_slip_to_clap(type4, false, false))?; + self.smbios.type4 = type4_cfg; + + Ok(()) + } + + /// # Arguments + /// + /// * `type17` - The type17 cmdline string. + fn add_smbios_type17(&mut self, type17: &str) -> Result<()> { + if self.smbios.type17.added { + bail!("smbios type17 has been added"); + } + + let type17_cfg = + SmbiosType17Config::try_parse_from(str_slip_to_clap(type17, false, false))?; + self.smbios.type17 = type17_cfg; + + Ok(()) + } + + /// Add argument `smbios_args` to `VmConfig`. + /// + /// # Arguments + /// + /// * `smbios_args` - The args of object. + pub fn add_smbios(&mut self, smbios_args: &str) -> Result<()> { + let smbios_type = get_value_of_parameter("type", smbios_args)?; + match smbios_type.as_str() { + "0" => { + self.add_smbios_type0(smbios_args)?; + } + "1" => { + self.add_smbios_type1(smbios_args)?; + } + "2" => { + self.add_smbios_type2(smbios_args)?; + } + "3" => { + self.add_smbios_type3(smbios_args)?; + } + "4" => { + self.add_smbios_type4(smbios_args)?; + } + "17" => { + self.add_smbios_type17(smbios_args)?; + } + _ => { + bail!("Unknow smbios type: {:?}", &smbios_type); + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_smbios_uuid() { + let uuid = Uuid::from_str("33DB4D5E-1FF7-401C-9657-7441C03DD766").unwrap(); + + assert_eq!( + uuid.name.to_vec(), + &[ + 0x5E, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, + 0xD7, 0x66 + ] + ); + } + + #[test] + fn test_add_smbios() { + let mut vm_config = VmConfig::default(); + + let smbios0 = "type=0,vendor=fake,version=fake,date=fake"; + let smbios1 = "type=1,manufacturer=fake,version=fake,product=fake,serial=fake,uuid=33DB4D5E-1FF7-401C-9657-7441C03DD766,sku=fake,family=fake"; + let smbios2 = "type=2,manufacturer=fake,product=fake,version=fake,serial=fake,asset=fake,location=fake"; + let smbios3 = "type=3,manufacturer=fake,version=fake,serial=fake,asset=fake,sku=fake"; + let smbios4 = "type=4,sock_pfx=fake,manufacturer=fake,version=fake,serial=fake,asset=fake,part=fake,max-speed=1,current-speed=1"; + let smbios17 = "type=17,loc_pfx=fake,bank=fake,manufacturer=fake,serial=fake,asset=fake,part=fake,speed=1"; + + assert!(vm_config.add_smbios(smbios0).is_ok()); + assert!(vm_config.add_smbios(smbios1).is_ok()); + assert!(vm_config.add_smbios(smbios2).is_ok()); + assert!(vm_config.add_smbios(smbios3).is_ok()); + assert!(vm_config.add_smbios(smbios4).is_ok()); + assert!(vm_config.add_smbios(smbios17).is_ok()); + assert!(vm_config.add_smbios(smbios0).is_err()); + } +} diff --git a/machine_manager/src/config/tls_creds.rs b/machine_manager/src/config/tls_creds.rs new file mode 100644 index 0000000000000000000000000000000000000000..a3b7396c07d0ed789c5f2e5ec7392585d4af0e6f --- /dev/null +++ b/machine_manager/src/config/tls_creds.rs @@ -0,0 +1,85 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::{anyhow, Result}; +use clap::{ArgAction, Parser}; +use serde::{Deserialize, Serialize}; + +use crate::config::{str_slip_to_clap, valid_dir, valid_id, ConfigError, VmConfig}; + +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct TlsCredObjConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long, value_parser = valid_dir)] + pub dir: String, + #[arg(long)] + pub endpoint: Option, + #[arg(long, alias = "verify-peer", default_value= "false", action = ArgAction::Append)] + pub verifypeer: bool, +} + +impl VmConfig { + pub fn add_tlscred(&mut self, tlscred_config: &str) -> Result<()> { + let tlscred = + TlsCredObjConfig::try_parse_from(str_slip_to_clap(tlscred_config, true, false))?; + let id = tlscred.id.clone(); + if self.object.tls_object.contains_key(&id) { + return Err(anyhow!(ConfigError::IdRepeat("tlscred".to_string(), id))); + } + self.object.tls_object.insert(id, tlscred); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{env, fs}; + + use super::*; + + #[test] + fn test_add_tlscred() { + let mut dir = env::current_dir().unwrap(); + dir.push("test_pki"); + // Create file. + if !dir.is_dir() { + fs::create_dir(dir.clone()).unwrap(); + } + assert!(dir.is_dir()); + + // Certificate directory is exist. + let tls_config: String = format!( + "tls-creds-x509,id=vnc-tls-creds0,dir={},endpoint=server,verify-peer=false", + dir.to_str().unwrap() + ); + let id = String::from("vnc-tls-creds0"); + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_object(tls_config.as_str()).is_ok()); + assert!(vm_config.object.tls_object.get(&id).is_some()); + if let Some(tls_cred_cfg) = vm_config.object.tls_object.get(&id) { + assert_eq!(tls_cred_cfg.dir, dir.to_str().unwrap()); + assert_eq!(tls_cred_cfg.endpoint, Some("server".to_string())); + assert!(!tls_cred_cfg.verifypeer); + } + + // Delete file. + fs::remove_dir(dir.clone()).unwrap(); + assert!(!dir.is_dir()); + // Certificate directory does not exist. + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_object(tls_config.as_str()).is_err()); + } +} diff --git a/machine_manager/src/config/vfio.rs b/machine_manager/src/config/vfio.rs deleted file mode 100644 index 8acb4df6fe9297565e7d30743b81aec2826c1f41..0000000000000000000000000000000000000000 --- a/machine_manager/src/config/vfio.rs +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use super::errors::{ErrorKind, Result}; -use crate::config::{CmdParser, ConfigCheck, MAX_STRING_LENGTH}; - -#[derive(Default)] -pub struct VfioConfig { - pub host: String, - pub id: String, -} - -impl ConfigCheck for VfioConfig { - fn check(&self) -> Result<()> { - if self.host.len() > MAX_STRING_LENGTH { - return Err( - ErrorKind::StringLengthTooLong("host".to_string(), MAX_STRING_LENGTH).into(), - ); - } - - if self.id.len() > MAX_STRING_LENGTH { - return Err(ErrorKind::StringLengthTooLong("id".to_string(), MAX_STRING_LENGTH).into()); - } - - Ok(()) - } -} - -pub fn parse_vfio(vfio_config: &str) -> Result { - let mut cmd_parser = CmdParser::new("vfio-pci"); - cmd_parser - .push("") - .push("host") - .push("id") - .push("bus") - .push("addr") - .push("multifunction"); - cmd_parser.parse(vfio_config)?; - - let mut vfio: VfioConfig = VfioConfig::default(); - if let Some(host) = cmd_parser.get_value::("host")? { - vfio.host = host; - } else { - return Err(ErrorKind::FieldIsMissing("host", "vfio").into()); - } - if let Some(id) = cmd_parser.get_value::("id")? { - vfio.id = id; - } - vfio.check()?; - - Ok(vfio) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::get_pci_bdf; - - #[test] - fn test_check_vfio_config() { - let mut vfio_config = - parse_vfio("vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x1.0x2").unwrap(); - assert!(vfio_config.check().is_ok()); - - vfio_config.host = "IYqUdAMXggoUMU28eBJCxQGUirYYSyW1cfGJI3ZpZAzMFCKnVPA5e7gnurLtXjCm\ - YoG5pfqRDbN7M2dpSd8fzSbufAJaor8UY9xbH7BybZ7WDEFmkxgCQp6PWgaBSmLOCe1tEMs4RQ938ZLnh8ej\ - Q81VovbrU7ecafacCn9AJQoidN3Seab3QOEd4SJbtd4hAPeYvsXLVa6xOZxtVjqjRxk9b36feF0C5JrucVcs\ - QsusZZtVfUFUZxOoV8JltVsBmdasnic" - .to_string(); - assert!(vfio_config.check().is_err()); - - vfio_config.id = "LPwM1h4QUTCjL4fX2gFdCdPrF9S0kGHf0onpU6E4fyI6Jmzg0DCM9sffvEVjaVu1ilp\ - 2OrgCWzvNBflYvUUihPj3ePPYs3erSHmSOmQZbnGEFsiBSTJHfPAsRtWJoipeIh9cgIR1tnU3OjwPPli4gmb6\ - E6GgSyMd0oQtUGFyNf5pRHlYqlx3s7PMPVUtRJP0bBnNd5eDwWAotInu33h6UI0zfKgckAxeVdEROKAExx5xWK\ - V3AgPhvvPzFx3chYymy" - .to_string(); - assert!(vfio_config.check().is_err()); - } - - #[test] - fn test_vfio_config_cmdline_parser() { - let vfio_cfg = parse_vfio("vfio-pci,host=0000:1a:00.3,id=net"); - assert!(vfio_cfg.is_ok()); - let vfio_config = vfio_cfg.unwrap(); - assert_eq!(vfio_config.host, "0000:1a:00.3"); - assert_eq!(vfio_config.id, "net"); - } - - #[test] - fn test_pci_vfio_config_cmdline_parser() { - let vfio_cfg1 = "vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x1.0x2"; - let config1 = parse_vfio(vfio_cfg1); - assert!(config1.is_ok()); - let vfio_cfg2 = "vfio-pci,host=0000:1a:00.3,bus=pcie.0,addr=0x1.0x2"; - let config2 = parse_vfio(vfio_cfg2); - assert!(config2.is_ok()); - let vfio_cfg3 = "vfio-pci,id=net,bus=pcie.0,addr=0x1.0x2"; - let config3 = parse_vfio(vfio_cfg3); - assert!(config3.is_err()); - - let pci_bdf = get_pci_bdf(vfio_cfg1); - assert!(pci_bdf.is_ok()); - let pci = pci_bdf.unwrap(); - assert_eq!(pci.bus, "pcie.0".to_string()); - assert_eq!(pci.addr, (1, 2)); - - let vfio_cfg1 = - "vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x1.0x2,multifunction=on"; - assert!(parse_vfio(vfio_cfg1).is_ok()); - } -} diff --git a/machine_manager/src/config/vnc.rs b/machine_manager/src/config/vnc.rs new file mode 100644 index 0000000000000000000000000000000000000000..f257ae07c56315a420a40fd53526224b4d4f9a4e --- /dev/null +++ b/machine_manager/src/config/vnc.rs @@ -0,0 +1,125 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::net::Ipv4Addr; + +use anyhow::{anyhow, Context, Result}; +use clap::{ArgAction, Parser}; +use serde::{Deserialize, Serialize}; + +use crate::config::{str_slip_to_clap, ConfigError, VmConfig}; + +/// Configuration of vnc. +#[derive(Parser, Debug, Clone, Default, Serialize, Deserialize)] +#[command(no_binary_name(true))] +pub struct VncConfig { + /// Vnc listening addr (ip, port). + #[arg(long, alias = "classtype", value_parser = parse_ip_port)] + pub addr: (String, u16), + /// Configuration of encryption. + #[arg(long, alias = "tls-creds", default_value = "")] + pub tls_creds: String, + /// Authentication switch. + #[arg(long, default_value = "false", action = ArgAction::SetTrue)] + pub sasl: bool, + /// Configuration of authentication. + #[arg(long, alias = "sasl-authz", default_value = "")] + pub sasl_authz: String, +} + +const VNC_MAX_PORT_NUM: i32 = 65535; +const VNC_PORT_OFFSET: i32 = 5900; + +impl VmConfig { + /// Make configuration for vnc: "chardev" -> "vnc". + pub fn add_vnc(&mut self, vnc_config: &str) -> Result<()> { + let vnc_config = VncConfig::try_parse_from(str_slip_to_clap(vnc_config, true, false))?; + self.vnc = Some(vnc_config); + Ok(()) + } +} + +fn parse_ip_port(addr: &str) -> Result<(String, u16)> { + let v: Vec<&str> = addr.split(':').collect(); + if v.len() != 2 { + return Err(anyhow!(ConfigError::FieldIsMissing( + "ip".to_string(), + "port".to_string() + ))); + } + let ip = v[0] + .parse::() + .with_context(|| "Invalid Ip param for vnc!")?; + let base_port = v[1] + .parse::() + .with_context(|| "Invalid Port param for vnc!")?; + // Prevent the base_port out of bounds. + if !(0..=VNC_MAX_PORT_NUM - VNC_PORT_OFFSET).contains(&base_port) { + return Err(anyhow!(ConfigError::InvalidParam( + base_port.to_string(), + "port".to_string() + ))); + } + + Ok((ip.to_string(), (base_port + VNC_PORT_OFFSET) as u16)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_vnc() { + let mut vm_config = VmConfig::default(); + let config_line = "0.0.0.0:1,tls-creds=vnc-tls-creds0,sasl,sasl-authz=authz0"; + assert!(vm_config.add_vnc(config_line).is_ok()); + let vnc_config = vm_config.vnc.unwrap(); + assert_eq!(vnc_config.addr.0, String::from("0.0.0.0")); + assert_eq!(vnc_config.addr.1, 5901); + assert_eq!(vnc_config.tls_creds, String::from("vnc-tls-creds0")); + assert!(vnc_config.sasl); + assert_eq!(vnc_config.sasl_authz, String::from("authz0")); + + let mut vm_config = VmConfig::default(); + let config_line = "0.0.0.0:5900,tls-creds=vnc-tls-creds0"; + assert!(vm_config.add_vnc(config_line).is_ok()); + let vnc_config = vm_config.vnc.unwrap(); + assert!(!vnc_config.sasl); + assert_eq!(vnc_config.addr.1, 11800); + + let mut vm_config = VmConfig::default(); + let config_line = "0.0.0.0:1,sasl,sasl-authz=authz0"; + assert!(vm_config.add_vnc(config_line).is_ok()); + let vnc_config = vm_config.vnc.unwrap(); + assert_eq!(vnc_config.tls_creds, "".to_string()); + + // Invalie format of ip:port. + let config_lines = [ + "tls-creds=vnc-tls-creds0", // No ip:port. + "127.0.0.1", // No port. + "1", // No ip. + "0.0.0.0:65536", // Invalid port. + "0.0.0.0:59636", // Invalid port. + "0.0.0.0:2147483647", // Invalie port. + "0.0.0.0:-1", // Invalid port. + "0.0.0.0:123ab", // Invalid port. + "127.257.0.1:0", // Invalid ip. + "127.0.0.0.1:0", // Invalid ip. + "127.12ab.0.1:0", // Invalid ip. + "127.0.1:0", // Invalid ip. + ]; + for config_line in config_lines { + let mut vm_config = VmConfig::default(); + assert!(vm_config.add_vnc(config_line).is_err()); + } + } +} diff --git a/machine_manager/src/error.rs b/machine_manager/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..01bf611067833ced868550a013e2671b022a9ef7 --- /dev/null +++ b/machine_manager/src/error.rs @@ -0,0 +1,32 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum MachineManagerError { + #[error("ConfigParser")] + ConfigParser { + #[from] + source: crate::config::error::ConfigError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Json")] + Json { + #[from] + source: serde_json::Error, + }, +} diff --git a/machine_manager/src/event_loop.rs b/machine_manager/src/event_loop.rs index 72ef64f7f616271054ddaf7d451b1120c385d07b..79d7cf6f94fee268c2ce9295ff68ffa079fe4dd4 100644 --- a/machine_manager/src/event_loop.rs +++ b/machine_manager/src/event_loop.rs @@ -11,14 +11,20 @@ // See the Mulan PSL v2 for more details. use std::collections::HashMap; -use std::sync::{Arc, Mutex}; +use std::os::unix::prelude::RawFd; +use std::sync::{Arc, Barrier, Mutex}; use std::{process, thread}; -use crate::machine::IOTHREADS; -use crate::qmp::qmp_schema::IothreadInfo; +use anyhow::{bail, Result}; +use log::info; use super::config::IothreadConfig; -use util::loop_context::{EventLoopContext, EventLoopManager, EventNotifier}; +use crate::machine::IOTHREADS; +use crate::qmp::qmp_schema::IothreadInfo; +use crate::signal_handler::get_signal; +use util::loop_context::{ + gen_delete_notifiers, get_notifiers_fds, EventLoopContext, EventLoopManager, EventNotifier, +}; /// This struct used to manage all events occur during VM lifetime. /// # Notes @@ -41,18 +47,29 @@ impl EventLoop { /// # Arguments /// /// * `iothreads` - refer to `-iothread` params - pub fn object_init(iothreads: &Option>) -> util::errors::Result<()> { + pub fn object_init(iothreads: &Option>) -> Result<()> { let mut io_threads = HashMap::new(); + let cnt = match iothreads { + Some(thrs) => thrs.len(), + None => 0, + }; + let thread_exit_barrier = Arc::new(Barrier::new(cnt + 1)); + if let Some(thrs) = iothreads { for thr in thrs { - io_threads.insert(thr.id.clone(), EventLoopContext::new()); + io_threads.insert( + thr.id.clone(), + EventLoopContext::new(thread_exit_barrier.clone()), + ); } } + // SAFETY: This function is called at startup thus no concurrent accessing to + // GLOBAL_EVENT_LOOP. And each iothread has a dedicated EventLoopContext. unsafe { if GLOBAL_EVENT_LOOP.is_none() { GLOBAL_EVENT_LOOP = Some(EventLoop { - main_loop: EventLoopContext::new(), + main_loop: EventLoopContext::new(thread_exit_barrier), io_threads, }); @@ -68,10 +85,11 @@ impl EventLoop { }; IOTHREADS.lock().unwrap().push(iothread_info); while let Ok(ret) = ctx.iothread_run() { - if !ret { + if !ret || get_signal() != 0 { break; } } + ctx.thread_exit_barrier.wait(); })?; } } else { @@ -89,6 +107,7 @@ impl EventLoop { /// /// * `name` - if None, return main loop, OR return io-thread-loop which is related to `name`. pub fn get_ctx(name: Option<&String>) -> Option<&mut EventLoopContext> { + // SAFETY: All concurrently accessed data of EventLoopContext is protected. unsafe { if let Some(event_loop) = GLOBAL_EVENT_LOOP.as_mut() { if let Some(name) = name { @@ -106,24 +125,26 @@ impl EventLoop { /// /// # Arguments /// - /// * `manager` - The main part to manager the event loop specified by name. - /// * `name` - specify which event loop to manage - pub fn set_manager(manager: Arc>, name: Option<&String>) { - if let Some(ctx) = Self::get_ctx(name) { - ctx.set_manager(manager) + /// * `manager` - The main part to manager the event loop. + pub fn set_manager(manager: Arc>) { + // SAFETY: All concurrently accessed data of EventLoopContext is protected. + unsafe { + if let Some(event_loop) = GLOBAL_EVENT_LOOP.as_mut() { + event_loop.main_loop.set_manager(manager.clone()); + for (_name, io_thread) in event_loop.io_threads.iter_mut() { + io_thread.set_manager(manager.clone()); + } + } } } - /// Update event notifiers to event loop + /// Update event notifiers to event loop /// /// # Arguments /// /// * `notifiers` - The wrapper of events will be handled in the event loop specified by name. /// * `name` - specify which event loop to manage - pub fn update_event( - notifiers: Vec, - name: Option<&String>, - ) -> util::errors::Result<()> { + pub fn update_event(notifiers: Vec, name: Option<&String>) -> Result<()> { if let Some(ctx) = Self::get_ctx(name) { ctx.update_events(notifiers) } else { @@ -137,12 +158,21 @@ impl EventLoop { /// /// Once run main loop, `epoll` in `MainLoopContext` will execute /// `epoll_wait()` function to wait for events. - pub fn loop_run() -> util::errors::Result<()> { + pub fn loop_run() -> Result<()> { + // SAFETY: the main_loop ctx is dedicated for main thread, thus no concurrent + // accessing. unsafe { if let Some(event_loop) = GLOBAL_EVENT_LOOP.as_mut() { loop { + let sig_num = get_signal(); + if sig_num != 0 { + info!("MainLoop exits due to receive signal {}", sig_num); + event_loop.main_loop.thread_exit_barrier.wait(); + return Ok(()); + } if !event_loop.main_loop.run()? { info!("MainLoop exits due to guest internal operation."); + event_loop.main_loop.thread_exit_barrier.wait(); return Ok(()); } } @@ -151,4 +181,44 @@ impl EventLoop { } } } + + pub fn loop_clean() { + // SAFETY: the main_loop ctx is dedicated for main thread, thus no concurrent + // accessing. + unsafe { + GLOBAL_EVENT_LOOP = None; + } + } + + pub fn kick_all() { + // SAFETY: All concurrently accessed data of EventLoopContext is protected. + unsafe { + if let Some(event_loop) = GLOBAL_EVENT_LOOP.as_mut() { + for (_name, io_thread) in event_loop.io_threads.iter_mut() { + io_thread.kick(); + } + event_loop.main_loop.kick(); + } + } + } +} + +pub fn register_event_helper( + notifiers: Vec, + ctx_name: Option<&String>, + record_evts: &mut Vec, +) -> Result<()> { + let mut notifiers_fds = get_notifiers_fds(¬ifiers); + EventLoop::update_event(notifiers, ctx_name)?; + record_evts.append(&mut notifiers_fds); + Ok(()) +} + +pub fn unregister_event_helper( + ctx_name: Option<&String>, + record_evts: &mut Vec, +) -> Result<()> { + EventLoop::update_event(gen_delete_notifiers(record_evts), ctx_name)?; + record_evts.clear(); + Ok(()) } diff --git a/machine_manager/src/lib.rs b/machine_manager/src/lib.rs index a544c609d757ab093b008413aa117d347c0e75e8..cab6b880623bf93669ad786f016f0764dd7231fb 100644 --- a/machine_manager/src/lib.rs +++ b/machine_manager/src/lib.rs @@ -21,28 +21,17 @@ //! 2. The API interface over VM inside and outside. //! 3. Configuration for VM and its devices. -#[macro_use] -extern crate log; -#[macro_use] -extern crate error_chain; - pub mod cmdline; pub mod config; +pub mod error; pub mod event_loop; pub mod machine; +pub mod notifier; pub mod qmp; pub mod signal_handler; pub mod socket; +pub mod state_query; pub mod temp_cleaner; +pub mod test_server; -pub mod errors { - error_chain! { - links { - ConfigParser(crate::config::errors::Error, crate::config::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - Json(serde_json::Error); - } - } -} +pub use error::MachineManagerError; diff --git a/machine_manager/src/machine.rs b/machine_manager/src/machine.rs index 02d8900006549b2d4743787cf4d3ec91b2cfb258..bc4a38ec9ecc012d1745503eb590d55ae3a8bf71 100644 --- a/machine_manager/src/machine.rs +++ b/machine_manager/src/machine.rs @@ -11,17 +11,23 @@ // See the Mulan PSL v2 for more details. use std::os::unix::io::RawFd; +use std::str::FromStr; use std::sync::Mutex; +use anyhow::anyhow; use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; use strum::VariantNames; +use crate::config::ShutdownAction; +use crate::qmp::qmp_response::{Response, Version}; use crate::qmp::qmp_schema::{ - BlockDevAddArgument, ChardevInfo, Cmd, CmdLine, DeviceAddArgument, DeviceProps, Events, GicCap, - IothreadInfo, KvmInfo, MachineInfo, MigrateCapabilities, NetDevAddArgument, PropList, - QmpCommand, QmpEvent, Target, TypeLists, + BlockDevAddArgument, BlockdevSnapshotInternalArgument, CameraDevAddArgument, + CharDevAddArgument, ChardevInfo, Cmd, CmdLine, CmdParameter, DeviceAddArgument, DeviceProps, + Events, GicCap, HumanMonitorCmdArgument, IothreadInfo, KvmInfo, MachineInfo, + MigrateCapabilities, NetDevAddArgument, PropList, QmpCommand, QmpErrorClass, QmpEvent, + QueryMemGpaArgument, QueryVcpuRegArgument, Target, TypeLists, UpdateRegionArgument, }; -use crate::qmp::{Response, Version}; #[derive(Clone)] pub struct PathInfo { @@ -29,9 +35,9 @@ pub struct PathInfo { pub label: String, } -/// State for KVM VM. -#[derive(PartialEq, Copy, Clone, Debug)] -pub enum KvmVmState { +/// State for VM. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +pub enum VmState { Created = 1, Running = 2, InMigrating = 3, @@ -40,16 +46,26 @@ pub enum KvmVmState { Shutdown = 6, } -/// Event over StratoVirt lifetime. -pub enum VmEvent { - ShutdownCauseGuestReset, - ShutdownCauseGuestCrash, - ShutdownCauseFailEntry, - ShutdownCauseInternalError, +/// Type for Hypervisor. +#[derive(Default, Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum HypervisorType { + #[default] + Kvm, + Test, } -unsafe impl Sync for VmEvent {} -unsafe impl Send for VmEvent {} +impl FromStr for HypervisorType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + // Note: "kvm:tcg" is a configuration compatible with libvirt. + "kvm" | "kvm:tcg" => Ok(HypervisorType::Kvm), + "test" => Ok(HypervisorType::Test), + _ => Err(anyhow!("Not supported or invalid hypervisor type {}.", s)), + } + } +} /// Trait to handle virtual machine lifecycle. /// @@ -61,7 +77,7 @@ unsafe impl Send for VmEvent {} /// `Created` --`(start)`--> `Running` /// `Running` --`(pause)`--> `Paused` /// `Paused` --`(resume)`--> `Running` -/// `KVM_VMSTATE_*` --`(destroy)`--> `None` +/// `VMSTATE_*` --`(destroy)`--> `None` /// /// **Notice**: /// 1. Migrate state(`Migrated` and `InMigrating`), @@ -75,37 +91,47 @@ unsafe impl Send for VmEvent {} pub trait MachineLifecycle { /// Start VM or Device, VM or Device enter running state after this call return. fn start(&self) -> bool { - self.notify_lifecycle(KvmVmState::Created, KvmVmState::Paused) + self.notify_lifecycle(VmState::Created, VmState::Paused) } /// Pause VM or Device, VM or Device will temporarily stored in memory until it resumed /// or destroyed. fn pause(&self) -> bool { - self.notify_lifecycle(KvmVmState::Running, KvmVmState::Paused) + self.notify_lifecycle(VmState::Running, VmState::Paused) } /// Resume VM or Device, resume VM state to running state after this call return. fn resume(&self) -> bool { - self.notify_lifecycle(KvmVmState::Paused, KvmVmState::Running) + self.notify_lifecycle(VmState::Paused, VmState::Running) } /// Close VM or Device, stop running. fn destroy(&self) -> bool { - self.notify_lifecycle(KvmVmState::Running, KvmVmState::Shutdown) + self.notify_lifecycle(VmState::Running, VmState::Shutdown) + } + + /// Close VM by power_button. + fn powerdown(&self) -> bool { + self.notify_lifecycle(VmState::Running, VmState::Shutdown) } /// Reset VM, stop running and restart a new VM. fn reset(&mut self) -> bool { - self.notify_lifecycle(KvmVmState::Running, KvmVmState::Shutdown) + self.notify_lifecycle(VmState::Running, VmState::Shutdown) } /// When VM or Device life state changed, notify concerned entry. /// /// # Arguments /// - /// * `old` - The current `KvmVmState`. - /// * `new` - The new `KvmVmState` expected to transform. - fn notify_lifecycle(&self, old: KvmVmState, new: KvmVmState) -> bool; + /// * `old` - The current `VmState`. + /// * `new` - The new `VmState` expected to transform. + fn notify_lifecycle(&self, old: VmState, new: VmState) -> bool; + + /// Get shutdown_action to determine the poweroff operation. + fn get_shutdown_action(&self) -> ShutdownAction { + ShutdownAction::ShutdownActionPoweroff + } } /// `AddressSpace` access interface of `Machine`. @@ -166,19 +192,66 @@ pub trait DeviceInterface { fn netdev_del(&mut self, id: String) -> Response; + /// Create a new chardev device. + fn chardev_add(&mut self, _args: CharDevAddArgument) -> Response; + + /// Remove a chardev device. + fn chardev_remove(&mut self, _id: String) -> Response; + + /// Creates a new camera device. + fn cameradev_add(&mut self, _args: CameraDevAddArgument) -> Response { + Response::create_response( + serde_json::to_value("cameradev_add not supported for VM".to_string()).unwrap(), + None, + ) + } + + /// Delete a camera device. + fn cameradev_del(&mut self, _id: String) -> Response { + Response::create_response( + serde_json::to_value("cameradev_del not supported for VM".to_string()).unwrap(), + None, + ) + } + + /// Control OH audio's control authority. + fn switch_audio_record(&self, _authorized: String) -> Response { + Response::create_response( + serde_json::to_value("switch_audio_record not supported for VM".to_string()).unwrap(), + None, + ) + } + /// Receive a file descriptor via SCM rights and assign it a name. fn getfd(&self, fd_name: String, if_fd: Option) -> Response; /// Query balloon's size. fn query_balloon(&self) -> Response; + /// Query machine mem size. + fn query_mem(&self) -> Response; + + /// Query the info of vnc server. + fn query_vnc(&self) -> Response; + + /// Query display of stratovirt. + fn query_display_image(&self) -> Response; + + /// Query state. + fn query_workloads(&self) -> Response { + Response::create_error_response( + QmpErrorClass::GenericError("query_workloads not supported for VM".to_string()), + None, + ) + } + /// Set balloon's size. fn balloon(&self, size: u64) -> Response; /// Query the version of StratoVirt. fn query_version(&self) -> Response { let version = Version::new(1, 0, 5); - Response::create_response(serde_json::to_value(&version).unwrap(), None) + Response::create_response(serde_json::to_value(version).unwrap(), None) } /// Query all commands of StratoVirt. @@ -203,7 +276,7 @@ pub trait DeviceInterface { let target = Target { arch: "aarch64".to_string(), }; - Response::create_response(serde_json::to_value(&target).unwrap(), None) + Response::create_response(serde_json::to_value(target).unwrap(), None) } /// Query all events of StratoVirt. @@ -224,7 +297,7 @@ pub trait DeviceInterface { enabled: true, present: true, }; - Response::create_response(serde_json::to_value(&kvm).unwrap(), None) + Response::create_response(serde_json::to_value(kvm).unwrap(), None) } /// Query machine types supported by StratoVirt. @@ -273,8 +346,7 @@ pub trait DeviceInterface { fn list_type(&self) -> Response { let mut vec_types = Vec::new(); // These devices are used to interconnect with libvirt, but not been implemented yet. - #[allow(unused_mut)] - let mut list_types: Vec<(&str, &str)> = vec![ + let list_types: Vec<(&str, &str)> = vec![ ("ioh3420", "pcie-root-port-base"), ("pcie-root-port", "pcie-root-port-base"), ("pcie-pci-bridge", "base-pci-bridge"), @@ -286,9 +358,15 @@ pub trait DeviceInterface { ("vfio-pci", "pci-device"), ("vhost-vsock-device", "virtio-device"), ("iothread", "object"), + #[cfg(target_arch = "aarch64")] + ("gpex-pcihost", "pcie-host-bridge"), + ("nec-usb-xhci", "base-xhci"), + ("usb-tablet", "usb-hid"), + ("usb-kbd", "usb-hid"), + ("usb-storage", "usb-storage-dev"), + ("virtio-gpu-pci", "virtio-gpu"), ]; - #[cfg(target_arch = "aarch64")] - list_types.push(("gpex-pcihost", "pcie-host-bridge")); + for list in list_types { let re = TypeLists::new(String::from(list.0), String::from(list.1)); vec_types.push(re); @@ -310,27 +388,49 @@ pub trait DeviceInterface { }; vec_props.push(prop); } + if typename.contains("virtio-blk") { + let prop = DeviceProps { + name: "num-queues".to_string(), + prop_type: "uint16".to_string(), + }; + vec_props.push(prop); + } Response::create_response(serde_json::to_value(&vec_props).unwrap(), None) } fn query_tpm_models(&self) -> Response { let tpm_models = Vec::::new(); - Response::create_response(serde_json::to_value(&tpm_models).unwrap(), None) + Response::create_response(serde_json::to_value(tpm_models).unwrap(), None) } fn query_tpm_types(&self) -> Response { let tpm_types = Vec::::new(); - Response::create_response(serde_json::to_value(&tpm_types).unwrap(), None) + Response::create_response(serde_json::to_value(tpm_types).unwrap(), None) } fn query_command_line_options(&self) -> Response { - let cmd_lines = Vec::::new(); - Response::create_response(serde_json::to_value(&cmd_lines).unwrap(), None) + let parameters = vec![ + CmdParameter { + name: "discard".to_string(), + help: "discard operation (unmap|ignore)".to_string(), + parameter_type: "string".to_string(), + }, + CmdParameter { + name: "detect-zeroes".to_string(), + help: "optimize zero writes (unmap|on|off)".to_string(), + parameter_type: "string".to_string(), + }, + ]; + let cmd_lines = vec![CmdLine { + parameters, + option: "drive".to_string(), + }]; + Response::create_response(serde_json::to_value(cmd_lines).unwrap(), None) } fn query_migrate_capabilities(&self) -> Response { let caps = Vec::::new(); - Response::create_response(serde_json::to_value(&caps).unwrap(), None) + Response::create_response(serde_json::to_value(caps).unwrap(), None) } fn query_qmp_schema(&self) -> Response { @@ -359,32 +459,38 @@ pub trait DeviceInterface { fn qom_list(&self) -> Response { let vec_cmd: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_cmd).unwrap(), None) + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) } fn qom_get(&self) -> Response { let vec_cmd: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_cmd).unwrap(), None) + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) } fn query_block(&self) -> Response { let vec_cmd: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_cmd).unwrap(), None) + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) } fn query_named_block_nodes(&self) -> Response { let vec_cmd: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_cmd).unwrap(), None) + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) } fn query_blockstats(&self) -> Response { let vec_cmd: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_cmd).unwrap(), None) + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) + } + + fn query_block_jobs(&self) -> Response { + // Fix me: qmp command call, return none temporarily. + let vec_cmd: Vec = Vec::new(); + Response::create_response(serde_json::to_value(vec_cmd).unwrap(), None) } fn query_gic_capabilities(&self) -> Response { let vec_gic: Vec = Vec::new(); - Response::create_response(serde_json::to_value(&vec_gic).unwrap(), None) + Response::create_response(serde_json::to_value(vec_gic).unwrap(), None) } fn query_iothreads(&self) -> Response { @@ -395,6 +501,45 @@ pub trait DeviceInterface { } Response::create_response(serde_json::to_value(&vec_iothreads).unwrap(), None) } + + fn update_region(&mut self, args: UpdateRegionArgument) -> Response; + + // Send event to input device for testing only. + fn input_event(&self, _k: String, _v: String) -> Response { + Response::create_empty_response() + } + + fn human_monitor_command(&self, _args: HumanMonitorCmdArgument) -> Response { + Response::create_error_response( + QmpErrorClass::GenericError("human-monitor-command is not supported yet".to_string()), + None, + ) + } + + fn blockdev_snapshot_internal_sync(&self, _args: BlockdevSnapshotInternalArgument) -> Response { + Response::create_empty_response() + } + + fn blockdev_snapshot_delete_internal_sync( + &self, + _args: BlockdevSnapshotInternalArgument, + ) -> Response { + Response::create_empty_response() + } + + fn query_vcpu_reg(&self, _args: QueryVcpuRegArgument) -> Response { + Response::create_error_response( + QmpErrorClass::GenericError("query_vcpu_reg is not supported yet".to_string()), + None, + ) + } + + fn query_mem_gpa(&self, _args: QueryMemGpaArgument) -> Response { + Response::create_error_response( + QmpErrorClass::GenericError("query_mem_gpa is not supported yet".to_string()), + None, + ) + } } /// Migrate external api @@ -412,6 +557,10 @@ pub trait MigrateInterface { fn query_migrate(&self) -> Response { Response::create_empty_response() } + + fn cancel_migrate(&self) -> Response { + Response::create_empty_response() + } } /// Machine interface which is exposed to inner hypervisor. @@ -420,5 +569,8 @@ pub trait MachineInterface: MachineLifecycle + MachineAddressInterface {} /// Machine interface which is exposed to outer hypervisor. pub trait MachineExternalInterface: MachineLifecycle + DeviceInterface + MigrateInterface {} +/// Machine interface which is exposed to test server. +pub trait MachineTestInterface: MachineAddressInterface {} + pub static PTY_PATH: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); pub static IOTHREADS: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); diff --git a/machine_manager/src/notifier.rs b/machine_manager/src/notifier.rs new file mode 100644 index 0000000000000000000000000000000000000000..36285f36c5337d981a83458e925cb997e2abbb13 --- /dev/null +++ b/machine_manager/src/notifier.rs @@ -0,0 +1,73 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use log::error; +use once_cell::sync::Lazy; + +static NOTIFIER_MANAGER: Lazy> = + Lazy::new(|| RwLock::new(NotifierManager::new())); + +pub type PauseNOtifyCallback = dyn Fn(bool) + Send + Sync; + +struct NotifierManager { + pause_notifiers: HashMap>, + next_id: u64, +} + +impl NotifierManager { + fn new() -> Self { + Self { + pause_notifiers: HashMap::new(), + next_id: 1, + } + } + + fn register_pause_notifier(&mut self, notifier: Arc) -> u64 { + let id = self.next_id; + self.pause_notifiers.insert(id, notifier); + self.next_id += 1; + id + } + + fn unregister_pause_notifier(&mut self, id: u64) { + if self.pause_notifiers.remove(&id).is_none() { + error!("There is no pause notifier with id {}", id); + } + } + + fn pause_notify(&self, paused: bool) { + for (_, notify) in self.pause_notifiers.iter() { + notify(paused); + } + } +} + +pub fn register_vm_pause_notifier(notifier: Arc) -> u64 { + NOTIFIER_MANAGER + .write() + .unwrap() + .register_pause_notifier(notifier) +} + +pub fn unregister_vm_pause_notifier(id: u64) { + NOTIFIER_MANAGER + .write() + .unwrap() + .unregister_pause_notifier(id) +} + +pub fn pause_notify(paused: bool) { + NOTIFIER_MANAGER.read().unwrap().pause_notify(paused); +} diff --git a/machine_manager/src/qmp/mod.rs b/machine_manager/src/qmp/mod.rs index 12e493ab937f61d3f9c0536d26a3eb17f677ef56..53f87e25255c7c87326704a9eb79161a200f371e 100644 --- a/machine_manager/src/qmp/mod.rs +++ b/machine_manager/src/qmp/mod.rs @@ -26,773 +26,10 @@ //! `qmp-schema.json`. It's can be compatible by Qemu's zoology. Those //! transformed structures can be found in `machine_manager/src/qmp/qmp_schema.rs` +pub mod qmp_channel; +pub mod qmp_response; #[allow(non_upper_case_globals)] #[allow(non_camel_case_types)] #[allow(non_snake_case)] pub mod qmp_schema; - -use std::collections::BTreeMap; -use std::io::Write; -use std::os::unix::io::RawFd; -use std::sync::{Arc, Mutex, RwLock}; -use std::time::{SystemTime, UNIX_EPOCH}; - -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use util::leak_bucket::LeakBucket; -use util::set_termi_canon_mode; - -use crate::event_loop::EventLoop; -use crate::machine::MachineExternalInterface; -use crate::socket::SocketRWHandler; -use crate::{ - errors::{Result, ResultExt}, - temp_cleaner::TempCleaner, -}; -use qmp_schema as schema; -use schema::QmpCommand; - -static mut QMP_CHANNEL: Option> = None; - -/// Macro `event!`: send event to qmp-client. -/// -/// # Arguments -/// -/// * `$x` - event type -/// * `$y` - event context -/// -/// # Example -/// -/// ```text -/// #[macro_use] -/// use machine_manager::qmp::*; -/// -/// event!(Shutdown; shutdown_msg); -/// event!(Stop); -/// event!(Resume); -/// ``` -#[macro_export] -macro_rules! event { - ( $x:tt ) => {{ - QmpChannel::send_event(&$crate::qmp::qmp_schema::QmpEvent::$x { - data: Default::default(), - timestamp: $crate::qmp::create_timestamp(), - }); - }}; - ( $x:tt;$y:expr ) => {{ - QmpChannel::send_event(&$crate::qmp::qmp_schema::QmpEvent::$x { - data: $y, - timestamp: $crate::qmp::create_timestamp(), - }); - }}; -} - -/// Macro `create_command_matches!`: Generate a match statement for qmp_command -/// , which is combined with its handle func. -/// -/// # Arguments -/// -/// `cmd_type_1` - The qmp command with no arguments. -/// `cmd_type_2` - The qmp command with arguments. -macro_rules! create_command_matches { - ( $command:expr; $executor:expr; $ret:expr; - $(($cmd_type_1:tt, $func_1:tt)),*; - $(($cmd_type_2:tt, $func_2:tt, $($arg:tt),*)),*; - $(($cmd_type_3:tt, $func_3:tt)),* - ) => { - match $command { - $( - $crate::qmp::qmp_schema::QmpCommand::$cmd_type_1{ id, ..} => { - qmp_command_match!($func_1, $executor, $ret); - id - }, - )* - $( - $crate::qmp::qmp_schema::QmpCommand::$cmd_type_2{ arguments, id } => { - qmp_command_match!($func_2, $executor, arguments, $ret, $($arg),*); - id - }, - )* - $( - $crate::qmp::qmp_schema::QmpCommand::$cmd_type_3{ arguments, id } => { - qmp_command_match_with_argument!($func_3, $executor, arguments, $ret); - id - }, - )* - _ => None, - } - }; -} - -/// Macro: to execute handle func with every arguments. -macro_rules! qmp_command_match { - ( $func:tt, $executor:expr, $ret:expr ) => { - $ret = $executor.$func().into(); - }; - ( $func:tt, $executor:expr, $cmd:expr, $ret:expr, $($arg:tt),* ) => { - $ret = $executor.$func( - $($cmd.$arg),* - ).into(); - }; -} - -/// Macro: to execute handle func with all arguments. -macro_rules! qmp_command_match_with_argument { - ( $func:tt, $executor:expr, $cmd:expr, $ret:expr ) => { - $ret = $executor.$func($cmd).into(); - }; -} - -/// Qmp greeting message. -/// -/// # Notes -/// -/// It contains the version of VM or fake Qemu version to adapt others. -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -pub struct QmpGreeting { - #[serde(rename = "QMP")] - qmp: Greeting, -} - -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -struct Greeting { - version: Version, - capabilities: Vec, -} - -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -pub struct Version { - #[serde(rename = "qemu")] - application: VersionNumber, - package: String, -} - -impl Version { - pub fn new(micro: u8, minor: u8, major: u8) -> Self { - let version_number = VersionNumber { - micro, - minor, - major, - }; - Version { - application: version_number, - package: "StratoVirt-".to_string() + env!("CARGO_PKG_VERSION"), - } - } -} - -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -struct VersionNumber { - micro: u8, - minor: u8, - major: u8, -} - -impl QmpGreeting { - /// Create qmp greeting message. - /// - /// # Arguments - /// - /// * `micro` - Micro version number. - /// * `minor` - Minor version number. - /// * `major` - Major version number. - pub fn create_greeting(micro: u8, minor: u8, major: u8) -> Self { - let version_number = VersionNumber { - micro, - minor, - major, - }; - let cap: Vec = Default::default(); - let version = Version { - application: version_number, - package: "".to_string(), - }; - let greeting = Greeting { - version, - capabilities: cap, - }; - QmpGreeting { qmp: greeting } - } -} - -/// Qmp response to client -/// -/// # Notes -/// -/// It contains two kind response: `BadResponse` and `GoodResponse`. This two -/// kind response are fit by executing qmp command by success and failure. -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub struct Response { - #[serde(rename = "return", default, skip_serializing_if = "Option::is_none")] - return_: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - error: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, -} - -impl Response { - /// Create qmp response with inner `Value` and `id`. - /// - /// # Arguments - /// - /// * `v` - The `Value` of qmp `return` field. - /// * `id` - The `id` for qmp `Response`, it must be equal to `Request`'s - /// `id`. - pub fn create_response(v: Value, id: Option) -> Self { - Response { - return_: Some(v), - error: None, - id, - } - } - - /// Create a empty qmp response, `return` field will be empty. - pub fn create_empty_response() -> Self { - Response { - return_: Some(serde_json::to_value(Empty {}).unwrap()), - error: None, - id: None, - } - } - - /// Create a error qmo response with `err_class` and `id`. - /// # Arguments - /// - /// * `err_class` - The `QmpErrorClass` of qmp `error` field. - /// * `id` - The `id` for qmp `Response`, it must be equal to `Request`'s - /// `id`. - pub fn create_error_response(err_class: schema::QmpErrorClass, id: Option) -> Self { - Response { - return_: None, - error: Some(ErrorMessage::new(&err_class)), - id, - } - } - - fn change_id(&mut self, id: Option) { - self.id = id; - } -} - -impl From for Response { - fn from(value: bool) -> Self { - if value { - Response::create_empty_response() - } else { - Response::create_error_response( - schema::QmpErrorClass::GenericError(String::new()), - None, - ) - } - } -} - -/// `ErrorMessage` for Qmp Response. -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -pub struct ErrorMessage { - #[serde(rename = "class")] - errorkind: String, - desc: String, -} - -impl ErrorMessage { - fn new(e: &schema::QmpErrorClass) -> Self { - let content = e.to_content(); - let serde_str = serde_json::to_string(&e).unwrap(); - let serde_vec: Vec<&str> = serde_str.split(':').collect(); - let class_name = serde_vec[0]; - let len: usize = class_name.len(); - ErrorMessage { - errorkind: class_name[2..len - 1].to_string(), - desc: content, - } - } -} - -/// Empty message for QMP. -#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] -pub struct Empty {} - -/// Command trait for Deserialize and find back Response. -pub trait Command: Serialize { - type Res: DeserializeOwned; - fn back(self) -> Self::Res; -} - -/// `TimeStamp` structure for `QmpEvent`. -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct TimeStamp { - seconds: u64, - microseconds: u64, -} - -/// Constructs a `TimeStamp` struct. -pub fn create_timestamp() -> TimeStamp { - let start = SystemTime::now(); - let since_the_epoch = start - .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - let seconds = u128::from(since_the_epoch.as_secs()); - let microseconds = (since_the_epoch.as_nanos() - seconds * 1_000_000_000) / (1_000_u128); - TimeStamp { - seconds: seconds as u64, - microseconds: microseconds as u64, - } -} - -/// Accept qmp command, analyze and exec it. -/// -/// # Arguments -/// -/// * `stream_fd` - The input stream file description. -/// * `controller` - The controller which execute actual qmp command. -/// * `leak_bucket` - The LeakBucket flow controller for qmp command. -/// -/// # Errors -/// -/// This function will fail when json parser failed or socket file description broke. -pub fn handle_qmp( - stream_fd: RawFd, - controller: &Arc>, - leak_bucket: &mut LeakBucket, -) -> Result<()> { - let mut qmp_service = crate::socket::SocketHandler::new(stream_fd); - - // If flow over `LEAK_BUCKET_LIMIT` per seconds, discard the request and return - // a `OperationThrottled` error. - if leak_bucket.throttled(EventLoop::get_ctx(None).unwrap(), 1_u64) { - qmp_service.discard()?; - let err_resp = schema::QmpErrorClass::OperationThrottled(crate::socket::LEAK_BUCKET_LIMIT); - qmp_service - .send_str(&serde_json::to_string(&Response::create_error_response( - err_resp, None, - ))?) - .chain_err(|| "Failed to send message to qmp client.")?; - return Ok(()); - } - - match qmp_service.decode_line() { - (Ok(None), _) => Ok(()), - (Ok(buffer), if_fd) => { - info!("QMP: <-- {:?}", buffer); - let qmp_command: schema::QmpCommand = buffer.unwrap(); - let (return_msg, shutdown_flag) = qmp_command_exec(qmp_command, controller, if_fd); - info!("QMP: --> {:?}", return_msg); - qmp_service.send_str(&return_msg)?; - - // handle shutdown command - if shutdown_flag { - let shutdown_msg = schema::Shutdown { - guest: false, - reason: "host-qmp-quit".to_string(), - }; - event!(Shutdown; shutdown_msg); - TempCleaner::clean(); - set_termi_canon_mode().expect("Failed to set terminal to canonical mode."); - - std::process::exit(0); - } - - Ok(()) - } - (Err(e), _) => { - let err_resp = schema::QmpErrorClass::GenericError(format!("{}", &e)); - warn!("Qmp json parser made an error:{}", e); - qmp_service.send_str(&serde_json::to_string(&Response::create_error_response( - err_resp, None, - ))?)?; - Ok(()) - } - } -} - -/// Create a match , where `qmp_command` and its arguments matching by handle -/// function, and exec this qmp command. -fn qmp_command_exec( - qmp_command: QmpCommand, - controller: &Arc>, - if_fd: Option, -) -> (String, bool) { - let mut qmp_response = Response::create_empty_response(); - let mut shutdown_flag = false; - - // Use macro create match to cover most Qmp command - let mut id = create_command_matches!( - qmp_command.clone(); controller.lock().unwrap(); qmp_response; - (stop, pause), - (cont, resume), - (query_status, query_status), - (query_version, query_version), - (query_commands, query_commands), - (query_target, query_target), - (query_kvm, query_kvm), - (query_events, query_events), - (query_machines, query_machines), - (query_tpm_models, query_tpm_models), - (query_tpm_types, query_tpm_types), - (query_command_line_options, query_command_line_options), - (query_migrate_capabilities, query_migrate_capabilities), - (query_qmp_schema, query_qmp_schema), - (query_sev_capabilities, query_sev_capabilities), - (query_chardev, query_chardev), - (qom_list, qom_list), - (qom_get, qom_get), - (query_block, query_block), - (query_named_block_nodes, query_named_block_nodes), - (query_blockstats, query_blockstats), - (query_gic_capabilities, query_gic_capabilities), - (query_iothreads, query_iothreads), - (query_migrate, query_migrate), - (query_cpus, query_cpus), - (query_balloon, query_balloon), - (list_type, list_type), - (query_hotpluggable_cpus, query_hotpluggable_cpus); - (device_list_properties, device_list_properties, typename), - (device_del, device_del, id), - (blockdev_del, blockdev_del, node_name), - (netdev_del, netdev_del, id), - (balloon, balloon, value), - (migrate, migrate, uri); - (device_add, device_add), - (blockdev_add, blockdev_add), - (netdev_add, netdev_add) - ); - - // Handle the Qmp command which macro can't cover - if id.is_none() { - id = match qmp_command { - QmpCommand::quit { id, .. } => { - controller.lock().unwrap().destroy(); - shutdown_flag = true; - id - } - QmpCommand::getfd { arguments, id } => { - qmp_response = controller.lock().unwrap().getfd(arguments.fd_name, if_fd); - id - } - _ => None, - } - } - - // Change response id with input qmp message - qmp_response.change_id(id); - ( - serde_json::to_string(&qmp_response).unwrap() + "\r", - shutdown_flag, - ) -} - -/// The struct `QmpChannel` is the only struct can handle Global variable -/// `QMP_CHANNEL`. -/// It is used to send event to qmp client and restore some file descriptor -/// which was sended by client. -pub struct QmpChannel { - /// The `writer` to send `QmpEvent`. - event_writer: RwLock>, - /// Restore file descriptor received from client. - fds: Arc>>, -} - -impl QmpChannel { - /// Constructs a `QmpChannel` in global `QMP_CHANNEL`. - pub fn object_init() { - unsafe { - if QMP_CHANNEL.is_none() { - QMP_CHANNEL = Some(Arc::new(QmpChannel { - event_writer: RwLock::new(None), - fds: Arc::new(RwLock::new(BTreeMap::new())), - })); - } - } - } - - /// Bind a `SocketRWHandler` to `QMP_CHANNEL`. - /// - /// # Arguments - /// - /// * `writer` - The `SocketRWHandler` used to communicate with client. - pub fn bind_writer(writer: SocketRWHandler) { - *Self::inner().event_writer.write().unwrap() = Some(writer); - } - - /// Unbind `SocketRWHandler` from `QMP_CHANNEL`. - pub fn unbind() { - *Self::inner().event_writer.write().unwrap() = None; - } - - /// Check whether a `SocketRWHandler` bind with `QMP_CHANNEL` or not. - pub fn is_connected() -> bool { - Self::inner().event_writer.read().unwrap().is_some() - } - - /// Restore extern file descriptor in `QMP_CHANNEL`. - /// - /// # Arguments - /// - /// * `name` - Name of file descriptor. - /// * `fd` - File descriptor sent by client. - pub fn set_fd(name: String, fd: RawFd) { - Self::inner().fds.write().unwrap().insert(name, fd); - } - - /// Get extern file descriptor restored in `QMP_CHANNEL`. - /// - /// # Arguments - /// - /// * `name` - Name of file descriptor. - pub fn get_fd(name: &str) -> Option { - Self::inner().fds.read().unwrap().get(name).copied() - } - - /// Send a `QmpEvent` to client. - /// - /// # Arguments - /// - /// * `event` - The `QmpEvent` sent to client. - #[allow(clippy::unused_io_amount)] - pub fn send_event(event: &schema::QmpEvent) { - if Self::is_connected() { - let event_str = serde_json::to_string(&event).unwrap(); - let mut writer_unlocked = Self::inner().event_writer.write().unwrap(); - let writer = writer_unlocked.as_mut().unwrap(); - writer.flush().unwrap(); - writer.write(event_str.as_bytes()).unwrap(); - writer.write(&[b'\r']).unwrap(); - writer.write(&[b'\n']).unwrap(); - info!("EVENT: --> {:?}", event); - } - } - - fn inner() -> &'static std::sync::Arc { - unsafe { - match &QMP_CHANNEL { - Some(channel) => channel, - None => { - panic!("Qmp channel not initialized"); - } - } - } - } -} - -#[cfg(test)] -mod tests { - extern crate serde_json; - use super::*; - use std::os::unix::net::{UnixListener, UnixStream}; - - #[test] - fn test_qmp_greeting_msg() { - let greeting_msg = QmpGreeting::create_greeting(1, 0, 5); - - let json_msg = r#" - { - "QMP":{ - "version":{ - "qemu":{ - "micro": 1, - "minor": 0, - "major": 5 - }, - "package": "" - }, - "capabilities": [] - } - } - "#; - let greeting_from_json: QmpGreeting = serde_json::from_str(json_msg).unwrap(); - - assert_eq!(greeting_from_json, greeting_msg); - } - - #[test] - fn test_qmp_resp() { - // 1.Empty response and ID change; - let mut resp = Response::create_empty_response(); - resp.change_id(Some("0".to_string())); - - let json_msg = r#"{"return":{},"id":"0"}"#; - assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); - - resp.change_id(Some("1".to_string())); - let json_msg = r#"{"return":{},"id":"1"}"#; - assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); - - // 2.Normal response - let resp_value = schema::StatusInfo { - singlestep: false, - running: true, - status: schema::RunState::running, - }; - let resp = Response::create_response(serde_json::to_value(&resp_value).unwrap(), None); - - let json_msg = r#"{"return":{"running":true,"singlestep":false,"status":"running"}}"#; - assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); - - // 3.Error response - let qmp_err = - schema::QmpErrorClass::GenericError("Invalid Qmp command arguments!".to_string()); - let resp = Response::create_error_response(qmp_err, None); - - let json_msg = - r#"{"error":{"class":"GenericError","desc":"Invalid Qmp command arguments!"}}"#; - assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); - } - - #[test] - fn test_qmp_event_msg() { - let event_json = - r#"{"event":"STOP","data":{},"timestamp":{"seconds":1575531524,"microseconds":91519}}"#; - let qmp_event: schema::QmpEvent = serde_json::from_str(&event_json).unwrap(); - match qmp_event { - schema::QmpEvent::Stop { - data: _, - timestamp: _, - } => { - assert!(true); - } - _ => assert!(false), - } - } - - // Environment Preparation for UnixSocket - fn prepare_unix_socket_environment(socket_id: &str) -> (UnixListener, UnixStream, UnixStream) { - let socket_name: String = format!("test_{}.sock", socket_id); - let _ = std::fs::remove_file(&socket_name); - - let listener = UnixListener::bind(&socket_name).unwrap(); - let client = UnixStream::connect(&socket_name).unwrap(); - let (server, _) = listener.accept().unwrap(); - (listener, client, server) - } - - // Environment Recovery for UnixSocket - fn recover_unix_socket_environment(socket_id: &str) { - let socket_name: String = format!("test_{}.sock", socket_id); - std::fs::remove_file(&socket_name).unwrap(); - } - - #[test] - fn test_qmp_event_macro() { - use crate::socket::{Socket, SocketRWHandler}; - use std::io::Read; - - // Pre test. Environment preparation - QmpChannel::object_init(); - let mut buffer = [0u8; 200]; - let (listener, mut client, server) = prepare_unix_socket_environment("06"); - - // Use event! macro to send event msg to client - let socket = Socket::from_unix_listener(listener, None); - socket.bind_unix_stream(server); - QmpChannel::bind_writer(SocketRWHandler::new(socket.get_stream_fd())); - - // 1.send no-content event - event!(Stop); - let length = client.read(&mut buffer).unwrap(); - let qmp_event: schema::QmpEvent = - serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); - match qmp_event { - schema::QmpEvent::Stop { - data: _, - timestamp: _, - } => { - assert!(true); - } - _ => assert!(false), - } - - // 2.send with-content event - let shutdown_event = schema::Shutdown { - guest: true, - reason: "guest-shutdown".to_string(), - }; - event!(Shutdown; shutdown_event); - let length = client.read(&mut buffer).unwrap(); - let qmp_event: schema::QmpEvent = - serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); - match qmp_event { - schema::QmpEvent::Shutdown { data, timestamp: _ } => { - assert_eq!(data.guest, true); - assert_eq!(data.reason, "guest-shutdown".to_string()); - } - _ => assert!(false), - } - - // After test. Environment Recover - recover_unix_socket_environment("06"); - } - - #[test] - fn test_qmp_send_response() { - use crate::socket::Socket; - use std::io::Read; - - // Pre test. Environment preparation - let mut buffer = [0u8; 300]; - let (listener, mut client, server) = prepare_unix_socket_environment("07"); - - // Use event! macro to send event msg to client - let socket = Socket::from_unix_listener(listener, None); - socket.bind_unix_stream(server); - - // 1.send greeting response - socket.send_response(true); - let length = client.read(&mut buffer).unwrap(); - let qmp_response: QmpGreeting = - serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); - let qmp_greeting = QmpGreeting::create_greeting(1, 0, 5); - assert_eq!(qmp_greeting, qmp_response); - - // 2.send empty response - socket.send_response(false); - let length = client.read(&mut buffer).unwrap(); - let qmp_response: Response = - serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); - let qmp_empty_response = Response::create_empty_response(); - assert_eq!(qmp_empty_response, qmp_response); - - // After test. Environment Recover - recover_unix_socket_environment("07"); - drop(socket); - } - - #[test] - fn test_create_error_response() { - let strange_msg = "!?/.,、。’】= -~1!@#¥%……&*()——+".to_string(); - - let err_cls = schema::QmpErrorClass::GenericError(strange_msg.clone()); - let msg = ErrorMessage::new(&err_cls); - assert_eq!(msg.desc, strange_msg); - assert_eq!(msg.errorkind, "GenericError".to_string()); - let qmp_err = schema::QmpErrorClass::GenericError(strange_msg.clone()); - let resp = Response::create_error_response(qmp_err, None); - assert_eq!(resp.error, Some(msg)); - - let err_cls = schema::QmpErrorClass::CommandNotFound(strange_msg.clone()); - let msg = ErrorMessage::new(&err_cls); - assert_eq!(msg.desc, strange_msg); - assert_eq!(msg.errorkind, "CommandNotFound".to_string()); - let qmp_err = schema::QmpErrorClass::CommandNotFound(strange_msg.clone()); - let resp = Response::create_error_response(qmp_err, None); - assert_eq!(resp.error, Some(msg)); - - let err_cls = schema::QmpErrorClass::DeviceNotFound(strange_msg.clone()); - let msg = ErrorMessage::new(&err_cls); - assert_eq!(msg.desc, strange_msg); - assert_eq!(msg.errorkind, "DeviceNotFound".to_string()); - let qmp_err = schema::QmpErrorClass::DeviceNotFound(strange_msg.clone()); - let resp = Response::create_error_response(qmp_err, None); - assert_eq!(resp.error, Some(msg)); - - let err_cls = schema::QmpErrorClass::KVMMissingCap(strange_msg.clone()); - let msg = ErrorMessage::new(&err_cls); - assert_eq!(msg.desc, strange_msg); - assert_eq!(msg.errorkind, "KVMMissingCap".to_string()); - let qmp_err = schema::QmpErrorClass::KVMMissingCap(strange_msg.clone()); - let resp = Response::create_error_response(qmp_err, None); - assert_eq!(resp.error, Some(msg)); - } -} +pub mod qmp_socket; diff --git a/machine_manager/src/qmp/qmp_channel.rs b/machine_manager/src/qmp/qmp_channel.rs new file mode 100644 index 0000000000000000000000000000000000000000..51063331fa5e940dda2bc50c2018b5cb099eddb1 --- /dev/null +++ b/machine_manager/src/qmp/qmp_channel.rs @@ -0,0 +1,196 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::BTreeMap; +use std::io::Write; +use std::os::unix::io::RawFd; +use std::sync::{Arc, RwLock}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use log::{error, info, warn}; +use serde::{Deserialize, Serialize}; + +use super::qmp_schema::{self as schema}; +use crate::socket::SocketRWHandler; +use util::time::NANOSECONDS_PER_SECOND; + +static mut QMP_CHANNEL: Option> = None; + +/// Macro `event!`: send event to qmp-client. +/// +/// # Arguments +/// +/// * `$x` - event type +/// * `$y` - event context +/// +/// # Example +/// +/// ```text +/// #[macro_use] +/// use machine_manager::qmp::*; +/// +/// event!(Shutdown; shutdown_msg); +/// event!(Stop); +/// event!(Resume); +/// ``` +#[macro_export] +macro_rules! event { + ( $x:tt ) => {{ + QmpChannel::send_event(&$crate::qmp::qmp_schema::QmpEvent::$x { + data: Default::default(), + timestamp: $crate::qmp::qmp_channel::create_timestamp(), + }); + }}; + ( $x:tt;$y:expr ) => {{ + QmpChannel::send_event(&$crate::qmp::qmp_schema::QmpEvent::$x { + data: $y, + timestamp: $crate::qmp::qmp_channel::create_timestamp(), + }); + }}; +} + +/// `TimeStamp` structure for `QmpEvent`. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct TimeStamp { + seconds: u64, + microseconds: u64, +} + +/// Constructs a `TimeStamp` struct. +pub fn create_timestamp() -> TimeStamp { + let start = SystemTime::now(); + let since_the_epoch = start + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + let seconds = u128::from(since_the_epoch.as_secs()); + let microseconds = + (since_the_epoch.as_nanos() - seconds * u128::from(NANOSECONDS_PER_SECOND)) / (1_000_u128); + TimeStamp { + seconds: seconds as u64, + microseconds: microseconds as u64, + } +} + +/// The struct `QmpChannel` is the only struct can handle Global variable +/// `QMP_CHANNEL`. +/// It is used to send event to qmp client and restore some file descriptor +/// which was sended by client. +pub struct QmpChannel { + /// The `writer` to send `QmpEvent`. + event_writer: RwLock>, + /// Restore file descriptor received from client. + fds: Arc>>, +} + +impl QmpChannel { + /// Constructs a `QmpChannel` in global `QMP_CHANNEL`. + pub fn object_init() { + // SAFETY: Global variable QMP_CHANNEL is only used in the main thread, + // so there are no competition or synchronization. + unsafe { + if QMP_CHANNEL.is_none() { + QMP_CHANNEL = Some(Arc::new(QmpChannel { + event_writer: RwLock::new(None), + fds: Arc::new(RwLock::new(BTreeMap::new())), + })); + } + } + } + + /// Bind a `SocketRWHandler` to `QMP_CHANNEL`. + /// + /// # Arguments + /// + /// * `writer` - The `SocketRWHandler` used to communicate with client. + pub(crate) fn bind_writer(writer: SocketRWHandler) { + *Self::inner().event_writer.write().unwrap() = Some(writer); + } + + /// Unbind `SocketRWHandler` from `QMP_CHANNEL`. + pub(crate) fn unbind() { + *Self::inner().event_writer.write().unwrap() = None; + } + + /// Check whether a `SocketRWHandler` bind with `QMP_CHANNEL` or not. + pub fn is_connected() -> bool { + Self::inner().event_writer.read().unwrap().is_some() + } + + /// Restore extern file descriptor in `QMP_CHANNEL`. + /// + /// # Arguments + /// + /// * `name` - Name of file descriptor. + /// * `fd` - File descriptor sent by client. + pub fn set_fd(name: String, fd: RawFd) { + Self::inner().fds.write().unwrap().insert(name, fd); + } + + /// Get extern file descriptor restored in `QMP_CHANNEL`. + /// + /// # Arguments + /// + /// * `name` - Name of file descriptor. + pub fn get_fd(name: &str) -> Option { + Self::inner().fds.read().unwrap().get(name).copied() + } + + /// Send a `QmpEvent` to client. + /// + /// # Arguments + /// + /// * `event` - The `QmpEvent` sent to client. + #[allow(clippy::unused_io_amount)] + pub fn send_event(event: &schema::QmpEvent) { + if Self::is_connected() { + let mut event_str = serde_json::to_string(&event).unwrap(); + let mut writer_locked = Self::inner().event_writer.write().unwrap(); + let writer = writer_locked.as_mut().unwrap(); + + info!("EVENT: --> {:?}", event); + if let Err(e) = writer.flush() { + error!("flush err, {:?}", e); + return; + } + event_str.push_str("\r\n"); + if let Err(e) = writer.write(event_str.as_bytes()) { + error!("write err, {:?}", e); + } + } + } + + fn inner() -> &'static std::sync::Arc { + // SAFETY: Global variable QMP_CHANNEL is only used in the main thread, + // so there are no competition or synchronization. + unsafe { + match &QMP_CHANNEL { + Some(channel) => channel, + None => { + panic!("Qmp channel not initialized"); + } + } + } + } +} + +/// Send device deleted message to qmp client. +pub fn send_device_deleted_msg(id: &str) { + if QmpChannel::is_connected() { + let deleted_event = schema::DeviceDeleted { + device: Some(id.to_string()), + path: format!("/machine/peripheral/{}", id), + }; + event!(DeviceDeleted; deleted_event); + } else { + warn!("Qmp channel is not connected while sending device deleted message"); + } +} diff --git a/machine_manager/src/qmp/qmp_response.rs b/machine_manager/src/qmp/qmp_response.rs new file mode 100644 index 0000000000000000000000000000000000000000..bbf2d41ed45652075da7044825b0c28d376fae3a --- /dev/null +++ b/machine_manager/src/qmp/qmp_response.rs @@ -0,0 +1,280 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use super::qmp_schema::{self as schema}; + +/// Qmp greeting message. +/// +/// # Notes +/// +/// It contains the version of VM or fake Qemu version to adapt others. +#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] +pub(crate) struct QmpGreeting { + #[serde(rename = "QMP")] + qmp: Greeting, +} + +#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] +struct Greeting { + version: Version, + capabilities: Vec, +} + +#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] +pub struct Version { + #[serde(rename = "qemu")] + application: VersionNumber, + package: String, +} + +impl Version { + pub fn new(micro: u8, minor: u8, major: u8) -> Self { + let version_number = VersionNumber { + micro, + minor, + major, + }; + Version { + application: version_number, + package: "StratoVirt-".to_string() + env!("CARGO_PKG_VERSION"), + } + } +} + +#[derive(Default, Debug, Serialize, Deserialize, PartialEq)] +struct VersionNumber { + micro: u8, + minor: u8, + major: u8, +} + +impl QmpGreeting { + /// Create qmp greeting message. + /// + /// # Arguments + /// + /// * `micro` - Micro version number. + /// * `minor` - Minor version number. + /// * `major` - Major version number. + pub(crate) fn create_greeting(micro: u8, minor: u8, major: u8) -> Self { + let version = Version::new(micro, minor, major); + let cap: Vec = Default::default(); + let greeting = Greeting { + version, + capabilities: cap, + }; + QmpGreeting { qmp: greeting } + } +} + +/// `ErrorMessage` for Qmp Response. +#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +struct ErrorMessage { + #[serde(rename = "class")] + errorkind: String, + desc: String, +} + +impl ErrorMessage { + fn new(e: &schema::QmpErrorClass) -> Self { + let content = e.to_content(); + let serde_str = serde_json::to_string(&e).unwrap(); + let serde_vec: Vec<&str> = serde_str.split(':').collect(); + let class_name = serde_vec[0]; + let len: usize = class_name.len(); + ErrorMessage { + errorkind: class_name[2..len - 1].to_string(), + desc: content, + } + } +} + +/// Empty message for QMP. +#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub(crate) struct Empty {} + +/// Qmp response to client +/// +/// # Notes +/// +/// It contains two kind response: `BadResponse` and `GoodResponse`. This two +/// kind response are fit by executing qmp command by success and failure. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Response { + #[serde(rename = "return", default, skip_serializing_if = "Option::is_none")] + return_: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + error: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + id: Option, +} + +impl Response { + /// Create qmp response with inner `Value` and `id`. + /// + /// # Arguments + /// + /// * `v` - The `Value` of qmp `return` field. + /// * `id` - The `id` for qmp `Response`, it must be equal to `Request`'s `id`. + pub fn create_response(v: Value, id: Option) -> Self { + Response { + return_: Some(v), + error: None, + id, + } + } + + /// Create a empty qmp response, `return` field will be empty. + pub fn create_empty_response() -> Self { + Response { + return_: Some(serde_json::to_value(Empty {}).unwrap()), + error: None, + id: None, + } + } + + /// Create a error qmo response with `err_class` and `id`. + /// # Arguments + /// + /// * `err_class` - The `QmpErrorClass` of qmp `error` field. + /// * `id` - The `id` for qmp `Response`, it must be equal to `Request`'s `id`. + pub fn create_error_response(err_class: schema::QmpErrorClass, id: Option) -> Self { + Response { + return_: None, + error: Some(ErrorMessage::new(&err_class)), + id, + } + } + + pub(crate) fn change_id(&mut self, id: Option) { + self.id = id; + } +} + +impl From for Response { + fn from(value: bool) -> Self { + if value { + Response::create_empty_response() + } else { + Response::create_error_response( + schema::QmpErrorClass::GenericError(String::new()), + None, + ) + } + } +} + +#[cfg(test)] +mod tests { + use serde_json; + + use super::*; + use crate::qmp::qmp_schema; + + #[test] + fn test_qmp_greeting_msg() { + let greeting_msg = QmpGreeting::create_greeting(1, 0, 5); + + let json_msg = r#" + { + "QMP":{ + "version":{ + "qemu":{ + "micro": 1, + "minor": 0, + "major": 5 + }, + "package": "StratoVirt-2.4.0" + }, + "capabilities": [] + } + } + "#; + let greeting_from_json: QmpGreeting = serde_json::from_str(json_msg).unwrap(); + + assert_eq!(greeting_from_json, greeting_msg); + } + + #[test] + fn test_qmp_resp() { + // 1.Empty response and ID change; + let mut resp = Response::create_empty_response(); + resp.change_id(Some("0".to_string())); + + let json_msg = r#"{"return":{},"id":"0"}"#; + assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); + + resp.change_id(Some("1".to_string())); + let json_msg = r#"{"return":{},"id":"1"}"#; + assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); + + // 2.Normal response + let resp_value = qmp_schema::StatusInfo { + singlestep: false, + running: true, + status: qmp_schema::RunState::running, + }; + let resp = Response::create_response(serde_json::to_value(resp_value).unwrap(), None); + + let json_msg = r#"{"return":{"running":true,"singlestep":false,"status":"running"}}"#; + assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); + + // 3.Error response + let qmp_err = + qmp_schema::QmpErrorClass::GenericError("Invalid Qmp command arguments!".to_string()); + let resp = Response::create_error_response(qmp_err, None); + + let json_msg = + r#"{"error":{"class":"GenericError","desc":"Invalid Qmp command arguments!"}}"#; + assert_eq!(serde_json::to_string(&resp).unwrap(), json_msg); + } + + #[test] + fn test_create_error_response() { + let strange_msg = "!?/.,、。’】= -~1!@#¥%……&*()——+".to_string(); + + let err_cls = qmp_schema::QmpErrorClass::GenericError(strange_msg.clone()); + let msg = ErrorMessage::new(&err_cls); + assert_eq!(msg.desc, strange_msg); + assert_eq!(msg.errorkind, "GenericError".to_string()); + let qmp_err = qmp_schema::QmpErrorClass::GenericError(strange_msg.clone()); + let resp = Response::create_error_response(qmp_err, None); + assert_eq!(resp.error, Some(msg)); + + let err_cls = qmp_schema::QmpErrorClass::CommandNotFound(strange_msg.clone()); + let msg = ErrorMessage::new(&err_cls); + assert_eq!(msg.desc, strange_msg); + assert_eq!(msg.errorkind, "CommandNotFound".to_string()); + let qmp_err = qmp_schema::QmpErrorClass::CommandNotFound(strange_msg.clone()); + let resp = Response::create_error_response(qmp_err, None); + assert_eq!(resp.error, Some(msg)); + + let err_cls = qmp_schema::QmpErrorClass::DeviceNotFound(strange_msg.clone()); + let msg = ErrorMessage::new(&err_cls); + assert_eq!(msg.desc, strange_msg); + assert_eq!(msg.errorkind, "DeviceNotFound".to_string()); + let qmp_err = qmp_schema::QmpErrorClass::DeviceNotFound(strange_msg.clone()); + let resp = Response::create_error_response(qmp_err, None); + assert_eq!(resp.error, Some(msg)); + + let err_cls = qmp_schema::QmpErrorClass::KVMMissingCap(strange_msg.clone()); + let msg = ErrorMessage::new(&err_cls); + assert_eq!(msg.desc, strange_msg); + assert_eq!(msg.errorkind, "KVMMissingCap".to_string()); + let qmp_err = qmp_schema::QmpErrorClass::KVMMissingCap(strange_msg); + let resp = Response::create_error_response(qmp_err, None); + assert_eq!(resp.error, Some(msg)); + } +} diff --git a/machine_manager/src/qmp/qmp_schema.rs b/machine_manager/src/qmp/qmp_schema.rs index 0058e9429b39ad02c160521a9f6d75512efd6720..ba43fc0113e0a2cf54905d492ca5a4e9b838f6cb 100644 --- a/machine_manager/src/qmp/qmp_schema.rs +++ b/machine_manager/src/qmp/qmp_schema.rs @@ -10,12 +10,15 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use serde::{Deserialize, Serialize}; pub use serde_json::Value as Any; + +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; use strum_macros::{EnumIter, EnumString, EnumVariantNames}; -use super::Version; -use crate::qmp::{Command, Empty, TimeStamp}; +use super::qmp_channel::TimeStamp; +use super::qmp_response::{Empty, Version}; +use util::aio::AioEngine; /// A error enum for qmp #[allow(clippy::upper_case_acronyms)] @@ -50,294 +53,110 @@ impl QmpErrorClass { } } -/// A enum to store all command struct -#[derive(Debug, Clone, Serialize, Deserialize, EnumIter, EnumVariantNames, EnumString)] -#[serde(tag = "execute")] -#[serde(deny_unknown_fields)] -pub enum QmpCommand { - #[serde(rename = "qmp_capabilities")] - qmp_capabilities { - #[serde(default)] - arguments: qmp_capabilities, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - quit { - #[serde(default)] - arguments: quit, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - stop { - #[serde(default)] - arguments: stop, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - cont { - #[serde(default)] - arguments: cont, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - device_add { - arguments: Box, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - device_del { - arguments: device_del, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - netdev_add { - arguments: Box, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - netdev_del { - arguments: netdev_del, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-hotpluggable-cpus")] - #[strum(serialize = "query-hotpluggable-cpus")] - query_hotpluggable_cpus { - #[serde(default)] - arguments: query_hotpluggable_cpus, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-cpus")] - #[strum(serialize = "query-cpus")] - query_cpus { - #[serde(default)] - arguments: query_cpus, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-status")] - query_status { - #[serde(default)] - arguments: query_status, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - getfd { - arguments: getfd, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "blockdev-add")] - blockdev_add { - arguments: Box, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "blockdev-del")] - blockdev_del { - arguments: blockdev_del, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "balloon")] - balloon { - #[serde(default)] - arguments: balloon, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-balloon")] - query_balloon { - #[serde(default)] - arguments: query_balloon, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "migrate")] - migrate { - arguments: migrate, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-migrate")] - query_migrate { - #[serde(default)] - arguments: query_migrate, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-version")] - query_version { - #[serde(default)] - arguments: query_version, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-commands")] - query_commands { - #[serde(default)] - arguments: query_commands, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-target")] - query_target { - #[serde(default)] - arguments: query_target, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-kvm")] - query_kvm { - #[serde(default)] - arguments: query_kvm, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-machines")] - query_machines { - #[serde(default)] - arguments: query_machines, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-events")] - #[strum(serialize = "query-events")] - query_events { - #[serde(default)] - arguments: query_events, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "qom-list-types")] - list_type { - #[serde(default)] - arguments: list_type, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "device-list-properties")] - device_list_properties { - #[serde(default)] - arguments: device_list_properties, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "block-commit")] - #[strum(serialize = "block-commit")] - block_commit { - #[serde(default)] - arguments: block_commit, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-tpm-models")] - query_tpm_models { - #[serde(default)] - arguments: query_tpm_models, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-tpm-types")] - query_tpm_types { - #[serde(default)] - arguments: query_tpm_types, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-command-line-options")] - query_command_line_options { - #[serde(default)] - arguments: query_command_line_options, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-migrate-capabilities")] - query_migrate_capabilities { - #[serde(default)] - arguments: query_migrate_capabilities, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-qmp-schema")] - query_qmp_schema { - #[serde(default)] - arguments: query_qmp_schema, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-sev-capabilities")] - query_sev_capabilities { - #[serde(default)] - arguments: query_sev_capabilities, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-chardev")] - #[strum(serialize = "query-chardev")] - query_chardev { - #[serde(default)] - arguments: query_chardev, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "qom-list")] - #[strum(serialize = "qom-list")] - qom_list { - #[serde(default)] - arguments: qom_list, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "qom_get")] - #[strum(serialize = "qom_get")] - qom_get { - #[serde(default)] - arguments: qom_get, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-block")] - #[strum(serialize = "query-block")] - query_block { - #[serde(default)] - arguments: query_block, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-named-block-nodes")] - #[strum(serialize = "query-named-block-nodes")] - query_named_block_nodes { - #[serde(default)] - arguments: query_named_block_nodes, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-blockstats")] - #[strum(serialize = "query-blockstats")] - query_blockstats { - #[serde(default)] - arguments: query_blockstats, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-gic-capabilities")] - #[strum(serialize = "query-gic-capabilities")] - query_gic_capabilities { - #[serde(default)] - arguments: query_gic_capabilities, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, - #[serde(rename = "query-iothreads")] - #[strum(serialize = "query-iothreads")] - query_iothreads { - #[serde(default)] - arguments: query_iothreads, - #[serde(default, skip_serializing_if = "Option::is_none")] - id: Option, - }, +macro_rules! define_qmp_command_enum { + ($($command:ident($name:expr, $args_type:ty, $need_strum:ident $(, $serde_default:ident)?)),*) => { + /// A enum to store all command struct + #[derive(Debug, Clone, Serialize, Deserialize, EnumIter, EnumVariantNames, EnumString)] + #[serde(tag = "execute")] + #[serde(deny_unknown_fields)] + pub enum QmpCommand { + $( + #[serde(rename = $name)] + #[cfg_attr($need_strum, strum(serialize = $name))] + $command { + $(#[serde($serde_default)])? + arguments: $args_type, + #[serde(default, skip_serializing_if = "Option::is_none")] + id: Option, + }, + )* + } + }; +} + +// QMP command enum definition example: command("name", arguments, ..) +define_qmp_command_enum!( + qmp_capabilities("qmp_capabilities", qmp_capabilities, FALSE, default), + quit("quit", quit, FALSE, default), + stop("stop", stop, FALSE, default), + cont("cont", cont, FALSE, default), + system_powerdown("system_powerdown", system_powerdown, FALSE, default), + system_reset("system_reset", system_reset, FALSE, default), + device_add("device_add", Box, FALSE), + device_del("device_del", device_del, FALSE), + chardev_add("chardev-add", chardev_add, FALSE), + chardev_remove("chardev-remove", chardev_remove, FALSE), + netdev_add("netdev_add", Box, FALSE), + netdev_del("netdev_del", netdev_del, FALSE), + cameradev_add("cameradev_add", cameradev_add, FALSE), + cameradev_del("cameradev_del", cameradev_del, FALSE), + query_hotpluggable_cpus("query-hotpluggable-cpus", query_hotpluggable_cpus, TRUE, default), + query_cpus("query-cpus", query_cpus, TRUE, default), + query_status("query-status", query_status, FALSE, default), + getfd("getfd", getfd, FALSE), + blockdev_add("blockdev-add", Box, FALSE), + blockdev_del("blockdev-del", blockdev_del, FALSE), + balloon("balloon", balloon, FALSE, default), + query_mem("query-mem", query_mem, FALSE, default), + query_mem_gpa("query-mem-gpa", query_mem_gpa, FALSE, default), + query_balloon("query-balloon", query_balloon, FALSE, default), + query_vnc("query-vnc", query_vnc, TRUE, default), + query_display_image("query-display-image", query_display_image, FALSE, default), + switch_audio_record("switch-audio-record", switch_audio_record, FALSE), + migrate("migrate", migrate, FALSE), + query_migrate("query-migrate", query_migrate, FALSE, default), + cancel_migrate("migrate_cancel", cancel_migrate, FALSE, default), + query_version("query-version", query_version, FALSE, default), + query_commands("query-commands", query_commands, FALSE, default), + query_target("query-target", query_target, FALSE, default), + query_kvm("query-kvm", query_kvm, FALSE, default), + query_machines("query-machines", query_machines, FALSE, default), + query_events("query-events", query_events, TRUE, default), + list_type("qom-list-types", list_type, FALSE, default), + device_list_properties("device-list-properties", device_list_properties, FALSE, default), + block_commit("block-commit", block_commit, TRUE, default), + query_tpm_models("query-tpm-models", query_tpm_models, FALSE, default), + query_tpm_types("query-tpm-types", query_tpm_types, FALSE, default), + query_command_line_options("query-command-line-options", query_command_line_options, FALSE, default), + query_migrate_capabilities("query-migrate-capabilities", query_migrate_capabilities, FALSE, default), + query_qmp_schema("query-qmp-schema", query_qmp_schema, FALSE, default), + query_sev_capabilities("query-sev-capabilities", query_sev_capabilities, FALSE, default), + query_chardev("query-chardev", query_chardev, TRUE, default), + qom_list("qom-list", qom_list, TRUE, default), + qom_get("qom-get", qom_get, TRUE, default), + query_block("query-block", query_block, TRUE, default), + query_named_block_nodes("query-named-block-nodes", query_named_block_nodes, TRUE, default), + query_blockstats("query-blockstats", query_blockstats, TRUE, default), + query_block_jobs("query-block-jobs", query_block_jobs, TRUE, default), + query_gic_capabilities("query-gic-capabilities", query_gic_capabilities, TRUE, default), + query_iothreads("query-iothreads", query_iothreads, TRUE, default), + update_region("update_region", update_region, TRUE, default), + input_event("input_event", input_event, FALSE, default), + human_monitor_command("human-monitor-command", human_monitor_command, FALSE), + blockdev_snapshot_internal_sync("blockdev-snapshot-internal-sync", blockdev_snapshot_internal, FALSE), + blockdev_snapshot_delete_internal_sync("blockdev-snapshot-delete-internal-sync", blockdev_snapshot_internal, FALSE), + query_vcpu_reg("query-vcpu-reg", query_vcpu_reg, FALSE), + trace_get_state("trace-get-state", trace_get_state, FALSE), + trace_set_state("trace-set-state", trace_set_state, FALSE), + query_workloads("query-workloads", query_workloads, FALSE) +); + +/// Command trait for Deserialize and find back Response. +trait Command: Serialize { + type Res: DeserializeOwned; + fn back(self) -> Self::Res; +} + +macro_rules! generate_command_impl { + ($name:ident, $res_type:ty) => { + impl Command for $name { + type Res = $res_type; + + fn back(self) -> Self::Res { + Default::default() + } + } + }; } /// qmp_capabilities @@ -353,43 +172,29 @@ pub enum QmpCommand { #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct qmp_capabilities {} - -impl Command for qmp_capabilities { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +generate_command_impl!(qmp_capabilities, Empty); /// quit /// /// This command will cause the StratoVirt process to exit gracefully. While every /// attempt is made to send the QMP response before terminating, this is not -/// guaranteed. When using this interface, a premature EOF would not be +/// guaranteed. When using this interface, a premature EOF would not be /// unexpected. /// /// # Examples /// /// ```text /// -> { "execute": "quit" } -/// <- { "return": {}} +/// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct quit {} - -impl Command for quit { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +generate_command_impl!(quit, Empty); /// stop /// -/// Stop all guest VCPU execution +/// Stop all guest VCPU execution. /// /// # Examples /// @@ -400,14 +205,7 @@ impl Command for quit { #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct stop {} - -impl Command for stop { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +generate_command_impl!(stop, Empty); /// cont /// @@ -422,14 +220,37 @@ impl Command for stop { #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct cont {} +generate_command_impl!(cont, Empty); -impl Command for cont { - type Res = Empty; +/// system_powerdown +/// +/// Requests that a guest perform a powerdown operation. +/// +/// # Examples +/// +/// ```test +/// -> { "execute": "system_powerdown" } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct system_powerdown {} +generate_command_impl!(system_powerdown, Empty); - fn back(self) -> Empty { - Default::default() - } -} +/// system_reset +/// +/// Reset guest VCPU execution. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "system_reset" } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct system_reset {} +generate_command_impl!(system_reset, Empty); /// device_add /// @@ -445,7 +266,7 @@ impl Command for cont { /// /// ```text /// -> { "execute": "device_add", -/// "arguments": { "id": "net-0", "driver": "virtio-net-mmio", "addr": "0x0"}} +/// "arguments": { "id": "net-0", "driver": "virtio-net-mmio", "addr": "0x0" } } /// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -471,6 +292,8 @@ pub struct device_add { pub mac: Option, #[serde(rename = "netdev")] pub netdev: Option, + #[serde(rename = "chardev")] + pub chardev: Option, #[serde(rename = "disable-modern")] pub disable_modern: Option, #[serde(rename = "mq")] @@ -482,23 +305,89 @@ pub struct device_add { pub iothread: Option, pub multifunction: Option, pub host: Option, + #[serde(rename = "num-queues")] + pub queues: Option, + pub boot_index: Option, + pub sysfsdev: Option, + #[serde(rename = "queue-size")] + pub queue_size: Option, + pub port: Option, + pub backend: Option, + pub path: Option, + pub cameradev: Option, + pub hostbus: Option, + pub hostaddr: Option, + pub hostport: Option, + pub vendorid: Option, + pub productid: Option, + pub isobufs: Option, + pub isobsize: Option, + #[serde(rename = "cpu-id")] + pub cpu_id: Option, } pub type DeviceAddArgument = device_add; +generate_command_impl!(device_add, Empty); -impl Command for device_add { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +/// update_region +/// +/// # Arguments +/// +/// * `update_type` - update type: add or delete. +/// * `region_type` - the type of the region: io, ram_device, rom_device. +/// * `offset` - the offset of the father region. +/// * `size` - the size of the region. +/// * `priority` - the priority of the region. +/// * `romd` - read only mode. +/// * `ioeventfd` - is there an ioeventfd. +/// * `ioeventfd_data` - the matching data for ioeventfd. +/// * `ioeventfd_size` - the size of matching data. +/// +/// Additional arguments depend on the type. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "update_region", +/// "arguments": { "update_type": "add", "region_type": "io_region", +/// "offset": 0, "size": 4096, "priority": 99 } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct update_region { + #[serde(rename = "update_type")] + pub update_type: String, + #[serde(rename = "region_type")] + pub region_type: String, + #[serde(rename = "offset")] + pub offset: u64, + #[serde(rename = "size")] + pub size: u64, + #[serde(rename = "priority")] + pub priority: u64, + #[serde(rename = "read_only_mode")] + pub romd: Option, + #[serde(rename = "ioeventfd")] + pub ioeventfd: Option, + #[serde(rename = "ioeventfd_data")] + pub ioeventfd_data: Option, + #[serde(rename = "ioeventfd_size")] + pub ioeventfd_size: Option, + #[serde(rename = "device_fd_path")] + pub device_fd_path: Option, +} + +pub type UpdateRegionArgument = update_region; +generate_command_impl!(update_region, Empty); #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct FileOptions { pub driver: String, pub filename: String, + #[serde(default)] + pub aio: AioEngine, } #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -524,9 +413,9 @@ pub struct CacheOptions { /// /// ```text /// -> { "execute": "blockdev_add", -/// "arguments": {"node-name": "drive-0", -/// "file": {"driver": "file", "filename": "/path/to/block"}, -/// "cache": {"direct": true}, "read-only": false }} +/// "arguments": { "node-name": "drive-0", +/// "file": { "driver": "file", "filename": "/path/to/block" }, +/// "cache": { "direct": true }, "read-only": false } } /// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -535,11 +424,12 @@ pub struct blockdev_add { #[serde(rename = "node-name")] pub node_name: String, pub file: FileOptions, + pub media: Option, pub cache: Option, #[serde(rename = "read-only")] pub read_only: Option, - #[serde(rename = "read-zeros")] - pub read_zeros: Option, + #[serde(rename = "detect-zeroes")] + pub detect_zeroes: Option, pub driver: Option, pub backing: Option, pub discard: Option, @@ -547,17 +437,14 @@ pub struct blockdev_add { pub options: Option, #[serde(rename = "throttling.iops-total")] pub iops: Option, + #[serde(rename = "l2-cache-size")] + pub l2_cache_size: Option, + #[serde(rename = "refcount-cache-size")] + pub refcount_cache_size: Option, } pub type BlockDevAddArgument = blockdev_add; - -impl Command for blockdev_add { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +generate_command_impl!(blockdev_add, Empty); /// netdev_add /// @@ -573,7 +460,7 @@ impl Command for blockdev_add { /// /// ```text /// -> { "execute": "netdev_add", -/// "arguments": {"id": "net-0", "ifname": "tap0", "fds": 123 }} +/// "arguments": { "id": "net-0", "ifname": "tap0", "fds": 123 } } /// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -582,27 +469,132 @@ pub struct netdev_add { pub id: String, #[serde(rename = "ifname")] pub if_name: Option, + pub fd: Option, pub fds: Option, pub dnssearch: Option, #[serde(rename = "type")] pub net_type: Option, - pub vhost: Option, + pub vhost: Option, + pub vhostfd: Option, pub vhostfds: Option, - pub ifname: Option, pub downscript: Option, pub script: Option, - pub queues: Option, + pub queues: Option, + pub chardev: Option, } pub type NetDevAddArgument = netdev_add; +generate_command_impl!(netdev_add, Empty); + +/// cameradev_add +/// +/// # Arguments +/// +/// * `id` - the device's ID, must be unique. +/// * `path` - the backend camera file, eg. /dev/video0. +/// * `driver` - the backend type, eg. v4l2. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "cameradev_add", +/// "arguments": { "id": "cam0", "driver": "v4l2", "path": "/dev/video0" } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct cameradev_add { + pub id: String, + pub path: Option, + pub driver: String, +} -impl Command for netdev_add { - type Res = Empty; +pub type CameraDevAddArgument = cameradev_add; +generate_command_impl!(cameradev_add, Empty); - fn back(self) -> Empty { - Default::default() - } +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AddrDataOptions { + pub path: String, +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AddrOptions { + #[serde(rename = "type")] + pub addr_type: String, + #[serde(rename = "data")] + pub addr_data: AddrDataOptions, +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BackendDataOptions { + pub addr: AddrOptions, + pub server: bool, +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BackendOptions { + #[serde(rename = "type")] + pub backend_type: String, + #[serde(rename = "data")] + pub backend_data: BackendDataOptions, +} + +/// chardev-add +/// +/// # Arguments +/// +/// * `id` - the character device's ID, must be unique. +/// * `backend` - the chardev backend info. +/// +/// Additional arguments depend on the type. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "chardev-add", +/// "arguments": { "id": "chardev_id", "backend": { "type": "socket", "data": { +/// "addr": { "type": "unix", "data": { "path": "/path/to/socket" } }, +/// "server": false } } } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct chardev_add { + pub id: String, + pub backend: BackendOptions, +} + +pub type CharDevAddArgument = chardev_add; +generate_command_impl!(chardev_add, Empty); + +/// chardev-remove +/// +/// Remove a chardev backend. +/// +/// # Arguments +/// +/// * `id` - The ID of the character device. +/// +/// # Errors +/// +/// If `id` is not a valid chardev backend, DeviceNotFound. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "chardev-remove", "arguments": { "id": "chardev_id" } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct chardev_remove { + pub id: String, } +generate_command_impl!(chardev_remove, Empty); /// device_del /// @@ -628,8 +620,7 @@ impl Command for netdev_add { /// # Examples /// /// ```text -/// -> { "execute": "device_del", -/// "arguments": { "id": "net-0" } } +/// -> { "execute": "device_del", "arguments": { "id": "net-0" } } /// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -637,29 +628,33 @@ impl Command for netdev_add { pub struct device_del { pub id: String, } +generate_command_impl!(device_del, Empty); -impl Command for device_del { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} - +/// blockdev-del +/// +/// Remove a block device. +/// +/// # Arguments +/// +/// * `node_name` - The name of the device node to remove. +/// +/// # Errors +/// +/// If `node_name` is not a valid device, DeviceNotFound. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "blockdev-del", "arguments": { "node-name": "node0" } } +/// <- { "return": {} } +/// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct blockdev_del { #[serde(rename = "node-name")] pub node_name: String, } - -impl Command for blockdev_del { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} +generate_command_impl!(blockdev_del, Empty); /// netdev_del /// @@ -671,7 +666,7 @@ impl Command for blockdev_del { /// /// # Errors /// -/// If `id` is not a valid network backend, DeviceNotFound +/// If `id` is not a valid network backend, DeviceNotFound. /// /// # Examples /// @@ -684,16 +679,36 @@ impl Command for blockdev_del { pub struct netdev_del { pub id: String, } +generate_command_impl!(netdev_del, Empty); -impl Command for netdev_del { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } +/// cameradev_del +/// +/// Remove a camera backend. +/// +/// # Arguments +/// +/// * `id` - The name of the camera backend to remove. +/// +/// # Errors +/// +/// If `id` is not a valid camera backend, DeviceNotFound. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "cameradev_del", "arguments": { "id": "cam0" } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct cameradev_del { + pub id: String, } +generate_command_impl!(cameradev_del, Empty); -/// query-hotpluggable-cpus: +/// query-hotpluggable-cpus +/// +/// Query which CPU types could be plugged. /// /// # Returns /// @@ -704,29 +719,21 @@ impl Command for netdev_del { /// For pc machine type started with -smp 1,maxcpus=2: /// ```text /// -> { "execute": "query-hotpluggable-cpus" } -/// <- {"return": [ +/// <- { "return": [ /// { /// "type": host-x-cpu", "vcpus-count": 1, -/// "props": {"core-id": 0, "socket-id": 1, "thread-id": 0} +/// "props": {"core-id": 0, "socket-id": 1, "thread-id": 0 } /// }, /// { /// "qom-path": "/machine/unattached/device[0]", /// "type": "host-x-cpu", "vcpus-count": 1, -/// "props": {"core-id": 0, "socket-id": 0, "thread-id": 0} -/// } -/// ]} +/// "props": { "core-id": 0, "socket-id": 0, "thread-id": 0 } +/// } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct query_hotpluggable_cpus {} - -impl Command for query_hotpluggable_cpus { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} +generate_command_impl!(query_hotpluggable_cpus, Vec); #[allow(clippy::upper_case_acronyms)] #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -747,13 +754,23 @@ pub struct CpuInstanceProperties { pub node_id: Option, #[serde(rename = "socket-id", default, skip_serializing_if = "Option::is_none")] pub socket_id: Option, + #[cfg(target_arch = "x86_64")] + #[serde(rename = "die_id", default, skip_serializing_if = "Option::is_none")] + pub die_id: Option, + #[cfg(target_arch = "aarch64")] + #[serde( + rename = "cluster_id", + default, + skip_serializing_if = "Option::is_none" + )] + pub cluster_id: Option, #[serde(rename = "thread-id", default, skip_serializing_if = "Option::is_none")] pub thread_id: Option, #[serde(rename = "core-id", default, skip_serializing_if = "Option::is_none")] pub core_id: Option, } -/// query-cpus: +/// query-cpus /// /// This command causes vCPU threads to exit to userspace, which causes /// a small interruption to guest CPU execution. This will have a negative @@ -785,20 +802,27 @@ pub struct CpuInstanceProperties { /// "qom_path":"/machine/unattached/device[2]", /// "arch":"x86", /// "thread_id":3135 -/// } -/// ] -/// } +/// } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct query_cpus {} +generate_command_impl!(query_cpus, Vec); -impl Command for query_cpus { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CpuInfoCommon { + #[serde(rename = "current")] + pub current: bool, + #[serde(rename = "qom_path")] + pub qom_path: String, + #[serde(rename = "halted")] + pub halted: bool, + #[serde(rename = "props", default, skip_serializing_if = "Option::is_none")] + pub props: Option, + #[serde(rename = "CPU")] + pub CPU: isize, + #[serde(rename = "thread_id")] + pub thread_id: isize, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -806,36 +830,16 @@ impl Command for query_cpus { pub enum CpuInfo { #[serde(rename = "x86")] x86 { - #[serde(rename = "current")] - current: bool, - #[serde(rename = "qom_path")] - qom_path: String, - #[serde(rename = "halted")] - halted: bool, - #[serde(rename = "props", default, skip_serializing_if = "Option::is_none")] - props: Option, - #[serde(rename = "CPU")] - CPU: isize, - #[serde(rename = "thread_id")] - thread_id: isize, + #[serde(flatten)] + common: CpuInfoCommon, #[serde(flatten)] #[serde(rename = "x86")] x86: CpuInfoX86, }, #[serde(rename = "arm")] Arm { - #[serde(rename = "current")] - current: bool, - #[serde(rename = "qom_path")] - qom_path: String, - #[serde(rename = "halted")] - halted: bool, - #[serde(rename = "props", default, skip_serializing_if = "Option::is_none")] - props: Option, - #[serde(rename = "CPU")] - CPU: isize, - #[serde(rename = "thread_id")] - thread_id: isize, + #[serde(flatten)] + common: CpuInfoCommon, #[serde(flatten)] #[serde(rename = "Arm")] arm: CpuInfoArm, @@ -860,21 +864,12 @@ pub struct CpuInfoArm {} /// /// ```text /// -> { "execute": "query-status" } -/// <- { "return": { "running": true, -/// "singlestep": false, -/// "status": "running" } } +/// <- { "return": { "running": true, "singlestep": false, "status": "running" } } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct query_status {} - -impl Command for query_status { - type Res = StatusInfo; - - fn back(self) -> StatusInfo { - Default::default() - } -} +generate_command_impl!(query_status, StatusInfo); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct StatusInfo { @@ -886,9 +881,10 @@ pub struct StatusInfo { pub status: RunState, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Default)] pub enum RunState { #[serde(rename = "debug")] + #[default] debug, #[serde(rename = "inmigrate")] inmigrate, @@ -924,12 +920,6 @@ pub enum RunState { preconfig, } -impl Default for RunState { - fn default() -> Self { - RunState::debug - } -} - /// migrate /// /// Migrates the current running guest to another VM or file. @@ -937,33 +927,47 @@ impl Default for RunState { /// # Arguments /// /// * `uri` - the Uniform Resource Identifier of the destination VM or file. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } } +/// <- { "return": {} } +/// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct migrate { #[serde(rename = "uri")] pub uri: String, } +generate_command_impl!(migrate, Empty); -impl Command for migrate { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} - -/// query-migrate: +/// query-migrate /// /// Returns information about current migration. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-migrate" } +/// <- { "return": {} } +/// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_migrate {} +generate_command_impl!(query_migrate, MigrationInfo); -impl Command for query_migrate { - type Res = MigrationInfo; - - fn back(self) -> MigrationInfo { - Default::default() - } -} +/// migrate_cancel +/// +/// Cancel migrate the current VM. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "migrate_cancel" } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct cancel_migrate {} +generate_command_impl!(cancel_migrate, MigrationInfo); #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct MigrationInfo { @@ -973,7 +977,7 @@ pub struct MigrationInfo { /// getfd /// -/// Receive a file descriptor via SCM rights and assign it a name +/// Receive a file descriptor via SCM rights and assign it a name. /// /// # Arguments /// @@ -991,150 +995,129 @@ pub struct getfd { #[serde(rename = "fdname")] pub fd_name: String, } +generate_command_impl!(getfd, Empty); -impl Command for getfd { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} - -/// Shutdown +/// switch_audio_record /// -/// Emitted when the virtual machine has shut down, indicating that StratoVirt is -/// about to exit. -/// -/// # Notes +/// Control the authority of audio record /// -/// If the command-line option "-no-shutdown" has been specified, StratoVirt -/// will not exit, and a STOP event will eventually follow the SHUTDOWN event -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[serde(deny_unknown_fields)] -pub struct Shutdown { - /// If true, the shutdown was triggered by a guest request (such as - /// a guest-initiated ACPI shutdown request or other hardware-specific - /// action) rather than a host request (such as sending StratoVirt a SIGINT). - #[serde(rename = "guest")] - pub guest: bool, - pub reason: String, -} - -/// Reset -/// -/// Emitted when the virtual machine is reset -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[serde(deny_unknown_fields)] -pub struct Reset { - /// If true, the reset was triggered by a guest request (such as - /// a guest-initiated ACPI reboot request or other hardware-specific action - /// ) rather than a host request (such as the QMP command system_reset). - #[serde(rename = "guest")] - pub guest: bool, -} - -/// Stop -/// -/// Emitted when the virtual machine is stopped -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[serde(deny_unknown_fields)] -pub struct Stop {} - -/// Resume -/// -/// Emitted when the virtual machine resumes execution -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -#[serde(deny_unknown_fields)] -pub struct Resume {} - -/// DeviceDeleted +/// # Arguments /// -/// Emitted whenever the device removal completion is acknowledged by the guest. -/// At this point, it's safe to reuse the specified device ID. Device removal can -/// be initiated by the guest or by HMP/QMP commands. +/// * `authorized` - on or off. /// /// # Examples /// /// ```text -/// <- { "event": "DEVICE_DELETED", -/// "data": { "device": "virtio-net-mmio-0", -/// "path": "/machine/peripheral/virtio-net-mmio-0" }, -/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +/// -> { "execute": "switch_audio_record", "arguments": { "authorized": "on" } } +/// <- { "return": {} } /// ``` -#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[derive(Default, Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] -pub struct DeviceDeleted { - /// Device name. - #[serde(rename = "device", default, skip_serializing_if = "Option::is_none")] - pub device: Option, - /// Device path. - #[serde(rename = "path")] - pub path: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize, EnumIter, EnumVariantNames, EnumString)] -#[serde(tag = "event")] -pub enum QmpEvent { - #[serde(rename = "SHUTDOWN")] - Shutdown { - data: Shutdown, - timestamp: TimeStamp, - }, - #[serde(rename = "RESET")] - Reset { data: Reset, timestamp: TimeStamp }, - #[serde(rename = "STOP")] - Stop { - #[serde(default)] - data: Stop, - timestamp: TimeStamp, - }, - #[serde(rename = "RESUME")] - Resume { - #[serde(default)] - data: Resume, - timestamp: TimeStamp, - }, - #[serde(rename = "DEVICE_DELETED")] - DeviceDeleted { - data: DeviceDeleted, - timestamp: TimeStamp, - }, - #[serde(rename = "BALLOON_CHANGED")] - BalloonChanged { - data: BalloonInfo, - timestamp: TimeStamp, - }, +pub struct switch_audio_record { + #[serde(rename = "authorized")] + pub authorized: String, } +generate_command_impl!(switch_audio_record, Empty); -/// query-balloon: +/// query-balloon /// /// Query the actual size of memory of VM. /// /// # Returns /// -/// `BalloonInfo` includs the actual size of memory +/// `BalloonInfo` includs the actual size of memory. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-balloon" } -/// <- {"return":{"actual":8589934592}} +/// <- { "return": { "actual": 8589934592 } } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_balloon {} -impl Command for query_balloon { - type Res = BalloonInfo; - fn back(self) -> BalloonInfo { - Default::default() - } -} +generate_command_impl!(query_balloon, BalloonInfo); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct BalloonInfo { pub actual: u64, } -/// balloon: +/// query-vnc +/// +/// Information about current VNC server. +/// +/// # Examples +/// +/// For pc machine type started with -vnc ip:port(for example: 0.0.0.0:0): +/// ```text +/// -> { "execute": "query-vnc" } +/// <- { "return": { +/// "enabled": true, +/// "host": "0.0.0.0", +/// "service": "50401", +/// "auth": "None", +/// "family": "ipv4", +/// "clients": [ +/// "host": "127.0.0.1", +/// "service": "50401", +/// "family": "ipv4", +/// ] } } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_vnc {} +generate_command_impl!(query_vnc, VncInfo); + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct VncInfo { + #[serde(rename = "enabled")] + pub enabled: bool, + #[serde(rename = "host")] + pub host: String, + #[serde(rename = "service")] + pub service: String, + #[serde(rename = "auth")] + pub auth: String, + #[serde(rename = "family")] + pub family: String, + #[serde(rename = "clients")] + pub clients: Vec, +} + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct VncClientInfo { + #[serde(rename = "host")] + pub host: String, + #[serde(rename = "service")] + pub service: String, + #[serde(rename = "family")] + pub family: String, +} + +/// query-display-image +/// +/// Information about image of stratovirt. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-display-image" } +/// <- { "return": { +/// "fileDir": /tmp/stratovirt-images, +/// "isSuccess": true, } } +/// `` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_display_image {} +generate_command_impl!(query_display_image, GpuInfo); + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct GpuInfo { + #[serde(rename = "isSuccess")] + pub isSuccess: bool, + #[serde(rename = "fileDir")] + pub fileDir: String, +} + +/// balloon /// /// Advice VM to change memory size with the argument `value`. /// @@ -1144,126 +1127,105 @@ pub struct BalloonInfo { /// /// # Notes /// -/// This is only an advice instead of command to VM, -/// therefore, the VM changes its memory according to `value` and its condation. +/// This is only an advice instead of command to VM, therefore, the VM changes +/// its memory according to `value` and its condation. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "balloon", "arguments": { "value": 589934492 } } -/// <- {"return":{}} +/// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct balloon { #[serde(rename = "value")] pub value: u64, } +generate_command_impl!(balloon, Empty); -impl Command for balloon { - type Res = Empty; - fn back(self) -> Empty { - Default::default() - } -} - -/// version: +/// query-version /// /// Query version of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-version" } -/// <- {"return":{"package":"StratoVirt-0.3.0","qemu":{"major":4,"micro":0,"minor":1}}} +/// <- { "return": { +/// "version": { "qemu": { "minor": 1, "micro": 0, "major": 5 }, +/// "package": "StratoVirt-2.4.0" }, +/// "capabilities": [] } } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_version {} +generate_command_impl!(query_version, Version); -impl Command for query_version { - type Res = Version; - - fn back(self) -> Version { - Default::default() - } -} - -/// Query commands: +/// query-commands /// /// Query all qmp commands of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-commands" } -/// <- {"return":[{"name":"qmp_capabilities"},{"name":"quit"},{"name":"stop"}, -/// {"name":"cont"},{"name":"device_add"},{"name":"device_del"},{"name":"netdev_add"}, -/// {"name":"netdev_del"},{"name":"query-hotpluggable-cpus"},{"name":"query-cpus"}, -/// {"name":"query_status"},{"name":"getfd"},{"name":"blockdev_add"}, -/// {"name":"blockdev_del"},{"name":"balloon"},{"name":"query_balloon"}, -/// {"name":"migrate"},{"name":"query_migrate"},{"name":"query_version"}, -/// {"name":"query_target"},{"name":"query_commands"}]} +/// <- { "return": [ { "name": "qmp_capabilities" }, { "name": "quit" }, { "name": "stop" }, +/// { "name": "cont" }, { "name": "system_powerdown" }, { "name": "system_reset" }, +/// { "name": "device_add" }, { "name": "device_del" }, { "name": "chardev_add" }, +/// { "name": "chardev_remove" }, { "name": "netdev_add" }, { "name": "netdev_del" }, +/// { "name" : "cameradev_add" }, { "name": "cameradev_del" }, +/// { "name": "query-hotpluggable-cpus" }, { "name": "query-cpus" } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_commands {} +generate_command_impl!(query_commands, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct Cmd { pub name: String, } -impl Command for query_commands { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - -/// Query target: +/// query-target /// /// Query the target platform where the StratoVirt is running. /// -/// # Example +/// # Examples /// /// ```text /// # for X86 platform. /// -> { "execute": "query-target" } -/// <- {"return":{"arch":"x86_64"}} +/// <- { "return": { "arch": "x86_64" } } /// /// # for Aarch64 platform. /// -> { "execute": "query-target" } -/// <- {"return":{"arch":"aarch64"}} +/// <- { "return": { "arch": "aarch64" } } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_target {} +generate_command_impl!(query_target, Target); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct Target { pub arch: String, } -impl Command for query_target { - type Res = Target; - - fn back(self) -> Target { - Default::default() - } -} - -/// Query machines: +/// query-machines /// /// Query machine information. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-machines" } -/// <- {"return":[{"cpu-max":255,"deprecated":false,"hotpluggable-cpus":true,"name":"none","numa-mem-supported":false}, -/// {"cpu-max":255,"deprecated":false,"hotpluggable-cpus":true,"name":"microvm","numa-mem-supported":false}, -/// {"cpu-max":255,"deprecated":false,"hotpluggable-cpus":true,"name":"standardvm","numa-mem-supported":false}]} +/// <- { "return": [ { "cpu-max": 255, "deprecated": false, "hotpluggable-cpus": true, +/// "name": "none", "numa-mem-supported": false }, +/// { "cpu-max": 255, "deprecated": false, "hotpluggable-cpus": true, +/// "name": "microvm", "numa-mem-supported": false }, +/// { "cpu-max": 255, "deprecated": false, "hotpluggable-cpus": true, +/// "name": "standardvm", "numa-mem-supported": false } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_machines {} +generate_command_impl!(query_machines, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct MachineInfo { @@ -1277,25 +1239,17 @@ pub struct MachineInfo { pub deprecated: bool, } -impl Command for query_machines { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - -/// Query events: +/// query-events /// /// Query all events of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-events" } -/// <- {"return":[{"name":"Shutdown"},{"name":"Reset"}, -/// {"name":"Stop"},{"name":"Resume"},{"name":"DeviceDeleted"}, -/// {"name":"BalloonChanged"}]} +/// <- { "return": [ { "name": "Shutdown" }, { "name": "Reset" }, +/// { "name": "Stop" }, { "name": "Resume" }, { "name": "DeviceDeleted" }, +/// { "name": "BalloonChanged" } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct Events { @@ -1304,27 +1258,21 @@ pub struct Events { #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_events {} +generate_command_impl!(query_events, Vec); -impl Command for query_events { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - -/// Query KVM: +/// query-kvm /// /// Query if KVM is enabled. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-kvm" } -/// <- {"return":{"enabled":true,"present":true}} +/// <- { "return": { "enabled": true, "present": true } } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_kvm {} +generate_command_impl!(query_kvm, KvmInfo); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct KvmInfo { @@ -1332,27 +1280,22 @@ pub struct KvmInfo { pub present: bool, } -impl Command for query_kvm { - type Res = KvmInfo; - - fn back(self) -> KvmInfo { - Default::default() - } -} - -/// List all Qom type. +/// qom-list-types +/// +/// This command will return a list of types given search parameters. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "qom-list-types" } -/// <- {"return":[{"name":"ioh3420","parent":"pcie-root-port-base"}, -/// {"name":"pcie-root-port","parent":"pcie-root-port-base"}, -/// {"name":"pcie-pci-bridge","parent":"base-pci-bridge"}, -/// {"name":"pci-bridge","parent":"base-pci-bridge"}]} +/// <- { "return": [ { "name": "ioh3420", "parent": "pcie-root-port-base" }, +/// { "name": "pcie-root-port", "parent": "pcie-root-port-base" }, +/// { "name": "pcie-pci-bridge", "parent": "base-pci-bridge" }, +/// { "name": "pci-bridge", "parent": "base-pci-bridge" } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct list_type {} +generate_command_impl!(list_type, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct TypeLists { @@ -1366,26 +1309,21 @@ impl TypeLists { } } -impl Command for list_type { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - -/// Get device list properties. +/// device-list-properties +/// +/// List properties associated with a device. /// -/// # Example +/// # Examples /// /// ```text -/// -> { "execute": "device-list-properties", "arguments": {"typename": "virtio-blk-pci"} } -/// <- {"return":[]} +/// -> { "execute": "device-list-properties", "arguments": { "typename": "virtio-blk-pci" } } +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct device_list_properties { pub typename: String, } +generate_command_impl!(device_list_properties, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct DeviceProps { @@ -1394,80 +1332,58 @@ pub struct DeviceProps { pub prop_type: String, } -impl Command for device_list_properties { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct block_commit {} +generate_command_impl!(block_commit, Vec); -impl Command for block_commit { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - +/// query-tpm-models +/// /// Query tpm models of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-tpm-models" } -/// <- {"return":[]} +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_tpm_models {} +generate_command_impl!(query_tpm_models, Vec); -impl Command for query_tpm_models { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - +/// query-tpm-types +/// /// Query target of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-tpm-types" } -/// <- {"return":[]} +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_tpm_types {} +generate_command_impl!(query_tpm_types, Vec); -impl Command for query_tpm_types { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - +/// query-command-line-options +/// /// Query command line options. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-command-line-options" } -/// <- {"return":[]} +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_command_line_options {} +generate_command_impl!(query_command_line_options, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct CmdParameter { - name: String, - help: String, + pub name: String, + pub help: String, #[serde(rename = "type")] - paramter_type: String, + pub parameter_type: String, } #[derive(Default, Debug, Clone, Serialize, Deserialize)] @@ -1476,24 +1392,19 @@ pub struct CmdLine { pub option: String, } -impl Command for query_command_line_options { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - +/// query-migrate-capabilities +/// /// Query capabilities of migration. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-migrate-capabilities" } -/// <- {"return":[]} +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_migrate_capabilities {} +generate_command_impl!(query_migrate_capabilities, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct MigrateCapabilities { @@ -1501,62 +1412,47 @@ pub struct MigrateCapabilities { pub capability: String, } -impl Command for query_migrate_capabilities { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} - +/// query-qmp-schema +/// /// Query target of StratoVirt. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-qmp-schema" } -/// <- {"return":{}} +/// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_qmp_schema {} +generate_command_impl!(query_qmp_schema, Empty); -impl Command for query_qmp_schema { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} - +/// query-sev-capabilities +/// /// Query capabilities of sev. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "query-sev-capabilities" } -/// <- {"return":{}} +/// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct query_sev_capabilities {} +generate_command_impl!(query_sev_capabilities, Empty); -impl Command for query_sev_capabilities { - type Res = Empty; - - fn back(self) -> Empty { - Default::default() - } -} - +/// qom-list +/// /// List all Qom. /// -/// # Example +/// # Examples /// /// ```text /// -> { "execute": "qom-list" } -/// <- {"return":[]} +/// <- { "return": [] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct qom_list {} +generate_command_impl!(qom_list, Vec); #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct PropList { @@ -1565,184 +1461,572 @@ pub struct PropList { pub prop_type: String, } -impl Command for qom_list { - type Res = Vec; +/// query-chardev +/// +/// Query char devices. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-chardev" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_chardev {} +generate_command_impl!(query_chardev, Vec); - fn back(self) -> Vec { - Default::default() - } +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct ChardevInfo { + #[serde(rename = "frontend-open")] + pub open: bool, + pub filename: String, + pub label: String, } -/// Query char devices. +/// qom-get +/// +/// Get qom properties. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "qom-get" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct qom_get {} +generate_command_impl!(qom_get, bool); + +/// query-block +/// +/// Query blocks of StratoVirt. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-block" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_block {} +generate_command_impl!(query_block, Vec); + +/// query-named-block-nodes +/// +/// Query named block node. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-named-block-nodes" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_named_block_nodes {} +generate_command_impl!(query_named_block_nodes, Vec); + +/// query-blockstats +/// +/// Query status of blocks. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-blockstats" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_blockstats {} +generate_command_impl!(query_blockstats, Vec); + +/// query-block-jobs +/// +/// Query jobs of blocks. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-block-jobs" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_block_jobs {} +generate_command_impl!(query_block_jobs, Vec); + +/// query-gic-capabilities +/// +/// Query capabilities of gic. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-gic-capabilities" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_gic_capabilities {} +generate_command_impl!(query_gic_capabilities, Vec); + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct GicCap { + emulated: bool, + version: u32, + kernel: bool, +} + +/// query-iothreads +/// +/// Query information of iothreads. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-iothreads" } +/// <- { "return": [] } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct query_iothreads {} +generate_command_impl!(query_iothreads, Vec); + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct IothreadInfo { + #[serde(rename = "poll-shrink")] + pub shrink: u32, + #[serde(rename = "thread-id")] + pub pid: u32, + #[serde(rename = "poll-grow")] + pub grow: u32, + #[serde(rename = "poll-max-ns")] + pub max: u32, + pub id: String, +} + +/// input_event +/// +/// # Arguments +/// +/// * `key` - the input type such as 'keyboard' or 'pointer'. +/// * `value` - the input value. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "input_event", +/// "arguments": { "key": "pointer", "value": "100,200,1" } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct input_event { + pub key: String, + pub value: String, +} +generate_command_impl!(input_event, Vec); + +/// human-monitor-command +/// +/// # Arguments +/// +/// * `command_line` - the command line will be executed. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "human-monitor-command", +/// "arguments": { "command-line": "drive_add dummy +/// file=/path/to/file,format=raw,if=none,id=drive-id" } } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct human_monitor_command { + #[serde(rename = "command-line")] + pub command_line: String, +} +pub type HumanMonitorCmdArgument = human_monitor_command; + +/// blockdev-snapshot-internal-sync +/// +/// Create disk internal snapshot. +/// +/// # Arguments +/// +/// * `device` - the valid block device. +/// * `name` - the snapshot name. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "blockdev-snapshot-internal-sync", +/// "arguments": { "device": "disk0", "name": "snapshot1" } } +/// <- { "return": {} } +/// ``` +/// +/// blockdev-snapshot-delete-internal-sync +/// +/// Delete disk internal snapshot. +/// +/// # Arguments +/// +/// * `device` - the valid block device. +/// * `name` - the snapshot name. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "blockdev-snapshot-delete-internal-sync", +/// "arguments": { "device": "disk0", "name": "snapshot1" } } +/// <- { "return": { +/// "id": "1", +/// "name": "snapshot0", +/// "vm-state-size": 0, +/// "date-sec": 1000012, +/// "date-nsec": 10, +/// "vm-clock-sec": 100, +/// "vm-clock-nsec": 20, +/// "icount": 220414 +/// } } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct blockdev_snapshot_internal { + pub device: String, + pub name: String, +} +pub type BlockdevSnapshotInternalArgument = blockdev_snapshot_internal; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SnapshotInfo { + #[serde(rename = "id")] + pub id: String, + #[serde(rename = "name")] + pub name: String, + #[serde(rename = "vm-state-size")] + pub vm_state_size: u64, + #[serde(rename = "date-sec")] + pub date_sec: u32, + #[serde(rename = "date-nsec")] + pub date_nsec: u32, + #[serde(rename = "vm-clock-nsec")] + pub vm_clock_nsec: u64, + #[serde(rename = "icount")] + pub icount: u64, +} + +/// query-mem +/// +/// Query memory address space flat. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-mem" } +/// <- { "return": {} } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct query_mem {} +generate_command_impl!(query_mem, Empty); + +/// query-vcpu-reg +/// +/// Query vcpu register value. +/// +/// # Arguments +/// +/// * `addr` - the register addr will be query. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-vcpu-reg", +/// "arguments": { "addr": "603000000013df1a", "vcpu": 0 } } +/// <- { "return": "348531C5" } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct query_vcpu_reg { + #[serde(rename = "addr")] + pub addr: String, + #[serde(rename = "vcpu")] + pub vcpu: usize, +} +pub type QueryVcpuRegArgument = query_vcpu_reg; + +/// query-mem-gpa +/// +/// Query the value of the guest physical address. +/// +/// # Examples +/// +/// ```text +/// -> { "execute": "query-mem-gpa", "arguments": { "gpa": "13c4d1d00" } } +/// <- { "return": "B9000001" } +/// ``` +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct query_mem_gpa { + #[serde(rename = "gpa")] + pub gpa: String, +} +pub type QueryMemGpaArgument = query_mem_gpa; + +macro_rules! define_qmp_event_enum { + ($($event:ident($name:expr, $data_type:ty $(, $serde_default:ident)?)),*) => { + /// A enum to store all event struct + #[derive(Debug, Clone, Serialize, Deserialize, EnumIter, EnumVariantNames, EnumString)] + #[serde(tag = "event")] + pub enum QmpEvent { + $( + #[serde(rename = $name)] + #[strum(serialize = $name)] + $event { + $(#[serde($serde_default)])? + data: $data_type, + timestamp: TimeStamp, + }, + )* + } + }; +} + +// QMP event enum definition example: event("name", data, ..) +define_qmp_event_enum!( + Shutdown("SHUTDOWN", Shutdown), + Reset("RESET", Reset), + Stop("STOP", Stop, default), + Resume("RESUME", Resume, default), + Powerdown("POWERDOWN", Powerdown, default), + CpuResize("CPU_RESIZE", CpuResize, default), + DeviceDeleted("DEVICE_DELETED", DeviceDeleted), + BalloonChanged("BALLOON_CHANGED", BalloonInfo) +); + +/// Shutdown +/// +/// Emitted when the virtual machine has shut down, indicating that StratoVirt is +/// about to exit. +/// +/// # Notes +/// +/// If the command-line option "-no-shutdown" has been specified, StratoVirt +/// will not exit, and a STOP event will eventually follow the SHUTDOWN event. +/// +/// # Examples +/// +/// ```text +/// <- { "event": "SHUTDOWN", +/// "data": { "guest": true, "reason": "guest-shutdown" }, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct Shutdown { + /// If true, the shutdown was triggered by a guest request (such as + /// a guest-initiated ACPI shutdown request or other hardware-specific + /// action) rather than a host request (such as sending StratoVirt a SIGINT). + #[serde(rename = "guest")] + pub guest: bool, + pub reason: String, +} + +/// Reset +/// +/// Emitted when the virtual machine is reset. +/// +/// # Examples +/// +/// ```text +/// <- { "event": "RESET", +/// "data": { "guest": false }, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct Reset { + /// If true, the reset was triggered by a guest request (such as + /// a guest-initiated ACPI reboot request or other hardware-specific action + /// ) rather than a host request (such as the QMP command system_reset). + #[serde(rename = "guest")] + pub guest: bool, +} + +/// Stop +/// +/// Emitted when the virtual machine is stopped. +/// +/// # Examples +/// +/// ```text +/// <- { "event": "STOP", +/// "data": {}, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct Stop {} + +/// Resume /// -/// # Example +/// Emitted when the virtual machine resumes execution. +/// +/// # Examples /// /// ```text -/// -> { "execute": "query-chardev" } -/// <- {"return":[]} +/// <- { "event": "RESUME", +/// "data": {}, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } /// ``` -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_chardev {} - -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct ChardevInfo { - #[serde(rename = "frontend-open")] - pub open: bool, - pub filename: String, - pub label: String, -} - -impl Command for query_chardev { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct Resume {} -/// Get qom properties. +/// Powerdown /// -/// # Example +/// Emitted when the virtual machine powerdown execution. +/// +/// # Examples /// /// ```text -/// -> { "execute": "qom_get" } -/// <- {"return":[]} +/// <- { "event": "POWERDOWN", +/// "data": {}, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } /// ``` -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct qom_get {} - -impl Command for qom_get { - type Res = bool; - - fn back(self) -> bool { - Default::default() - } -} +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct Powerdown {} -/// Query blocks of StratoVirt. +/// CpuResize /// -/// # Example +/// Emitted when the virtual machine cpu hot(un)plug execution. +/// +/// # Examples /// /// ```text -/// -> { "execute": "query-block" } -/// <- {"return":[]} +/// <- { "event": "CPU_RESIZE", +/// "data": {}, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } /// ``` -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_block {} - -impl Command for query_block { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct CpuResize {} -/// Query named block node. +/// DeviceDeleted /// -/// # Example +/// Emitted whenever the device removal completion is acknowledged by the guest. +/// At this point, it's safe to reuse the specified device ID. Device removal can +/// be initiated by the guest or by HMP/QMP commands. +/// +/// # Examples /// /// ```text -/// -> { "execute": "query-named-block-nodes" } -/// <- {"return":[]} +/// <- { "event": "DEVICE_DELETED", +/// "data": { "device": "virtio-net-mmio-0", +/// "path": "/machine/peripheral/virtio-net-mmio-0" }, +/// "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } /// ``` -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_named_block_nodes {} - -impl Command for query_named_block_nodes { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct DeviceDeleted { + /// Device name. + #[serde(rename = "device", default, skip_serializing_if = "Option::is_none")] + pub device: Option, + /// Device path. + #[serde(rename = "path")] + pub path: String, } -/// Query status of blocks. +/// trace-get-state +/// +/// # Arguments +/// +/// * `name` - event name pattern /// -/// # Example +/// # Examples /// /// ```text -/// -> { "execute": "query-blockstats" } -/// <- {"return":[]} +/// -> { "execute": "trace-get-state", +/// "arguments": { "name": "event_name" } } +/// <- { "return": [ { "name": "event_name", "state": "disabled" } ] } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_blockstats {} - -impl Command for query_blockstats { - type Res = Vec; +#[serde(deny_unknown_fields)] +pub struct trace_get_state { + #[serde(rename = "name")] + pub pattern: String, +} +pub type TraceGetArgument = trace_get_state; - fn back(self) -> Vec { - Default::default() - } +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub struct TraceInfo { + pub name: String, + pub state: bool, } -/// Query capabilities of gic. +/// trace-set-state +/// +/// # Arguments +/// +/// * `name` - event name pattern +/// * `enable` - whether to enable tracing /// -/// # Example +/// # Examples /// /// ```text -/// -> { "execute": "query-gic-capabilities" } -/// <- {"return":[]} +/// -> { "execute": "trace-set-state", +/// "arguments": { "name": "event_name", +/// "enable": true } } +/// <- { "return": {} } /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_gic_capabilities {} - -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct GicCap { - emulated: bool, - version: u32, - kernel: bool, -} - -impl Command for query_gic_capabilities { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } +#[serde(deny_unknown_fields)] +pub struct trace_set_state { + #[serde(rename = "name")] + pub pattern: String, + #[serde(rename = "enable")] + pub enable: bool, } +pub type TraceSetArgument = trace_set_state; -/// Query information of iothreads. +/// query_workloads /// -/// # Example +/// Query the current workloads of the running VM. +/// +/// # Examples /// /// ```text -/// -> { "execute": "query-iothreads" } -/// <- {"return":[]} +/// -> {"execute": "query-workloads", "arguments": {}} +/// <- {"return":[{"module":"scream-play","state":"Off"},{"module":"tap-0","state":"upload: 0 download: 0"}]} /// ``` #[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct query_iothreads {} - -#[derive(Default, Debug, Clone, Serialize, Deserialize)] -pub struct IothreadInfo { - #[serde(rename = "poll-shrink")] - pub shrink: u32, - #[serde(rename = "thread-id")] - pub pid: u32, - #[serde(rename = "poll-grow")] - pub grow: u32, - #[serde(rename = "poll-max-ns")] - pub max: u32, - pub id: String, -} - -impl Command for query_iothreads { - type Res = Vec; - - fn back(self) -> Vec { - Default::default() - } -} +#[serde(deny_unknown_fields)] +pub struct query_workloads {} +generate_command_impl!(query_workloads, Empty); #[cfg(test)] mod tests { use super::*; + #[test] + fn test_qmp_event_msg() { + let event_json = + r#"{"event":"STOP","data":{},"timestamp":{"seconds":1575531524,"microseconds":91519}}"#; + let qmp_event: QmpEvent = serde_json::from_str(event_json).unwrap(); + match qmp_event { + QmpEvent::Stop { + data: _, + timestamp: _, + } => { + assert!(true); + } + _ => assert!(false), + } + } + #[test] fn test_qmp_unexpected_arguments() { // qmp: quit. let json_msg = r#" - { + { "execute": "quit" } "#; @@ -1755,8 +2039,8 @@ mod tests { // unexpected arguments for quit. let json_msg = r#" - { - "execute": "quit" , + { + "execute": "quit" , "arguments": "isdf" } "#; @@ -1769,7 +2053,7 @@ mod tests { // qmp: stop. let json_msg = r#" - { + { "execute": "stop" } "#; @@ -1782,8 +2066,8 @@ mod tests { // unexpected arguments for stop. let json_msg = r#" - { - "execute": "stop" , + { + "execute": "stop" , "arguments": "isdf" } "#; @@ -1796,7 +2080,7 @@ mod tests { // qmp: cont. let json_msg = r#" - { + { "execute": "cont" } "#; @@ -1809,8 +2093,8 @@ mod tests { // unexpected arguments for count. let json_msg = r#" - { - "execute": "cont" , + { + "execute": "cont" , "arguments": "isdf" } "#; @@ -1821,9 +2105,36 @@ mod tests { let ret_msg = r#"invalid type: string "isdf", expected struct cont"#; assert!(err_msg == ret_msg); + // qmp: system_reset. + let json_msg = r#" + { + "execute": "system_reset" + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let ret_msg = r#"ok"#; + assert!(err_msg == ret_msg); + + // unexpected arguments for system_reset. + let json_msg = r#" + { + "execute": "system_reset" , + "arguments": "isdf" + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let ret_msg = r#"invalid type: string "isdf", expected struct system_reset"#; + assert!(err_msg == ret_msg); + // qmp: query-hotpluggable-cpus. let json_msg = r#" - { + { "execute": "query-hotpluggable-cpus" } "#; @@ -1836,8 +2147,8 @@ mod tests { // unexpected arguments for query-hotpluggable-cpus. let json_msg = r#" - { - "execute": "query-hotpluggable-cpus" , + { + "execute": "query-hotpluggable-cpus" , "arguments": "isdf" } "#; @@ -1850,7 +2161,7 @@ mod tests { // qmp: query-cpus. let json_msg = r#" - { + { "execute": "query-cpus" } "#; @@ -1863,8 +2174,8 @@ mod tests { // unexpected arguments for query-cpus. let json_msg = r#" - { - "execute": "query-cpus" , + { + "execute": "query-cpus" , "arguments": "isdf" } "#; @@ -1877,7 +2188,7 @@ mod tests { // qmp: query-ststus. let json_msg = r#" - { + { "execute": "query-status" } "#; @@ -1890,8 +2201,8 @@ mod tests { // unexpected arguments for query-status. let json_msg = r#" - { - "execute": "query-status" , + { + "execute": "query-status" , "arguments": "isdf" } "#; @@ -1907,11 +2218,11 @@ mod tests { fn test_wrong_qmp_arguments() { // right arguments for device_add. let json_msg = r#" - { - "execute": "device_add" , + { + "execute": "device_add" , "arguments": { - "id":"net-0", - "driver":"virtio-net-mmio", + "id":"net-0", + "driver":"virtio-net-mmio", "addr":"0x0" } } @@ -1925,11 +2236,11 @@ mod tests { // unknow arguments for device_add. let json_msg = r#" - { - "execute": "device_add" , + { + "execute": "device_add" , "arguments": { - "id":"net-0", - "driver":"virtio-net-mmio", + "id":"net-0", + "driver":"virtio-net-mmio", "addr":"0x0", "UnknowArg": "should go to error" } @@ -1944,11 +2255,11 @@ mod tests { // wrong spelling arguments for device_add. let json_msg = r#" - { - "execute": "device_add" , + { + "execute": "device_add" , "arguments": { - "id":"net-0", - "driv":"virtio-net-mmio", + "id":"net-0", + "driv":"virtio-net-mmio", "addr":"0x0" } } @@ -1962,8 +2273,8 @@ mod tests { // right arguments for device_del. let json_msg = r#" - { - "execute": "device_del" , + { + "execute": "device_del" , "arguments": { "id": "net-1" } @@ -1978,8 +2289,8 @@ mod tests { // wrong arguments for device_del. let json_msg = r#" - { - "execute": "device_del" , + { + "execute": "device_del" , "arguments": { "value": "h8i" } @@ -1996,7 +2307,7 @@ mod tests { // missing arguments for getfd. let json_msg = r#" - { + { "execute": "getfd" } "#; @@ -2009,8 +2320,8 @@ mod tests { // unexpected arguments for getfd. let json_msg = r#" - { - "execute": "getfd" , + { + "execute": "getfd" , "arguments": "isdf" } "#; @@ -2023,10 +2334,10 @@ mod tests { // right arguments for getfd. let json_msg = r#" - { + { "execute": "getfd", - "arguments": { - "fdname": "fd1" + "arguments": { + "fdname": "fd1" } } "#; @@ -2039,17 +2350,17 @@ mod tests { // right arguments for blockdev-add. let json_msg = r#" - { + { "execute": "blockdev-add", "arguments": { - "node-name": "drive-0", + "node-name": "drive-0", "file": { - "driver": "file", + "driver": "file", "filename": "/path/to/block" - }, + }, "cache": { "direct": true - }, + }, "read-only": false } } @@ -2063,11 +2374,11 @@ mod tests { // right arguments for device-add. let json_msg = r#" - { + { "execute": "device_add", "arguments": { - "id": "drive-0", - "driver": "virtio-blk-mmio", + "id": "drive-0", + "driver": "virtio-blk-mmio", "addr": "0x1" } } @@ -2078,31 +2389,14 @@ mod tests { }; let ret_msg = r#"ok"#; assert!(err_msg == ret_msg); - - // right arguments for netdev-add. - let json_msg = r#" - { - "execute": "netdev_add", - "arguments": { - "id": "net-0", - "ifname":"tap0" - } - } - "#; - let err_msg = match serde_json::from_str::(json_msg) { - Ok(_) => "ok".to_string(), - Err(e) => e.to_string(), - }; - let ret_msg = r#"ok"#; - assert!(err_msg == ret_msg); } #[test] fn test_unsupported_commands() { // unsupported qmp command. let json_msg = r#" - { - "execute": "hello-world" , + { + "execute": "hello-world" , } "#; let err_msg = match serde_json::from_str::(json_msg) { @@ -2114,8 +2408,8 @@ mod tests { // unsupported qmp command, and unknow field. let json_msg = r#" - { - "execute": "hello-world" , + { + "execute": "hello-world" , "arguments": { "msg": "hello", } @@ -2133,8 +2427,8 @@ mod tests { fn test_qmp_commands() { // query-version let json_msg = r#" - { - "execute": "query-version" + { + "execute": "query-version" } "#; let err_msg = match serde_json::from_str::(json_msg) { @@ -2146,8 +2440,8 @@ mod tests { // query-target let json_msg = r#" - { - "execute": "query-target" + { + "execute": "query-target" } "#; let err_msg = match serde_json::from_str::(json_msg) { @@ -2159,8 +2453,126 @@ mod tests { // query-commands let json_msg = r#" - { - "execute": "query-commands" + { + "execute": "query-commands" + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"ok"#; + assert!(err_msg.contains(part_msg)); + } + + #[test] + fn test_qmp_netdev_add() { + // Normal netdev_add test. + let json_msg = r#" + { + "execute": "netdev_add", + "arguments": { + "id": "net0", + "ifname": "tap0", + "fd": "11", + "fds": "fd-net00:fd-net01", + "dnssearch": "test", + "type": "vhost-user", + "vhost": true, + "vhostfd": "21", + "vhostfds": "vhostfd-net00:vhostfd-net01", + "downscript": "/etc/ifdown.sh", + "script": "/etc/ifup.sh", + "queues": 16, + "chardev": "char_dev_name" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"ok"#; + assert!(err_msg.contains(part_msg)); + + // Abnormal netdev_add test with invalid vhost type. + let json_msg = r#" + { + "execute": "netdev_add", + "arguments": { + "vhost": "invalid_type" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"expected a boolean"#; + assert!(err_msg.contains(part_msg)); + + // Abnormal netdev_add test with invalid queues type. + let json_msg = r#" + { + "execute": "netdev_add", + "arguments": { + "queues": "invalid_type" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"expected u16"#; + assert!(err_msg.contains(part_msg)); + } + + #[test] + fn test_qmp_input_event() { + // key event + let json_msg = r#" + { + "execute": "input_event" , + "arguments": { + "key": "keyboard", + "value": "2,1" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"ok"#; + assert!(err_msg.contains(part_msg)); + // pointer event + let json_msg = r#" + { + "execute": "input_event" , + "arguments": { + "key": "pointer", + "value": "4,5,1" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"ok"#; + assert!(err_msg.contains(part_msg)); + } + + #[test] + fn test_qmp_human_monitor_command() { + // Normal test. + let json_msg = r#" + { + "execute": "human-monitor-command" , + "arguments": { + "command-line": "drive_add dummy file=/path/to/file,format=raw,if=none,id=drive-id" + } } "#; let err_msg = match serde_json::from_str::(json_msg) { @@ -2169,5 +2581,21 @@ mod tests { }; let part_msg = r#"ok"#; assert!(err_msg.contains(part_msg)); + + // Abnormal test with invalid arguments. + let json_msg = r#" + { + "execute": "human-monitor-command" , + "arguments": { + "invalid_key": "invalid_value" + } + } + "#; + let err_msg = match serde_json::from_str::(json_msg) { + Ok(_) => "ok".to_string(), + Err(e) => e.to_string(), + }; + let part_msg = r#"unknown field `invalid_key`, expected `command-line`"#; + assert!(err_msg.contains(part_msg)); } } diff --git a/machine_manager/src/qmp/qmp_socket.rs b/machine_manager/src/qmp/qmp_socket.rs new file mode 100644 index 0000000000000000000000000000000000000000..f0b4ace2663610990ef2c94787704988cec85071 --- /dev/null +++ b/machine_manager/src/qmp/qmp_socket.rs @@ -0,0 +1,711 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fmt::Display; +use std::net::IpAddr; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::str::FromStr; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread; +use std::time::{Duration, Instant}; + +use anyhow::{bail, Context, Result}; +use log::{error, info, warn}; +use vmm_sys_util::epoll::EventSet; + +use super::qmp_schema; +use super::qmp_schema::QmpCommand; +use super::{qmp_channel::QmpChannel, qmp_response::QmpGreeting, qmp_response::Response}; +use crate::event; +use crate::event_loop::EventLoop; +use crate::machine::{MachineExternalInterface, VmState}; +use crate::socket::SocketHandler; +use crate::socket::SocketRWHandler; +use crate::temp_cleaner::TempCleaner; +use util::leak_bucket::LeakBucket; +use util::loop_context::{ + gen_delete_notifiers, read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, +}; +use util::set_termi_canon_mode; +use util::socket::{SocketListener, SocketStream}; +use util::unix::parse_unix_uri; + +const LEAK_BUCKET_LIMIT: u64 = 100; + +pub enum QmpSocketPath { + Unix { path: String }, + Tcp { host: String, port: u16 }, +} + +impl QmpSocketPath { + pub fn new(path: String) -> Result { + if path.starts_with('u') { + Ok(QmpSocketPath::Unix { + path: parse_unix_uri(&path)?, + }) + } else if path.starts_with('t') { + let (host, port) = parse_tcp_uri(&path)?; + Ok(QmpSocketPath::Tcp { host, port }) + } else { + bail!("invalid socket type: {}", path) + } + } +} + +impl Display for QmpSocketPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + QmpSocketPath::Tcp { host, port } => { + write!(f, "{}:{}", &host, &port) + } + QmpSocketPath::Unix { path } => write!(f, "{}", path), + } + } +} + +/// The wrapper over socket and socket handler. +/// +/// # Example +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::os::unix::io::AsRawFd; +/// use std::os::unix::net::UnixStream; +/// +/// use machine_manager::qmp::qmp_socket::Socket; +/// use util::socket::SocketListener; +/// +/// fn main() -> std::io::Result<()> { +/// let listener = SocketListener::bind_by_uds("/path/to/my/socket").unwrap(); +/// let socket = Socket::from_listener(listener, None); +/// assert!(!socket.is_connected()); +/// +/// let client_stream = UnixStream::connect("/path/to/my/socket")?; +/// socket.accept(); +/// assert!(socket.is_connected()); +/// Ok(()) +/// } +/// ``` +pub struct Socket { + /// Socket listener tuple + listener: SocketListener, + /// Socket stream with RwLock + stream: RwLock>, + /// Perform socket command + performer: Option>>, +} + +impl Socket { + /// Allocates a new `Socket` with `SocketListener`. + /// + /// # Arguments + /// + /// * `listener` - The `SocketListener` bind to `Socket`. + /// * `performer` - The `VM` to perform socket command. + pub fn from_listener( + listener: SocketListener, + performer: Option>>, + ) -> Self { + Socket { + listener, + stream: RwLock::new(None), + performer, + } + } + + /// Get listener's fd from `Socket`. + fn get_listener_fd(&self) -> RawFd { + self.listener.as_raw_fd() + } + + /// Accept stream and bind to Socket. + pub fn accept(&self) { + self.bind_stream(self.listener.accept().unwrap()); + } + + /// Bind `Socket` with a `SocketStream`. + /// + /// # Arguments + /// + /// * `sock_stream` - The `SocketStream` bind to `Socket`. + pub fn bind_stream(&self, stream: SocketStream) { + *self.stream.write().unwrap() = Some(stream); + } + + /// Unbind stream from `Socket`, reset the state. + #[allow(unused)] + fn drop_stream(&self) { + *self.stream.write().unwrap() = None; + } + + /// Confirm whether socket stream bind to `Socket` or not. + pub fn is_connected(&self) -> bool { + self.stream.read().unwrap().is_some() + } + + /// Get socket fd from `Socket`, it a private function. + fn get_stream_fd(&self) -> RawFd { + if self.is_connected() { + self.stream.read().unwrap().as_ref().unwrap().as_raw_fd() + } else { + panic!("Failed to get socket fd!"); + } + } + + /// Get a `SocketHandler` from `Socket`. + fn get_socket_handler(&self) -> SocketHandler { + SocketHandler::new(self.get_stream_fd()) + } + + /// In qmp feature, send empty or greeting response to client. + /// + /// # Arguments + /// + /// * `is_greeting` - Whether sending greeting response or not. + fn send_response(&self, is_greeting: bool) -> std::io::Result<()> { + if self.is_connected() { + let mut handler = self.get_socket_handler(); + let resp = if is_greeting { + serde_json::to_string(&QmpGreeting::create_greeting(1, 0, 5)).unwrap() + } else { + serde_json::to_string(&Response::create_empty_response()).unwrap() + }; + handler.send_str(&resp)?; + info!("QMP: <-- {:?}", resp); + } + Ok(()) + } + + /// Create socket's accepted stream to `event_notifier`. + fn create_event_notifier(&mut self, shared_socket: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let leak_bucket = LeakBucket::new(LEAK_BUCKET_LIMIT); + if let Err(e) = leak_bucket { + error!("Failed to create leak bucket, {:?}", e); + return notifiers; + } + let leak_bucket = Arc::new(Mutex::new(leak_bucket.unwrap())); + let shared_leak_bucket = leak_bucket.clone(); + let leak_bucket_fd = leak_bucket.lock().unwrap().as_raw_fd(); + + self.accept(); + QmpChannel::bind_writer(SocketRWHandler::new(self.get_stream_fd())); + if let Err(e) = self.send_response(true) { + error!("{:?}", e); + QmpChannel::unbind(); + return notifiers; + } + let handler: Rc = Rc::new(move |event, _| { + if event == EventSet::IN { + let socket_mutexed = shared_socket.lock().unwrap(); + let stream_fd = socket_mutexed.get_stream_fd(); + + let performer = &socket_mutexed.performer.as_ref().unwrap(); + if let Err(e) = handle_qmp( + stream_fd, + performer, + &mut shared_leak_bucket.lock().unwrap(), + ) { + error!("{:?}", e); + } + } + if event & EventSet::HANG_UP == EventSet::HANG_UP { + let socket_mutexed = shared_socket.lock().unwrap(); + let stream_fd = socket_mutexed.get_stream_fd(); + + QmpChannel::unbind(); + Some(gen_delete_notifiers(&[stream_fd, leak_bucket_fd])) + } else { + None + } + }); + let qmp_notifier = EventNotifier::new( + NotifierOperation::AddShared, + self.get_stream_fd(), + Some(self.get_listener_fd()), + EventSet::IN | EventSet::HANG_UP, + vec![handler], + ); + notifiers.push(qmp_notifier); + + let leak_bucket_notifier = EventNotifier::new( + NotifierOperation::AddShared, + leak_bucket_fd, + None, + EventSet::IN, + vec![Rc::new(move |_, fd| { + read_fd(fd); + leak_bucket.lock().unwrap().clear_timer(); + None + })], + ); + notifiers.push(leak_bucket_notifier); + + notifiers + } +} + +impl EventNotifierHelper for Socket { + fn internal_notifiers(shared_socket: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let socket = shared_socket.clone(); + let handler: Rc = + Rc::new(move |_, _| Some(socket.lock().unwrap().create_event_notifier(socket.clone()))); + let notifier = EventNotifier::new( + NotifierOperation::AddShared, + shared_socket.lock().unwrap().get_listener_fd(), + None, + EventSet::IN, + vec![handler], + ); + notifiers.push(notifier); + + notifiers + } +} + +/// Macro: to execute handle func with every arguments. +macro_rules! qmp_command_match { + ( $func:tt, $executor:expr, $ret:expr ) => { + $ret = $executor.$func().into(); + }; + ( $func:tt, $executor:expr, $cmd:expr, $ret:expr, $($arg:tt),* ) => { + $ret = $executor.$func( + $($cmd.$arg),* + ).into(); + }; +} + +/// Macro: to execute handle func with all arguments. +macro_rules! qmp_command_match_with_argument { + ( $func:tt, $executor:expr, $cmd:expr, $ret:expr ) => { + $ret = $executor.$func($cmd).into(); + }; +} + +/// Macro `create_command_matches!`: Generate a match statement for qmp_command +/// , which is combined with its handle func. +/// +/// # Arguments +/// +/// `cmd_type_1` - The qmp command with no arguments. +/// `cmd_type_2` - The qmp command with arguments. +macro_rules! create_command_matches { + ( $command:expr; $executor:expr; $ret:expr; + $(($cmd_type_1:tt, $func_1:tt)),*; + $(($cmd_type_2:tt, $func_2:tt, $($arg:tt),*)),*; + $(($cmd_type_3:tt, $func_3:tt)),* + ) => { + match $command { + $( + $crate::qmp::qmp_schema::QmpCommand::$cmd_type_1{ id, ..} => { + qmp_command_match!($func_1, $executor, $ret); + id + }, + )* + $( + $crate::qmp::qmp_schema::QmpCommand::$cmd_type_2{ arguments, id } => { + qmp_command_match!($func_2, $executor, arguments, $ret, $($arg),*); + id + }, + )* + $( + $crate::qmp::qmp_schema::QmpCommand::$cmd_type_3{ arguments, id } => { + qmp_command_match_with_argument!($func_3, $executor, arguments, $ret); + id + }, + )* + _ => None, + } + }; +} + +/// Parse tcp uri to ip:port. +/// +/// # Notions +/// +/// tcp uri is the string as `tcp:ip:port`. +fn parse_tcp_uri(uri: &str) -> Result<(String, u16)> { + let parse_vec: Vec<&str> = uri.splitn(3, ':').collect(); + if parse_vec.len() == 3 && parse_vec[0] == "tcp" { + if let Ok(host) = IpAddr::from_str(parse_vec[1]) { + if let Ok(port) = parse_vec[2].parse::() { + Ok((host.to_string(), port)) + } else { + bail!("Invalid port used by tcp socket: {}", parse_vec[2]) + } + } else { + bail!("Invalid host used by tcp socket: {}", parse_vec[1]) + } + } else { + bail!("Invalid tcp uri: {}", uri) + } +} + +/// Accept qmp command, analyze and exec it. +/// +/// # Arguments +/// +/// * `stream_fd` - The input stream file description. +/// * `controller` - The controller which execute actual qmp command. +/// * `leak_bucket` - The LeakBucket flow controller for qmp command. +/// +/// # Errors +/// +/// This function will fail when json parser failed or socket file description broke. +fn handle_qmp( + stream_fd: RawFd, + controller: &Arc>, + leak_bucket: &mut LeakBucket, +) -> Result<()> { + let mut qmp_service = crate::socket::SocketHandler::new(stream_fd); + + // If flow over `LEAK_BUCKET_LIMIT` per seconds, discard the request and return + // a `OperationThrottled` error. + if leak_bucket.throttled(EventLoop::get_ctx(None).unwrap(), 1_u32) { + qmp_service.discard()?; + let err_resp = qmp_schema::QmpErrorClass::OperationThrottled(LEAK_BUCKET_LIMIT); + qmp_service + .send_str(&serde_json::to_string(&Response::create_error_response( + err_resp, None, + ))?) + .with_context(|| "Failed to send message to qmp client.")?; + return Ok(()); + } + + match qmp_service.decode_line() { + (Ok(None), _) => Ok(()), + (Ok(buffer), if_fd) => { + info!("QMP: --> {:?}", buffer); + let qmp_command: QmpCommand = buffer.unwrap(); + let (return_msg, shutdown_flag) = qmp_command_exec(qmp_command, controller, if_fd); + info!("QMP: <-- {:?}", return_msg); + qmp_service.send_str(&return_msg)?; + + // handle shutdown command + if shutdown_flag { + let shutdown_msg = qmp_schema::Shutdown { + guest: false, + reason: "host-qmp-quit".to_string(), + }; + event!(Shutdown; shutdown_msg); + TempCleaner::clean(); + set_termi_canon_mode().expect("Failed to set terminal to canonical mode."); + + std::process::exit(0); + } + + Ok(()) + } + (Err(e), _) => { + let err_resp = qmp_schema::QmpErrorClass::GenericError(format!("{}", &e)); + warn!("Qmp json parser made an error: {:?}", e); + qmp_service.send_str(&serde_json::to_string(&Response::create_error_response( + err_resp, None, + ))?)?; + Ok(()) + } + } +} + +/// Create a match , where `qmp_command` and its arguments matching by handle +/// function, and exec this qmp command. +fn qmp_command_exec( + qmp_command: QmpCommand, + controller: &Arc>, + if_fd: Option, +) -> (String, bool) { + let mut qmp_response = Response::create_empty_response(); + let mut shutdown_flag = false; + + // Use macro create match to cover most Qmp command + let mut id = create_command_matches!( + qmp_command.clone(); controller.lock().unwrap(); qmp_response; + (cont, resume), + (system_powerdown, powerdown), + (system_reset, reset), + (query_status, query_status), + (query_version, query_version), + (query_commands, query_commands), + (query_target, query_target), + (query_kvm, query_kvm), + (query_events, query_events), + (query_machines, query_machines), + (query_tpm_models, query_tpm_models), + (query_tpm_types, query_tpm_types), + (query_command_line_options, query_command_line_options), + (query_migrate_capabilities, query_migrate_capabilities), + (query_qmp_schema, query_qmp_schema), + (query_sev_capabilities, query_sev_capabilities), + (query_chardev, query_chardev), + (qom_list, qom_list), + (qom_get, qom_get), + (query_block, query_block), + (query_named_block_nodes, query_named_block_nodes), + (query_blockstats, query_blockstats), + (query_block_jobs, query_block_jobs), + (query_gic_capabilities, query_gic_capabilities), + (query_iothreads, query_iothreads), + (query_migrate, query_migrate), + (cancel_migrate, cancel_migrate), + (query_cpus, query_cpus), + (query_balloon, query_balloon), + (query_mem, query_mem), + (query_vnc, query_vnc), + (query_display_image, query_display_image), + (list_type, list_type), + (query_hotpluggable_cpus, query_hotpluggable_cpus), + (query_workloads, query_workloads); + (input_event, input_event, key, value), + (device_list_properties, device_list_properties, typename), + (device_del, device_del, id), + (switch_audio_record, switch_audio_record, authorized), + (blockdev_del, blockdev_del, node_name), + (netdev_del, netdev_del, id), + (chardev_remove, chardev_remove, id), + (cameradev_del, cameradev_del,id), + (balloon, balloon, value), + (migrate, migrate, uri); + (device_add, device_add), + (blockdev_add, blockdev_add), + (netdev_add, netdev_add), + (chardev_add, chardev_add), + (cameradev_add, cameradev_add), + (update_region, update_region), + (human_monitor_command, human_monitor_command), + (blockdev_snapshot_internal_sync, blockdev_snapshot_internal_sync), + (blockdev_snapshot_delete_internal_sync, blockdev_snapshot_delete_internal_sync), + (query_vcpu_reg, query_vcpu_reg), + (query_mem_gpa, query_mem_gpa) + ); + + // Handle the Qmp command which macro can't cover + if id.is_none() { + id = match qmp_command { + QmpCommand::stop { arguments: _, id } => { + let now = Instant::now(); + while !controller.lock().unwrap().pause() { + thread::sleep(Duration::from_millis(5)); + if now.elapsed() > Duration::from_secs(2) { + // Not use resume() to avoid unnecessary qmp event. + controller + .lock() + .unwrap() + .notify_lifecycle(VmState::Paused, VmState::Running); + qmp_response = Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Failed to pause VM".to_string(), + ), + None, + ); + break; + } + } + id + } + QmpCommand::quit { id, .. } => { + controller.lock().unwrap().destroy(); + shutdown_flag = true; + id + } + QmpCommand::getfd { arguments, id } => { + qmp_response = controller.lock().unwrap().getfd(arguments.fd_name, if_fd); + id + } + QmpCommand::trace_get_state { arguments, id } => { + match trace::get_state_by_pattern(arguments.pattern) { + Ok(events) => { + let mut ret = Vec::new(); + for (name, state) in events { + ret.push(qmp_schema::TraceInfo { name, state }); + } + qmp_response = + Response::create_response(serde_json::to_value(ret).unwrap(), None); + } + Err(_) => { + qmp_response = Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Failed to get trace state".to_string(), + ), + None, + ) + } + } + id + } + QmpCommand::trace_set_state { arguments, id } => { + if trace::set_state_by_pattern(arguments.pattern, arguments.enable).is_err() { + qmp_response = Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError( + "Failed to set trace state".to_string(), + ), + None, + ) + } + id + } + _ => None, + } + } + + // Change response id with input qmp message + qmp_response.change_id(id); + (serde_json::to_string(&qmp_response).unwrap(), shutdown_flag) +} + +#[cfg(test)] +mod tests { + use std::os::unix::net::UnixStream; + use std::time::Duration; + + use super::*; + use serde_json; + + // Environment Preparation for UnixSocket + fn prepare_unix_socket_environment( + socket_id: &str, + ) -> (SocketListener, UnixStream, SocketStream) { + let socket_name: String = format!("test_{}.sock", socket_id); + let _ = std::fs::remove_file(&socket_name); + + let listener = SocketListener::bind_by_uds(&socket_name).unwrap(); + + std::thread::sleep(Duration::from_millis(100)); + let client = UnixStream::connect(&socket_name).unwrap(); + let server = listener.accept().unwrap(); + (listener, client, server) + } + + // Environment Recovery for UnixSocket + fn recover_unix_socket_environment(socket_id: &str) { + let socket_name: String = format!("test_{}.sock", socket_id); + std::fs::remove_file(socket_name).unwrap(); + } + + #[test] + fn test_socket_lifecycle() { + // Pre test. Environment Preparation + let (listener, _, server) = prepare_unix_socket_environment("04"); + let socket = Socket::from_listener(listener, None); + + // life cycle test + // 1.Unconnected + assert!(!socket.is_connected()); + + // 2.Connected + socket.bind_stream(server); + assert!(socket.is_connected()); + + // 3.Unbind SocketStream, reset state + socket.drop_stream(); + assert!(!socket.is_connected()); + + // 4.Accept and reconnect a new UnixStream + let _new_client = UnixStream::connect("test_04.sock"); + socket.accept(); + assert!(socket.is_connected()); + + // After test. Environment Recover + recover_unix_socket_environment("04"); + } + + #[test] + fn test_qmp_event_macro() { + use std::io::Read; + + use crate::socket::SocketRWHandler; + + // Pre test. Environment preparation + QmpChannel::object_init(); + let mut buffer = [0u8; 200]; + let (listener, mut client, server) = prepare_unix_socket_environment("06"); + + // Use event! macro to send event msg to client + let socket = Socket::from_listener(listener, None); + socket.bind_stream(server); + QmpChannel::bind_writer(SocketRWHandler::new(socket.get_stream_fd())); + + // 1.send no-content event + event!(Stop); + let length = client.read(&mut buffer).unwrap(); + let qmp_event: qmp_schema::QmpEvent = + serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); + match qmp_event { + qmp_schema::QmpEvent::Stop { + data: _, + timestamp: _, + } => { + assert!(true); + } + _ => assert!(false), + } + + // 2.send with-content event + let shutdown_event = qmp_schema::Shutdown { + guest: true, + reason: "guest-shutdown".to_string(), + }; + event!(Shutdown; shutdown_event); + let length = client.read(&mut buffer).unwrap(); + let qmp_event: qmp_schema::QmpEvent = + serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); + match qmp_event { + qmp_schema::QmpEvent::Shutdown { data, timestamp: _ } => { + assert!(data.guest); + assert_eq!(data.reason, "guest-shutdown".to_string()); + } + _ => assert!(false), + } + + // After test. Environment Recover + recover_unix_socket_environment("06"); + } + + #[test] + fn test_qmp_send_response() { + use std::io::Read; + + // Pre test. Environment preparation + let mut buffer = [0u8; 300]; + let (listener, mut client, server) = prepare_unix_socket_environment("07"); + + // Use event! macro to send event msg to client + let socket = Socket::from_listener(listener, None); + socket.bind_stream(server); + + // 1.send greeting response + let res = socket.send_response(true); + let length = client.read(&mut buffer).unwrap(); + let qmp_response: QmpGreeting = + serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); + let qmp_greeting = QmpGreeting::create_greeting(1, 0, 5); + assert_eq!(qmp_greeting, qmp_response); + assert!(res.is_ok()); + + // 2.send empty response + let res = socket.send_response(false); + let length = client.read(&mut buffer).unwrap(); + let qmp_response: Response = + serde_json::from_str(&(String::from_utf8_lossy(&buffer[..length]))).unwrap(); + let qmp_empty_response = Response::create_empty_response(); + assert_eq!(qmp_empty_response, qmp_response); + assert!(res.is_ok()); + + // After test. Environment Recover + recover_unix_socket_environment("07"); + drop(socket); + } +} diff --git a/machine_manager/src/signal_handler.rs b/machine_manager/src/signal_handler.rs index 0cd4524f5266e27f3d38ff2f3483cb7d86b91662..6d679192d27e6e8c9bce9c9939d771ec9da21587 100644 --- a/machine_manager/src/signal_handler.rs +++ b/machine_manager/src/signal_handler.rs @@ -9,60 +9,109 @@ // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -extern crate vmm_sys_util; -use crate::temp_cleaner::TempCleaner; -use std::io::Write; + +use std::{ + io::Write, + sync::atomic::{AtomicI32, Ordering}, +}; use libc::{c_int, c_void, siginfo_t}; -use util::set_termi_canon_mode; use vmm_sys_util::signal::register_signal_handler; +use crate::{ + event, + event_loop::EventLoop, + qmp::{qmp_channel::QmpChannel, qmp_schema}, +}; +use util::set_termi_canon_mode; + pub const VM_EXIT_GENE_ERR: i32 = 1; const SYSTEMCALL_OFFSET: isize = 6; +const SYS_SECCOMP: i32 = 1; -fn basic_clean() { - // clean temporary file - TempCleaner::clean(); - - set_termi_canon_mode().expect("Failed to set terminal to canon mode."); -} +static mut RECEIVED_SIGNAL: AtomicI32 = AtomicI32::new(0); pub fn exit_with_code(code: i32) { - // Safe, because the basic_clean function has been executed before exit. + // SAFETY: The basic_clean function has been executed before exit. unsafe { libc::_exit(code); } } -extern "C" fn handle_signal_kill(num: c_int, _: *mut siginfo_t, _: *mut c_void) { - basic_clean(); +pub fn set_signal(num: c_int) { + /* + * Other three signals need to send shutdown message more than SIGSYS. + * So, if received other three signals, it should replace the SIGSYS + * which has been received before. The SIGTERM/SIGINT/SIGHUP has the + * same treatment, if received one of them, no need to replace it. + */ + if [0, libc::SIGSYS].contains(&get_signal()) { + // SAFETY: just write a global variable. + unsafe { + RECEIVED_SIGNAL.store(num, Ordering::SeqCst); + } + EventLoop::kick_all(); + } +} + +pub fn get_signal() -> i32 { + // SAFETY: just read a global variable. + unsafe { RECEIVED_SIGNAL.load(Ordering::SeqCst) } +} + +pub fn handle_signal() { + let sig_num = get_signal(); + if sig_num != 0 { + set_termi_canon_mode().expect("Failed to set terminal to canonical mode."); + if [libc::SIGTERM, libc::SIGINT, libc::SIGHUP].contains(&sig_num) + && QmpChannel::is_connected() + { + let shutdown_msg = qmp_schema::Shutdown { + guest: false, + reason: "Guest shutdown by signal ".to_string() + &sig_num.to_string(), + }; + event!(Shutdown; shutdown_msg); + } + } +} + +extern "C" fn receive_signal_kill(num: c_int, _: *mut siginfo_t, _: *mut c_void) { + set_signal(num); write!( &mut std::io::stderr(), "Received kill signal, signal number: {} \r\n", num ) .expect("Failed to write to stderr"); - exit_with_code(VM_EXIT_GENE_ERR); } -extern "C" fn handle_signal_sys(_: c_int, info: *mut siginfo_t, _: *mut c_void) { - basic_clean(); - let badcall = unsafe { *(info as *const i32).offset(SYSTEMCALL_OFFSET) as usize }; - write!( - &mut std::io::stderr(), - "Received a bad system call, number: {} \r\n", - badcall - ) - .expect("Failed to write to stderr"); - exit_with_code(VM_EXIT_GENE_ERR); +extern "C" fn receive_signal_sys(num: c_int, info: *mut siginfo_t, _: *mut c_void) { + set_signal(num); + // SAFETY: The safety of this function is guaranteed by caller. + if let Some(sig_info) = unsafe { info.as_ref() } { + if SYS_SECCOMP == sig_info.si_code { + eprintln!("seccomp violation, Try running with `strace -ff` to identify the cause."); + } + + // SAFETY: the pointer is not null. + let badcall = unsafe { *(info.cast::().offset(SYSTEMCALL_OFFSET)) }; + write!( + &mut std::io::stderr(), + "Received a bad system call, number: {} \r\n", + badcall + ) + .expect("Failed to write to stderr"); + } } /// Register kill signal handler. Signals supported now are SIGTERM and SIGSYS. pub fn register_kill_signal() { - register_signal_handler(libc::SIGTERM, handle_signal_kill) + register_signal_handler(libc::SIGTERM, receive_signal_kill) .expect("Register signal handler for SIGTERM failed!"); - register_signal_handler(libc::SIGSYS, handle_signal_sys) + register_signal_handler(libc::SIGSYS, receive_signal_sys) .expect("Register signal handler for SIGSYS failed!"); - register_signal_handler(libc::SIGINT, handle_signal_kill) + register_signal_handler(libc::SIGINT, receive_signal_kill) .expect("Register signal handler for SIGINT failed!"); + register_signal_handler(libc::SIGHUP, receive_signal_kill) + .expect("Register signal handler for SIGHUP failed!"); } diff --git a/machine_manager/src/socket.rs b/machine_manager/src/socket.rs index 594ef64a32a48776d75ecebf36669ffc74b26215..a2c0f3369bf588006a0e6c401b1c6729962494f8 100644 --- a/machine_manager/src/socket.rs +++ b/machine_manager/src/socket.rs @@ -10,314 +10,31 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use serde::Deserialize; use std::io::{Error, ErrorKind, Read, Write}; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::os::unix::net::{UnixListener, UnixStream}; -use std::sync::{Arc, Mutex, RwLock}; - -use util::leak_bucket::LeakBucket; -use util::loop_context::{read_fd, EventNotifier, EventNotifierHelper, NotifierOperation}; -use vmm_sys_util::epoll::EventSet; - -use super::errors::Result; -use crate::machine::MachineExternalInterface; -use crate::{ - qmp::qmp_schema::QmpEvent, - qmp::{QmpChannel, QmpGreeting, Response}, +use std::mem::size_of; +use std::os::unix::io::RawFd; + +use anyhow::{bail, Result}; +use libc::{ + c_void, iovec, msghdr, recvmsg, sendmsg, CMSG_DATA, CMSG_FIRSTHDR, CMSG_LEN, CMSG_NXTHDR, + MSG_DONTWAIT, MSG_NOSIGNAL, SCM_RIGHTS, SOL_SOCKET, }; +use serde::Deserialize; const MAX_SOCKET_MSG_LENGTH: usize = 8192; -pub(crate) const LEAK_BUCKET_LIMIT: u64 = 100; - -/// The wrapper over Unix socket and socket handler. -/// -/// # Example -/// -/// ```no_run -/// use std::os::unix::net::{UnixListener, UnixStream}; -/// use std::os::unix::io::AsRawFd; -/// use std::io::prelude::*; -/// -/// use machine_manager::socket::Socket; -/// -/// fn main() -> std::io::Result<()> { -/// let listener = UnixListener::bind("/path/to/my/socket")?; -/// let socket = Socket::from_unix_listener(listener, None); -/// assert!(!socket.is_connected()); -/// -/// let client_stream = UnixStream::connect("/path/to/my/socket")?; -/// let server_stream = socket.accept_unix_stream(); -/// socket.bind_unix_stream(server_stream); -/// assert!(socket.is_connected()); -/// Ok(()) -/// } -/// ``` -pub struct Socket { - /// Type for Socket - sock_type: SocketType, - /// Socket listener tuple - listener: UnixListener, - /// Socket stream with RwLock - stream: RwLock>, - /// Perform socket command - performer: Option>>, -} - -impl Socket { - /// Allocates a new `Socket` with `UnixListener`. - /// - /// # Arguments - /// - /// * `listener` - The `UnixListener` bind to `Socket`. - /// * `performer` - The `VM` to perform socket command. - pub fn from_unix_listener( - listener: UnixListener, - performer: Option>>, - ) -> Self { - Socket { - sock_type: SocketType::Unix, - listener, - stream: RwLock::new(None), - performer, - } - } - - /// Get listener's fd from `Socket`. - pub fn get_listener_fd(&self) -> RawFd { - self.listener.as_raw_fd() - } - - /// Accept stream and bind to Socket. - pub fn accept(&self) { - match self.sock_type { - SocketType::Unix => { - let stream = self.accept_unix_stream(); - self.bind_unix_stream(stream); - } - } - - QmpChannel::bind_writer(SocketRWHandler::new(self.get_stream_fd())); - self.send_response(true); - } - - /// Accept a new incoming connection unix stream from unix listener. - pub fn accept_unix_stream(&self) -> UnixStream { - let (stream, _) = self.listener.accept().unwrap(); - stream - } - - /// Get socket type from `Socket`. - pub fn get_socket_type(&self) -> SocketType { - self.sock_type - } - - /// Bind `Socket` with a `UnixStream`. - /// - /// # Arguments - /// - /// * `unix_stream` - The `UnixStream` bind to `Socket`. - pub fn bind_unix_stream(&self, unix_stream: UnixStream) { - let stream = SocketStream::from_unix_stream(unix_stream); - *self.stream.write().unwrap() = Some(stream); - } - - /// Unbind stream from `Socket`, reset the state. - pub fn drop_stream(&self) { - *self.stream.write().unwrap() = None; - } - - /// Confirm whether socket stream bind to `Socket` or not. - pub fn is_connected(&self) -> bool { - self.stream.read().unwrap().is_some() - } - - /// Get socket fd from `Socket`, it a private function. - pub fn get_stream_fd(&self) -> RawFd { - if self.is_connected() { - self.stream.read().unwrap().as_ref().unwrap().socket_fd - } else { - panic!("Failed to get socket fd!"); - } - } - - /// Get a `SocketHandler` from `Socket`. - pub fn get_socket_handler(&self) -> SocketHandler { - SocketHandler::new(self.get_stream_fd()) - } - - /// In qmp feature, send event to client. - /// - /// # Arguments - /// - /// * `event` - The `QmpEvent` will be sent to client. - pub fn send_event(&self, event: &QmpEvent) { - if self.is_connected() { - let mut handler = self.get_socket_handler(); - let event_str = serde_json::to_string(&event).unwrap(); - handler.send_str(&event_str).unwrap(); - info!("EVENT: --> {:?}", event); - } - } - - /// In qmp feature, send empty or greeting response to client. - /// - /// # Arguments - /// - /// * `is_greeting` - Whether sending greeting response or not. - pub fn send_response(&self, is_greeting: bool) { - if self.is_connected() { - let mut handler = self.get_socket_handler(); - let resp = if is_greeting { - serde_json::to_string(&QmpGreeting::create_greeting(1, 0, 5)).unwrap() + "\r" - } else { - serde_json::to_string(&Response::create_empty_response()).unwrap() + "\r" - }; - handler.send_str(&resp).unwrap(); - info!("QMP: --> {:?}", resp); - } - } - - /// Create socket's accepted stream to `event_notifier`. - fn create_event_notifier(&mut self, shared_socket: Arc>) -> Vec { - let mut notifiers = Vec::new(); - self.accept(); - - let leak_bucket = Arc::new(Mutex::new(LeakBucket::new(LEAK_BUCKET_LIMIT))); - let shared_leak_bucket = leak_bucket.clone(); - let leak_bucket_fd = leak_bucket.lock().unwrap().as_raw_fd(); - - let mut handlers = Vec::new(); - let handler: Box Option>> = - Box::new(move |event, _| { - if event == EventSet::IN { - let socket_mutexed = shared_socket.lock().unwrap(); - let stream_fd = socket_mutexed.get_stream_fd(); - - let performer = &socket_mutexed.performer.as_ref().unwrap(); - if let Err(e) = crate::qmp::handle_qmp( - stream_fd, - performer, - &mut shared_leak_bucket.lock().unwrap(), - ) { - error!("{}", e); - } - } - if event & EventSet::HANG_UP == EventSet::HANG_UP { - let socket_mutexed = shared_socket.lock().unwrap(); - let stream_fd = socket_mutexed.get_stream_fd(); - let listener_fd = socket_mutexed.get_listener_fd(); - - QmpChannel::unbind(); - - Some(vec![ - EventNotifier::new( - NotifierOperation::Delete, - stream_fd, - Some(listener_fd), - EventSet::IN | EventSet::HANG_UP, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - leak_bucket_fd, - None, - EventSet::IN, - Vec::new(), - ), - ]) - } else { - None - } - }); - handlers.push(Arc::new(Mutex::new(handler))); - - let qmp_notifier = EventNotifier::new( - NotifierOperation::AddShared, - self.get_stream_fd(), - Some(self.get_listener_fd()), - EventSet::IN | EventSet::HANG_UP, - handlers, - ); - notifiers.push(qmp_notifier); - - let leak_bucket_notifier = EventNotifier::new( - NotifierOperation::AddShared, - leak_bucket_fd, - None, - EventSet::IN, - vec![Arc::new(Mutex::new(Box::new(move |_, fd| { - read_fd(fd); - leak_bucket.lock().unwrap().clear_timer(); - None - })))], - ); - notifiers.push(leak_bucket_notifier); - - notifiers - } -} - -impl EventNotifierHelper for Socket { - fn internal_notifiers(shared_socket: Arc>) -> Vec { - let mut notifiers = Vec::new(); - - let socket = shared_socket.clone(); - let mut handlers = Vec::new(); - let handler: Box Option>> = - Box::new(move |_, _| { - Some(socket.lock().unwrap().create_event_notifier(socket.clone())) - }); - - handlers.push(Arc::new(Mutex::new(handler))); - - let notifier = EventNotifier::new( - NotifierOperation::AddShared, - shared_socket.lock().unwrap().get_listener_fd(), - None, - EventSet::IN, - handlers, - ); - - notifiers.push(notifier); - - notifiers - } -} - -/// Type for api socket. -#[derive(Copy, Clone, Debug, PartialEq)] -pub enum SocketType { - Unix = 1, -} - -/// Wrapper over UnixSteam. -#[derive(Debug)] -struct SocketStream { - /// `RawFd` for socket - socket_fd: RawFd, - /// Make `UnixStream` persistent without `drop` - #[allow(dead_code)] - persistent: Option, -} - -impl SocketStream { - fn from_unix_stream(stream: UnixStream) -> Self { - SocketStream { - socket_fd: stream.as_raw_fd(), - persistent: Some(stream), - } - } -} +/// The max buffer length received by recvmsg. +const MAX_RECV_BUF_LEN: usize = 4096; +/// The max buffer length used by recvmsg for file descriptors. +const MAX_RECV_FDS_LEN: usize = MAX_RECV_BUF_LEN; /// Wrapper over socket file description read and write message. /// /// # Examples /// /// ```no_run -/// use std::os::unix::net::UnixStream; -/// use std::os::unix::io::AsRawFd; /// use std::io::prelude::*; +/// use std::os::unix::io::AsRawFd; +/// use std::os::unix::net::UnixStream; /// /// use machine_manager::socket::SocketRWHandler; /// @@ -361,7 +78,7 @@ impl SocketRWHandler { /// Get inner buf as a `String`. pub fn get_buf_string(&mut self) -> Result { if self.buf.len() > MAX_SOCKET_MSG_LENGTH { - bail!("The socket messege is too long."); + bail!("The socket message is too long."); } Ok(String::from_utf8_lossy(&self.buf).trim().to_string()) @@ -376,6 +93,35 @@ impl SocketRWHandler { } } + fn parse_fd(&mut self, mhdr: &msghdr) { + // At least it should has one RawFd. + // SAFETY: The input parameter is constant. + let min_cmsg_len = unsafe { u64::from(CMSG_LEN(size_of::() as u32)) }; + if (mhdr.msg_controllen as u64) < min_cmsg_len { + return; + } + + // SAFETY: The pointer of mhdr can be guaranteed not null. + let mut cmsg_hdr = unsafe { CMSG_FIRSTHDR(mhdr as *const msghdr).as_ref() }; + while cmsg_hdr.is_some() { + let scm = cmsg_hdr.unwrap(); + if scm.cmsg_level == SOL_SOCKET + && scm.cmsg_type == SCM_RIGHTS + && scm.cmsg_len as u64 >= min_cmsg_len + { + // SAFETY: The pointer of scm can be guaranteed not null. + let fds = unsafe { + let fd_num = (scm.cmsg_len as u64 - u64::from(CMSG_LEN(0))) as usize + / size_of::(); + std::slice::from_raw_parts(CMSG_DATA(scm) as *const RawFd, fd_num) + }; + self.scm_fd.append(&mut fds.to_vec()); + } + // SAFETY: The pointer of mhdr can be guaranteed not null. + cmsg_hdr = unsafe { CMSG_NXTHDR(mhdr as *const msghdr, scm).as_ref() }; + } + } + /// Receive bytes and scm_fd from socket file descriptor. /// /// # Notes @@ -388,90 +134,49 @@ impl SocketRWHandler { /// # Errors /// The socket file descriptor is broken. fn read_fd(&mut self) -> std::io::Result<()> { - use libc::{ - c_uint, c_void, cmsghdr, iovec, msghdr, recvmsg, CMSG_DATA, CMSG_FIRSTHDR, CMSG_SPACE, - MSG_DONTWAIT, SCM_RIGHTS, SOL_SOCKET, + let recv_buf = [0_u8; MAX_RECV_BUF_LEN]; + let mut iov = iovec { + iov_base: recv_buf.as_ptr() as *mut c_void, + iov_len: MAX_RECV_BUF_LEN, }; - + let mut cmsg_space = [0_u8; MAX_RECV_FDS_LEN]; loop { - let tmp_buf = [0_u8; 1]; - let mut iov = iovec { - iov_base: tmp_buf.as_ptr() as *mut c_void, - iov_len: 1, - }; - - let mut cmsg_space = { - let mut space = 0; - space += - unsafe { CMSG_SPACE(std::mem::size_of::<[RawFd; 2]>() as c_uint) } as usize; - Some(Vec::::with_capacity(space)) - }; - - let (msg_control, msg_controllen) = cmsg_space - .as_mut() - .map(|v| (v.as_mut_ptr(), v.capacity())) - .unwrap_or((std::ptr::null_mut(), 0)); - - // In `musl` toolchain, msghdr has private member `__pad0` and `__pad1`, it can't be - // initialized in normal way. - let mut mhdr: msghdr = unsafe { std::mem::zeroed() }; + let mut mhdr: msghdr = + // SAFETY: In `musl` toolchain, msghdr has private member `__pad0` and `__pad1`, it can't be + // initialized in normal way. + unsafe { std::mem::zeroed() }; mhdr.msg_name = std::ptr::null_mut(); mhdr.msg_namelen = 0; mhdr.msg_iov = &mut iov as *mut iovec; mhdr.msg_iovlen = 1; - mhdr.msg_control = msg_control as *mut c_void; - mhdr.msg_controllen = msg_controllen as _; + mhdr.msg_control = cmsg_space.as_mut_ptr() as *mut c_void; + mhdr.msg_controllen = cmsg_space.len() as _; mhdr.msg_flags = 0; // MSG_DONTWAIT: Enables nonblocking operation, if the operation would block the call // fails with the error EAGAIN or EWOULDBLOCK. When this error occurs, break loop + // SAFETY: The pointer of mhdr can been guaranteed not null. let ret = unsafe { recvmsg(self.socket_fd, &mut mhdr, MSG_DONTWAIT) }; - - if ret == -1 { + // when use tcpsocket client and exit with ctrl+c, ret value will return 0 and get + // error WouldBlock or BrokenPipe, so we should handle this 0 to break this loop. + if ret == -1 || ret == 0 { let sock_err = Error::last_os_error(); - if sock_err.kind() == ErrorKind::WouldBlock { + if sock_err.kind() == ErrorKind::WouldBlock + || sock_err.kind() == ErrorKind::BrokenPipe + { break; } else { return Err(sock_err); } - } else if ret == 0 { - break; } - - let cmsg_hdr: Option<&cmsghdr> = unsafe { - if mhdr.msg_controllen > 0 { - cmsg_space - .as_mut() - .unwrap() - .set_len(mhdr.msg_controllen as usize); - CMSG_FIRSTHDR(&mhdr as *const msghdr) + self.parse_fd(&mhdr); + if ret > 0 { + self.buf.extend(&recv_buf[..ret as usize]); + if let Some(pos) = self.pos.checked_add(ret as usize) { + self.pos = pos; } else { - std::ptr::null() + return Err(ErrorKind::InvalidInput.into()); } - .as_ref() - }; - - if let Some(scm) = cmsg_hdr { - if scm.cmsg_level == SOL_SOCKET && scm.cmsg_type == SCM_RIGHTS { - let scm_cmsg_header = unsafe { - std::slice::from_raw_parts( - CMSG_DATA(scm), - std::mem::size_of::<[RawFd; 2]>() as usize, - ) - }; - for fd in scm_cmsg_header.iter() { - if *fd != 0 { - self.scm_fd.push(i32::from(*fd)); - } - } - } - }; - - self.buf.push(tmp_buf[0]); - if let Some(pos) = self.pos.checked_add(1) { - self.pos = pos; - } else { - return Err(ErrorKind::InvalidInput.into()); } } Ok(()) @@ -491,8 +196,6 @@ impl SocketRWHandler { /// # Errors /// The socket file descriptor is broken. fn write_fd(&mut self, length: usize) -> std::io::Result<()> { - use libc::{c_void, iovec, msghdr, sendmsg}; - let mut iov = iovec { iov_base: self.buf.as_slice()[(self.pos - length)..(self.pos - 1)].as_ptr() as *mut c_void, @@ -501,6 +204,7 @@ impl SocketRWHandler { // In `musl` toolchain, msghdr has private member `__pad0` and `__pad1`, it can't be // initialized in normal way. + // SAFETY: The member variables of mhdr have been initialization later. let mut mhdr: msghdr = unsafe { std::mem::zeroed() }; mhdr.msg_name = std::ptr::null_mut(); mhdr.msg_namelen = 0; @@ -510,7 +214,8 @@ impl SocketRWHandler { mhdr.msg_controllen = 0; mhdr.msg_flags = 0; - if unsafe { sendmsg(self.socket_fd, &mhdr, 0) } == -1 { + // SAFETY: The buffer address and length recorded in mhdr are both legal. + if unsafe { sendmsg(self.socket_fd, &mhdr, MSG_NOSIGNAL) } == -1 { Err(Error::new( ErrorKind::BrokenPipe, "The socket pipe is broken!", @@ -563,9 +268,9 @@ impl Write for SocketRWHandler { /// # Examples /// /// ```no_run -/// use std::os::unix::net::UnixStream; -/// use std::os::unix::io::AsRawFd; /// use std::io::prelude::*; +/// use std::os::unix::io::AsRawFd; +/// use std::os::unix::net::UnixStream; /// /// use machine_manager::socket::SocketHandler; /// @@ -599,6 +304,20 @@ impl SocketHandler { } } + pub fn get_line(&mut self) -> Result> { + self.buffer.clear(); + self.stream.clear(); + self.stream.read_fd().unwrap(); + self.stream.get_buf_string().map(|buffer| { + self.buffer = buffer; + if self.stream.pos == 0 { + None + } else { + Some(self.buffer.clone()) + } + }) + } + /// Parse the bytes received by `SocketHandler`. /// /// # Notes @@ -646,7 +365,8 @@ impl SocketHandler { /// The socket file descriptor is broken. pub fn send_str(&mut self, s: &str) -> std::io::Result<()> { self.stream.flush().unwrap(); - match self.stream.write(s.as_bytes()) { + let msg = s.to_string() + "\r"; + match self.stream.write(msg.as_bytes()) { Ok(_) => { let _ = self.stream.write(&[b'\n'])?; Ok(()) @@ -668,7 +388,7 @@ mod tests { use serde::{Deserialize, Serialize}; - use super::{Socket, SocketHandler, SocketRWHandler, SocketType}; + use crate::socket::{SocketHandler, SocketRWHandler}; // Environment Preparation for UnixSocket fn prepare_unix_socket_environment(socket_id: &str) -> (UnixListener, UnixStream, UnixStream) { @@ -686,7 +406,7 @@ mod tests { // Environment Recovery for UnixSocket fn recover_unix_socket_environment(socket_id: &str) { let socket_name: String = format!("test_{}.sock", socket_id); - std::fs::remove_file(&socket_name).unwrap(); + std::fs::remove_file(socket_name).unwrap(); } fn socket_basic_rw(client_fd: RawFd, server_fd: RawFd) -> bool { @@ -766,7 +486,7 @@ mod tests { let length = client.read(&mut response).unwrap(); assert_eq!( String::from_utf8_lossy(&response[..length]), - "I am a test str\n".to_string() + "I am a test str\r\n".to_string() ); // 2.send String @@ -775,7 +495,7 @@ mod tests { let length = client.read(&mut response).unwrap(); assert_eq!( String::from_utf8_lossy(&response[..length]), - "I am a test String\n".to_string() + "I am a test String\r\n".to_string() ); // After test. Environment Recover @@ -840,33 +560,4 @@ mod tests { // After test. Environment Recover recover_unix_socket_environment("03"); } - - #[test] - fn test_socket_lifecycle() { - // Pre test. Environment Preparation - let (listener, _, server) = prepare_unix_socket_environment("04"); - let socket = Socket::from_unix_listener(listener, None); - - // life cycle test - // 1.Unconnected - assert_eq!(socket.is_connected(), false); - - // 2.Connected - socket.bind_unix_stream(server); - assert_eq!(socket.is_connected(), true); - assert_eq!(socket.get_socket_type(), SocketType::Unix); - - // 3.Unbind SocketStream, reset state - socket.drop_stream(); - assert_eq!(socket.is_connected(), false); - - // 4.Accept and reconnect a new UnixStream - let _new_client = UnixStream::connect("test_04.sock"); - let new_server = socket.accept_unix_stream(); - socket.bind_unix_stream(new_server); - assert_eq!(socket.is_connected(), true); - - // After test. Environment Recover - recover_unix_socket_environment("04"); - } } diff --git a/machine_manager/src/state_query.rs b/machine_manager/src/state_query.rs new file mode 100644 index 0000000000000000000000000000000000000000..e5c0dd9f4f354657430f76dde98d3af7fd8dd0b7 --- /dev/null +++ b/machine_manager/src/state_query.rs @@ -0,0 +1,69 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use log::error; +use once_cell::sync::Lazy; + +static STATE_QUERY_MANAGER: Lazy> = + Lazy::new(|| RwLock::new(StateQueryManager::new())); + +pub type StateQueryCallback = dyn Fn() -> String + Send + Sync; + +struct StateQueryManager { + query_callbacks: HashMap>, +} + +impl StateQueryManager { + fn new() -> Self { + Self { + query_callbacks: HashMap::new(), + } + } + + fn register_query_callback(&mut self, key: String, callback: Arc) { + self.query_callbacks.insert(key, callback); + } + + fn unregister_query_callback(&mut self, key: &str) { + if self.query_callbacks.remove(key).is_none() { + error!("There is no query callback with key {}", key); + } + } + + fn query_workloads(&self) -> Vec<(String, String)> { + self.query_callbacks + .iter() + .map(|(module, query)| (module.clone(), query())) + .collect() + } +} + +pub fn register_state_query_callback(key: String, callback: Arc) { + STATE_QUERY_MANAGER + .write() + .unwrap() + .register_query_callback(key, callback); +} + +pub fn unregister_state_query_callback(key: &str) { + STATE_QUERY_MANAGER + .write() + .unwrap() + .unregister_query_callback(key); +} + +pub fn query_workloads() -> Vec<(String, String)> { + STATE_QUERY_MANAGER.read().unwrap().query_workloads() +} diff --git a/machine_manager/src/temp_cleaner.rs b/machine_manager/src/temp_cleaner.rs index 4a4683267587b5d20b37290a1c7bcc0f47846782..cd5b2f42ddb58c2dd69f1e3eeba51752e641ac27 100644 --- a/machine_manager/src/temp_cleaner.rs +++ b/machine_manager/src/temp_cleaner.rs @@ -10,28 +10,44 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::collections::HashMap; use std::fs; +use std::path::Path; +use std::sync::Arc; + +use log::{error, info}; static mut GLOBAL_TEMP_CLEANER: Option = None; +pub type ExitNotifier = dyn Fn() + Send + Sync; + /// This structure used to keep temporary file which was created by program, and would be deleted /// when Vm exit. pub struct TempCleaner { /// Path of files that should be removed after exiting the vm. paths: Vec, + /// Notifiers are used to release residual resources after exiting the vm. + notifiers: HashMap>, } impl TempCleaner { pub fn object_init() { + // SAFETY: This global variable is only used in single thread, + // so there is no data competition or synchronization problem. unsafe { if GLOBAL_TEMP_CLEANER.is_none() { - GLOBAL_TEMP_CLEANER = Some(TempCleaner { paths: Vec::new() }); + GLOBAL_TEMP_CLEANER = Some(TempCleaner { + paths: Vec::new(), + notifiers: HashMap::new(), + }); } } } /// Add to be removed file path pub fn add_path(path: String) { + // SAFETY: This global variable is only used in single thread, + // so there is no data competition or synchronization problem. unsafe { if let Some(tmp) = GLOBAL_TEMP_CLEANER.as_mut() { tmp.paths.push(path); @@ -39,17 +55,56 @@ impl TempCleaner { } } - /// Clean the temporary files - pub fn clean() { + /// Add exit notifier. + pub fn add_exit_notifier(id: String, exit: Arc) { + // SAFETY: This global variable is only used in single thread, + // so there is no data competition or synchronization problem. + unsafe { + if let Some(tmp) = GLOBAL_TEMP_CLEANER.as_mut() { + tmp.notifiers.insert(id, exit); + } + } + } + + /// Remove exit notifier by id. + pub fn remove_exit_notifier(id: &str) { + // SAFETY: This global variable is only used in single thread, + // so there is no data competition or synchronization problem. unsafe { if let Some(tmp) = GLOBAL_TEMP_CLEANER.as_mut() { - while let Some(path) = tmp.paths.pop() { - if let Err(ref e) = fs::remove_file(&path) { - error!("Failed to delete console / socket file:{} :{}", &path, e); - } else { - info!("Delete file: {} successfully.", &path); - } + tmp.notifiers.remove(id); + } + } + } + + fn clean_files(&mut self) { + while let Some(path) = self.paths.pop() { + if Path::new(&path).exists() { + if let Err(ref e) = fs::remove_file(&path) { + error!("Failed to delete console / socket file:{} :{}", &path, e); + } else { + info!("Delete file: {} successfully", &path); } + } else { + info!("file: {} has been removed", &path); + } + } + } + + fn exit_notifier(&mut self) { + for (_id, exit) in self.notifiers.iter() { + exit(); + } + } + + /// Clean the resources + pub fn clean() { + // SAFETY: This global variable is only used in single thread, + // so there is no data competition or synchronization problem. + unsafe { + if let Some(tmp) = GLOBAL_TEMP_CLEANER.as_mut() { + tmp.clean_files(); + tmp.exit_notifier(); } } } diff --git a/machine_manager/src/test_server.rs b/machine_manager/src/test_server.rs new file mode 100644 index 0000000000000000000000000000000000000000..f126ede07bc83c32d2ade0890f0607e991638827 --- /dev/null +++ b/machine_manager/src/test_server.rs @@ -0,0 +1,268 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::unix::io::RawFd; +use std::os::unix::net::UnixStream; +use std::os::unix::prelude::AsRawFd; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; + +use hex::FromHexError; +use vmm_sys_util::epoll::EventSet; + +use crate::event_loop::EventLoop; +use crate::machine::{MachineTestInterface, IOTHREADS}; +use crate::socket::SocketHandler; +use util::loop_context::{EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation}; +use util::test_helper::{eoi_intx, get_test_clock, has_msix_msg, query_intx, set_test_clock}; + +pub struct TestSock { + stream: UnixStream, + controller: Arc>, +} + +impl TestSock { + pub fn new(path: &str, controller: Arc>) -> Self { + let stream = match UnixStream::connect(path) { + Ok(s) => s, + Err(e) => { + panic!("Failed to connect test socket: {}", e); + } + }; + TestSock { stream, controller } + } + + pub fn get_stream_fd(&self) -> RawFd { + self.stream.as_raw_fd() + } +} + +impl EventNotifierHelper for TestSock { + fn internal_notifiers(socket: Arc>) -> Vec { + let socket_clone = socket.clone(); + let handler: Rc = Rc::new(move |_, _| { + let locked_socket = socket_clone.lock().unwrap(); + handle_test_cmd(locked_socket.get_stream_fd(), &locked_socket.controller); + None + }); + + let mut notifiers = Vec::new(); + let handlers = vec![handler]; + notifiers.push(EventNotifier::new( + NotifierOperation::AddExclusion, + socket.lock().unwrap().get_stream_fd(), + None, + EventSet::IN, + handlers, + )); + notifiers + } +} + +fn get_min_timeout() -> i64 { + let mut min_timeout = EventLoop::get_ctx(None).unwrap().timers_min_duration(); + + for thread in IOTHREADS.lock().unwrap().iter() { + let timeout = EventLoop::get_ctx(Some(&thread.id)) + .unwrap() + .timers_min_duration(); + if timeout.is_some() + && (min_timeout.is_none() + || (min_timeout.is_some() + && timeout.as_ref().unwrap() < min_timeout.as_ref().unwrap())) + { + min_timeout = timeout; + } + } + match min_timeout { + Some(d) => { + let timeout = d.as_nanos(); + if timeout >= i64::MAX as u128 { + i64::MAX + } else { + timeout as i64 + } + } + None => -1, + } +} + +fn update_clock(target: u64) { + let mut current = get_test_clock(); + while current < target { + let timeout = get_min_timeout(); + let mut step = target.checked_sub(current).unwrap(); + if timeout != -1 && step > timeout as u64 { + step = timeout as u64; + } + + set_test_clock(current.checked_add(step).unwrap()); + EventLoop::get_ctx(None).unwrap().run_timers(); + for thread in IOTHREADS.lock().unwrap().iter() { + EventLoop::get_ctx(Some(&thread.id)).unwrap().run_timers(); + } + + current = get_test_clock(); + } +} + +fn handle_test_cmd(stream_fd: RawFd, controller: &Arc>) { + let mut handler = SocketHandler::new(stream_fd); + let msg = handler.get_line().unwrap().unwrap(); + + let cmd: Vec<&str> = msg.split(' ').collect(); + assert!(!cmd.is_empty()); + match cmd[0] { + "read" => { + assert!(cmd.len() == 3); + let addr = u64::from_str_radix(cmd[1].trim_start_matches("0x"), 16).unwrap(); + let size = usize::from_str_radix(cmd[2].trim_start_matches("0x"), 16).unwrap(); + let mut data = vec![0_u8; size]; + + controller + .lock() + .unwrap() + .mmio_read(addr, data.as_mut_slice()); + handler + .send_str(format!("OK 0x{}", hex::encode(data).as_str()).as_str()) + .unwrap(); + } + "readb" | "readw" | "readl" | "readq" => { + assert!(cmd.len() == 2); + let addr = u64::from_str_radix(cmd[1].trim_start_matches("0x"), 16).unwrap(); + let size = match cmd[0] { + "readb" => 1, + "readw" => 2, + "readl" => 4, + "readq" => 8, + _ => unreachable!(), + }; + let mut data = vec![0_u8; 8]; + + controller + .lock() + .unwrap() + .mmio_read(addr, data[..size].as_mut()); + data.reverse(); + + handler + .send_str(format!("OK 0x{}", hex::encode(data).as_str()).as_str()) + .unwrap(); + } + "write" => { + assert!(cmd.len() == 4); + let addr = u64::from_str_radix(cmd[1].trim_start_matches("0x"), 16).unwrap(); + let size = usize::from_str_radix(cmd[2].trim_start_matches("0x"), 16).unwrap(); + let data_str = cmd[3].trim_start_matches("0x"); + let data = match hex::decode(data_str) { + Ok(d) => d, + Err(FromHexError::OddLength) => hex::decode(format!("0{}", data_str)).unwrap(), + Err(e) => panic!("Unable to decode {} to hex: {}", data_str, e), + }; + assert!(data.len() == size); + + controller.lock().unwrap().mmio_write(addr, data.as_slice()); + handler.send_str("OK").unwrap(); + } + "writeb" | "writew" | "writel" | "writeq" => { + assert!(cmd.len() == 3); + let addr = u64::from_str_radix(cmd[1].trim_start_matches("0x"), 16).unwrap(); + let input_str = cmd[2].trim_start_matches("0x"); + let input = match hex::decode(input_str) { + Ok(i) => i, + Err(FromHexError::OddLength) => hex::decode(format!("0{}", input_str)).unwrap(), + Err(e) => panic!("Unable to decode {} to hex: {}", input_str, e), + }; + let size = match cmd[0] { + "writeb" => 1, + "writew" => 2, + "writel" => 4, + "writeq" => 8, + _ => unreachable!(), + }; + let mut data = vec![0_u8; size]; + data[size - input.len()..].copy_from_slice(input.as_slice()); + data.reverse(); + + controller.lock().unwrap().mmio_write(addr, data.as_slice()); + handler.send_str("OK").unwrap(); + } + "memset" => { + assert!(cmd.len() == 4); + let addr = u64::from_str_radix(cmd[1].trim_start_matches("0x"), 16).unwrap(); + let size = usize::from_str_radix(cmd[2].trim_start_matches("0x"), 16).unwrap(); + let pat = hex::decode(cmd[3].trim_start_matches("0x")).unwrap(); + let pat_size = pat.len(); + let mut data = vec![0_u8; size]; + for index in 0..data.len() { + data[index] = pat[index % pat_size]; + } + + controller.lock().unwrap().mmio_write(addr, data.as_slice()); + handler.send_str("OK").unwrap(); + } + "clock_step" => { + assert!(cmd.len() < 3); + let value = match cmd.len() { + 1 => get_min_timeout(), + 2 => cmd[1].parse::().unwrap(), + _ => panic!("Too many arguments in clock_step command"), + }; + let ns: u64 = std::cmp::max(value, 0) as u64; + + update_clock(get_test_clock().checked_add(ns).unwrap()); + handler + .send_str(format!("OK {}", get_test_clock()).as_str()) + .unwrap(); + } + "clock_set" => { + assert!(cmd.len() == 2); + let value = cmd[1].parse::().unwrap(); + let ns: u64 = std::cmp::max(value, 0) as u64; + + update_clock(ns); + handler + .send_str(format!("OK {}", get_test_clock()).as_str()) + .unwrap(); + } + "query_msix" => { + assert!(cmd.len() == 3); + let addr = cmd[1].parse::().unwrap(); + let data = cmd[2].parse::().unwrap(); + match has_msix_msg(addr, data) { + true => handler.send_str("OK TRUE".to_string().as_str()).unwrap(), + false => handler.send_str("OK FALSE".to_string().as_str()).unwrap(), + } + } + "query_intx" => { + assert!(cmd.len() == 2); + let irq = cmd[1].parse::().unwrap(); + match query_intx(irq) { + true => handler.send_str("OK TRUE".to_string().as_str()).unwrap(), + false => handler.send_str("OK FALSE".to_string().as_str()).unwrap(), + } + } + "eoi_intx" => { + assert!(cmd.len() == 2); + let irq = cmd[1].parse::().unwrap(); + match eoi_intx(irq) { + true => handler.send_str("OK TRUE".to_string().as_str()).unwrap(), + false => handler.send_str("OK FALSE".to_string().as_str()).unwrap(), + } + } + _ => { + handler + .send_str(format!("Unsupported command: {}", cmd[0]).as_str()) + .unwrap(); + } + }; +} diff --git a/migration/Cargo.toml b/migration/Cargo.toml index 6991804940dd7dd850037633e8a5309626e6e9d5..f0d10681b29e2ebc82811d1e8daa7e1d9378185a 100644 --- a/migration/Cargo.toml +++ b/migration/Cargo.toml @@ -1,17 +1,19 @@ [package] name = "migration" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" [dependencies] +kvm-ioctls = "0.16.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +once_cell = "1.18.0" +log = "0.4" +thiserror = "1.0" +anyhow = "1.0" util = {path = "../util"} -error-chain = "0.12.4" -kvm-ioctls = "0.6.0" -serde = { version = ">=1.0.114", features = ["derive"] } -serde_json = "1.0.55" -once_cell = "1.9.0" -log = "0.4.8" +machine_manager = { path = "../machine_manager" } [dev-dependencies] -migration_derive = { path = "../migration_derive" } +migration_derive = { path = "migration_derive" } diff --git a/migration_derive/Cargo.toml b/migration/migration_derive/Cargo.toml similarity index 52% rename from migration_derive/Cargo.toml rename to migration/migration_derive/Cargo.toml index 4c115c12b0b478245c9fb2a6e245fce97a6ca6e9..beecc7b237cd5da1276063212c03221d182d621e 100644 --- a/migration_derive/Cargo.toml +++ b/migration/migration_derive/Cargo.toml @@ -1,18 +1,18 @@ [package] name = "migration_derive" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" [dependencies] -syn = { version = "1.0.72", features = ["full", "extra-traits"] } -quote = "1.0.7" +syn = { version = "2.0.18", features = ["full", "extra-traits"] } +quote = "1.0" proc-macro2 = "1.0" [dev-dependencies] -migration = { path = "../migration" } -util = { path = "../util" } +migration = { path = "../../migration" } +util = { path = "../../util" } [lib] name = "migration_derive" diff --git a/migration_derive/src/attr_parser.rs b/migration/migration_derive/src/attr_parser.rs similarity index 61% rename from migration_derive/src/attr_parser.rs rename to migration/migration_derive/src/attr_parser.rs index 96b26af9d752a16b6a459e20d5ac0a0bfe62d8a4..237949842e893cd2d9943ee1984e8b150cea220d 100644 --- a/migration_derive/src/attr_parser.rs +++ b/migration/migration_derive/src/attr_parser.rs @@ -10,7 +10,7 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use syn::{MetaList, MetaNameValue}; +use syn::Lit; // Read the program version in `Cargo.toml`. const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); @@ -39,10 +39,34 @@ const FIELD_ATTRIBUTE_NAME: &str = "alias"; pub fn parse_struct_attributes(attributes: &[syn::Attribute]) -> (u32, u32) { let (mut current_version, mut compat_version) = (0, 0); for attribute in attributes { - if attribute.path.is_ident(ATTRIBUTE_NAME) { - if let Ok(syn::Meta::List(meta)) = attribute.parse_meta() { - get_attr_version(meta, &mut current_version, &mut compat_version); - } + if attribute.path().is_ident(ATTRIBUTE_NAME) { + let _ = attribute.parse_nested_meta(|meta| { + if meta.path.is_ident(CURRENT_VERSION) { + let value = meta.value()?; + let lit = value.parse::()?; + current_version = match lit { + syn::Lit::Int(lit_int) => lit_int.base10_parse().unwrap(), + syn::Lit::Str(lit_str) => version_to_u32(&lit_str.value()), + _ => panic!("Unsupported version number."), + }; + + return Ok(()); + } + + if meta.path.is_ident(COMPAT_VERSION) { + let value = meta.value()?; + let lit = value.parse::()?; + compat_version = match lit { + syn::Lit::Int(lit_int) => lit_int.base10_parse().unwrap(), + syn::Lit::Str(lit_str) => version_to_u32(&lit_str.value()), + _ => panic!("Unsupported version number."), + }; + + return Ok(()); + } + + Err(meta.error("unrecognized repr")) + }); } } @@ -55,7 +79,7 @@ pub fn parse_field_attributes(attributes: &[syn::Attribute]) -> Option { let mut field_alias = None; for attribute in attributes { - if attribute.path.is_ident(FIELD_ATTRIBUTE_NAME) { + if attribute.path().is_ident(FIELD_ATTRIBUTE_NAME) { let content: proc_macro2::TokenStream = attribute.parse_args().unwrap(); field_alias = Some(content.to_string()); } @@ -64,31 +88,6 @@ pub fn parse_field_attributes(attributes: &[syn::Attribute]) -> Option { field_alias } -fn get_attr_version(meta_list: MetaList, current_version: &mut u32, compat_version: &mut u32) { - for meta in meta_list.nested.iter() { - if let syn::NestedMeta::Meta(syn::Meta::NameValue(attr_name_value)) = meta { - if let Some(version) = meta_name_parse(attr_name_value, CURRENT_VERSION) { - *current_version = version; - } - if let Some(version) = meta_name_parse(attr_name_value, COMPAT_VERSION) { - *compat_version = version; - } - } - } -} - -fn meta_name_parse(meta_name: &MetaNameValue, name_str: &str) -> Option { - if *meta_name.path.get_ident().unwrap() == name_str { - Some(match &meta_name.lit { - syn::Lit::Int(lit_int) => lit_int.base10_parse().unwrap(), - syn::Lit::Str(lit_str) => version_to_u32(&lit_str.value()), - _ => panic!("Unsupported version number."), - }) - } else { - None - } -} - /// Check current version and compat version. /// /// # Check rules @@ -130,17 +129,37 @@ fn version_to_u32(version_str: &str) -> u32 { panic!("Version str is illegal."); } - (version_vec[2] as u32) + ((version_vec[1] as u32) << 8) + ((version_vec[0] as u32) << 16) + u32::from(version_vec[2]) + (u32::from(version_vec[1]) << 8) + (u32::from(version_vec[0]) << 16) } -#[test] -fn test_version_to_u32() { - let version_str_01 = "0.1.0"; - assert_eq!(version_to_u32(version_str_01), 256); +#[cfg(test)] +mod test { + use syn::{parse_quote, ItemStruct}; - let version_str_02 = "1.18.0"; - assert_eq!(version_to_u32(version_str_02), 70_144); + use super::*; - let version_str_03 = "255.255.255"; - assert_eq!(version_to_u32(version_str_03), 16_777_215); + #[test] + fn test_version_to_u32() { + let version_str_01 = "0.1.0"; + assert_eq!(version_to_u32(version_str_01), 256); + + let version_str_02 = "1.18.0"; + assert_eq!(version_to_u32(version_str_02), 70_144); + + let version_str_03 = "255.255.255"; + assert_eq!(version_to_u32(version_str_03), 16_777_215); + } + + #[test] + fn test_parse_attribute() { + let input: ItemStruct = parse_quote! { + #[desc_version(current_version = 1, compat_version = "0.1.0")] + pub struct MyStruct(u16, u32); + }; + + let (current_version, compat_version) = parse_struct_attributes(input.attrs.as_slice()); + + assert_eq!(current_version, 1); + assert_eq!(compat_version, 256); + } } diff --git a/migration_derive/src/field_parser.rs b/migration/migration_derive/src/field_parser.rs similarity index 92% rename from migration_derive/src/field_parser.rs rename to migration/migration_derive/src/field_parser.rs index 24033997f415f01889311f229193de42a1d792ea..3f754cb8c87bac8b3f1e65f784b7de1330715fd0 100644 --- a/migration_derive/src/field_parser.rs +++ b/migration/migration_derive/src/field_parser.rs @@ -41,11 +41,8 @@ fn parse_field( // parse var of field let var_ident = input.value().ident.as_ref().unwrap(); let var_name = var_ident.to_string(); - let alias_name = if let Some(alias) = parse_field_attributes(&input.value().attrs) { - alias - } else { - var_name.clone() - }; + let alias_name = + parse_field_attributes(&input.value().attrs).unwrap_or_else(|| var_name.clone()); // parse type of field let ty = input.value().ty.clone(); @@ -76,14 +73,10 @@ fn parse_field( fn parse_ty(input: syn::Type) -> (syn::TypePath, usize, bool) { match input { syn::Type::Array(array) => { - let array_type_token; - - match *array.elem.clone() { - syn::Type::Path(token) => { - array_type_token = token; - } + let array_type_token = match *array.elem.clone() { + syn::Type::Path(token) => token, _ => panic!("Unsupported array type."), - } + }; match &array.len { syn::Expr::Lit(expr_lit) => match &expr_lit.lit { diff --git a/migration_derive/src/lib.rs b/migration/migration_derive/src/lib.rs similarity index 93% rename from migration_derive/src/lib.rs rename to migration/migration_derive/src/lib.rs index a71ab54f0f436c6f4081e6f7552cd91835739ce3..11736fc1ae0c8ee437ef1b243ea247550130f06a 100644 --- a/migration_derive/src/lib.rs +++ b/migration/migration_derive/src/lib.rs @@ -40,25 +40,24 @@ //! } //! //! fn main() { -//! println!("Description of DeviceState is {:?}", DeviceState::descriptor()); +//! println!( +//! "Description of DeviceState is {:?}", +//! DeviceState::descriptor() +//! ); //! } -//! //! ``` //! //! 2. The `ByteCode` derive to auto add `ByteCode` trait and its relying trait for //! struct, such as `Default`, `Sync`, `Send`. -#[macro_use] -extern crate syn; - -use proc_macro::TokenStream; -use quote::quote; -use syn::DeriveInput; - mod attr_parser; mod field_parser; mod struct_parser; +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, DeriveInput}; + /// Define a macro derive `Desc`. #[proc_macro_derive(Desc, attributes(desc_version, alias))] pub fn derive_desc(input: TokenStream) -> TokenStream { @@ -103,8 +102,6 @@ pub fn derive_bytecode(input: TokenStream) -> TokenStream { #struct_default } } - unsafe impl Send for #ident {} - unsafe impl Sync for #ident {} impl util::byte_code::ByteCode for #ident {} }) .into() diff --git a/migration_derive/src/struct_parser.rs b/migration/migration_derive/src/struct_parser.rs similarity index 100% rename from migration_derive/src/struct_parser.rs rename to migration/migration_derive/src/struct_parser.rs diff --git a/migration/src/error.rs b/migration/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..4e3e449eceaa8946a4a95396423a5a9849310022 --- /dev/null +++ b/migration/src/error.rs @@ -0,0 +1,62 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +use crate::protocol::MigrationStatus; + +#[derive(Error, Debug)] +pub enum MigrationError { + #[error("UtilError")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Json")] + Json { + #[from] + source: serde_json::Error, + }, + #[error("Migration compat_version {0} higher than current version {1}")] + VersionNotFit(u32, u32), + #[error("{0} for snapshot file / migration stream is not fit")] + HeaderItemNotFit(String), + #[error("Failed to transfer migration status from {0} to {1}.")] + InvalidStatusTransfer(MigrationStatus, MigrationStatus), + #[error("Can't restore structure from raw slice: {0}")] + FromBytesError(&'static str), + #[error("Failed to get GIC {0} register: {1}")] + GetGicRegsError(&'static str, String), + #[error("Failed to set GIC {0} register: {1}")] + SetGicRegsError(&'static str, String), + #[error("Failed to save vm memory: {0}")] + SaveVmMemoryErr(String), + #[error("Failed to restore vm memory: {0}")] + RestoreVmMemoryErr(String), + #[error("Failed to send vm memory: {0}")] + SendVmMemoryErr(String), + #[error("Failed to receive vm memory: {0}")] + RecvVmMemoryErr(String), + #[error("Response error")] + ResponseErr, + #[error("Migration status mismatch: source {0}, destination {1}.")] + MigrationStatusErr(String, String), + #[error("Migration config {0} mismatch: source {1}, destination {2}.")] + MigrationConfigErr(String, String, String), + #[error("Invalid snapshot path for restoring snapshot")] + InvalidSnapshotPath, +} diff --git a/migration/src/general.rs b/migration/src/general.rs new file mode 100644 index 0000000000000000000000000000000000000000..d0c7c65dc053721871fb6bdbb51a2fe24e4b4ebc --- /dev/null +++ b/migration/src/general.rs @@ -0,0 +1,311 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::{hash_map::DefaultHasher, HashMap}; +use std::hash::{Hash, Hasher}; +use std::io::{Read, Write}; +use std::mem::size_of; +use std::thread; +use std::time::{Duration, Instant}; + +use anyhow::{anyhow, bail, Context, Result}; + +use crate::manager::{Instance, MIGRATION_MANAGER}; +use crate::protocol::{ + DeviceStateDesc, FileFormat, MigrationHeader, MigrationStatus, VersionCheck, HEADER_LENGTH, +}; +use crate::{MigrationError, MigrationManager}; +use machine_manager::machine::VmState; +use util::unix::host_page_size; + +impl MigrationManager { + /// Write `MigrationHeader` to `Write` trait object as bytes. + /// `MigrationHeader` will occupy the first 4096 bytes in snapshot file. + /// bytes 0-8: the length of the header that's in serde style from struct MigrationHeader. + /// bytes 8-4096: the header that's in serde style from struct MigrationHeader, and tailing 0s. + /// + /// # Arguments + /// + /// * `file_format` - confirm snapshot file format. + /// * `fd` - The `Write` trait object to write header message. + pub fn save_header(file_format: Option, fd: &mut dyn Write) -> Result<()> { + let mut header = MigrationHeader::default(); + if let Some(format) = file_format { + header.format = format; + header.desc_len = match format { + FileFormat::Device => Self::desc_db_len()?, + FileFormat::MemoryFull => (host_page_size() as usize) * 2 - HEADER_LENGTH, + }; + } else { + header.desc_len = Self::desc_db_len()?; + } + + let header_serde = serde_json::to_vec(&header)?; + if header_serde.len() > HEADER_LENGTH - 8 { + return Err(anyhow!(MigrationError::SaveVmMemoryErr( + "header too long".to_string() + ))); + } + let header_len = header_serde.len().to_le_bytes(); + let mut input_slice = [0u8; HEADER_LENGTH]; + input_slice[0..8].copy_from_slice(&header_len); + input_slice[8..header_serde.len() + 8].copy_from_slice(&header_serde); + + fd.write(&input_slice) + .with_context(|| "Failed to save migration header")?; + + Ok(()) + } + + /// Restore and parse `MigrationHeader` from `Read` object. + /// + /// # Arguments + /// + /// * `fd` - The `Read` trait object to read header message. + pub fn restore_header(fd: &mut dyn Read) -> Result { + // 1. reader header length + let mut header_len = [0u8; 8]; + fd.read_exact(&mut header_len)?; + let header_len = u64::from_le_bytes(header_len); + if header_len > HEADER_LENGTH as u64 - 8 { + return Err(anyhow!(MigrationError::FromBytesError( + "migration header length too large" + ))); + } + + // 2. read header according to its length + let mut header_bytes = Vec::new(); + // SAFETY: upper limit of header_len is HEADER_LENGTH - 8. + header_bytes.resize(header_len as usize, 0); + fd.read_exact(&mut header_bytes)?; + + // 3. change the binary format header into struct + let deserializer = serde_json::Deserializer::from_slice(&header_bytes); + let mut migration_header: Option = None; + for header in deserializer.into_iter::() { + migration_header = match header { + Ok(h) => Some(h), + Err(_) => { + return Err(anyhow!(MigrationError::FromBytesError( + "Invalid migration header" + ))) + } + }; + } + + // 4. read the extra bits + let mut place_holder = vec![0u8; HEADER_LENGTH - 8 - header_len as usize]; + fd.read_exact(&mut place_holder)?; + + // SAFETY: migration_header is Some here. + Ok(migration_header.unwrap()) + } + + /// Write all `DeviceStateDesc` in `desc_db` hashmap to `Write` trait object. + pub fn save_desc_db(fd: &mut dyn Write) -> Result<()> { + let length = Self::desc_db_len()?; + let mut buffer = Vec::new(); + // SAFETY: desc db length is under control. + buffer.resize(length, 0); + let mut start = 0; + + let desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); + for (_, desc) in desc_db.iter() { + let desc_str = serde_json::to_string(desc)?; + let desc_bytes = desc_str.as_bytes(); + buffer[start..start + desc_bytes.len()].copy_from_slice(desc_bytes); + start += desc_bytes.len(); + } + fd.write_all(&buffer) + .with_context(|| "Failed to write descriptor message.")?; + + Ok(()) + } + + /// Load and parse device state descriptor from `Read` trait object. Save as a Hashmap. + pub fn restore_desc_db( + fd: &mut dyn Read, + desc_length: usize, + ) -> Result> { + let mut desc_buffer = Vec::new(); + // SAFETY: desc_length has been checked in check_header(). + desc_buffer.resize(desc_length, 0); + fd.read_exact(&mut desc_buffer)?; + let mut snapshot_desc_db = HashMap::::new(); + + let deserializer = serde_json::Deserializer::from_slice(&desc_buffer); + for desc in deserializer.into_iter::() { + let device_desc: DeviceStateDesc = match desc { + Ok(desc) => desc, + Err(_) => break, + }; + if device_desc.size > (1 << 20) { + bail!("The size field of DeviceStateDesc is too large"); + } + snapshot_desc_db.insert(device_desc.alias, device_desc); + } + + Ok(snapshot_desc_db) + } + + /// Get vm state and check its version can be match. + /// + /// # Arguments + /// + /// * fd - The `Read` trait object. + /// * snap_desc_db - snap_desc_db - snapshot state descriptor. + pub fn check_vm_state( + fd: &mut dyn Read, + desc_db: &HashMap, + ) -> Result<(Vec, u64)> { + let mut instance = Instance::default(); + fd.read_exact( + // SAFETY: The pointer of instance can guaranteed not null. + unsafe { + std::slice::from_raw_parts_mut( + &mut instance as *mut Instance as *mut u8, + size_of::(), + ) + }, + ) + .with_context(|| "Failed to read instance of object")?; + + let locked_desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); + let snap_desc = desc_db + .get(&instance.object) + .with_context(|| "Failed to get instance object")?; + let current_desc = locked_desc_db + .get(&snap_desc.name) + .with_context(|| "Failed to get snap_desc name")?; + + let mut state_data = Vec::new(); + // SAFETY: size has been checked in restore_desc_db(). + state_data.resize(snap_desc.size as usize, 0); + fd.read_exact(&mut state_data)?; + + match current_desc.check_version(snap_desc) { + VersionCheck::Same => {} + VersionCheck::Compat => { + current_desc + .add_padding(snap_desc, &mut state_data) + .with_context(|| "Failed to transform snapshot data version")?; + } + VersionCheck::Mismatch => { + return Err(anyhow!(MigrationError::VersionNotFit( + current_desc.compat_version, + snap_desc.current_version, + ))) + } + } + + Ok((state_data, instance.name)) + } + + /// Get `Device`'s alias from device type string. + /// + /// # Argument + /// + /// * `device_type` - The type string of device instance. + pub fn get_desc_alias(device_type: &str) -> Option { + Some(translate_id(device_type)) + } + + /// Return `desc_db` value len(0 restored as `serde_json`) + pub fn desc_db_len() -> Result { + let mut db_data_len = 0; + let desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); + for (_, desc) in desc_db.iter() { + let desc_str = serde_json::to_string(desc)?; + db_data_len += desc_str.as_bytes().len(); + } + + Ok(db_data_len) + } + + /// Get current migration status for migration manager. + pub fn status() -> MigrationStatus { + *MIGRATION_MANAGER.status.read().unwrap() + } + + /// Set a new migration status for migration manager. + /// + /// # Arguments + /// + /// * `new_status`: new migration status, the transform must be illegal. + pub fn set_status(new_status: MigrationStatus) -> Result<()> { + let mut status = MIGRATION_MANAGER.status.write().unwrap(); + *status = status.transfer(new_status)?; + + Ok(()) + } + + /// Check whether current migration status is active. + pub fn is_active() -> bool { + Self::status() == MigrationStatus::Active + } + + /// Check whether current migration status is cancel. + pub fn is_canceled() -> bool { + Self::status() == MigrationStatus::Canceled + } +} + +pub trait Lifecycle { + /// Pause VM during migration. + fn pause() -> Result<()> { + if let Some(locked_vm) = &MIGRATION_MANAGER.vmm.read().unwrap().vm { + let now = Instant::now(); + while !locked_vm.lock().unwrap().pause() { + thread::sleep(Duration::from_millis(5)); + if now.elapsed() > Duration::from_secs(2) { + // Not use resume() to avoid unnecessary qmp event. + locked_vm + .lock() + .unwrap() + .notify_lifecycle(VmState::Paused, VmState::Running); + bail!("Failed to pause VM"); + } + } + } + + Ok(()) + } + + /// Resume VM during migration. + fn resume() -> Result<()> { + let locked_transports = &MIGRATION_MANAGER.vmm.read().unwrap().transports; + for (_, transport) in locked_transports.iter() { + transport.lock().unwrap().resume()?; + } + + let locked_devices = &MIGRATION_MANAGER.vmm.read().unwrap().devices; + for (_, device) in locked_devices.iter() { + device.lock().unwrap().resume()?; + } + + Ok(()) + } +} + +impl Lifecycle for MigrationManager {} + +/// Converting device instance to unique ID of u64 bit. +/// Because name of String type in `Instance` does not implement Copy trait. +/// +/// # Arguments +/// +/// * `dev_id` - The device id. +pub fn translate_id(dev_id: &str) -> u64 { + let mut hash = DefaultHasher::new(); + dev_id.hash(&mut hash); + hash.finish() +} diff --git a/migration/src/header.rs b/migration/src/header.rs deleted file mode 100644 index 46833b40a1a30e9b938fe84013078dd51011222d..0000000000000000000000000000000000000000 --- a/migration/src/header.rs +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use kvm_ioctls::Kvm; - -use util::byte_code::ByteCode; - -use super::errors::{ErrorKind, Result}; - -/// Magic number for migration header. Those bytes represent "STRATOVIRT". -const MAGIC_NUMBER: [u8; 16] = [ - 0x53, 0x54, 0x52, 0x41, 0x54, 0x4f, 0x56, 0x49, 0x52, 0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, -]; -const CURRENT_VERSION: u32 = 1; -const COMPAT_VERSION: u32 = 1; -#[cfg(target_arch = "x86_64")] -const EAX_VENDOR_INFO: u32 = 0x0; - -/// Format type for migration. -/// Different file format will have different file layout. -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum FileFormat { - Device = 1, - MemoryFull = 2, -} - -/// The endianness of byte order. -#[derive(Debug, Copy, Clone, PartialEq)] -enum EndianType { - Little = 1, - Big = 2, -} - -impl EndianType { - fn get_endian_type() -> EndianType { - if cfg!(target_endian = "big") { - EndianType::Big - } else { - EndianType::Little - } - } -} - -/// Get host cpu model as bytes. -#[cfg(target_arch = "x86_64")] -fn cpu_model() -> [u8; 16] { - use core::arch::x86_64::__cpuid_count; - - // Safe because we only use cpuid for cpu info in x86_64. - let result = unsafe { __cpuid_count(EAX_VENDOR_INFO, 0) }; - let vendor_slice = [result.ebx, result.edx, result.ecx]; - - // Safe because we known those brand string length. - let vendor_array = unsafe { - let brand_string_start = vendor_slice.as_ptr() as *const u8; - std::slice::from_raw_parts(brand_string_start, 3 * 4) - }; - - let mut buffer = [0u8; 16]; - if vendor_array.len() > 16 { - buffer.copy_from_slice(&vendor_array[0..15]); - } else { - buffer[0..vendor_array.len()].copy_from_slice(vendor_array); - } - buffer -} - -/// Structure used to mark some message in migration. -#[derive(Copy, Clone, Debug)] -pub struct MigrationHeader { - /// Magic number for migration file/stream. - magic_num: [u8; 16], - /// Current version of migration. - #[allow(dead_code)] - current_version: u32, - /// Compatible version of migration. - compat_version: u32, - /// Arch identifier. - arch: [u8; 8], - /// Endianness of byte order. - byte_order: EndianType, - /// The type of hypervisor. - #[allow(dead_code)] - hypervisor_type: [u8; 8], - /// The version of hypervisor. - hypervisor_version: u32, - /// The type of Cpu model. - #[cfg(target_arch = "x86_64")] - cpu_model: [u8; 16], - /// Operation system type. - os_type: [u8; 8], - /// File format of migration file/stream. - pub format: FileFormat, - /// The length of `DeviceStateDesc`. - pub desc_len: usize, -} - -impl ByteCode for MigrationHeader {} - -impl Default for MigrationHeader { - fn default() -> Self { - MigrationHeader { - magic_num: MAGIC_NUMBER, - current_version: CURRENT_VERSION, - compat_version: COMPAT_VERSION, - format: FileFormat::Device, - byte_order: EndianType::Little, - hypervisor_type: [b'k', b'v', b'm', b'0', b'0', b'0', b'0', b'0'], - hypervisor_version: Kvm::new().unwrap().get_api_version() as u32, - #[cfg(target_arch = "x86_64")] - cpu_model: cpu_model(), - #[cfg(target_os = "linux")] - os_type: [b'l', b'i', b'n', b'u', b'x', b'0', b'0', b'0'], - #[cfg(target_arch = "x86_64")] - arch: [b'x', b'8', b'6', b'_', b'6', b'4', b'0', b'0'], - #[cfg(target_arch = "aarch64")] - arch: [b'a', b'a', b'r', b'c', b'h', b'6', b'4', b'0'], - desc_len: 0, - } - } -} - -impl MigrationHeader { - /// Check parsed `MigrationHeader` is illegal or not. - pub fn check_header(&self) -> Result<()> { - if self.magic_num != MAGIC_NUMBER { - return Err(ErrorKind::HeaderItemNotFit("Magic_number".to_string()).into()); - } - - if self.compat_version > CURRENT_VERSION { - return Err(ErrorKind::VersionNotFit(self.compat_version, CURRENT_VERSION).into()); - } - - #[cfg(target_arch = "x86_64")] - let current_arch = [b'x', b'8', b'6', b'_', b'6', b'4', b'0', b'0']; - #[cfg(target_arch = "aarch64")] - let current_arch = [b'a', b'a', b'r', b'c', b'h', b'6', b'4', b'0']; - if self.arch != current_arch { - return Err(ErrorKind::HeaderItemNotFit("Arch".to_string()).into()); - } - - if self.byte_order != EndianType::get_endian_type() { - return Err(ErrorKind::HeaderItemNotFit("Byte order".to_string()).into()); - } - - #[cfg(target_arch = "x86_64")] - if self.cpu_model != cpu_model() { - return Err(ErrorKind::HeaderItemNotFit("Cpu model".to_string()).into()); - } - - #[cfg(target_os = "linux")] - let current_os_type = [b'l', b'i', b'n', b'u', b'x', b'0', b'0', b'0']; - if self.os_type != current_os_type { - return Err(ErrorKind::HeaderItemNotFit("Os type".to_string()).into()); - } - - let current_kvm_version = Kvm::new().unwrap().get_api_version() as u32; - if current_kvm_version < self.hypervisor_version { - return Err(ErrorKind::HeaderItemNotFit("Hypervisor version".to_string()).into()); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::{Kvm, MigrationHeader}; - - #[test] - fn test_check_header() { - if !Kvm::new().is_ok() { - return; - } - - let header = MigrationHeader::default(); - assert_eq!(header.check_header().is_ok(), true); - } -} diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 9ccab5f3ff5fa33fdebb7882e36152b3a07cd4ec..39b9563b335fd4df8958f6ece1d6bbedf4d52746 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -14,61 +14,180 @@ //! //! Offer snapshot and migration interface for VM. -#[macro_use] -extern crate error_chain; -#[cfg(test)] -#[macro_use] -extern crate migration_derive; -#[macro_use] -extern crate log; - -mod device_state; -mod header; -mod manager; -mod snapshot; -mod status; - -pub use device_state::{DeviceStateDesc, FieldDesc, StateTransfer}; -pub use manager::{MigrationHook, MigrationManager, MigrationRestoreOrder}; -pub use status::MigrationStatus; - -pub mod errors { - use super::status::MigrationStatus; - - error_chain! { - foreign_links { - Io(std::io::Error); - Ioctl(kvm_ioctls::Error); - Json(serde_json::Error); +pub mod error; +pub mod general; +pub mod manager; +pub mod migration; +pub mod protocol; +pub mod snapshot; + +pub use error::MigrationError; +pub use manager::{MigrationHook, MigrationManager}; +pub use protocol::{DeviceStateDesc, FieldDesc, MemBlock, MigrationStatus, StateTransfer}; + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use std::{net::TcpStream, os::unix::net::UnixStream, thread}; + +use anyhow::Result; +use log::error; + +use machine_manager::qmp::{qmp_response::Response, qmp_schema}; + +#[derive(Default, Copy, Clone)] +pub struct MigrateMemSlot { + /// number of a memory slot. + pub slot: u32, + /// Guest address. + pub guest_phys_addr: u64, + /// Host address. + pub userspace_addr: u64, + /// Size of memory. + /// size = 0 represents no-region use this slot. + pub memory_size: u64, +} + +/// Start to snapshot VM. +/// +/// # Arguments +/// +/// * `path` - snapshot dir path. If path dir not exists, will create it. +pub fn snapshot(path: String) -> Response { + if let Err(e) = MigrationManager::save_snapshot(&path) { + error!("Failed to migrate to path \'{:?}\': {:?}", path, e); + let _ = MigrationManager::set_status(MigrationStatus::Failed); + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + + Response::create_empty_response() +} + +/// Start to migrate VM with unix mode. +/// +/// # Arguments +/// +/// * `path` - Unix socket path, as /tmp/migration.socket. +pub fn migration_unix_mode(path: String) -> Response { + let mut socket = match UnixStream::connect(path) { + Ok(_sock) => { + // Specify the tcp receiving or send timeout. + let time_out = Some(Duration::from_secs(30)); + _sock + .set_read_timeout(time_out) + .unwrap_or_else(|e| error!("{:?}", e)); + _sock + .set_write_timeout(time_out) + .unwrap_or_else(|e| error!("{:?}", e)); + _sock } - errors { - VersionNotFit(compat_version: u32, current_version: u32) { - display("Migration compat_version {} higher than current version {}", compat_version, current_version) - } - HeaderItemNotFit(item: String) { - display("{} for snapshot file / migration stream is not fit", item) - } - InvalidStatusTransfer(status1: MigrationStatus, status2: MigrationStatus) { - display("Failed to transfer migration status from {} to {}.", status1, status2) - } - FromBytesError(name: &'static str) { - display("Can't restore structure from raw slice: {}", name) - } - GetGicRegsError(reg: &'static str, ret: String) { - display("Failed to get GIC {} register: {}", reg, ret) - } - SetGicRegsError(reg: &'static str, ret: String) { - display("Failed to set GIC {} register: {}", reg, ret) - } - SaveVmMemoryErr(e: String) { - display("Failed to save vm memory: {}", e) - } - RestoreVmMemoryErr { - display("Failed to restore vm memory.") - } - InvalidSnapshotPath { - display("Invalid snapshot path for restoring snapshot") + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) + } + }; + + if let Err(e) = thread::Builder::new() + .name("unix_migrate".to_string()) + .spawn(move || { + if let Err(e) = MigrationManager::send_migration(&mut socket) { + error!("Failed to send migration: {:?}", e); + let _ = MigrationManager::recover_from_migration(); + let _ = MigrationManager::set_status(MigrationStatus::Failed) + .map_err(|e| error!("{:?}", e)); } + }) + { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + } + + Response::create_empty_response() +} + +/// Start to migrate VM with tcp mode. +/// +/// # Arguments +/// +/// * `path` - Tcp ip and port, as 192.168.1.1:4446. +pub fn migration_tcp_mode(path: String) -> Response { + let mut socket = match TcpStream::connect(path) { + Ok(_sock) => { + // Specify the tcp receiving or send timeout. + let time_out = Some(Duration::from_secs(30)); + _sock + .set_read_timeout(time_out) + .unwrap_or_else(|e| error!("{:?}", e)); + _sock + .set_write_timeout(time_out) + .unwrap_or_else(|e| error!("{:?}", e)); + _sock + } + Err(e) => { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ) } + }; + + if let Err(e) = thread::Builder::new() + .name("tcp_migrate".to_string()) + .spawn(move || { + if let Err(e) = MigrationManager::send_migration(&mut socket) { + error!("Failed to send migration: {:?}", e); + let _ = MigrationManager::recover_from_migration(); + let _ = MigrationManager::set_status(MigrationStatus::Failed) + .map_err(|e| error!("{:?}", e)); + } + }) + { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); + }; + + Response::create_empty_response() +} + +/// Query the current migration status. +pub fn query_migrate() -> Response { + let status_str = MigrationManager::status().to_string(); + let migration_info = qmp_schema::MigrationInfo { + status: Some(status_str), + }; + + Response::create_response(serde_json::to_value(migration_info).unwrap(), None) +} + +/// Cancel the current migration. +pub fn cancel_migrate() -> Response { + if let Err(e) = MigrationManager::set_status(MigrationStatus::Canceled) { + return Response::create_error_response( + qmp_schema::QmpErrorClass::GenericError(e.to_string()), + None, + ); } + + Response::create_empty_response() +} + +pub trait MigrateOps: Send + Sync { + fn get_mem_slots(&self) -> Arc>>; + + fn get_dirty_log(&self, _slot: u32, _mem_size: u64) -> Result>; + + fn start_dirty_log(&self) -> Result<()>; + + fn stop_dirty_log(&self) -> Result<()>; + + fn register_instance(&self) -> Result<()>; } diff --git a/migration/src/manager.rs b/migration/src/manager.rs index 1ea194e266740f5531c8190d4727ff8d9f21b6c2..3b081e97161cf9425d8118255fb881bcff57c6b2 100644 --- a/migration/src/manager.rs +++ b/migration/src/manager.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -10,43 +10,34 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::cmp; -use std::collections::hash_map::DefaultHasher; use std::collections::HashMap; use std::fs::File; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::io::{Read, Write}; use std::sync::{Arc, Mutex, RwLock}; +use std::time::Instant; -use super::device_state::{DeviceStateDesc, StateTransfer}; -use super::errors::{Result, ResultExt}; -use super::status::MigrationStatus; +use anyhow::{Context, Result}; +use log::info; use once_cell::sync::Lazy; + +use crate::general::translate_id; +use crate::migration::DirtyBitmap; +use crate::protocol::{DeviceStateDesc, MemBlock, MigrationStatus, StateTransfer}; +use crate::MigrateOps; +use machine_manager::config::VmConfig; +use machine_manager::machine::MachineLifecycle; use util::byte_code::ByteCode; -/// Glocal MigrationManager to manage all migration combined interface. +/// Global MigrationManager to manage all migration combined interface. pub(crate) static MIGRATION_MANAGER: Lazy = Lazy::new(|| MigrationManager { - entry: Arc::new(RwLock::new([ - Vec::<(String, MigrationEntry)>::new(), - Vec::<(String, MigrationEntry)>::new(), - Vec::<(String, MigrationEntry)>::new(), - ])), + vmm: Arc::new(RwLock::new(Vmm::default())), desc_db: Arc::new(RwLock::new(HashMap::::new())), status: Arc::new(RwLock::new(MigrationStatus::None)), + vmm_bitmaps: Arc::new(RwLock::new(HashMap::new())), + limit: Arc::new(RwLock::new(MigrationLimit::default())), }); -/// Used to map Device id from String to u64 only. -/// Because instance_id in InstanceId can't be String for it has no Copy trait. -/// -/// # Arguments -/// -/// * `dev_id` - The device id. -pub fn id_remap(dev_id: &str) -> u64 { - let mut hash = DefaultHasher::new(); - dev_id.hash(&mut hash); - hash.finish() -} - /// A hook for `Device` to save device state to `Write` object and load device /// from `[u8]` slice. /// @@ -55,135 +46,175 @@ pub fn id_remap(dev_id: &str) -> u64 { /// This trait is a symbol of device's migration capabilities. All /// migratable device must implement this trait. pub trait MigrationHook: StateTransfer { - /// Pre save device state as `[u8]` with device's `InstanceId` to a `Write` + /// Save device state as `[u8]` with device's `InstanceId` to a `Write` /// trait object. /// /// # Arguments /// - /// * `id` - This unique id to represent a single device. It can be treated - /// as `object_id` in `InstanceId`. - /// * `writer` - The `Write` trait object to store or receive data. - fn pre_save(&self, id: &str, writer: &mut dyn Write) -> Result<()> { + /// * `id` - This unique id to represent a single device. It can be treated as `object_id` in + /// `InstanceId`. + /// * `fd` - The `Write` trait object to save device data. + fn save_device(&self, id: u64, fd: &mut dyn Write) -> Result<()> { let state_data = self .get_state_vec() - .chain_err(|| "Failed to get device state")?; - - let device_alias = self.get_device_alias(); - let instance_id = InstanceId { - object_type: device_alias, - object_id: id_remap(id), - }; + .with_context(|| "Failed to get device state")?; - writer - .write_all(instance_id.as_bytes()) - .chain_err(|| "Failed to write instance id.")?; - writer - .write_all(&state_data) - .chain_err(|| "Failed to write device state")?; + fd.write_all( + Instance { + name: id, + object: self.get_device_alias(), + } + .as_bytes(), + ) + .with_context(|| "Failed to write instance id.")?; + fd.write_all(&state_data) + .with_context(|| "Failed to write device state")?; Ok(()) } - /// Pre load device state from `[u8]` to `Device`. + /// Restore device state from `[u8]` to `Device`. /// /// # Arguments /// /// * `state` - The raw data which can be recovered to `DeviceState`. - /// * `memory` - The file of memory data, this parameter is optional. - fn pre_load(&self, state: &[u8], _memory: Option<&File>) -> Result<()> { + fn restore_device(&self, state: &[u8]) -> Result<()> { self.set_state(state) } - /// Pre load device state from `[u8]` to mutable `Device`. + /// Restore device state from `[u8]` to mutable `Device`. /// /// # Arguments /// /// * `state` - The raw data which can be recovered to `DeviceState`. - /// * `memory` - The file of memory data, this parameter is optional. - fn pre_load_mut(&mut self, state: &[u8], _memory: Option<&File>) -> Result<()> { + fn restore_mut_device(&mut self, state: &[u8]) -> Result<()> { self.set_state_mut(state) } + /// Save memory state to `Write` trait. + /// + /// # Arguments + /// + /// * _fd - The `Write` trait object to save memory data. + fn save_memory(&self, _fd: &mut dyn Write) -> Result<()> { + Ok(()) + } + + /// Restore memory state from memory. + /// + /// # Arguments + /// + /// * _memory - The file of memory data, this parameter is optional. + /// * _state - device state from memory. + fn restore_memory(&self, _memory: Option<&File>, _state: &[u8]) -> Result<()> { + Ok(()) + } + + /// Send memory data to `Write` trait. + /// + /// # Arguments + /// + /// * _fd - The `Write` trait object to send memory data. + /// * _range - the memory block range needs to send. + fn send_memory(&self, _fd: &mut dyn Write, _range: MemBlock) -> Result<()> { + Ok(()) + } + + /// Receive memory data from `Read`. + /// + /// # Arguments + /// + /// * _fd - The `Read` trait object to receive memory data. + /// * _range - the memory block range needs to send. + fn recv_memory(&self, _fd: &mut dyn Read, _range: MemBlock) -> Result<()> { + Ok(()) + } + /// Resume the recover device. /// /// # Notes /// - /// For some device, such as virtio-device or vhost-device, after recover - /// device state, it need a step to wake up device to running. + /// For some device, such as virtio-device or vhost-device, after restore + /// device state, it need a step to wake up device to run. fn resume(&mut self) -> Result<()> { Ok(()) } } -/// The instance id to represent a single object in VM. +/// The instance represents a single object in VM. /// /// # Notes /// -/// Instance_id contains two parts: One part is device type to describe the -/// type of a object, another is unique id for every object. -/// -/// ## object_type -/// The object_type for a object is the order which type is registered to -/// `desc_db`. It's associated with object name. -/// -/// ## object_id -/// -/// The object id should reflect the unique device or ram_region instance in -/// a VM. Is will be set delying on device create order. +/// Instance contains two parts: One part is name for every object. +/// another is the type of a object. #[derive(Ord, PartialOrd, Eq, PartialEq, Hash, Copy, Clone, Debug, Default)] -pub struct InstanceId { - /// The type of object. - pub object_type: u64, - /// The unique id of object. - pub object_id: u64, +pub struct Instance { + /// The name reflects the unique device or ram_region instance in a VM. + pub name: u64, + /// The object is the type which is registered to `desc_db`. + pub object: u64, } -impl ByteCode for InstanceId {} - -/// A entry to every migratable device to call on migration interface. -pub enum MigrationEntry { - /// Safe device instance with migration interface. - Safe(Arc), - /// Mutex device instance with migration interface. - Mutex(Arc>), - /// Safe memory instance with migration interface. - Memory(Arc), +impl ByteCode for Instance {} + +/// Including all components of a Vmm. +#[derive(Default)] +pub struct Vmm { + /// Vm config + pub config: Arc>, + /// Trait to represent a Vm. + pub vm: Option>>, + /// Trait to represent CPU devices. + pub cpus: HashMap>, + /// Trait to represent memory devices. + pub memory: Option>, + /// Trait to represent transports. + pub transports: HashMap>>, + /// Trait to represent devices. + pub devices: HashMap>>, + #[cfg(target_arch = "aarch64")] + /// Trait to represent GIC devices(GICv3, GICv3 ITS). + pub gic_group: HashMap>, + #[cfg(target_arch = "x86_64")] + /// Trait to represent kvm device. + pub kvm: Option>, + /// The vector of the object implementing MigrateOps trait. + pub mgt_object: Option>>, } -/// Ensure the recovery sequence of different devices based on priorities. -/// At present, we need to ensure that the state recovery of the gic device -/// must be after the cpu, so different priorities are defined. -#[derive(Debug)] -pub enum MigrationRestoreOrder { - Default = 0, - Gicv3 = 1, - Gicv3Its = 2, - Max = 3, +/// Limit of migration. +pub struct MigrationLimit { + /// Start time of each iteration. + pub iteration_start_time: Instant, + /// Virtual machine downtime. + pub limit_downtime: u64, + /// Max number of iterations during iteratively sending dirty memory. + pub max_dirty_iterations: u16, } -impl From for u16 { - fn from(order: MigrationRestoreOrder) -> u16 { - match order { - MigrationRestoreOrder::Default => 0, - MigrationRestoreOrder::Gicv3 => 1, - MigrationRestoreOrder::Gicv3Its => 2, - _ => 3, +impl Default for MigrationLimit { + fn default() -> Self { + Self { + iteration_start_time: Instant::now(), + limit_downtime: 50, + max_dirty_iterations: 30, } } } -/// The entry list size is the same as the MigrationRestoreOrder number -type MigrationEntryList = [Vec<(String, MigrationEntry)>; 3]; - /// This structure is to manage all resource during migration. /// It is also the only way to call on `MIGRATION_MANAGER`. pub struct MigrationManager { - /// The map offers the device_id and combined migratable device entry. - pub(crate) entry: Arc>, + /// The vmm can manage all VM related components + pub vmm: Arc>, /// The map offers the device type and its device state describe structure. - pub(crate) desc_db: Arc>>, + pub desc_db: Arc>>, /// The status of migration work. - status: Arc>, + pub status: Arc>, + /// vmm dirty bitmaps. + pub vmm_bitmaps: Arc>>, + /// Limiting elements of migration. + pub limit: Arc>, } impl MigrationManager { @@ -195,255 +226,214 @@ impl MigrationManager { fn register_device_desc(desc: DeviceStateDesc) { let mut desc_db = MIGRATION_MANAGER.desc_db.write().unwrap(); if !desc_db.contains_key(&desc.name) { + info!("Register device name: {}, desc: {:?}", desc.name, desc); desc_db.insert(desc.name.clone(), desc); } } - /// Register safe device instance to entry hashmap with instance id. + /// Register vm config to vmm. /// /// # Arguments /// - /// * `device_desc` - The `DeviceStateDesc` of device instance. - /// * `entry` - Device instance with migratable interface. - /// * `restore_order` - device restore order. - pub fn register_device_instance( - device_desc: DeviceStateDesc, - device_entry: Arc, - restore_order: MigrationRestoreOrder, - ) where + /// * `config` - The configuration from virtual machine. + pub fn register_vm_config(config: Arc>) { + MIGRATION_MANAGER.vmm.write().unwrap().config = config; + } + + /// Register vm instance to vmm. + /// + /// # Arguments + /// + /// * `vm` - vm instance with MachineLifecycle trait. + pub fn register_vm_instance(vm: Arc>) + where + T: MachineLifecycle + Sync + Send + 'static, + { + MIGRATION_MANAGER.vmm.write().unwrap().vm = Some(vm); + } + + /// Register CPU instance to vmm. + /// + /// # Arguments + /// + /// * `cpu_desc` - The `DeviceStateDesc` of CPU instance. + /// * `cpu` - CPU device instance with MigrationHook trait. + /// * `id` - The unique id for CPU device. + pub fn register_cpu_instance(cpu_desc: DeviceStateDesc, cpu: Arc, id: u8) + where T: MigrationHook + Sync + Send + 'static, { - let name = device_desc.name.clone(); - Self::register_device_desc(device_desc); + let name = cpu_desc.name.clone() + "/" + &id.to_string(); + let mut copied_cpu_desc = cpu_desc.clone(); + copied_cpu_desc.name = name.clone(); + copied_cpu_desc.alias = cpu_desc.alias + u64::from(id); + Self::register_device_desc(copied_cpu_desc); + + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.cpus.insert(translate_id(&name), cpu); + } - let entry = MigrationEntry::Safe(device_entry); - info!( - "Register device instance: id {} order {:?}", - &name, &restore_order - ); - MigrationManager::insert_entry(name, restore_order.into(), entry, true); + /// Register memory instance to vmm. + /// + /// # Arguments + /// + /// * `memory` - The memory instance with MigrationHook trait. + pub fn register_memory_instance(memory: Arc) + where + T: MigrationHook + Sync + Send + 'static, + { + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.memory = Some(memory); } - /// Register mutex device instance to entry hashmap with instance_id. + /// Register transport instance to vmm. /// /// # Arguments /// /// * `device_desc` - The `DeviceStateDesc` of device instance. - /// * `entry` - Device instance with migratable interface. - pub fn register_device_instance_mutex( + /// * `device` - The transport instance with MigrationHook trait. + /// * `id` - The unique id for device. + pub fn register_transport_instance( device_desc: DeviceStateDesc, - device_entry: Arc>, + device: Arc>, + id: &str, ) where T: MigrationHook + Sync + Send + 'static, { - let name = device_desc.name.clone(); - let order = MigrationRestoreOrder::Default.into(); + let name = device_desc.name.clone() + "/" + id; Self::register_device_desc(device_desc); - let entry = MigrationEntry::Mutex(device_entry); - info!("Register device instance mutex: id {}", &name); - MigrationManager::insert_entry(name, order, entry, true); + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.transports.insert(translate_id(&name), device); } - pub fn register_device_instance_mutex_with_id( + /// Register device instance to vmm. + /// + /// # Arguments + /// + /// * `device_desc` - The `DeviceStateDesc` of device instance. + /// * `device` - The device instance with MigrationHook trait. + /// * `id` - The unique id for device. + pub fn register_device_instance( device_desc: DeviceStateDesc, - device_entry: Arc>, + device: Arc>, id: &str, ) where T: MigrationHook + Sync + Send + 'static, { let name = device_desc.name.clone() + "/" + id; - let order = MigrationRestoreOrder::Default.into(); Self::register_device_desc(device_desc); - let entry = MigrationEntry::Mutex(device_entry); - info!("Register device instance with id: id {}", &name); - MigrationManager::insert_entry(name, order, entry, false); - } - pub fn unregister_device_instance_mutex_by_id(device_desc: DeviceStateDesc, id: &str) { - let name = device_desc.name + "/" + id; - info!("Unregister device instance: id {}", &name); - MigrationManager::remove_entry(&name); + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.devices.insert(translate_id(&name), device); } - /// Register memory instance. + /// Register kvm instance to vmm. /// /// # Arguments /// - /// * `entry` - Memory instance with migratable interface. - pub fn register_memory_instance(entry: Arc) + /// * `kvm_desc` - The `DeviceStateDesc` of kvm instance. + /// * `kvm` - The kvm device instance with MigrationHook trait. + #[cfg(target_arch = "x86_64")] + pub fn register_kvm_instance(kvm_desc: DeviceStateDesc, kvm: Arc) where T: MigrationHook + Sync + Send + 'static, { - let entry = MigrationEntry::Memory(entry); - info!("Register memory instance"); - MigrationManager::insert_entry(String::from("MemoryState/Memory"), 0, entry, true); + Self::register_device_desc(kvm_desc); + + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.kvm = Some(kvm); } - /// Insert entry. If the name is duplicated, you should set gen_instance_id to true to - /// generated instance id to ensure that the id is unique. + /// Register GIC device instance to vmm. /// /// # Arguments /// - /// * `name` - Entry name. - /// * `order` - Restore order. - /// * `entry` - Instance with migratable interface. - /// * `gen_instance_id` - If auto-generated instance id. - fn insert_entry(name: String, order: u16, entry: MigrationEntry, gen_instance_id: bool) { - let mut entrys = MIGRATION_MANAGER.entry.write().unwrap(); - let mut index = 0; - if gen_instance_id { - for (key, _) in &entrys[order as usize] { - if let Some(pos) = key.rfind(':') { - let (tmp_id, num_id) = key.split_at(pos); - if tmp_id == name { - let num = num_id.strip_prefix(':').unwrap(); - index = cmp::max(index, num.parse::().unwrap() + 1); - } - } - } - } - // ID is format as "{name}:{instance_id}" - let id = format!("{}:{}", name, index); - debug!("Insert entry: id {}", &id); - entrys[order as usize].push((id, entry)); + /// * `gic_desc` - The `DeviceStateDesc` of GIC instance. + /// * `gic` - The GIC device instance with MigrationHook trait. + #[cfg(target_arch = "aarch64")] + pub fn register_gic_instance(gic_desc: DeviceStateDesc, gic: Arc, id: &str) + where + T: MigrationHook + Sync + Send + 'static, + { + Self::register_device_desc(gic_desc); + + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.gic_group.insert(translate_id(id), gic); } - /// Remove entry by the unique name. Not support to remove the entry with instance id. + /// Register migration instance to vmm. /// /// # Arguments /// - /// * `name` - Entry name. - fn remove_entry(name: &str) { - let eid = format!("{}:0", name); - let mut entrys = MIGRATION_MANAGER.entry.write().unwrap(); - for (i, item) in entrys.iter().enumerate() { - let pos = item.iter().position(|(key, _)| key == &eid); - if let Some(index) = pos { - debug!("Remove entry: eid {}", &eid); - entrys[i].remove(index); - return; - } - } + /// * `mgt_object` - object with MigrateOps trait. + pub fn register_migration_instance(mgt_object: Arc>) { + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.mgt_object = Some(mgt_object); } - /// Get `Device`'s alias from device type string. + /// Unregister transport instance from vmm. /// - /// # Argument + /// # Arguments /// - /// * `device_type` - The type string of device instance. - pub fn get_desc_alias(device_type: &str) -> Option { - Some(id_remap(device_type)) - } - - /// Return `desc_db` value len(0 restored as `serde_json`) - pub fn get_desc_db_len() -> Result { - let mut db_data_len = 0; - let desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); - for (_, desc) in desc_db.iter() { - let desc_str = serde_json::to_string(desc)?; - db_data_len += desc_str.as_bytes().len(); - } - - Ok(db_data_len) - } - - /// Write all `DeviceStateDesc` in `desc_db` hashmap to `Write` trait object. - pub fn save_descriptor_db(writer: &mut dyn Write) -> Result<()> { - let desc_length = Self::get_desc_db_len()?; - let mut desc_buffer = Vec::new(); - desc_buffer.resize(desc_length, 0); - let mut start = 0; - - let desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); - for (_, desc) in desc_db.iter() { - let desc_str = serde_json::to_string(desc)?; - let desc_bytes = desc_str.as_bytes(); - desc_buffer[start..start + desc_bytes.len()].copy_from_slice(desc_bytes); - start += desc_bytes.len(); - } - writer - .write_all(&desc_buffer) - .chain_err(|| "Failed to write descriptor message.")?; - - Ok(()) - } - - /// Load and parse device state descriptor from `Read` trait object. Save as a Hashmap. - pub fn load_descriptor_db( - reader: &mut dyn Read, - desc_length: usize, - ) -> Result> { - let mut desc_buffer = Vec::new(); - desc_buffer.resize(desc_length, 0); - reader.read_exact(&mut desc_buffer)?; - let mut snapshot_desc_db = HashMap::::new(); - - let deserializer = serde_json::Deserializer::from_slice(&desc_buffer); - for desc in deserializer.into_iter::() { - let device_desc: DeviceStateDesc = match desc { - Ok(desc) => desc, - Err(_) => break, - }; - snapshot_desc_db.insert(device_desc.alias, device_desc); - } - - Ok(snapshot_desc_db) + /// * `device_desc` - The `DeviceStateDesc` of device instance. + /// * `id` - The unique id for device. + pub fn unregister_transport_instance(device_desc: DeviceStateDesc, id: &str) { + let name = device_desc.name + "/" + id; + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.transports.remove(&translate_id(&name)); } - /// Set a new migration status for migration manager. + /// Unregister device instance from vmm. /// /// # Arguments /// - /// * `new_status`: new migration status, the transform must be illegal. - pub fn set_status(new_status: MigrationStatus) -> Result<()> { - let mut status = MIGRATION_MANAGER.status.write().unwrap(); - *status = status.transfer(new_status)?; - - Ok(()) - } - - /// Get current migration status for migration manager. - pub fn migration_get_status() -> MigrationStatus { - *MIGRATION_MANAGER.status.read().unwrap() + /// * `device_desc` - The `DeviceStateDesc` of device instance. + /// * `id` - The unique id for device. + pub fn unregister_device_instance(device_desc: DeviceStateDesc, id: &str) { + let name = device_desc.name + "/" + id; + let mut locked_vmm = MIGRATION_MANAGER.vmm.write().unwrap(); + locked_vmm.devices.remove(&translate_id(&name)); } } #[cfg(test)] mod tests { - use super::*; - use crate::device_state::tests::{DeviceV1, DeviceV1State, DeviceV2, DeviceV2State}; use std::sync::{Arc, Mutex}; + use super::*; + use crate::protocol::tests::{DeviceV1, DeviceV1State, DeviceV2, DeviceV2State}; + impl MigrationHook for DeviceV1 {} impl MigrationHook for DeviceV2 {} #[test] fn test_register_device() { - let device_v1 = Arc::new(DeviceV1::default()); - let device_v2 = Arc::new(DeviceV2::default()); + let device_v1_mutex = Arc::new(Mutex::new(DeviceV1::default())); + let device_v2_arc = Arc::new(DeviceV2::default()); let device_v2_mutex = Arc::new(Mutex::new(DeviceV2::default())); MigrationManager::register_device_instance( DeviceV1State::descriptor(), - device_v1, - MigrationRestoreOrder::Default, + device_v1_mutex, + "device_v1", ); - MigrationManager::register_memory_instance(device_v2); - MigrationManager::register_device_instance_mutex( + MigrationManager::register_memory_instance(device_v2_arc); + MigrationManager::register_device_instance( DeviceV2State::descriptor(), device_v2_mutex, + "device_v2", ); assert!(MigrationManager::get_desc_alias("DeviceV1State").is_some()); assert_eq!( MigrationManager::get_desc_alias("DeviceV1State").unwrap(), - id_remap("DeviceV1State") + translate_id("DeviceV1State") ); assert!(MigrationManager::get_desc_alias("DeviceV2State").is_some()); assert_eq!( MigrationManager::get_desc_alias("DeviceV2State").unwrap(), - id_remap("DeviceV2State") + translate_id("DeviceV2State") ); } } diff --git a/migration/src/migration.rs b/migration/src/migration.rs new file mode 100644 index 0000000000000000000000000000000000000000..2782d9cd8a144e0b865a01c48d6d5a55f3847a4c --- /dev/null +++ b/migration/src/migration.rs @@ -0,0 +1,806 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::mem::size_of; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; + +use anyhow::{anyhow, bail, Context, Result}; +use log::{info, warn}; + +use crate::general::Lifecycle; +use crate::manager::MIGRATION_MANAGER; +use crate::protocol::{MemBlock, MigrationStatus, Request, Response, TransStatus}; +use crate::MigrateMemSlot; +use crate::{MigrationError, MigrationManager}; +use machine_manager::config::{get_pci_bdf, PciBdf, VmConfig}; +use util::unix::host_page_size; + +impl MigrationManager { + /// Start VM live migration at source VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. it + /// will send source VM memory data and devices state to destination VM. + /// And, it will receive confirmation from destination VM. + pub fn send_migration(fd: &mut T) -> Result<()> + where + T: Read + Write, + { + // Activate the migration status of source and destination virtual machine. + Self::active_migration(fd).with_context(|| "Failed to active migration")?; + + // Send source virtual machine configuration. + Self::send_vm_config(fd).with_context(|| "Failed to send vm config")?; + + // Start logging dirty pages. + Self::start_dirty_log().with_context(|| "Failed to start logging dirty page")?; + + // Send all memory of virtual machine itself to destination. + Self::send_vm_memory(fd).with_context(|| "Failed to send VM memory")?; + + // Iteratively send virtual machine dirty memory. + let iterations = MIGRATION_MANAGER.limit.read().unwrap().max_dirty_iterations; + for _ in 0..iterations { + // Check the migration is active. + if !Self::is_active() { + break; + } + + if !Self::iteration_send(fd)? { + break; + } + } + + // Check whether the migration is canceled. + if Self::is_canceled() { + // Cancel the migration of source and destination. + Self::cancel_migration(fd).with_context(|| "Failed to cancel migration")?; + return Ok(()); + } + + // Pause virtual machine. + Self::pause()?; + + // Send remaining virtual machine dirty memory. + Self::send_dirty_memory(fd).with_context(|| "Failed to send dirty memory")?; + + // Stop logging dirty pages. + Self::stop_dirty_log().with_context(|| "Failed to stop logging dirty page")?; + + // Get virtual machine state and send it to destination VM. + Self::send_vmstate(fd).with_context(|| "Failed to send vm state")?; + + // Complete the migration. + Self::complete_migration(fd).with_context(|| "Failed to completing migration")?; + + // Destroy virtual machine. + Self::clear_migration().with_context(|| "Failed to clear migration")?; + + Ok(()) + } + + /// Start VM live migration at destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. it + /// will receive source VM memory data and devices state. And, + /// it will send confirmation to source VM. + pub fn recv_migration(fd: &mut T) -> Result<()> + where + T: Read + Write, + { + // Activate the migration status. + let request = Request::recv_msg(fd)?; + if request.status == TransStatus::Active { + info!("Active the migration"); + Self::set_status(MigrationStatus::Active)?; + Response::send_msg(fd, TransStatus::Ok)?; + } else { + Response::send_msg(fd, TransStatus::Error)?; + return Err(anyhow!(MigrationError::MigrationStatusErr( + (request.status as u16).to_string(), + TransStatus::Active.to_string(), + ))); + } + + // Check source and destination virtual machine configuration. + let request = Request::recv_msg(fd)?; + if request.status == TransStatus::VmConfig { + info!("Receive VmConfig status"); + Self::check_vm_config(fd, request.length) + .with_context(|| "Failed to check vm config")?; + } else { + Response::send_msg(fd, TransStatus::Error)?; + return Err(anyhow!(MigrationError::MigrationStatusErr( + (request.status as u16).to_string(), + TransStatus::VmConfig.to_string(), + ))); + } + + loop { + let request = Request::recv_msg(fd)?; + match request.status { + TransStatus::Memory => { + info!("Receive Memory status"); + Self::recv_vm_memory(fd, request.length)?; + } + TransStatus::State => { + info!("Receive State status"); + Self::recv_vmstate(fd)?; + break; + } + TransStatus::Cancel => { + info!("Receive Cancel status"); + Self::set_status(MigrationStatus::Canceled)?; + Response::send_msg(fd, TransStatus::Ok)?; + + bail!("Cancel migration from source"); + } + _ => { + warn!("Unable to distinguish status"); + } + } + } + + Ok(()) + } + + /// Send Vm configuration from source virtual machine. + fn send_vm_config(fd: &mut T) -> Result<()> + where + T: Write + Read, + { + let vm_config = &MIGRATION_MANAGER + .vmm + .read() + .unwrap() + .config + .lock() + .unwrap() + .clone(); + let config_data = serde_json::to_vec(vm_config)?; + Request::send_msg(fd, TransStatus::VmConfig, config_data.len() as u64)?; + fd.write_all(&config_data)?; + + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Check source and destination virtual machine config. + fn check_vm_config(fd: &mut T, len: u64) -> Result<()> + where + T: Write + Read, + { + // Sanity check for len to avoid OOM. Given 1MB is enough. + if len > (1 << 20) { + bail!("Source vm_config size is too large"); + } + + let mut data: Vec = Vec::new(); + data.resize_with(len as usize, Default::default); + fd.read_exact(&mut data)?; + + let src_config: &VmConfig = &serde_json::from_slice(&data)?; + let dest_config: &VmConfig = &MIGRATION_MANAGER + .vmm + .read() + .unwrap() + .config + .lock() + .unwrap() + .clone(); + // Check vCPU number. + Self::check_vcpu(src_config, dest_config)?; + Self::check_memory(src_config, dest_config)?; + Self::check_devices(src_config, dest_config)?; + + Response::send_msg(fd, TransStatus::Ok)?; + + Ok(()) + } + + /// Check vcpu number config. + fn check_vcpu(src_config: &VmConfig, dest_config: &VmConfig) -> Result<()> { + let src_cpu = src_config.machine_config.nr_cpus; + let dest_cpu = dest_config.machine_config.nr_cpus; + if src_cpu != dest_cpu { + return Err(anyhow!(MigrationError::MigrationConfigErr( + "vCPU number".to_string(), + src_cpu.to_string(), + dest_cpu.to_string(), + ))); + } + + Ok(()) + } + + /// Check memory size config. + fn check_memory(src_config: &VmConfig, dest_config: &VmConfig) -> Result<()> { + let src_mem = src_config.machine_config.mem_config.mem_size; + let dest_mem = dest_config.machine_config.mem_config.mem_size; + if src_mem != dest_mem { + return Err(anyhow!(MigrationError::MigrationConfigErr( + "memory size".to_string(), + src_mem.to_string(), + dest_mem.to_string(), + ))); + } + + Ok(()) + } + + /// Check devices type and BDF config. + fn check_devices(src_config: &VmConfig, dest_config: &VmConfig) -> Result<()> { + let mut dest_devices: HashMap = HashMap::new(); + for (dev_type, dev_info) in dest_config.devices.iter() { + if let Ok(dest_bdf) = get_pci_bdf(dev_info) { + dest_devices.insert(dest_bdf, dev_type.to_string()); + } + } + for (src_type, dev_info) in src_config.devices.iter() { + if let Ok(src_bdf) = get_pci_bdf(dev_info) { + match dest_devices.get(&src_bdf) { + Some(dest_type) => { + if !src_type.eq(dest_type) { + return Err(anyhow!(MigrationError::MigrationConfigErr( + "device type".to_string(), + src_type.to_string(), + dest_type.to_string(), + ))); + } + } + None => bail!( + "Failed to get destination device bdf {:?}, type {}", + src_bdf, + src_type + ), + } + } + } + + Ok(()) + } + + /// Start to send dirty memory page iteratively. Return true if it should + /// continue to the next iteration. Otherwise, return false. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn iteration_send(fd: &mut T) -> Result + where + T: Write + Read, + { + let mut state = + Self::send_dirty_memory(fd).with_context(|| "Failed to send dirty memory")?; + + // Check the virtual machine downtime. + if MIGRATION_MANAGER + .limit + .read() + .unwrap() + .iteration_start_time + .elapsed() + < Duration::from_millis(MIGRATION_MANAGER.limit.read().unwrap().limit_downtime) + { + state = false; + } + // Update iteration start time. + MIGRATION_MANAGER + .limit + .write() + .unwrap() + .iteration_start_time = Instant::now(); + + Ok(state) + } + + /// Receive memory data from source VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + /// * `len` - The length of Block data. + fn recv_vm_memory(fd: &mut T, len: u64) -> Result<()> + where + T: Write + Read, + { + // Sanity check for len to avoid OOM. Given 1MB is enough. + if len > (1 << 20) { + bail!("Source MemBlock config size is too large"); + } + + let mut blocks = Vec::::new(); + blocks.resize_with(len as usize / (size_of::()), Default::default); + fd.read_exact( + // SAFETY: + // 1. The pointer of blocks can be guaranteed not null. + // 2. The range of len has been limited. + unsafe { + std::slice::from_raw_parts_mut( + blocks.as_ptr() as *mut MemBlock as *mut u8, + len as usize, + ) + }, + )?; + + if let Some(locked_memory) = &MIGRATION_MANAGER.vmm.read().unwrap().memory { + for block in blocks.iter() { + locked_memory.recv_memory( + fd, + MemBlock { + gpa: block.gpa, + len: block.len, + }, + )?; + } + } + + Response::send_msg(fd, TransStatus::Ok)?; + + Ok(()) + } + + /// Send memory data to destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + /// * `blocks` - The memory blocks need to be sent. + fn send_memory(fd: &mut T, blocks: Vec) -> Result<()> + where + T: Read + Write, + { + let len = size_of::() * blocks.len(); + Request::send_msg(fd, TransStatus::Memory, len as u64)?; + fd.write_all( + // SAFETY: + // 1. The pointer of blocks can be guaranteed not null. + // 2. The len is constant. + unsafe { + std::slice::from_raw_parts(blocks.as_ptr() as *const MemBlock as *const u8, len) + }, + )?; + + if let Some(locked_memory) = &MIGRATION_MANAGER.vmm.read().unwrap().memory { + for block in blocks.iter() { + locked_memory.send_memory( + fd, + MemBlock { + gpa: block.gpa, + len: block.len, + }, + )?; + } + } + + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Send entire VM memory data to destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn send_vm_memory(fd: &mut T) -> Result<()> + where + T: Read + Write, + { + let mut blocks: Vec = Vec::new(); + if let Some(mgt_object) = &MIGRATION_MANAGER.vmm.read().unwrap().mgt_object { + let slots = mgt_object.lock().unwrap().get_mem_slots(); + for (_, slot) in slots.lock().unwrap().iter() { + blocks.push(MemBlock { + gpa: slot.guest_phys_addr, + len: slot.memory_size, + }); + } + } + + Self::send_memory(fd, blocks) + } + + /// Send dirty memory data to destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn send_dirty_memory(fd: &mut T) -> Result + where + T: Read + Write, + { + let mut blocks: Vec = Vec::new(); + if let Some(mgt_object) = &MIGRATION_MANAGER.vmm.read().unwrap().mgt_object { + let mem_slots = mgt_object.lock().unwrap().get_mem_slots(); + for (_, slot) in mem_slots.lock().unwrap().iter() { + let sub_blocks: Vec = Self::get_dirty_log(slot)?; + blocks.extend(sub_blocks); + } + } + + if blocks.is_empty() { + return Ok(false); + } + + Self::send_memory(fd, blocks)?; + + Ok(true) + } + + /// Send VM state data to destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn send_vmstate(fd: &mut T) -> Result<()> + where + T: Read + Write, + { + Request::send_msg(fd, TransStatus::State, 0)?; + Self::save_vmstate(None, fd)?; + + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Receive VM state data from source VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn recv_vmstate(fd: &mut T) -> Result<()> + where + T: Write + Read, + { + let header = Self::restore_header(fd)?; + header.check_header()?; + let desc_db = Self::restore_desc_db(fd, header.desc_len) + .with_context(|| "Failed to load device descriptor db")?; + Self::restore_vmstate(desc_db, fd).with_context(|| "Failed to load snapshot device")?; + Self::resume()?; + + Response::send_msg(fd, TransStatus::Ok)?; + + Ok(()) + } + + /// Active migration status and synchronize the state of destination VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn active_migration(fd: &mut T) -> Result<()> + where + T: Read + Write, + { + Self::set_status(MigrationStatus::Active)?; + Request::send_msg(fd, TransStatus::Active, 0)?; + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Synchronize the `Completed` status of destination VM + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn complete_migration(fd: &mut T) -> Result<()> + where + T: Write + Read, + { + Self::set_status(MigrationStatus::Completed)?; + Request::send_msg(fd, TransStatus::Complete, 0)?; + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Finish the migration of destination VM and notify the source VM. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + pub fn finish_migration(fd: &mut T) -> Result<()> + where + T: Write + Read, + { + // Receive complete status from source vm. + let request = Request::recv_msg(fd)?; + if request.status == TransStatus::Complete { + info!("Receive Complete status"); + Self::set_status(MigrationStatus::Completed)?; + Response::send_msg(fd, TransStatus::Ok)?; + } else { + return Err(anyhow!(MigrationError::MigrationStatusErr( + (request.status as u16).to_string(), + TransStatus::Complete.to_string(), + ))); + } + + Ok(()) + } + + /// Cancel live migration. + /// + /// # Arguments + /// + /// * `fd` - The fd implements `Read` and `Write` trait object. + fn cancel_migration(fd: &mut T) -> Result<()> + where + T: Write + Read, + { + // Stop logging dirty pages. + Self::stop_dirty_log().with_context(|| "Failed to stop logging dirty page")?; + + Request::send_msg(fd, TransStatus::Cancel, 0)?; + let result = Response::recv_msg(fd)?; + if result.is_err() { + return Err(anyhow!(MigrationError::ResponseErr)); + } + + Ok(()) + } + + /// Clear live migration environment and shut down VM. + fn clear_migration() -> Result<()> { + if let Some(locked_vm) = &MIGRATION_MANAGER.vmm.read().unwrap().vm { + locked_vm.lock().unwrap().destroy(); + } + + Ok(()) + } + + /// Recover the virtual machine if migration is failed. + pub fn recover_from_migration() -> Result<()> { + if let Some(locked_vm) = &MIGRATION_MANAGER.vmm.read().unwrap().vm { + locked_vm.lock().unwrap().resume(); + } + + Ok(()) + } +} + +/// Dirty bitmap information of vmm memory slot. +pub struct DirtyBitmap { + /// Guest address. + pub gpa: u64, + /// Host address. + pub hva: u64, + /// length of memory. + pub len: u64, + /// Bitmap for vmm memory slot. + pub map: Vec, + /// Host page size. + pub page_size: u64, +} + +impl DirtyBitmap { + /// Create a new dirty bitmap for vmm. + /// + /// # Arguments + /// + /// * `gpa` - Guest physical address of memory slot. + /// * `hva` - Host virtual address of memory slot. + /// * `len` - Length of memory slot. + fn new(gpa: u64, hva: u64, len: u64) -> Self { + let page_size = host_page_size(); + + let mut num_pages = len / page_size; + // Page alignment. + if len % page_size > 0 { + num_pages += 1; + } + let size = num_pages / 64 + 1; + let map: Vec = (0..size).map(|_| AtomicU64::new(0)).collect(); + + DirtyBitmap { + gpa, + hva, + len, + map, + page_size, + } + } + + /// Mark dirty bitmap for vmm. + /// + /// # Arguments + /// + /// * `addr` - Guest physical address of memory. + /// * `len` - Length of memory slot. + fn mark_bitmap(&self, addr: u64, len: u64) { + // Just return if len is 0. + if len == 0 { + return; + } + + let offset = addr - self.gpa; + let first_bit = offset / self.page_size; + let last_bit = (offset + len - 1) / self.page_size; + for n in first_bit..=last_bit { + // Ignore bit that is out of range. + if n >= self.len { + break; + } + self.map[(n as usize) >> 6].fetch_or(1 << (n & 63), Ordering::SeqCst); + } + } + + /// Get and clear dirty bitmap for vmm. + fn get_and_clear_dirty(&self) -> Vec { + self.map + .iter() + .map(|m| m.fetch_and(0, Ordering::SeqCst)) + .collect() + } +} + +pub trait Migratable { + /// Start the dirty log in the migration objects and vmm. + fn start_dirty_log() -> Result<()> { + // Create dirty bitmaps for vmm. + let mut bitmaps = HashMap::::new(); + if let Some(mgt_object) = &MIGRATION_MANAGER.vmm.read().unwrap().mgt_object { + let mem_slots = mgt_object.lock().unwrap().get_mem_slots(); + for (_, slot) in mem_slots.lock().unwrap().iter() { + let bitmap = + DirtyBitmap::new(slot.guest_phys_addr, slot.userspace_addr, slot.memory_size); + bitmaps.insert(slot.slot, bitmap); + } + + // Start logging dirty memory in migration object. + mgt_object.lock().unwrap().start_dirty_log()?; + } + + let mut vm_bitmaps = MIGRATION_MANAGER.vmm_bitmaps.write().unwrap(); + *vm_bitmaps = bitmaps; + + Ok(()) + } + + /// Stop the dirty log in the migration objects and vmm. + fn stop_dirty_log() -> Result<()> { + // Clear dirty bitmaps from vmm. + let mut vm_bitmaps = MIGRATION_MANAGER.vmm_bitmaps.write().unwrap(); + *vm_bitmaps = HashMap::new(); + + if let Some(mgt_object) = &MIGRATION_MANAGER.vmm.read().unwrap().mgt_object { + // Stop logging dirty memory in migration object. + mgt_object.lock().unwrap().stop_dirty_log()?; + } + + Ok(()) + } + + /// Collect the dirty log from migration object and vmm. + /// + /// # Arguments + /// + /// * `slot` - The memory slot. + fn get_dirty_log(slot: &MigrateMemSlot) -> Result> { + // Get dirty memory from vmm. + let mut vmm_dirty_bitmap = Vec::new(); + let bitmaps = MIGRATION_MANAGER.vmm_bitmaps.write().unwrap(); + for (_, map) in bitmaps.iter() { + if (slot.guest_phys_addr == map.gpa) && (slot.memory_size == map.len) { + vmm_dirty_bitmap = map.get_and_clear_dirty(); + } + } + + // Get dirty memory from migration objects. + let vmm = MIGRATION_MANAGER.vmm.read().unwrap(); + let mgt_object = vmm.mgt_object.as_ref().unwrap(); + let vm_dirty_bitmap = mgt_object + .lock() + .unwrap() + .get_dirty_log(slot.slot, slot.memory_size) + .unwrap(); + + // Merge dirty bitmap. + let dirty_bitmap: Vec = vm_dirty_bitmap + .iter() + .zip(vmm_dirty_bitmap.iter()) + .map(|(x, y)| x | y) + .collect(); + + // Convert dirty bitmaps to memory blocks. + Ok(Self::sync_dirty_bitmap(dirty_bitmap, slot.guest_phys_addr)) + } + + /// mark the dirty log into vmm. + /// + /// # Arguments + /// + /// * `addr` - Start address of dirty memory. + /// * `len` - Length of dirty memory. + fn mark_dirty_log(addr: u64, len: u64) { + if !MigrationManager::is_active() { + return; + } + + let bitmaps = MIGRATION_MANAGER.vmm_bitmaps.write().unwrap(); + for (_, map) in bitmaps.iter() { + if (addr >= map.hva) && ((addr + len) <= (map.hva + map.len)) { + map.mark_bitmap(addr - map.hva + map.gpa, len); + } + } + } + + /// sync the dirty log from migration object bitmaps. + /// + /// # Arguments + /// + /// * `bitmap` - dirty bitmap from migration object. + /// * `addr` - Start address of memory slot. + fn sync_dirty_bitmap(bitmap: Vec, addr: u64) -> Vec { + let page_size = host_page_size(); + let mut mem_blocks: Vec = Vec::new(); + let mut block: Option = None; + + for (idx, num) in bitmap.iter().enumerate() { + if *num == 0 { + continue; + } + + for bit in 0..64 { + if ((num >> bit) & 1_u64) == 0 { + if let Some(entry) = block.take() { + mem_blocks.push(entry); + } + continue; + } + + if let Some(e) = &mut block { + e.len += page_size; + } else { + let offset = ((idx * 64) + bit) as u64 * page_size; + block = Some(MemBlock { + gpa: addr + offset, + len: page_size, + }); + } + } + } + if let Some(entry) = block.take() { + mem_blocks.push(entry); + } + + mem_blocks + } +} + +impl Migratable for MigrationManager {} diff --git a/migration/src/device_state.rs b/migration/src/protocol.rs similarity index 38% rename from migration/src/device_state.rs rename to migration/src/protocol.rs index de9c16cf8e90233314fc0be23fdfd04b84f9f4dc..ca8e23c296ec325827f478286399ccd1391570b5 100644 --- a/migration/src/device_state.rs +++ b/migration/src/protocol.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -11,13 +11,459 @@ // See the Mulan PSL v2 for more details. use std::cmp::Ordering; +use std::io::{Read, Write}; +use std::mem::size_of; +use std::slice::{from_raw_parts, from_raw_parts_mut}; +use anyhow::{anyhow, bail, Context, Result}; +use kvm_ioctls::Kvm; use serde::{Deserialize, Serialize}; -use super::errors::Result; +use crate::MigrationError; +use util::byte_code::ByteCode; + +/// This status for migration in migration process. +/// +/// # Notes +/// +/// State transfer: +/// None -----------> Setup: set up migration resource. +/// Setup ----------> Active: migration is ready. +/// Active ---------> Completed: migration is successful. +/// Completed ------> Active: make migration become ready again. +/// Failed ---------> Setup: reset migration resource. +/// Any ------------> Failed: something wrong in migration. +/// Any ------------> Canceled: cancel migration. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MigrationStatus { + /// Migration resource is not prepared all + None, + /// Migration resource(desc_db, device instance, ...) is setup. + Setup, + /// Migration is active. + Active, + /// Migration completed. + Completed, + /// Migration failed. + Failed, + /// Migration canceled. + Canceled, +} + +impl std::fmt::Display for MigrationStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + MigrationStatus::None => "none", + MigrationStatus::Setup => "setup", + MigrationStatus::Active => "active", + MigrationStatus::Completed => "completed", + MigrationStatus::Failed => "failed", + MigrationStatus::Canceled => "canceled", + } + ) + } +} + +impl MigrationStatus { + // Check and transfer migration status after convert migration operations. + pub fn transfer(self, new_status: MigrationStatus) -> Result { + match self { + MigrationStatus::None => match new_status { + MigrationStatus::Setup => Ok(new_status), + _ => Err(anyhow!(MigrationError::InvalidStatusTransfer( + self, new_status + ))), + }, + MigrationStatus::Setup => match new_status { + MigrationStatus::Active | MigrationStatus::Failed | MigrationStatus::Canceled => { + Ok(new_status) + } + _ => Err(anyhow!(MigrationError::InvalidStatusTransfer( + self, new_status + ))), + }, + MigrationStatus::Active => match new_status { + MigrationStatus::Completed + | MigrationStatus::Failed + | MigrationStatus::Canceled => Ok(new_status), + _ => Err(anyhow!(MigrationError::InvalidStatusTransfer( + self, new_status + ))), + }, + MigrationStatus::Completed => match new_status { + MigrationStatus::Active => Ok(new_status), + _ => Err(anyhow!(MigrationError::InvalidStatusTransfer( + self, new_status + ))), + }, + MigrationStatus::Failed => match new_status { + MigrationStatus::Setup | MigrationStatus::Active => Ok(new_status), + _ => Err(anyhow!(MigrationError::InvalidStatusTransfer( + self, new_status + ))), + }, + MigrationStatus::Canceled => Ok(new_status), + } + } +} + +/// Structure defines the transmission protocol between the source with destination VM. +#[repr(u16)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum TransStatus { + /// Active migration. + Active, + /// Vm configuration. + VmConfig, + /// Processing memory data stage in migration. + Memory, + /// Processing device state stage in migration. + State, + /// Complete migration. + Complete, + /// Cancel migration. + Cancel, + /// Everything is ok in migration . + Ok, + /// Something error in migration . + Error, + /// Unknown status in migration . + Unknown, +} + +impl Default for TransStatus { + fn default() -> Self { + Self::Unknown + } +} + +impl std::fmt::Display for TransStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + TransStatus::Active => "Active", + TransStatus::VmConfig => "VmConfig", + TransStatus::Memory => "Memory", + TransStatus::State => "State", + TransStatus::Complete => "Complete", + TransStatus::Cancel => "Cancel", + TransStatus::Ok => "Ok", + TransStatus::Error => "Error", + TransStatus::Unknown => "Unknown", + } + ) + } +} + +/// Structure is used to save request protocol from source VM. +#[repr(C)] +#[derive(Default, Copy, Clone)] +pub struct Request { + /// Length of data to be sent. + pub length: u64, + /// The status need to sync to destination. + pub status: TransStatus, +} + +impl ByteCode for Request {} + +impl Request { + /// Send request message to socket file descriptor. + /// + /// # Arguments + /// + /// * `fd` - Socket file descriptor between source with destination. + /// * `status` - The transmission status of request. + /// * `length` - The length that data need to send. + /// + /// # Errors + /// + /// The socket file descriptor is broken. + pub fn send_msg(fd: &mut dyn Write, status: TransStatus, length: u64) -> Result<()> { + let request = Request { length, status }; + let data = + // SAFETY: The pointer of request can be guaranteed not null. + unsafe { from_raw_parts(&request as *const Self as *const u8, size_of::()) }; + fd.write_all(data) + .with_context(|| format!("Failed to write request data {:?}", data))?; + + Ok(()) + } + + /// Receive request message from socket file descriptor. + /// + /// # Arguments + /// + /// * `fd` - Socket file descriptor between source with destination. + /// + /// # Errors + /// + /// The socket file descriptor is broken. + pub fn recv_msg(fd: &mut dyn Read) -> Result { + let mut request = Request::default(); + let data = + // SAFETY: The pointer of request can be guaranteed not null. + unsafe { + from_raw_parts_mut(&mut request as *mut Request as *mut u8, size_of::()) + }; + fd.read_exact(data) + .with_context(|| format!("Failed to read request data {:?}", data))?; + + Ok(request) + } +} + +/// Structure is used to save response protocol from destination VM. +#[repr(C)] +#[derive(Default, Copy, Clone)] +pub struct Response { + /// The status need to response to source. + pub status: TransStatus, +} + +impl ByteCode for Response {} + +impl Response { + /// Send response message to socket file descriptor. + /// + /// # Arguments + /// + /// * `fd` - Socket file descriptor between source and destination. + /// * `status` - The transmission status of response. + /// + /// # Errors + /// + /// The socket file descriptor is broken. + pub fn send_msg(fd: &mut dyn Write, status: TransStatus) -> Result<()> { + let response = Response { status }; + let data = + // SAFETY: The pointer of response can be guaranteed not null. + unsafe { from_raw_parts(&response as *const Self as *const u8, size_of::()) }; + fd.write_all(data) + .with_context(|| format!("Failed to write response data {:?}", data))?; + + Ok(()) + } + + /// Receive response message from socket file descriptor. + /// + /// # Arguments + /// + /// * `fd` - Socket file descriptor between source and destination. + /// + /// # Errors + /// + /// The socket file descriptor is broken. + pub fn recv_msg(fd: &mut dyn Read) -> Result { + let mut response = Response::default(); + let data = + // SAFETY: The pointer of response can be guaranteed not null. + unsafe { + from_raw_parts_mut(&mut response as *mut Response as *mut u8, size_of::()) + }; + fd.read_exact(data) + .with_context(|| format!("Failed to read response data {:?}", data))?; + + Ok(response) + } + + /// Check the status from response is not OK. + pub fn is_err(&self) -> bool { + self.status != TransStatus::Ok + } +} + +/// Structure is used to save guest physical address and length of +/// memory block that needs to send. +#[repr(C)] +#[derive(Clone, Default)] +pub struct MemBlock { + /// Guest address. + pub gpa: u64, + /// Size of memory. + pub len: u64, +} + +/// Magic number for migration header. Those bytes represent "STRATOVIRT". +const MAGIC_NUMBER: [u8; 16] = [ + 0x53, 0x54, 0x52, 0x41, 0x54, 0x4f, 0x56, 0x49, 0x52, 0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +]; +const MAJOR_VERSION: u32 = 2; +const MINOR_VERSION: u32 = 2; +const CURRENT_VERSION: u32 = MAJOR_VERSION << 12 | MINOR_VERSION & 0b1111; +const COMPAT_VERSION: u32 = CURRENT_VERSION; +#[cfg(target_arch = "x86_64")] +const EAX_VENDOR_INFO: u32 = 0x0; +/// The length of `MigrationHeader` part occupies bytes in snapshot file. +pub const HEADER_LENGTH: usize = 4096; + +/// Format type for migration. +/// Different file format will have different file layout. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub enum FileFormat { + Device, + MemoryFull, +} + +/// The endianness of byte order. +#[derive(Debug, Copy, Clone, PartialEq, Deserialize, Serialize)] +enum EndianType { + Little = 1, + Big = 2, +} + +impl EndianType { + fn get_endian_type() -> EndianType { + if cfg!(target_endian = "big") { + EndianType::Big + } else { + EndianType::Little + } + } +} + +/// Get host cpu model as bytes. +#[cfg(target_arch = "x86_64")] +fn cpu_model() -> [u8; 16] { + use core::arch::x86_64::__cpuid_count; + + // SAFETY: We only use cpuid for cpu info in x86_64. + let result = unsafe { __cpuid_count(EAX_VENDOR_INFO, 0) }; + let vendor_slice = [result.ebx, result.edx, result.ecx]; + + // SAFETY: We known those brand string length. + let vendor_array = unsafe { + let brand_string_start = vendor_slice.as_ptr() as *const u8; + std::slice::from_raw_parts(brand_string_start, 3 * 4) + }; + + let mut buffer = [0u8; 16]; + if vendor_array.len() > 16 { + buffer.copy_from_slice(&vendor_array[0..15]); + } else { + buffer[0..vendor_array.len()].copy_from_slice(vendor_array); + } + buffer +} + +/// Structure used to mark some message in migration. +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +pub struct MigrationHeader { + /// Magic number for migration file/stream. + magic_num: [u8; 16], + /// Compatible version of migration. + compat_version: u32, + /// Arch identifier. + arch: [u8; 8], + /// Endianness of byte order. + byte_order: EndianType, + /// The version of hypervisor. + hypervisor_version: u32, + /// The type of CPU model. + #[cfg(target_arch = "x86_64")] + cpu_model: [u8; 16], + /// Operation system type. + os_type: [u8; 8], + /// File format of migration file/stream. + pub format: FileFormat, + /// The length of `DeviceStateDesc`. + pub desc_len: usize, +} + +impl ByteCode for MigrationHeader {} + +impl Default for MigrationHeader { + fn default() -> Self { + MigrationHeader { + magic_num: MAGIC_NUMBER, + compat_version: COMPAT_VERSION, + format: FileFormat::Device, + byte_order: EndianType::Little, + hypervisor_version: Kvm::new().unwrap().get_api_version() as u32, + #[cfg(target_arch = "x86_64")] + cpu_model: cpu_model(), + #[cfg(target_os = "linux")] + os_type: [b'l', b'i', b'n', b'u', b'x', b'0', b'0', b'0'], + #[cfg(target_arch = "x86_64")] + arch: [b'x', b'8', b'6', b'_', b'6', b'4', b'0', b'0'], + #[cfg(target_arch = "aarch64")] + arch: [b'a', b'a', b'r', b'c', b'h', b'6', b'4', b'0'], + desc_len: 0, + } + } +} + +impl MigrationHeader { + /// Check parsed `MigrationHeader` is illegal or not. + pub fn check_header(&self) -> Result<()> { + if self.magic_num != MAGIC_NUMBER { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Magic_number".to_string() + ))); + } + + if self.compat_version > CURRENT_VERSION { + return Err(anyhow!(MigrationError::VersionNotFit( + self.compat_version, + CURRENT_VERSION + ))); + } + + #[cfg(target_arch = "x86_64")] + let current_arch = [b'x', b'8', b'6', b'_', b'6', b'4', b'0', b'0']; + #[cfg(target_arch = "aarch64")] + let current_arch = [b'a', b'a', b'r', b'c', b'h', b'6', b'4', b'0']; + if self.arch != current_arch { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Arch".to_string() + ))); + } + + if self.byte_order != EndianType::get_endian_type() { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Byte order".to_string() + ))); + } + + #[cfg(target_arch = "x86_64")] + if self.cpu_model != cpu_model() { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Cpu model".to_string() + ))); + } + + #[cfg(target_os = "linux")] + let current_os_type = [b'l', b'i', b'n', b'u', b'x', b'0', b'0', b'0']; + if self.os_type != current_os_type { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Os type".to_string() + ))); + } + + let current_kvm_version = Kvm::new().unwrap().get_api_version() as u32; + if current_kvm_version < self.hypervisor_version { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Hypervisor version".to_string() + ))); + } + + if self.desc_len > (1 << 20) { + return Err(anyhow!(MigrationError::HeaderItemNotFit( + "Desc length".to_string() + ))); + } + + Ok(()) + } +} /// Version check result enum. -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Eq, Debug)] pub enum VersionCheck { /// Version is completely same. Same, @@ -59,10 +505,10 @@ pub trait StateTransfer { fn get_device_alias(&self) -> u64; } -/// The structure to describe `DeviceState` structure with version messege. +/// The structure to describe `DeviceState` structure with version message. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DeviceStateDesc { - /// Device type identfy. + /// Device type identify. pub name: String, /// Alias for device type. pub alias: u64, @@ -77,7 +523,7 @@ pub struct DeviceStateDesc { } /// The structure to describe struct field in `DeviceState` structure. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct FieldDesc { /// Field var name. pub var_name: String, @@ -120,7 +566,7 @@ impl DeviceStateDesc { } /// Check padding from a device state descriptor to another version device state - /// descriptor. The padding will be added tinto current_slice for `DeviceState`. + /// descriptor. The padding will be added into current_slice for `DeviceState`. /// /// # Arguments /// @@ -129,6 +575,7 @@ impl DeviceStateDesc { pub fn add_padding(&self, desc: &DeviceStateDesc, current_slice: &mut Vec) -> Result<()> { let tmp_slice = current_slice.clone(); current_slice.clear(); + // SAFETY: size has been checked in restore_desc_db(). current_slice.resize(self.size as usize, 0); for field in self.clone().fields { if desc.contains(&field.alias) { @@ -149,7 +596,7 @@ impl DeviceStateDesc { Ok(()) } - /// Check device state version descriptor version messege. + /// Check device state version descriptor version message. /// If version is same, return enum `Same`. /// If version is not same but fit, return enum `Compat`. /// if version is not fit, return enum `Mismatch`. @@ -168,9 +615,127 @@ impl DeviceStateDesc { #[cfg(test)] pub mod tests { - use super::{DeviceStateDesc, FieldDesc, StateTransfer, VersionCheck}; + use super::*; + use migration_derive::{ByteCode, Desc}; use util::byte_code::ByteCode; + #[test] + fn test_normal_transfer() { + let mut status = MigrationStatus::None; + + // None to Setup. + assert!(status.transfer(MigrationStatus::Setup).is_ok()); + status = status.transfer(MigrationStatus::Setup).unwrap(); + + // Setup to Active. + assert!(status.transfer(MigrationStatus::Active).is_ok()); + status = status.transfer(MigrationStatus::Active).unwrap(); + + // Active to Completed. + assert!(status.transfer(MigrationStatus::Completed).is_ok()); + status = status.transfer(MigrationStatus::Completed).unwrap(); + + // Completed to Active. + assert!(status.transfer(MigrationStatus::Active).is_ok()); + status = status.transfer(MigrationStatus::Active).unwrap(); + + // Any to Failed. + assert!(status.transfer(MigrationStatus::Failed).is_ok()); + status = status.transfer(MigrationStatus::Failed).unwrap(); + + // Failed to Active. + assert!(status.transfer(MigrationStatus::Active).is_ok()); + status = status.transfer(MigrationStatus::Active).unwrap(); + + // Any to Failed. + assert!(status.transfer(MigrationStatus::Failed).is_ok()); + status = status.transfer(MigrationStatus::Failed).unwrap(); + + // Failed to Setup. + assert!(status.transfer(MigrationStatus::Setup).is_ok()); + status = status.transfer(MigrationStatus::Setup).unwrap(); + + assert_eq!(status, MigrationStatus::Setup); + } + + #[test] + fn test_abnormal_transfer_with_error() { + let mut status = MigrationStatus::None; + + // None to Active. + if let Err(e) = status.transfer(MigrationStatus::Active) { + assert_eq!( + e.to_string(), + format!( + "Failed to transfer migration status from {} to {}.", + MigrationStatus::None, + MigrationStatus::Active + ) + ); + } else { + assert!(false) + } + status = status.transfer(MigrationStatus::Setup).unwrap(); + + // Setup to Complete. + if let Err(e) = status.transfer(MigrationStatus::Completed) { + assert_eq!( + e.to_string(), + format!( + "Failed to transfer migration status from {} to {}.", + MigrationStatus::Setup, + MigrationStatus::Completed + ) + ); + } else { + assert!(false) + } + status = status.transfer(MigrationStatus::Active).unwrap(); + + // Active to Setup. + if let Err(e) = status.transfer(MigrationStatus::Setup) { + assert_eq!( + e.to_string(), + format!( + "Failed to transfer migration status from {} to {}.", + MigrationStatus::Active, + MigrationStatus::Setup + ) + ); + } else { + assert!(false) + } + status = status.transfer(MigrationStatus::Completed).unwrap(); + + // Completed to Setup. + if let Err(e) = status.transfer(MigrationStatus::Setup) { + assert_eq!( + e.to_string(), + format!( + "Failed to transfer migration status from {} to {}.", + MigrationStatus::Completed, + MigrationStatus::Setup + ) + ); + } else { + assert!(false) + } + + // Complete to failed. + if let Err(e) = status.transfer(MigrationStatus::Failed) { + assert_eq!( + e.to_string(), + format!( + "Failed to transfer migration status from {} to {}.", + MigrationStatus::Completed, + MigrationStatus::Failed + ) + ); + } else { + assert!(false) + } + } + #[derive(Default)] // A simple device version 1. pub struct DeviceV1 { @@ -221,11 +786,11 @@ pub mod tests { } impl StateTransfer for DeviceV1 { - fn get_state_vec(&self) -> super::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> super::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *DeviceV1State::from_bytes(state).unwrap(); Ok(()) } @@ -236,11 +801,11 @@ pub mod tests { } impl StateTransfer for DeviceV2 { - fn get_state_vec(&self) -> super::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> super::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *DeviceV2State::from_bytes(state).unwrap(); Ok(()) } @@ -265,11 +830,11 @@ pub mod tests { } impl StateTransfer for DeviceV3 { - fn get_state_vec(&self) -> super::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> super::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *DeviceV3State::from_bytes(state).unwrap(); Ok(()) } @@ -294,11 +859,11 @@ pub mod tests { } impl StateTransfer for DeviceV4 { - fn get_state_vec(&self) -> super::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> super::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *DeviceV4State::from_bytes(state).unwrap(); Ok(()) } @@ -317,11 +882,11 @@ pub mod tests { } impl StateTransfer for DeviceV5 { - fn get_state_vec(&self) -> super::Result> { + fn get_state_vec(&self) -> Result> { Ok(self.state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> super::Result<()> { + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { self.state = *DeviceV5State::from_bytes(state).unwrap(); Ok(()) } @@ -333,13 +898,10 @@ pub mod tests { #[test] fn test_desc_basic_padding() { - /* - * This test makes two version of a device. - * Those devices's difference is appending a new field `mcr` in - * device state. - * Add_padding can solve this change in descriptor of device state. - * Test can verify this function works. - */ + // This test makes two version of a device. Those devices's difference is appending a new + // field `mcr` in device state. + // Add_padding can solve this change in descriptor of device state. + // Test can verify this function works. let mut device_v1 = DeviceV1 { state: DeviceV1State::default(), @@ -358,12 +920,9 @@ pub mod tests { ); let mut current_slice = device_v1.get_state_vec().unwrap(); - assert_eq!( - state_2_desc - .add_padding(&state_1_desc, &mut current_slice) - .is_ok(), - true - ); + assert!(state_2_desc + .add_padding(&state_1_desc, &mut current_slice) + .is_ok()); let mut device_v2 = DeviceV2 { state: DeviceV2State::default(), @@ -380,13 +939,10 @@ pub mod tests { #[test] fn test_desc_data_type_padding() { - /* - * This test makes two version of a device. - * Those devices's difference is appending all fields data value changed from - * u8 to u64. - * Add_padding can solve this change in descriptor of device state. - * Test can verify this function works. - */ + // This test makes two version of a device. Those devices's difference is appending all + // fields data value changed from u8 to u64. + // Add_padding can solve this change in descriptor of device state. + // Test can verify this function works. let mut device_v2 = DeviceV2 { state: DeviceV2State::default(), }; @@ -405,12 +961,9 @@ pub mod tests { ); let mut current_slice = device_v2.get_state_vec().unwrap(); - assert_eq!( - state_3_desc - .add_padding(&state_2_desc, &mut current_slice) - .is_ok(), - true - ); + assert!(state_3_desc + .add_padding(&state_2_desc, &mut current_slice) + .is_ok()); let mut device_v3 = DeviceV3 { state: DeviceV3State::default(), @@ -418,21 +971,18 @@ pub mod tests { device_v3.set_state_mut(¤t_slice).unwrap(); assert!(state_3_desc.current_version > state_2_desc.current_version); - assert_eq!(device_v3.state.ier, device_v2.state.ier as u64); - assert_eq!(device_v3.state.iir, device_v2.state.iir as u64); - assert_eq!(device_v3.state.lcr, device_v2.state.lcr as u64); - assert_eq!(device_v3.state.mcr, device_v2.state.mcr as u64); + assert_eq!(device_v3.state.ier, u64::from(device_v2.state.ier)); + assert_eq!(device_v3.state.iir, u64::from(device_v2.state.iir)); + assert_eq!(device_v3.state.lcr, u64::from(device_v2.state.lcr)); + assert_eq!(device_v3.state.mcr, u64::from(device_v2.state.mcr)); } #[test] fn test_desc_field_name_padding() { - /* - * This test makes two version of a device. - * Those devices's difference is appending all fields name changed from - * u8 to u64. - * Add_padding can solve this change in descriptor of device state. - * Test can verify this function works. - */ + // This test makes two version of a device. Those devices's difference is appending all + // fields name changed from u8 to u64. + // Add_padding can solve this change in descriptor of device state. + // Test can verify this function works. let mut device_v3 = DeviceV3 { state: DeviceV3State::default(), }; @@ -451,12 +1001,9 @@ pub mod tests { ); let mut current_slice = device_v3.get_state_vec().unwrap(); - assert_eq!( - state_4_desc - .add_padding(&state_3_desc, &mut current_slice) - .is_ok(), - true - ); + assert!(state_4_desc + .add_padding(&state_3_desc, &mut current_slice) + .is_ok()); let mut device_v4 = DeviceV4 { state: DeviceV4State::default(), @@ -472,13 +1019,10 @@ pub mod tests { #[test] fn test_desc_field_delete_padding() { - /* - * This test makes two version of a device. - * Those devices's difference is appending all fields name changed from - * u8 to u64. - * Add_padding can solve this change in descriptor of device state. - * Test can verify this function works. - */ + // This test makes two version of a device. Those devices's difference is appending all + // fields name changed from u8 to u64. + // Add_padding can solve this change in descriptor of device state. + // Test can verify this function works. let mut device_v4 = DeviceV4 { state: DeviceV4State::default(), }; @@ -497,12 +1041,9 @@ pub mod tests { ); let mut current_slice = device_v4.get_state_vec().unwrap(); - assert_eq!( - state_5_desc - .add_padding(&state_4_desc, &mut current_slice) - .is_ok(), - true - ); + assert!(state_5_desc + .add_padding(&state_4_desc, &mut current_slice) + .is_ok()); let mut device_v5 = DeviceV5 { state: DeviceV5State::default(), @@ -515,12 +1056,9 @@ pub mod tests { #[test] fn test_desc_jump_version_padding() { - /* - * This test makes two version of a device. - * Those devices jump from v2 to v5 once. - * Add_padding can solve this change in descriptor of device state. - * Test can verify this function works. - */ + // This test makes two version of a device. Those devices jump from v2 to v5 once. + // Add_padding can solve this change in descriptor of device state. + // Test can verify this function works. let mut device_v2 = DeviceV2 { state: DeviceV2State::default(), }; @@ -539,12 +1077,9 @@ pub mod tests { ); let mut current_slice = device_v2.get_state_vec().unwrap(); - assert_eq!( - state_5_desc - .add_padding(&state_2_desc, &mut current_slice) - .is_ok(), - true - ); + assert!(state_5_desc + .add_padding(&state_2_desc, &mut current_slice) + .is_ok()); let mut device_v5 = DeviceV5 { state: DeviceV5State::default(), @@ -552,6 +1087,16 @@ pub mod tests { device_v5.set_state_mut(¤t_slice).unwrap(); assert!(state_5_desc.current_version > state_2_desc.current_version); - assert_eq!(device_v5.state.rii, device_v2.state.iir as u64); + assert_eq!(device_v5.state.rii, u64::from(device_v2.state.iir)); + } + + #[test] + fn test_check_header() { + if Kvm::new().is_err() { + return; + } + + let header = MigrationHeader::default(); + assert!(header.check_header().is_ok()); } } diff --git a/migration/src/snapshot.rs b/migration/src/snapshot.rs index 30ee13c75b2517435dae5c50f8531593b6f40409..3a449bad8a7213da886363fe1ae260d5a77803aa 100644 --- a/migration/src/snapshot.rs +++ b/migration/src/snapshot.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. // // StratoVirt is licensed under Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan @@ -13,28 +13,30 @@ use std::collections::HashMap; use std::fs::{create_dir, File}; use std::io::{Read, Write}; -use std::mem::size_of; use std::path::PathBuf; -use util::byte_code::ByteCode; -use util::reader::BufferReader; +use anyhow::{anyhow, bail, Context, Result}; + +use crate::general::{translate_id, Lifecycle}; +use crate::manager::{MigrationManager, MIGRATION_MANAGER}; +use crate::protocol::{DeviceStateDesc, FileFormat, MigrationStatus, HEADER_LENGTH}; +use crate::MigrationError; use util::unix::host_page_size; -use crate::device_state::{DeviceStateDesc, VersionCheck}; -use crate::errors::{ErrorKind, Result, ResultExt}; -use crate::header::{FileFormat, MigrationHeader}; -use crate::manager::{id_remap, InstanceId, MigrationEntry, MigrationManager, MIGRATION_MANAGER}; -use crate::status::MigrationStatus; +pub const SERIAL_SNAPSHOT_ID: &str = "serial"; +pub const KVM_SNAPSHOT_ID: &str = "kvm"; +pub const GICV3_SNAPSHOT_ID: &str = "gicv3"; +pub const GICV3_ITS_SNAPSHOT_ID: &str = "gicv3_its"; +pub const PL011_SNAPSHOT_ID: &str = "pl011"; +pub const PL031_SNAPSHOT_ID: &str = "pl031"; -/// The length of `MigrationHeader` part occupies bytes in snapshot file. -const HEADER_LENGTH: usize = 4096; /// The suffix used for snapshot memory storage. const MEMORY_PATH_SUFFIX: &str = "memory"; /// The suffix used for snapshot device state storage. const DEVICE_PATH_SUFFIX: &str = "state"; impl MigrationManager { - /// Do snapshot for `VM`. + /// Save snapshot for `VM`. /// /// # Notes /// @@ -61,9 +63,7 @@ impl MigrationManager { vm_state_path.push(DEVICE_PATH_SUFFIX); match File::create(vm_state_path) { Ok(mut state_file) => { - Self::save_header(FileFormat::Device, &mut state_file)?; - Self::save_descriptor_db(&mut state_file)?; - Self::save_device_state(&mut state_file)?; + Self::save_vmstate(Some(FileFormat::Device), &mut state_file)?; } Err(e) => { bail!("Failed to create snapshot state file: {}", e); @@ -75,8 +75,7 @@ impl MigrationManager { vm_memory_path.push(MEMORY_PATH_SUFFIX); match File::create(vm_memory_path) { Ok(mut memory_file) => { - Self::save_header(FileFormat::MemoryFull, &mut memory_file)?; - Self::save_memory(&mut memory_file)?; + Self::save_memory(Some(FileFormat::MemoryFull), &mut memory_file)?; } Err(e) => { bail!("Failed to create snapshot memory file: {}", e); @@ -105,33 +104,33 @@ impl MigrationManager { let mut snapshot_path = PathBuf::from(path); if !snapshot_path.is_dir() { - return Err(ErrorKind::InvalidSnapshotPath.into()); + return Err(anyhow!(MigrationError::InvalidSnapshotPath)); } snapshot_path.push(MEMORY_PATH_SUFFIX); let mut memory_file = - File::open(&snapshot_path).chain_err(|| "Failed to open memory snapshot file")?; - let memory_header = Self::load_header(&mut memory_file)?; + File::open(&snapshot_path).with_context(|| "Failed to open memory snapshot file")?; + let memory_header = Self::restore_header(&mut memory_file)?; memory_header.check_header()?; if memory_header.format != FileFormat::MemoryFull { bail!("Invalid memory snapshot file"); } snapshot_path.pop(); snapshot_path.push(DEVICE_PATH_SUFFIX); - let mut device_state_file = - File::open(&snapshot_path).chain_err(|| "Failed to open device state snapshot file")?; - let device_state_header = Self::load_header(&mut device_state_file)?; + let mut device_state_file = File::open(&snapshot_path) + .with_context(|| "Failed to open device state snapshot file")?; + let device_state_header = Self::restore_header(&mut device_state_file)?; device_state_header.check_header()?; if device_state_header.format != FileFormat::Device { bail!("Invalid device state snapshot file"); } - Self::load_memory(&mut memory_file).chain_err(|| "Failed to load snapshot memory")?; + Self::restore_memory(&mut memory_file).with_context(|| "Failed to load snapshot memory")?; let snapshot_desc_db = - Self::load_descriptor_db(&mut device_state_file, device_state_header.desc_len) - .chain_err(|| "Failed to load device descriptor db")?; - Self::load_vmstate(snapshot_desc_db, &mut device_state_file) - .chain_err(|| "Failed to load snapshot device state")?; + Self::restore_desc_db(&mut device_state_file, device_state_header.desc_len) + .with_context(|| "Failed to load device descriptor db")?; + Self::restore_vmstate(snapshot_desc_db, &mut device_state_file) + .with_context(|| "Failed to load snapshot device state")?; Self::resume()?; // Set status to `Completed` @@ -140,62 +139,16 @@ impl MigrationManager { Ok(()) } - /// Write `MigrationHeader` to `Write` trait object as bytes. - /// `MigrationHeader` will occupy the first 4096 bytes in snapshot file. - /// - /// # Arguments - /// - /// * `file_format` - confirm snapshot file format. - /// * `writer` - The `Write` trait object to write header message. - fn save_header(file_format: FileFormat, writer: &mut dyn Write) -> Result<()> { - let mut header = MigrationHeader::default(); - header.format = file_format; - header.desc_len = match file_format { - FileFormat::Device => Self::get_desc_db_len()?, - FileFormat::MemoryFull => (host_page_size() as usize) * 2 - HEADER_LENGTH, - }; - let header_bytes = header.as_bytes(); - let mut input_slice = [0u8; HEADER_LENGTH]; - - input_slice[0..size_of::()].copy_from_slice(header_bytes); - writer - .write(&input_slice) - .chain_err(|| "Failed to save migration header")?; - - Ok(()) - } - - /// Load and parse `MigrationHeader` from `Read` object. - /// - /// # Arguments - /// - /// * `reader` - The `Read` trait object. - fn load_header(reader: &mut dyn Read) -> Result { - let mut header_bytes = [0u8; size_of::()]; - reader.read_exact(&mut header_bytes)?; - - let mut place_holder = [0u8; HEADER_LENGTH - size_of::()]; - reader.read_exact(&mut place_holder)?; - - Ok(*MigrationHeader::from_bytes(&header_bytes) - .ok_or(ErrorKind::FromBytesError("HEADER"))?) - } - /// Save memory state and data to `Write` trait object. /// /// # Arguments /// - /// * `writer` - The `Write` trait object. - fn save_memory(writer: &mut dyn Write) -> Result<()> { - let entry = MIGRATION_MANAGER.entry.read().unwrap(); - for item in entry.iter() { - for (id, entry) in item.iter() { - if let MigrationEntry::Memory(i) = entry { - i.pre_save(id, writer) - .chain_err(|| "Failed to save vm memory")?; - } - } - } + /// * `fd` - The `Write` trait object to save memory data. + fn save_memory(file_format: Option, fd: &mut dyn Write) -> Result<()> { + Self::save_header(file_format, fd)?; + + let locked_vmm = MIGRATION_MANAGER.vmm.read().unwrap(); + locked_vmm.memory.as_ref().unwrap().save_memory(fd)?; Ok(()) } @@ -205,115 +158,150 @@ impl MigrationManager { /// # Arguments /// /// * `file` - snapshot memory file. - fn load_memory(file: &mut File) -> Result<()> { + fn restore_memory(file: &mut File) -> Result<()> { let mut state_bytes = [0_u8].repeat((host_page_size() as usize) * 2 - HEADER_LENGTH); file.read_exact(&mut state_bytes)?; - let entry = MIGRATION_MANAGER.entry.read().unwrap(); - for item in entry.iter() { - for (_, entry) in item.iter() { - if let MigrationEntry::Memory(i) = entry { - i.pre_load(&state_bytes, Some(file)) - .chain_err(|| "Failed to load vm memory")?; - } - } - } + let locked_vmm = MIGRATION_MANAGER.vmm.read().unwrap(); + locked_vmm + .memory + .as_ref() + .unwrap() + .restore_memory(Some(file), &state_bytes)?; Ok(()) } - /// Save device state to `Write` trait object. + /// Save vm state to `Write` trait object as bytes.. /// /// # Arguments /// - /// * `writer` - The `Write` trait object. - fn save_device_state(writer: &mut dyn Write) -> Result<()> { - let entry = MIGRATION_MANAGER.entry.read().unwrap(); - for item in entry.iter() { - for (id, entry) in item.iter() { - match entry { - MigrationEntry::Safe(i) => i.pre_save(id, writer)?, - MigrationEntry::Mutex(i) => i.lock().unwrap().pre_save(id, writer)?, - _ => {} - } + /// * fd - The `Write` trait object to save VM data. + pub fn save_vmstate(file_format: Option, fd: &mut dyn Write) -> Result<()> { + Self::save_header(file_format, fd)?; + Self::save_desc_db(fd)?; + + let locked_vmm = MIGRATION_MANAGER.vmm.read().unwrap(); + // Save transports state. + for (id, transport) in locked_vmm.transports.iter() { + transport + .lock() + .unwrap() + .save_device(*id, fd) + .with_context(|| "Failed to save transport state")?; + } + + // Save devices state. + for (id, device) in locked_vmm.devices.iter() { + device + .lock() + .unwrap() + .save_device(*id, fd) + .with_context(|| "Failed to save device state")?; + } + + // Save CPUs state. + for (id, cpu) in locked_vmm.cpus.iter() { + cpu.save_device(*id, fd) + .with_context(|| "Failed to save cpu state")?; + } + + #[cfg(target_arch = "x86_64")] + { + // Save kvm device state. + locked_vmm + .kvm + .as_ref() + .unwrap() + .save_device(translate_id(KVM_SNAPSHOT_ID), fd) + .with_context(|| "Failed to save kvm state")?; + } + + #[cfg(target_arch = "aarch64")] + { + // Save GICv3 device state. + let gic_id = translate_id(GICV3_SNAPSHOT_ID); + if let Some(gic) = locked_vmm.gic_group.get(&gic_id) { + gic.save_device(gic_id, fd) + .with_context(|| "Failed to save gic state")?; + } + + // Save GICv3 ITS device state. + let its_id = translate_id(GICV3_ITS_SNAPSHOT_ID); + if let Some(its) = locked_vmm.gic_group.get(&its_id) { + its.save_device(its_id, fd) + .with_context(|| "Failed to save gic its state")?; } } Ok(()) } - /// Restore vm state from `Read` trait object. + /// Restore vm state from `Read` trait object as bytes.. /// /// # Arguments /// - /// * `snap_desc_db` - The snapshot descriptor hashmap read from snapshot file. - /// * `reader` - The `Read` trait object. - fn load_vmstate( + /// * snap_desc_db - snapshot state descriptor. + /// * fd - The `Read` trait object to restore VM data. + pub fn restore_vmstate( snap_desc_db: HashMap, - reader: &mut dyn Read, + fd: &mut dyn Read, ) -> Result<()> { - let desc_db = MIGRATION_MANAGER.desc_db.read().unwrap(); - let device_entry = MIGRATION_MANAGER.entry.read().unwrap(); - - let mut migration_file = BufferReader::new(reader); - migration_file.read_buffer()?; - - while let Some(data) = &migration_file.read_vectored(size_of::()) { - let instance_id = InstanceId::from_bytes(data.as_slice()).unwrap(); - let snap_desc = snap_desc_db.get(&instance_id.object_type).unwrap(); - let current_desc = desc_db.get(&snap_desc.name).unwrap(); - - let mut state_data = - if let Some(data) = migration_file.read_vectored(snap_desc.size as usize) { - data - } else { - bail!("Invalid snapshot device state data"); - }; - match current_desc.check_version(snap_desc) { - VersionCheck::Same => {} - VersionCheck::Compat => { - current_desc - .add_padding(snap_desc, &mut state_data) - .chain_err(|| "Failed to transform snapshot data version.")?; - } - VersionCheck::Mismatch => { - return Err(ErrorKind::VersionNotFit( - current_desc.compat_version, - snap_desc.current_version, - ) - .into()) - } + let locked_vmm = MIGRATION_MANAGER.vmm.read().unwrap(); + // Restore transports state. + for _ in 0..locked_vmm.transports.len() { + let (transport_data, id) = Self::check_vm_state(fd, &snap_desc_db)?; + if let Some(transport) = locked_vmm.transports.get(&id) { + transport + .lock() + .unwrap() + .restore_mut_device(&transport_data) + .with_context(|| "Failed to restore transport state")?; } + } - for item in device_entry.iter() { - for (key, state) in item { - if id_remap(key) == instance_id.object_id { - info!("Load VM state: key {}", key); - match state { - MigrationEntry::Safe(i) => i.pre_load(&state_data, None)?, - MigrationEntry::Mutex(i) => { - i.lock().unwrap().pre_load_mut(&state_data, None)? - } - _ => {} - } - } - } + // Restore devices state. + for _ in 0..locked_vmm.devices.len() { + let (device_data, id) = Self::check_vm_state(fd, &snap_desc_db)?; + if let Some(device) = locked_vmm.devices.get(&id) { + device + .lock() + .unwrap() + .restore_mut_device(&device_data) + .with_context(|| "Failed to restore device state")?; } } - Ok(()) - } + // Restore CPUs state. + for _ in 0..locked_vmm.cpus.len() { + let (cpu_data, id) = Self::check_vm_state(fd, &snap_desc_db)?; + if let Some(cpu) = locked_vmm.cpus.get(&id) { + cpu.restore_device(&cpu_data) + .with_context(|| "Failed to restore cpu state")?; + } + } - /// Resume recovered device. - /// This function will be called after restore device state. - fn resume() -> Result<()> { - let entry = MIGRATION_MANAGER.entry.read().unwrap(); - for item in entry.iter() { - for (_, state) in item { - if let MigrationEntry::Mutex(i) = state { - i.lock().unwrap().resume()? + #[cfg(target_arch = "x86_64")] + { + // Restore kvm device state. + if let Some(kvm) = &locked_vmm.kvm { + let (kvm_data, _) = Self::check_vm_state(fd, &snap_desc_db)?; + kvm.restore_device(&kvm_data) + .with_context(|| "Failed to restore kvm state")?; + } + } + + #[cfg(target_arch = "aarch64")] + { + // Restore GIC group state. + for _ in 0..locked_vmm.gic_group.len() { + let (gic_data, id) = Self::check_vm_state(fd, &snap_desc_db)?; + if let Some(gic) = locked_vmm.gic_group.get(&id) { + gic.restore_device(&gic_data) + .with_context(|| "Failed to restore gic state")?; } } } + Ok(()) } } diff --git a/migration/src/status.rs b/migration/src/status.rs deleted file mode 100644 index 382622bd7833ddef5658fb610ddfa8bf3f952b60..0000000000000000000000000000000000000000 --- a/migration/src/status.rs +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use crate::errors::{ErrorKind, Result}; - -/// This status for migration in migration process. -/// -/// # Notes -/// -/// State transfer: -/// None -----------> Setup: set up migration resource. -/// Setup ----------> Active: start to migrate. -/// Active ---------> Completed: migrate completed successfully. -/// Completed ------> Active: start to migrate again after a successfully migration. -/// Failed ---------> Setup: reset migration resource. -/// Any ------------> Failed: Something wrong in migration. -#[derive(Copy, Clone, Debug, PartialEq)] -pub enum MigrationStatus { - /// Migration resource is not prepared all - None = 0, - /// Migration resource(desc_db, device_instance, ...) is setup. - Setup = 1, - /// In migration or incoming migrating. - Active = 2, - /// Migration finished. - Completed = 3, - /// Migration failed. - Failed = 4, -} - -impl std::fmt::Display for MigrationStatus { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - MigrationStatus::None => "none", - MigrationStatus::Setup => "setup", - MigrationStatus::Active => "active", - MigrationStatus::Completed => "completed", - MigrationStatus::Failed => "failed", - } - ) - } -} - -impl MigrationStatus { - pub fn transfer(self, new_status: MigrationStatus) -> Result { - match self { - MigrationStatus::None => match new_status { - MigrationStatus::Setup => Ok(new_status), - _ => Err(ErrorKind::InvalidStatusTransfer(self, new_status).into()), - }, - MigrationStatus::Setup => match new_status { - MigrationStatus::Active | MigrationStatus::Failed => Ok(new_status), - _ => Err(ErrorKind::InvalidStatusTransfer(self, new_status).into()), - }, - MigrationStatus::Active => match new_status { - MigrationStatus::Completed | MigrationStatus::Failed => Ok(new_status), - _ => Err(ErrorKind::InvalidStatusTransfer(self, new_status).into()), - }, - MigrationStatus::Completed => match new_status { - MigrationStatus::Active => Ok(new_status), - _ => Err(ErrorKind::InvalidStatusTransfer(self, new_status).into()), - }, - MigrationStatus::Failed => match new_status { - MigrationStatus::Setup | MigrationStatus::Active => Ok(new_status), - _ => Err(ErrorKind::InvalidStatusTransfer(self, new_status).into()), - }, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_normal_transfer() { - let mut status = MigrationStatus::None; - - // None to Setup. - assert!(status.transfer(MigrationStatus::Setup).is_ok()); - status = status.transfer(MigrationStatus::Setup).unwrap(); - - // Setup to Active. - assert!(status.transfer(MigrationStatus::Active).is_ok()); - status = status.transfer(MigrationStatus::Active).unwrap(); - - // Active to Completed. - assert!(status.transfer(MigrationStatus::Completed).is_ok()); - status = status.transfer(MigrationStatus::Completed).unwrap(); - - // Completed to Active. - assert!(status.transfer(MigrationStatus::Active).is_ok()); - status = status.transfer(MigrationStatus::Active).unwrap(); - - // Any to Failed. - assert!(status.transfer(MigrationStatus::Failed).is_ok()); - status = status.transfer(MigrationStatus::Failed).unwrap(); - - // Failed to Active. - assert!(status.transfer(MigrationStatus::Active).is_ok()); - status = status.transfer(MigrationStatus::Active).unwrap(); - - // Any to Failed. - assert!(status.transfer(MigrationStatus::Failed).is_ok()); - status = status.transfer(MigrationStatus::Failed).unwrap(); - - // Failed to Setup. - assert!(status.transfer(MigrationStatus::Setup).is_ok()); - status = status.transfer(MigrationStatus::Setup).unwrap(); - - assert_eq!(status, MigrationStatus::Setup); - } - - #[test] - fn test_abnormal_transfer_with_error() { - let mut status = MigrationStatus::None; - - // None to Active. - if let Err(e) = status.transfer(MigrationStatus::Active) { - assert_eq!( - e.to_string(), - format!( - "Failed to transfer migration status from {} to {}.", - MigrationStatus::None, - MigrationStatus::Active - ) - ); - } else { - assert!(false) - } - status = status.transfer(MigrationStatus::Setup).unwrap(); - - // Setup to Complete. - if let Err(e) = status.transfer(MigrationStatus::Completed) { - assert_eq!( - e.to_string(), - format!( - "Failed to transfer migration status from {} to {}.", - MigrationStatus::Setup, - MigrationStatus::Completed - ) - ); - } else { - assert!(false) - } - status = status.transfer(MigrationStatus::Active).unwrap(); - - // Active to Setup. - if let Err(e) = status.transfer(MigrationStatus::Setup) { - assert_eq!( - e.to_string(), - format!( - "Failed to transfer migration status from {} to {}.", - MigrationStatus::Active, - MigrationStatus::Setup - ) - ); - } else { - assert!(false) - } - status = status.transfer(MigrationStatus::Completed).unwrap(); - - // Completed to Setup. - if let Err(e) = status.transfer(MigrationStatus::Setup) { - assert_eq!( - e.to_string(), - format!( - "Failed to transfer migration status from {} to {}.", - MigrationStatus::Completed, - MigrationStatus::Setup - ) - ); - } else { - assert!(false) - } - - // Complete to failed. - if let Err(e) = status.transfer(MigrationStatus::Failed) { - assert_eq!( - e.to_string(), - format!( - "Failed to transfer migration status from {} to {}.", - MigrationStatus::Completed, - MigrationStatus::Failed - ) - ); - } else { - assert!(false) - } - } -} diff --git a/ozone/Cargo.toml b/ozone/Cargo.toml index 838d15852889df11580edb6143efc7ee84abb3a9..dafafca70a91644e1f40ddcc213d0cdb3d08e517 100644 --- a/ozone/Cargo.toml +++ b/ozone/Cargo.toml @@ -1,13 +1,15 @@ [package] name = "ozone" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" description = "Provides protection for stratovirt" license = "Mulan PSL v2" [dependencies] -error-chain = "0.12.4" -libc = ">=0.2.71" +thiserror = "1.0" +anyhow = "1.0" +libc = "0.2" +nix = "0.26.2" util = { path = "../util" } diff --git a/ozone/src/args.rs b/ozone/src/args.rs index 28da385ea9ae549d9fa879a9dc64ccfe955f14a5..85e9ee96507066a801ecc4aad2a50e6fc8c90e26 100644 --- a/ozone/src/args.rs +++ b/ozone/src/args.rs @@ -86,7 +86,7 @@ pub fn create_args_parser<'a>() -> ArgParser<'a> { .arg( Arg::with_name("cgroup") .long("cgroup") - .help("set cgroup arguments, use -cgroup = ...") + .help("set cgroup arguments, use -cgroup = ...") .required(false) .takes_values(true), ) diff --git a/ozone/src/capability.rs b/ozone/src/capability.rs index e7eae704fc00947d2982cfd8f882570f44896d64..116225cd4f88536654ab41c1fc0020c5d46e1b51 100644 --- a/ozone/src/capability.rs +++ b/ozone/src/capability.rs @@ -12,9 +12,13 @@ //! Remove all capability for ozone when uid is 0, use -capability cap_* to add capability. -use crate::{syscall, ErrorKind, Result, ResultExt}; use std::{collections::HashMap, io::Write}; +use anyhow::{bail, Context, Result}; + +use crate::syscall; +use crate::OzoneError; + const CAPS_V3: u32 = 0x20080522; const NR_ALL_CAP: u8 = 41; @@ -89,7 +93,7 @@ pub struct CapUserData { inheritable_s1: u32, } -pub fn has_cap(cap: u8) -> Result { +fn has_cap(cap: u8) -> Result { let mut hdr = CapUserHeader { version: CAPS_V3, pid: 0, @@ -108,9 +112,9 @@ pub fn has_cap(cap: u8) -> Result { // so we set Bounding to limit child process. pub fn clear_all_capabilities() -> Result<()> { for cap in 0..NR_ALL_CAP { - if has_cap(cap).chain_err(|| ErrorKind::CapsError("CAPGET"))? { + if has_cap(cap).with_context(|| OzoneError::CapsError("CAPGET"))? { syscall::drop_bounding_caps(cap) - .chain_err(|| ErrorKind::CapsError("PR_CAPBSET_DROP"))?; + .with_context(|| OzoneError::CapsError("PR_CAPBSET_DROP"))?; } } @@ -129,10 +133,10 @@ pub fn set_capability_for_ozone(capability: &str) -> Result<()> { let warning = format!("Alert! Adding dangerous capability {:?} to ozone , it might cause risk of escape!\n", cap); std::io::stdout() .write(warning.as_bytes()) - .chain_err(|| "Failed to write warnings")?; + .with_context(|| "Failed to write warnings")?; std::io::stdout() .flush() - .chain_err(|| "Failed to flush stdout")?; + .with_context(|| "Failed to flush stdout")?; } } else { bail!("Invalid capability argument: {:?}", cap); @@ -143,9 +147,9 @@ pub fn set_capability_for_ozone(capability: &str) -> Result<()> { if cap_add_arr.contains(item.0) { continue; } - if has_cap(item.1 .0).chain_err(|| ErrorKind::CapsError("CAPGET"))? { + if has_cap(item.1 .0).with_context(|| OzoneError::CapsError("CAPGET"))? { syscall::drop_bounding_caps(item.1 .0) - .chain_err(|| ErrorKind::CapsError("PR_CAPBSET_DROP"))?; + .with_context(|| OzoneError::CapsError("PR_CAPBSET_DROP"))?; } } Ok(()) diff --git a/ozone/src/cgroup.rs b/ozone/src/cgroup.rs index 8ce65394a8383fb150c296b41fbdb05346115085..4bb79e6decca6f035bf59f426002a5af196d1181 100644 --- a/ozone/src/cgroup.rs +++ b/ozone/src/cgroup.rs @@ -18,7 +18,9 @@ use std::{ process, }; -use crate::{ErrorKind, Result, ResultExt}; +use anyhow::{bail, Context, Result}; + +use crate::OzoneError; const MOUNT_DIR: &str = "/proc/mounts"; const CGROUP_ALLOW_LIST: [&str; 2] = ["cpuset.cpus", "memory.limit_in_bytes"]; @@ -68,8 +70,9 @@ pub fn clean_cgroup(cmd_parser: &CgroupCfg, exec_file: String, name: String) -> let split: Vec<&str> = file.split('.').collect(); let base_path = get_base_location(split[0], &exec_file, &name)?; if base_path.exists() { - std::fs::remove_dir(&base_path) - .chain_err(|| format!("Failed to remove cgroup directory {:?}", &base_path))?; + std::fs::remove_dir(&base_path).with_context(|| { + format!("Failed to remove cgroup directory {:?}", &base_path) + })?; } } } @@ -81,7 +84,7 @@ pub fn clean_node(exec_file: String, name: String) -> Result<()> { let base_path = get_base_location("cpuset", &exec_file, &name)?; if base_path.exists() { std::fs::remove_dir(&base_path) - .chain_err(|| format!("Failed to remove cgroup directory {:?}", &base_path))?; + .with_context(|| format!("Failed to remove cgroup directory {:?}", &base_path))?; } Ok(()) @@ -93,10 +96,10 @@ fn get_base_location(controller: &str, exec_file: &str, name: &str) -> Result = dir.split(' ').collect(); target_path = PathBuf::from(split[1]); @@ -114,12 +117,12 @@ fn get_base_location(controller: &str, exec_file: &str, name: &str) -> Result Result<()> { let write_path = get_base_location("cpuset", exec_file, name)?; write_cgroup_value(&write_path, "cpuset.mems", node) - .chain_err(|| ErrorKind::WriteError("cpuset.mems".to_string(), node.to_string()))?; + .with_context(|| OzoneError::WriteError("cpuset.mems".to_string(), node.to_string()))?; let mut upper_path = write_path.clone(); upper_path.pop(); upper_path.push("cpuset.cpus"); - inherit_config(&write_path, "cpuset.cpus").chain_err(|| { + inherit_config(&write_path, "cpuset.cpus").with_context(|| { format!( "Failed to inherit configuration for path: {:?}", &write_path @@ -128,13 +131,13 @@ pub fn set_numa_node(node: &str, exec_file: &str, name: &str) -> Result<()> { let value = read_file_value(upper_path.clone()); if let Ok(val) = value { write_cgroup_value(&write_path, "cpuset.cpus", &val) - .chain_err(|| ErrorKind::WriteError("cpuset.cpus".to_string(), val.to_string()))?; + .with_context(|| OzoneError::WriteError("cpuset.cpus".to_string(), val.to_string()))?; } else { bail!("Can not read value from: {:?}", &upper_path); } let pid = process::id(); write_cgroup_value(&write_path, "tasks", &pid.to_string()) - .chain_err(|| "Failed to attach pid")?; + .with_context(|| "Failed to attach pid")?; Ok(()) } @@ -142,17 +145,17 @@ fn write_cgroup_value(path: &Path, file: &str, value: &str) -> Result<()> { if file != "tasks" { if !path.exists() { fs::create_dir_all(path) - .chain_err(|| format!("Failed to create directory: {:?}", path))?; + .with_context(|| format!("Failed to create directory: {:?}", path))?; } inherit_config(path, file) - .chain_err(|| format!("Failed to inherit configuration for path: {:?}", &path))?; + .with_context(|| format!("Failed to inherit configuration for path: {:?}", &path))?; } let mut path_to_write = path.to_path_buf(); - path_to_write.push(&file); - fs::write(&path_to_write, format!("{}\n", value)).chain_err(|| { - ErrorKind::WriteError( - (&path_to_write.to_string_lossy()).to_string(), + path_to_write.push(file); + fs::write(&path_to_write, format!("{}\n", value)).with_context(|| { + OzoneError::WriteError( + path_to_write.to_string_lossy().to_string(), value.to_string(), ) })?; @@ -162,21 +165,22 @@ fn write_cgroup_value(path: &Path, file: &str, value: &str) -> Result<()> { fn read_file_value(path: PathBuf) -> Result { let mut value = - fs::read_to_string(&path).chain_err(|| format!("Failed to read path: {:?}", &path))?; + fs::read_to_string(&path).with_context(|| format!("Failed to read path: {:?}", &path))?; value.pop(); Ok(value) } // Reason for inherit configuration: -// Ozone creates a new hierarchy: /sys/fs/cgroup/// in cgroup. As the value in -// current hierarchy should be a sub-aggregate of its parent hierarchy, in other words: value in "..// -// ///file" should be a sub-aggregate of that in "..///file". However, When -// creating the hierarchy "..///" values in "..///file" always -// be empty, which means that the attempts to set values in "..////file" will fail. -// In order to address this problem, Ozone inherit configuration from "..//file" to ""../ -// //file". -// IF many Ozones are launched with the same "exec_file", the first launched one will inherit configuration, other ones -// will not do that. +// Ozone creates a new hierarchy: /sys/fs/cgroup/// in cgroup. As the +// value in current hierarchy should be a sub-aggregate of its parent hierarchy, in other words: +// value in "..// ///file" should be a sub-aggregate of that in +// "..///file". However, When creating the hierarchy +// "..///" values in "..///file" always +// be empty, which means that the attempts to set values in +// "..////file" will fail. In order to address this problem, Ozone +// inherit configuration from "..//file" to ""../ //file". +// If many Ozones are launched with the same "exec_file", the first launched one will inherit +// configuration, other ones will not do that. fn inherit_config(path: &Path, file: &str) -> Result<()> { let upper_file = path.with_file_name(file); let value = read_file_value(upper_file.clone())?; @@ -187,9 +191,9 @@ fn inherit_config(path: &Path, file: &str) -> Result<()> { if upper_value.is_empty() { bail!("File: {:?} is empty", &grand_parent_file); } - fs::write(upper_file.clone(), format!("{}\n", upper_value)).chain_err(|| { - ErrorKind::WriteError( - (&upper_file.to_string_lossy()).to_string(), + fs::write(upper_file.clone(), format!("{}\n", upper_value)).with_context(|| { + OzoneError::WriteError( + upper_file.to_string_lossy().to_string(), upper_value.to_string(), ) })?; diff --git a/ozone/src/error.rs b/ozone/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..240fc12aa8a5c473c3a3dae73c70207377f81e02 --- /dev/null +++ b/ozone/src/error.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[allow(clippy::upper_case_acronyms)] +#[derive(Error, Debug)] +pub enum OzoneError { + #[error("Util error")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Failed to run binary file in ozone environment: {0}")] + ExecError(std::io::Error), + #[error("Failed to parse {0} to {1}")] + DigitalParseError(&'static str, String), + #[error("Failed to execute {0}")] + CapsError(&'static str), + #[error("Failed to write {0} to {1}")] + WriteError(String, String), +} diff --git a/ozone/src/handler.rs b/ozone/src/handler.rs index 4b27dd20fec590c6bbb52f742d9b16c674c3861d..b72b09ccc4b69ffd95126a966acd64e3337f4f59 100644 --- a/ozone/src/handler.rs +++ b/ozone/src/handler.rs @@ -10,9 +10,6 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use crate::cgroup::{self, init_cgroup, parse_cgroup, CgroupCfg}; -use crate::{capability, namespace, syscall, ErrorKind, Result, ResultExt}; - use std::process::Command; use std::{ fs::{canonicalize, read_dir}, @@ -21,6 +18,12 @@ use std::{ process::Stdio, }; +use anyhow::{anyhow, bail, Context, Result}; +use nix::fcntl::{fcntl, FcntlArg}; + +use crate::cgroup::{self, init_cgroup, parse_cgroup, CgroupCfg}; +use crate::OzoneError; +use crate::{capability, namespace, syscall}; use util::arg_parser::ArgMatches; const BASE_OZONE_PATH: &str = "/srv/ozone"; @@ -77,18 +80,18 @@ impl OzoneHandler { handler.name = name; } if let Some(uid) = args.value_of("uid") { - let user_id = (&uid) + let user_id = (uid) .parse::() - .map_err(|_| ErrorKind::DigitalParseError("uid", uid))?; + .with_context(|| OzoneError::DigitalParseError("uid", uid))?; if user_id > MAX_ID_NUMBER { bail!("Input uid should be no more than 65535"); } handler.uid = user_id; } if let Some(gid) = args.value_of("gid") { - let group_id = (&gid) + let group_id = (gid) .parse::() - .map_err(|_| ErrorKind::DigitalParseError("gid", gid))?; + .with_context(|| OzoneError::DigitalParseError("gid", gid))?; if group_id > MAX_ID_NUMBER { bail!("Input gid should be no more than 65535"); } @@ -96,28 +99,28 @@ impl OzoneHandler { } if let Some(exec_file) = args.value_of("exec_file") { handler.exec_file_path = canonicalize(exec_file) - .chain_err(|| "Failed to parse exec file path to PathBuf")?; + .with_context(|| "Failed to parse exec file path to PathBuf")?; } if let Some(source_paths) = args.values_of("source_files") { for path in source_paths.iter() { - handler.source_file_paths.push( - canonicalize(path).chain_err(|| { + handler + .source_file_paths + .push(canonicalize(path).with_context(|| { format!("Failed to parse source path {:?} to PathBuf", &path) - })?, - ); + })?); } } if let Some(node) = args.value_of("numa") { handler.node = Some( - (&node) + (node) .parse::() - .map_err(|_| ErrorKind::DigitalParseError("numa", node))?, + .with_context(|| OzoneError::DigitalParseError("numa", node))?, ); } if let Some(config) = args.values_of("cgroup") { let mut cgroup_cfg = init_cgroup(); for cfg in config { - parse_cgroup(&mut cgroup_cfg, &cfg).chain_err(|| "Failed to parse cgroup")? + parse_cgroup(&mut cgroup_cfg, &cfg).with_context(|| "Failed to parse cgroup")? } handler.cgroup = Some(cgroup_cfg); } @@ -141,7 +144,7 @@ impl OzoneHandler { ); } std::fs::create_dir_all(&self.chroot_dir) - .chain_err(|| format!("Failed to create folder {:?}", &self.chroot_dir))?; + .with_context(|| format!("Failed to create folder {:?}", &self.chroot_dir))?; Ok(()) } @@ -151,7 +154,7 @@ impl OzoneHandler { let mut chroot_dir = self.chroot_dir.clone(); chroot_dir.push(&exec_file_name); std::fs::copy(&self.exec_file_path, chroot_dir) - .chain_err(|| format!("Failed to copy {:?} to new chroot dir", exec_file_name))?; + .with_context(|| format!("Failed to copy {:?} to new chroot dir", exec_file_name))?; Ok(()) } @@ -161,19 +164,15 @@ impl OzoneHandler { /// /// * `file_path` - args parser. fn bind_mount_file(&self, file_path: &Path) -> Result<()> { - let file_name = if let Some(file) = file_path.file_name() { - file - } else { - bail!("Empty file path"); - }; + let file_name = file_path.file_name().with_context(|| "Empty file path")?; let mut new_root_dir = self.chroot_dir.clone(); new_root_dir.push(file_name); if file_path.is_dir() { std::fs::create_dir_all(&new_root_dir) - .chain_err(|| format!("Failed to create directory: {:?}", &new_root_dir))?; + .with_context(|| format!("Failed to create directory: {:?}", &new_root_dir))?; } else { std::fs::File::create(&new_root_dir) - .chain_err(|| format!("Failed to create file: {:?}", &new_root_dir))?; + .with_context(|| format!("Failed to create file: {:?}", &new_root_dir))?; } // new_root_dir.to_str().unwrap() is safe, because new_root_dir is not empty. syscall::mount( @@ -181,7 +180,7 @@ impl OzoneHandler { new_root_dir.to_str().unwrap(), libc::MS_BIND | libc::MS_SLAVE, ) - .chain_err(|| format!("Failed to mount file: {:?}", &file_path))?; + .with_context(|| format!("Failed to mount file: {:?}", &file_path))?; let data = std::fs::metadata(&new_root_dir)?; if !file_path.is_dir() && data.len() == 0 { @@ -189,11 +188,11 @@ impl OzoneHandler { } syscall::chown(new_root_dir.to_str().unwrap(), self.uid, self.gid) - .chain_err(|| format!("Failed to change owner for source: {:?}", &file_path))?; + .with_context(|| format!("Failed to change owner for source: {:?}", &file_path))?; Ok(()) } - /// Get exec file name. + /// Get exec file name. fn exec_file_name(&self) -> Result { if let Some(file_name) = self.exec_file_path.file_name() { return Ok(file_name.to_string_lossy().into()); @@ -204,11 +203,11 @@ impl OzoneHandler { fn create_newroot_folder(&self, folder: &str) -> Result<()> { std::fs::create_dir_all(folder) - .chain_err(|| format!("Failed to create folder: {:?}", &folder))?; + .with_context(|| format!("Failed to create folder: {:?}", &folder))?; syscall::chmod(folder, 0o700) - .chain_err(|| format!("Failed to chmod to 0o700 for folder: {:?}", &folder))?; + .with_context(|| format!("Failed to chmod to 0o700 for folder: {:?}", &folder))?; syscall::chown(folder, self.uid, self.gid) - .chain_err(|| format!("Failed to change owner for folder: {:?}", &folder))?; + .with_context(|| format!("Failed to change owner for folder: {:?}", &folder))?; Ok(()) } @@ -221,11 +220,11 @@ impl OzoneHandler { ) -> Result<()> { let dev = syscall::makedev(dev_major, dev_minor)?; syscall::mknod(dev_path, libc::S_IFCHR | libc::S_IWUSR | libc::S_IRUSR, dev) - .chain_err(|| format!("Failed to call mknod for device: {:?}", &dev_path))?; + .with_context(|| format!("Failed to call mknod for device: {:?}", &dev_path))?; syscall::chmod(dev_path, mode) - .chain_err(|| format!("Failed to change mode for device: {:?}", &dev_path))?; + .with_context(|| format!("Failed to change mode for device: {:?}", &dev_path))?; syscall::chown(dev_path, self.uid, self.gid) - .chain_err(|| format!("Failed to change owner for device: {:?}", &dev_path))?; + .with_context(|| format!("Failed to change owner for device: {:?}", &dev_path))?; Ok(()) } @@ -233,7 +232,7 @@ impl OzoneHandler { /// Realize OzoneHandler. pub fn realize(&self) -> Result<()> { // First, disinfect the process. - disinfect_process().chain_err(|| "Failed to disinfect process")?; + disinfect_process().with_context(|| "Failed to disinfect process")?; self.create_chroot_dir()?; self.copy_exec_file()?; @@ -244,11 +243,11 @@ impl OzoneHandler { let exec_file = self.exec_file_name()?; if let Some(node) = self.node.clone() { cgroup::set_numa_node(&node, &exec_file, &self.name) - .chain_err(|| "Failed to set numa node")?; + .with_context(|| "Failed to set numa node")?; } if let Some(cgroup) = &self.cgroup { cgroup::realize_cgroup(cgroup, exec_file, self.name.clone()) - .chain_err(|| "Failed to realize cgroup")?; + .with_context(|| "Failed to realize cgroup")?; } namespace::set_uts_namespace("Ozone")?; @@ -259,7 +258,7 @@ impl OzoneHandler { namespace::set_mount_namespace(self.chroot_dir.to_str().unwrap())?; for folder in NEWROOT_FOLDERS.iter() { - self.create_newroot_folder(*folder)?; + self.create_newroot_folder(folder)?; } for index in 0..NEWROOT_DEVICE_NR { @@ -272,15 +271,15 @@ impl OzoneHandler { } if let Some(capability) = &self.capability { capability::set_capability_for_ozone(capability) - .chain_err(|| "Failed to set capability for ozone.")?; + .with_context(|| "Failed to set capability for ozone.")?; } else { capability::clear_all_capabilities() - .chain_err(|| "Failed to clean all capability for ozone.")?; + .with_context(|| "Failed to clean all capability for ozone.")?; } let mut chroot_exec_file = PathBuf::from("/"); chroot_exec_file.push(self.exec_file_name()?); - Err(ErrorKind::ExecError( + Err(anyhow!(OzoneError::ExecError( Command::new(chroot_exec_file) .gid(self.gid) .uid(self.uid) @@ -289,8 +288,7 @@ impl OzoneHandler { .stderr(Stdio::inherit()) .args(&self.extra_args) .exec(), - ) - .into()) + ))) } /// Clean the environment. @@ -299,28 +297,24 @@ impl OzoneHandler { for source_file_path in self.source_file_paths.clone().into_iter() { let mut chroot_path = self.chroot_dir.clone(); let source_file_name = source_file_path.file_name(); - let file_name = if let Some(file_name) = source_file_name { - file_name - } else { - bail!("Source file is empty") - }; + let file_name = source_file_name.with_context(|| "Source file is empty")?; chroot_path.push(file_name); if chroot_path.exists() { syscall::umount(chroot_path.to_str().unwrap()) - .chain_err(|| format!("Failed to umount resource: {:?}", file_name))? + .with_context(|| format!("Failed to umount resource: {:?}", file_name))? } } std::fs::remove_dir_all(&self.chroot_dir) - .chain_err(|| "Failed to remove chroot dir path")?; + .with_context(|| "Failed to remove chroot dir path")?; if self.node.is_some() { cgroup::clean_node(self.exec_file_name()?, self.name.clone()) - .chain_err(|| "Failed to clean numa node")?; + .with_context(|| "Failed to clean numa node")?; } if let Some(cgroup) = &self.cgroup { cgroup::clean_cgroup(cgroup, self.exec_file_name()?, self.name.clone()) - .chain_err(|| "Failed to remove cgroup directory")?; + .with_context(|| "Failed to remove cgroup directory")?; } Ok(()) } @@ -328,20 +322,27 @@ impl OzoneHandler { /// Disinfect the process before launching the ozone process. fn disinfect_process() -> Result<()> { - let fd_entries = read_dir(SELF_FD).chain_err(|| "Failed to open process fd proc")?; + let fd_entries = read_dir(SELF_FD).with_context(|| "Failed to open process fd proc")?; + let mut open_fds = vec![]; for entry in fd_entries { if entry.is_err() { - continue; + break; } - let entry = entry?; - let file_name = entry.file_name(); + let file_name = entry.unwrap().file_name(); let file_name = file_name.to_str().unwrap_or("0"); let fd = file_name.parse::().unwrap_or(0); if fd > 2 { - syscall::close(fd).chain_err(|| format!("Failed to close fd: {}", fd))?; + open_fds.push(fd); } } + + for fd in open_fds { + if fcntl(fd, FcntlArg::F_GETFD).is_ok() { + syscall::close(fd).with_context(|| format!("Failed to close fd: {}", fd))? + } + } + Ok(()) } diff --git a/ozone/src/main.rs b/ozone/src/main.rs index 4cb998480bfc2e28df2b13c06bf0f5fa1bf96028..e43add3f54d788566f601412951ff07eb960df61 100644 --- a/ozone/src/main.rs +++ b/ozone/src/main.rs @@ -10,11 +10,7 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; - -use args::create_args_parser; -use handler::OzoneHandler; +pub mod error; mod args; mod capability; @@ -23,35 +19,31 @@ mod handler; mod namespace; mod syscall; -error_chain! { - links { - Util(util::errors::Error, util::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - } - errors { - ExecError(e: std::io::Error) { - display("Failed to run binary file in ozone environment: {}", e) - } - DigitalParseError(column: &'static str, item: String) { - display("Failed to parse {} to {}", item, column) - } - CapsError(cap_option: &'static str) { - display("Failed to execute {}", cap_option) - } - WriteError(path: String, value: String) { - display("Failed to write {} to {}", value, path) +pub use error::OzoneError; + +use std::io::Write; +use std::process::{ExitCode, Termination}; + +use anyhow::{Context, Result}; + +use crate::args::create_args_parser; +use crate::handler::OzoneHandler; + +fn main() -> ExitCode { + match run() { + Ok(ret) => ret.report(), + Err(ref e) => { + write!(&mut std::io::stderr(), "{}", format_args!("{:?}", e)) + .expect("Error writing to stderr"); + + ExitCode::FAILURE } } } -quick_main!(run); - fn run() -> Result<()> { let args = create_args_parser().get_matches()?; - #[allow(unused_variables)] - let handler = OzoneHandler::new(&args).chain_err(|| "Failed to parse cmdline args")?; + let handler = OzoneHandler::new(&args).with_context(|| "Failed to parse cmdline args")?; if args.is_present("clean_resource") { handler.teardown()?; diff --git a/ozone/src/namespace.rs b/ozone/src/namespace.rs index e76379b4306225339c9ac5ab783262aa8930f572..3e5757428074f84f0ee3ea2983b6287773dc96ca 100644 --- a/ozone/src/namespace.rs +++ b/ozone/src/namespace.rs @@ -13,8 +13,9 @@ use std::fs::File; use std::os::unix::prelude::IntoRawFd; +use anyhow::{Context, Result}; + use crate::syscall; -use crate::{Result, ResultExt}; const ROOT_DIR_NAME: &str = "/"; const OLD_ROOT_DIR_NAME: &str = "old_root"; @@ -26,15 +27,16 @@ const CURRENT_DIR_NAME: &str = "."; /// /// * `hostname` - Host name. pub fn set_uts_namespace(hostname: &str) -> Result<()> { - syscall::unshare(libc::CLONE_NEWUTS).chain_err(|| "Failed to unshare into a new namespace")?; - syscall::set_host_name(hostname).chain_err(|| "Failed to set new hostname")?; + syscall::unshare(libc::CLONE_NEWUTS) + .with_context(|| "Failed to unshare into a new namespace")?; + syscall::set_host_name(hostname).with_context(|| "Failed to set new hostname")?; Ok(()) } /// Set namespace for ipc. pub fn set_ipc_namespace() -> Result<()> { syscall::unshare(libc::CLONE_NEWIPC) - .chain_err(|| "Failed to share into a new ipc namespace")?; + .with_context(|| "Failed to share into a new ipc namespace")?; Ok(()) } @@ -45,10 +47,10 @@ pub fn set_ipc_namespace() -> Result<()> { /// * `path` - Path of network namespace. pub fn set_network_namespace(path: &str) -> Result<()> { let network_ns_fd = File::open(path) - .chain_err(|| format!("Failed to open netns path: {}", path))? + .with_context(|| format!("Failed to open netns path: {}", path))? .into_raw_fd(); syscall::setns(network_ns_fd, libc::CLONE_NEWNET) - .chain_err(|| "Failed to set network namespace")?; + .with_context(|| "Failed to set network namespace")?; syscall::close(network_ns_fd)?; Ok(()) } @@ -59,25 +61,26 @@ pub fn set_network_namespace(path: &str) -> Result<()> { /// /// * `mount_dir` - Path of mount directory . pub fn set_mount_namespace(mount_dir: &str) -> Result<()> { - syscall::unshare(libc::CLONE_NEWNS).chain_err(|| "Failed to unshare into a new namespace")?; + syscall::unshare(libc::CLONE_NEWNS) + .with_context(|| "Failed to unshare into a new namespace")?; syscall::mount(None, ROOT_DIR_NAME, libc::MS_SLAVE | libc::MS_REC) - .chain_err(|| "Failed to mount root path as slave and rec")?; + .with_context(|| "Failed to mount root path as slave and rec")?; syscall::mount(Some(mount_dir), mount_dir, libc::MS_BIND | libc::MS_REC) - .chain_err(|| "Failed to mount target path as bind and rec")?; + .with_context(|| "Failed to mount target path as bind and rec")?; std::env::set_current_dir(mount_dir) - .chain_err(|| "Failed to change current dir to mount dir path")?; + .with_context(|| "Failed to change current dir to mount dir path")?; - syscall::mkdir(OLD_ROOT_DIR_NAME).chain_err(|| "Failed to create old root dir")?; + syscall::mkdir(OLD_ROOT_DIR_NAME).with_context(|| "Failed to create old root dir")?; syscall::pivot_root(CURRENT_DIR_NAME, OLD_ROOT_DIR_NAME) - .chain_err(|| "Failed to call pivot_root")?; + .with_context(|| "Failed to call pivot_root")?; - syscall::chdir(ROOT_DIR_NAME).chain_err(|| "Failed to call chdir to change dir")?; + syscall::chdir(ROOT_DIR_NAME).with_context(|| "Failed to call chdir to change dir")?; - syscall::umount(OLD_ROOT_DIR_NAME).chain_err(|| "Failed to umount old root path dir")?; + syscall::umount(OLD_ROOT_DIR_NAME).with_context(|| "Failed to umount old root path dir")?; - std::fs::remove_dir(OLD_ROOT_DIR_NAME).chain_err(|| "Failed to remove old root path dir")?; + std::fs::remove_dir(OLD_ROOT_DIR_NAME).with_context(|| "Failed to remove old root path dir")?; Ok(()) } diff --git a/ozone/src/syscall.rs b/ozone/src/syscall.rs index 64b4ae1d2c368815aabedb9189c4a741e7cad19b..648f3fc20ea76c86fd9522183d4520789078b76d 100644 --- a/ozone/src/syscall.rs +++ b/ozone/src/syscall.rs @@ -142,7 +142,7 @@ pub fn set_host_name(host_name: &str) -> Result<()> { /// /// # Arguments /// -/// * `fd` - File descriptor referring to one of magic links in a /proc/[pid]/ns/ directory. +/// * `fd` - File descriptor referring to one of magic links in a /proc/`\[`pid`\]`/ns/ directory. /// * `nstype` - Namespace type. pub fn setns(fd: i32, nstype: i32) -> Result<()> { SyscallResult { @@ -215,7 +215,7 @@ pub fn chmod(file_path: &str, mode: libc::mode_t) -> Result<()> { /// * `major_id` - The major device number. /// * `minor_id` - The minor device number. pub fn makedev(major_id: u32, minor_id: u32) -> Result { - Ok(unsafe { libc::makedev(major_id, minor_id) }) + Ok(libc::makedev(major_id, minor_id)) } /// Create a special or ordinary file. diff --git a/ozonec/Cargo.lock b/ozonec/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..b67a79bbbdd38f05c0def30a76251a40073ad7c4 --- /dev/null +++ b/ozonec/Cargo.lock @@ -0,0 +1,906 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "caps" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" +dependencies = [ + "libc", + "thiserror", +] + +[[package]] +name = "cc" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.6", +] + +[[package]] +name = "clap" +version = "4.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +dependencies = [ + "bitflags 1.3.2", + "clap_derive", + "clap_lex", + "once_cell", +] + +[[package]] +name = "clap_derive" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "clap_lex" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "033f6b7a4acb1f358c742aaca805c939ee73b4c6209ae4318ec7aca81c42e646" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + +[[package]] +name = "flate2" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" + +[[package]] +name = "libseccomp" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21c57fd8981a80019807b7b68118618d29a87177c63d704fc96e6ecd003ae5b3" +dependencies = [ + "bitflags 1.3.2", + "libc", + "libseccomp-sys", + "pkg-config", +] + +[[package]] +name = "libseccomp-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7cbbd4ad467251987c6e5b47d53b11a5a05add08f2447a9e2d70aef1e0d138" + +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" + +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset", + "pin-utils", + "static_assertions", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "oci_spec" +version = "0.1.0" +dependencies = [ + "anyhow", + "libc", + "nix", + "serde", + "serde_json", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "ozonec" +version = "0.1.0" +dependencies = [ + "anyhow", + "bitflags 1.3.2", + "caps", + "chrono", + "clap", + "libc", + "libseccomp", + "log", + "nix", + "oci_spec", + "procfs", + "rlimit", + "rusty-fork", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "procfs" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "chrono", + "flate2", + "hex", + "lazy_static", + "rustix 0.36.17", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "rlimit" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81a9ed03edbed449d6897c2092c71ab5f7b5fb80f6f0b1a3ed6d40a6f9fc0720" +dependencies = [ + "libc", +] + +[[package]] +name = "rustix" +version = "0.36.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "305efbd14fde4139eb501df5f136994bb520b033fa9fbdce287507dc23b8c7ed" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys 0.4.14", + "windows-sys 0.48.0", +] + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.163" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.163" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", +] + +[[package]] +name = "serde_json" +version = "1.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix 0.38.3", + "windows-sys 0.48.0", +] + +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.74", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.74", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/ozonec/Cargo.toml b/ozonec/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..a617a89a40f8257b51a6539ae4011533ff76e296 --- /dev/null +++ b/ozonec/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "ozonec" +version = "0.1.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "An OCI runtime implemented by Rust" + +[dependencies] +anyhow = "= 1.0.71" +bitflags = "= 1.3.2" +caps = "0.5.5" +chrono = { version = "0.4.31", default-features = false, features = ["clock", "serde"] } +clap = { version = "= 4.1.4", default-features = false, features = ["derive", "cargo", "std", "help", "usage"] } +libc = "= 0.2.146" +libseccomp = "0.3.0" +log = { version = "= 0.4.18", features = ["std"]} +nix = "= 0.26.2" +oci_spec = { path = "oci_spec" } +procfs = "0.14.0" +rlimit = "0.5.3" +rusty-fork = "0.3.0" +serde = { version = "= 1.0.163", features = ["derive"] } +serde_json = "= 1.0.96" +thiserror = "= 1.0.40" + +[workspace] + +[profile.dev] +panic = "unwind" + +[profile.release] +lto = true +strip = true +opt-level = 'z' +codegen-units = 1 +panic = "abort" diff --git a/ozonec/oci_spec/Cargo.toml b/ozonec/oci_spec/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..e5923ad52ead9f6f2528cf0d35eae5c28a9d0405 --- /dev/null +++ b/ozonec/oci_spec/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "oci_spec" +version = "0.1.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "Open Container Initiative (OCI) Specifications in Rust" + +[dependencies] +anyhow = "= 1.0.71" +libc = "= 0.2.146" +nix = "= 0.26.2" +serde = { version = "= 1.0.163", features = ["derive"] } +serde_json = "= 1.0.96" + +[profile.dev] +panic = "unwind" + +[profile.release] +lto = true +strip = true +opt-level = 'z' +codegen-units = 1 +panic = "abort" \ No newline at end of file diff --git a/ozonec/oci_spec/src/lib.rs b/ozonec/oci_spec/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..f0dd3fe5fcbf13d380e3b3c773f7bd29b24508fc --- /dev/null +++ b/ozonec/oci_spec/src/lib.rs @@ -0,0 +1,20 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(target_os = "linux")] +pub mod linux; +#[cfg(target_family = "unix")] +pub mod posix; +pub mod process; +pub mod runtime; +pub mod state; +pub mod vm; diff --git a/ozonec/oci_spec/src/linux.rs b/ozonec/oci_spec/src/linux.rs new file mode 100644 index 0000000000000000000000000000000000000000..6dafcd5a7a66001454e1ea34d42830dc06149b4a --- /dev/null +++ b/ozonec/oci_spec/src/linux.rs @@ -0,0 +1,1232 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{collections::HashMap, path::PathBuf}; + +use anyhow::{anyhow, Result}; +use nix::sched::CloneFlags; +use serde::{Deserialize, Serialize}; + +/// Available Linux namespaces. +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize, Hash)] +#[serde(rename_all = "snake_case")] +pub enum NamespaceType { + Cgroup = 0x0200_0000, + Ipc = 0x0800_0000, + Network = 0x4000_0000, + Mount = 0x0002_0000, + Pid = 0x2000_0000, + Time = 0x0000_0080, + User = 0x1000_0000, + Uts = 0x0400_0000, +} + +impl TryInto for NamespaceType { + type Error = anyhow::Error; + + fn try_into(self) -> Result { + match self { + NamespaceType::Cgroup => Ok(CloneFlags::CLONE_NEWCGROUP), + NamespaceType::Ipc => Ok(CloneFlags::CLONE_NEWIPC), + NamespaceType::Network => Ok(CloneFlags::CLONE_NEWNET), + NamespaceType::Mount => Ok(CloneFlags::CLONE_NEWNS), + NamespaceType::Pid => Ok(CloneFlags::CLONE_NEWPID), + NamespaceType::Time => Err(anyhow!("Time namespace not supported with clone")), + NamespaceType::User => Ok(CloneFlags::CLONE_NEWUSER), + NamespaceType::Uts => Ok(CloneFlags::CLONE_NEWUTS), + } + } +} + +impl From for String { + fn from(ns_type: NamespaceType) -> Self { + match ns_type { + NamespaceType::Cgroup => String::from("cgroup"), + NamespaceType::Ipc => String::from("ipc"), + NamespaceType::Network => String::from("net"), + NamespaceType::Mount => String::from("mnt"), + NamespaceType::Pid => String::from("pid"), + NamespaceType::Time => String::from("time"), + NamespaceType::User => String::from("user"), + NamespaceType::Uts => String::from("uts"), + } + } +} + +/// Namespaces. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Namespace { + /// Namespace type. + #[serde(rename = "type")] + pub ns_type: NamespaceType, + /// Namespace file. If path is not specified, a new namespace is created. + #[serde(skip_serializing_if = "Option::is_none")] + pub path: Option, +} + +/// UID/GID mapping. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct IdMapping { + /// Starting uid/gid in the container. + pub containerID: u32, + /// Starting uid/gid on the host to be mapped to containerID. + pub hostID: u32, + /// Number of ids to be mapped. + pub size: u32, +} + +/// Offset for Time Namespace. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct TimeOffsets { + /// Offset of clock (in seconds) in the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub secs: Option, + /// Offset of clock (in nanoseconds) in the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub nanosecs: Option, +} + +/// Devices available in the container. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Device { + /// Type of device. + #[serde(rename = "type")] + pub dev_type: String, + /// Full path to device inside container. + pub path: String, + /// Major number for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub major: Option, + /// Minor number for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub minor: Option, + /// File mode for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub fileMode: Option, + /// Id of device owner. + #[serde(skip_serializing_if = "Option::is_none")] + pub uid: Option, + /// Id of device group. + #[serde(skip_serializing_if = "Option::is_none")] + pub gid: Option, +} + +fn default_device_type() -> String { + "a".to_string() +} + +/// Allowed device in Device Cgroup. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CgroupDevice { + /// Whether the entry is allowed or denied. + #[serde(default)] + pub allow: bool, + /// Type of device. + #[serde(default = "default_device_type", rename = "type")] + pub dev_type: String, + /// Major number for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub major: Option, + /// Minor number for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub minor: Option, + /// Cgroup permissions for device. + #[serde(skip_serializing_if = "Option::is_none")] + pub access: Option, +} + +/// Cgroup subsystem to set limits on the container's memory usage. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct MemoryCgroup { + /// Limit of memory usage. + #[serde(skip_serializing_if = "Option::is_none")] + pub limit: Option, + /// Soft limit of memory usage. + #[serde(skip_serializing_if = "Option::is_none")] + pub reservation: Option, + /// Limits of memory +Swap usage. + #[serde(skip_serializing_if = "Option::is_none")] + pub swap: Option, + /// Hard limit for kernel memory. + #[serde(skip_serializing_if = "Option::is_none")] + pub kernel: Option, + /// Hard limit for kernel TCP buffer memory. + #[serde(skip_serializing_if = "Option::is_none")] + pub kernelTCP: Option, + /// Swappiness parameter of vmscan. + #[serde(skip_serializing_if = "Option::is_none")] + pub swappiness: Option, + /// Enable or disable the OOM killer. + #[serde(skip_serializing_if = "Option::is_none")] + pub disableOOMKiller: Option, + /// Enable or disable hierarchical memory accounting. + #[serde(skip_serializing_if = "Option::is_none")] + pub useHierarchy: Option, + /// Enable container memory usage check before setting a new limit. + #[serde(skip_serializing_if = "Option::is_none")] + pub checkBeforeUpdate: Option, +} + +/// Cgroup subsystems cpu and cpusets. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct CpuCgroup { + /// Relative share of CPU time available to the tasks in a cgroup. + #[serde(skip_serializing_if = "Option::is_none")] + pub shares: Option, + /// Total amount of time in microseconds for which all tasks in a + /// cgroup can run during one period. + #[serde(skip_serializing_if = "Option::is_none")] + pub quota: Option, + /// Maximum amount of accumulated time in microseconds for which + /// all tasks in a cgroup can run additionally for burst during + /// one period. + #[serde(skip_serializing_if = "Option::is_none")] + pub burst: Option, + /// Period of time in microseconds for how regularly a cgroup's access + /// to CPU resources should be reallocated (CFS scheduler only) + #[serde(skip_serializing_if = "Option::is_none")] + pub period: Option, + /// Period of time in microseconds for the longest continuous period + /// in which the tasks in a cgrouop have access to CPU resources. + #[serde(skip_serializing_if = "Option::is_none")] + pub realtimeRuntime: Option, + /// Same as period but applies to realtime scheduler only. + #[serde(skip_serializing_if = "Option::is_none")] + pub realtimePeriod: Option, + /// List of CPUs the container will run on. + #[serde(skip_serializing_if = "Option::is_none")] + pub cpus: Option, + /// List of memory nodes the container will run on. + #[serde(skip_serializing_if = "Option::is_none")] + pub mems: Option, + /// Cgroups are configured with minimum weight. + #[serde(skip_serializing_if = "Option::is_none")] + pub idle: Option, +} + +/// Per-device bandwidth weights. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct WeightDevice { + /// Major number for device. + pub major: i64, + /// Minor number for device. + pub minor: i64, + /// Bandwidth weight for the device. + #[serde(skip_serializing_if = "Option::is_none")] + pub weight: Option, + /// Bandwidth weight for the device while competing with the cgroup's + /// child cgroups (CFS scheduler only) + #[serde(skip_serializing_if = "Option::is_none")] + pub leafWeight: Option, +} + +/// Per-device bandwidth rate limits. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct ThrottleDevice { + /// Major number for device. + pub major: i64, + /// Minor number for device. + pub minor: i64, + /// Bandwidth rate limit in bytes per second or IO rate limit for + /// the device. + pub rate: u64, +} + +/// Cgroup subsystem blkio which implements the block IO controller. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct BlockIoCgroup { + /// Per-cgroup weight. + #[serde(skip_serializing_if = "Option::is_none")] + pub weight: Option, + /// Equivalents of weight for the purpose of deciding how much + /// weight tasks in the given cgroup has while competing with + /// the cgroup's child cgroups. + #[serde(skip_serializing_if = "Option::is_none")] + pub leafWeight: Option, + /// Array of per-device bandwidth weights. + #[serde(skip_serializing_if = "Option::is_none")] + pub weightDevice: Option>, + /// Array of per-device read bandwidth rate limits. + #[serde(skip_serializing_if = "Option::is_none")] + pub throttleReadBpsDevice: Option>, + /// Array of per-device write bandwidth rate limits. + #[serde(skip_serializing_if = "Option::is_none")] + pub throttleWriteBpsDevice: Option>, + /// Array of per-device read IO rate limits. + #[serde(skip_serializing_if = "Option::is_none")] + pub throttleReadIOPSDevice: Option>, + /// Array of per-device write IO rate limits. + #[serde(skip_serializing_if = "Option::is_none")] + pub throttleWriteIOPSDevice: Option>, +} + +/// hugetlb controller which allows to limit the HugeTLB reservations +/// (if supported) or usage (page fault). +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct HugetlbCgroup { + /// Hugepage size + pub pageSize: String, + /// Limit in bytes of hugepagesize HugeTLB reservations + /// (if supported) or usage. + pub limit: u64, +} + +/// Priority assigned to traffic originating from processes in the +/// group and egressing the system on various interfaces. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct NetPriority { + /// Interface name. + pub name: String, + /// Priority applied to the interface. + pub priority: u32, +} + +/// Cgroup subsystems net_cls and net_prio. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct NetworkCgroup { + /// Network class identifier the cgroup's network packets will + /// be tagged with. + #[serde(skip_serializing_if = "Option::is_none")] + pub classID: Option, + /// List of objects of the priorities assigned to traffic + /// originating from processes in the group and egressing the + /// system on various interfaces. + #[serde(skip_serializing_if = "Option::is_none")] + pub priorities: Option>, +} + +/// Cgroup subsystem pids. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct PidsCgroup { + /// Maximum number of tasks in the cgroup. + pub limit: i64, +} + +/// Per-device rdma limit. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct RdmaLimit { + /// Maximum number of hca_handles in the cgroup. + pub hcaHandles: Option, + /// Maximum number of hca_objects in the cgroup. + pub hcaObjects: Option, +} + +/// Cgroup subsystem rdma. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct RdmaCgroup { + /// Rdma limit for mlx5_1. + #[serde(skip_serializing_if = "Option::is_none")] + pub mlx5_1: Option, + /// Rdma limit for mlx4_0. + #[serde(skip_serializing_if = "Option::is_none")] + pub mlx4_0: Option, + /// Rdma limit for rxe3. + #[serde(skip_serializing_if = "Option::is_none")] + pub rxe3: Option, +} + +/// Cgroups to restrict resource usage for a container and +/// handle device access. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Cgroups { + /// Device cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub devices: Option>, + /// Memory cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub memory: Option, + /// Cpu and Cpuset cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub cpu: Option, + /// Blkio cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub blockIO: Option, + /// Hugetlb cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub hugepageLimits: Option>, + /// Network cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub network: Option, + /// Pids cgroup settings. + #[serde(skip_serializing_if = "Option::is_none")] + pub pids: Option, +} + +#[cfg(target_arch = "x86_64")] +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Intel Resource Director Technology +pub struct IntelRdt { + #[serde(skip_serializing_if = "Option::is_none")] + /// Identity for RDT Class of Service (CLOS). + pub closID: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Schema for L3 cache id and capacity bitmask (CBM). + pub l3CacheSchema: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Schema of memory bandwidth per L3 cache id. + pub memBwSchema: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// If Intel RDT CMT should be enabled. + pub enableCMT: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// If Intel RDT MBM should be enabled. + pub enableMBM: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +#[repr(u32)] +/// Action for seccomp rules. +pub enum SeccompAction { + ScmpActKill = 0x0000_0000, + ScmpActKillProcess = 0x8000_0000, + ScmpActTrap = 0x0003_0000, + ScmpActErrno = 0x0005_0001, + ScmpActNotify = 0x7fc0_0000, + ScmpActTrace = 0x7ff0_0001, + ScmpActLog = 0x7ffc_0000, + ScmpActAllow = 0x7fff_0000, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +#[repr(u32)] +/// Operator for syscall arguments in seccomp. +pub enum SeccompOp { + ScmpCmpNe = 1, + ScmpCmpLt = 2, + ScmpCmpLe = 3, + #[default] + ScmpCmpEq = 4, + ScmpCmpGe = 5, + ScmpCmpGt = 6, + ScmpCmpMaskedEq = 7, +} + +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +/// The specific syscall in seccomp. +pub struct SeccompSyscallArg { + /// Index for syscall arguments. + #[serde(default)] + pub index: usize, + /// Value for syscall arguments. + #[serde(default)] + pub value: u64, + #[serde(skip_serializing_if = "Option::is_none")] + /// Value for syscall arguments. + pub valueTwo: Option, + /// Operator for syscall arguments. + pub op: SeccompOp, +} + +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Match a syscall in seccomp. +pub struct SeccompSyscall { + /// Names of the syscalls. + pub names: Vec, + /// Action for seccomp rules. + pub action: SeccompAction, + #[serde(skip_serializing_if = "Option::is_none")] + /// Errno return code to use. + pub errnoRet: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Specific syscall in seccomp. + pub args: Option>, +} + +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Seccomp provides application sandboxing mechanism in the Linux kernel. +pub struct Seccomp { + /// Default action for seccomp. + pub defaultAction: SeccompAction, + #[serde(skip_serializing_if = "Option::is_none")] + /// Errno return code to use. + pub defaultErrnoRet: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Architecture used for system calls. + pub architectures: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// List of flags to use with seccomp. + pub flags: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Path of UNIX domain socket over which the runtime will send the + /// container process state data structure when the SCMP_ACT_NOTIFY + /// action is used. + pub listennerPath: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Seccomp file descriptor returned by the seccomp syscall. + pub seccompFd: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Opaque data to pass to the seccomp agent. + pub listenerMetadata: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Match a syscall in seccomp. + pub syscalls: Option>, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Linux execution personality. +pub struct Personality { + /// Execution domain. + pub domain: String, + /// Additional flags to apply. + pub flags: Option>, +} + +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Linux-specific configuration. +pub struct LinuxPlatform { + /// A namespace wraps a global system resource in an abstraction. + pub namespaces: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + /// User namespace uid mappings from the host to the container. + pub uidMappings: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// User namespace gid mappings from the host to the container. + pub gidMappings: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Offset for Time Namespace. + pub timeOffsets: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Lists devices that MUST be available in the container. + pub devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Path to the cgroups. + pub cgroupsPath: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Rootfs's mount propagation. + pub rootfsPropagation: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Mask over the provided paths inside the container so + /// that they cannot be read. + pub maskedPaths: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Set the provided paths as readonly inside the container. + pub readonlyPaths: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Selinux context for the mounts in the container. + pub mountLabel: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Linux execution personality. + pub personality: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Configure a container's cgroups. + pub resources: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// The cgroup subsystem rdma. + pub rdma: Option, + #[serde(skip_serializing_if = "Option::is_none")] + /// Allows cgroup v2 parameters to be to be set and modified + /// for the container. + pub unified: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Kernel parameters to be modified at runtime for the + /// container. + pub sysctl: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + /// Seccomp provides application sandboxing mechanism in + /// the Linux kernel. + pub seccomp: Option, + #[cfg(target_arch = "x86_64")] + #[serde(skip_serializing_if = "Option::is_none")] + /// Intel Resource Director Technology. + pub intelRdt: Option, +} + +/// Arrays that specifies the sets of capabilities for the process. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Capbilities { + /// Array of effective capabilities that are kept for the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub effective: Option>, + /// Array of bounding capabilities that are kept for the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub bounding: Option>, + /// Array of inheritable capabilities that are kept for the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub inheritable: Option>, + /// Array of permitted capabilities that are kept for the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub permitted: Option>, + /// Array of ambient capabilities that are kept for the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub ambient: Option>, +} + +/// Scheduling policy. +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum SchedPolicy { + SchedOther, + SchedFifo, + SchedRr, + SchedBatch, + SchedIdle, +} + +impl From for libc::c_int { + fn from(value: SchedPolicy) -> Self { + match value { + SchedPolicy::SchedOther => libc::SCHED_OTHER, + SchedPolicy::SchedFifo => libc::SCHED_FIFO, + SchedPolicy::SchedRr => libc::SCHED_RR, + SchedPolicy::SchedBatch => libc::SCHED_BATCH, + SchedPolicy::SchedIdle => libc::SCHED_IDLE, + } + } +} + +/// Scheduler properties for the process. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Scheduler { + /// Scheduling policy. + pub policy: SchedPolicy, + /// Nice value for the process, affecting its priority. + #[serde(skip_serializing_if = "Option::is_none")] + pub nice: Option, + /// Static priority of the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, + /// Array of strings representing scheduling flags. + #[serde(skip_serializing_if = "Option::is_none")] + pub flags: Option>, + /// Amount of time in nanoseconds during which the process is + /// allowed to run in a given period, used by the deadline + /// scheduler. + #[serde(skip_serializing_if = "Option::is_none")] + pub runtime: Option, + /// Absolute deadline for the process to complete its execution, + /// used by the deadline scheduler. + #[serde(skip_serializing_if = "Option::is_none")] + pub deadline: Option, + /// Length of the period in nanoseconds used for determining the + /// process runtime, used by the deadline scheduler. + #[serde(skip_serializing_if = "Option::is_none")] + pub period: Option, +} + +/// I/O scheduling class. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum IoPriClass { + IoprioClassRt, + IoprioClassBe, + IoprioClassIdle, +} + +/// I/O priority settings for the container's processes within the +/// process group. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct IoPriority { + /// I/O scheduling class. + pub class: IoPriClass, + /// Priority level within the class. + pub priority: i64, +} + +/// CPU affinity used to execute the process. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ExecCpuAffinity { + /// List of CPUs a runtime parent process to be run on initially, + /// before the transition to container's cgroup. + #[serde(skip_serializing_if = "Option::is_none")] + pub initial: Option, + /// List of CPUs the process will be run on after the transition + /// to container's cgroup. + #[serde(skip_serializing_if = "Option::is_none", rename = "final")] + pub final_cpus: Option, +} + +#[cfg(test)] +mod tests { + use serde_json; + + use super::*; + + #[test] + fn test_namespaces() { + let json = r#"{ + "namespaces": [ + { + "type": "pid", + "path": "/proc/1234/ns/pid" + }, + { + "type": "network", + "path": "/var/run/netns/neta" + }, + { + "type": "mount" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "user" + }, + { + "type": "cgroup" + }, + { + "type": "time" + } + ] + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + namespaces: Vec, + } + + let ns: Section = serde_json::from_str(json).unwrap(); + assert_eq!(ns.namespaces.len(), 8); + assert_eq!(ns.namespaces[0].ns_type, NamespaceType::Pid); + assert_eq!(ns.namespaces[1].ns_type, NamespaceType::Network); + assert_eq!(ns.namespaces[2].ns_type, NamespaceType::Mount); + assert_eq!(ns.namespaces[3].ns_type, NamespaceType::Ipc); + assert_eq!(ns.namespaces[4].ns_type, NamespaceType::Uts); + assert_eq!(ns.namespaces[5].ns_type, NamespaceType::User); + assert_eq!(ns.namespaces[6].ns_type, NamespaceType::Cgroup); + assert_eq!(ns.namespaces[7].ns_type, NamespaceType::Time); + } + + #[test] + fn test_ids_mapping() { + let json = r#"{ + "uidMappings": [ + { + "containerID": 0, + "hostID": 1000, + "size": 32000 + } + ], + "gidMappings": [ + { + "containerID": 0, + "hostID": 1000, + "size": 32000 + } + ] + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + uidMappings: Vec, + gidMappings: Vec, + } + + let ids_mapping: Section = serde_json::from_str(json).unwrap(); + assert_eq!(ids_mapping.uidMappings.len(), 1); + assert_eq!(ids_mapping.uidMappings[0].size, 32000 as u32); + assert_eq!(ids_mapping.gidMappings.len(), 1); + assert_eq!(ids_mapping.gidMappings[0].size, 32000 as u32); + } + + #[test] + fn test_time_offsets() { + let json = r#"{ + "timeOffsets": { + "secs": 100 + } + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + timeOffsets: TimeOffsets, + } + + let time_offsets: Section = serde_json::from_str(json).unwrap(); + assert_eq!(time_offsets.timeOffsets.secs, Some(100)); + assert_eq!(time_offsets.timeOffsets.nanosecs, None); + } + + #[test] + fn test_devices() { + let json = r#"{ + "devices": [ + { + "path": "/dev/fuse", + "type": "c", + "major": 10, + "minor": 229, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/sda", + "type": "b", + "major": 8, + "minor": 0 + } + ] + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + devices: Vec, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.devices.len(), 2); + assert_eq!(section.devices[1].path, "/dev/sda"); + assert_eq!(section.devices[1].dev_type, "b"); + assert_eq!(section.devices[1].major, Some(8)); + assert_eq!(section.devices[1].minor, Some(0)); + assert_eq!(section.devices[1].fileMode, None); + assert_eq!(section.devices[1].uid, None); + assert_eq!(section.devices[1].gid, None); + } + + #[test] + fn test_cgroup_devices() { + let json = r#"{ + "devices": [ + { + "allow": false + }, + { + "allow": true, + "type": "c", + "major": 10, + "minor": 229, + "access": "rw" + } + ] + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + devices: Vec, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.devices.len(), 2); + assert_eq!(section.devices[0].allow, false); + assert_eq!(section.devices[0].dev_type, "a"); + assert_eq!(section.devices[0].major, None); + assert_eq!(section.devices[0].minor, None); + assert_eq!(section.devices[0].access, None); + assert_eq!(section.devices[1].allow, true); + assert_eq!(section.devices[1].dev_type, "c"); + assert_eq!(section.devices[1].major, Some(10)); + assert_eq!(section.devices[1].minor, Some(229)); + assert_eq!(section.devices[1].access, Some("rw".to_string())); + } + + #[test] + fn test_cgroup_memory_01() { + let json = r#"{ + "memory": { + "limit": 536870912, + "reservation": 536870912, + "swap": 536870912, + "kernel": -1, + "kernelTCP": -1, + "swappiness": 0, + "disableOOMKiller": false + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + memory: MemoryCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.memory.limit, Some(536870912)); + assert_eq!(section.memory.reservation, Some(536870912)); + assert_eq!(section.memory.swap, Some(536870912)); + assert_eq!(section.memory.kernel, Some(-1)); + assert_eq!(section.memory.kernelTCP, Some(-1)); + assert_eq!(section.memory.swappiness, Some(0)); + assert_eq!(section.memory.disableOOMKiller, Some(false)); + assert_eq!(section.memory.useHierarchy, None); + assert_eq!(section.memory.checkBeforeUpdate, None); + } + + #[test] + fn test_cgroup_memory_02() { + let json = r#"{ + "memory": { + "useHierarchy": true, + "checkBeforeUpdate": true + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + memory: MemoryCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.memory.limit, None); + assert_eq!(section.memory.reservation, None); + assert_eq!(section.memory.swap, None); + assert_eq!(section.memory.kernel, None); + assert_eq!(section.memory.kernelTCP, None); + assert_eq!(section.memory.swappiness, None); + assert_eq!(section.memory.disableOOMKiller, None); + assert_eq!(section.memory.useHierarchy, Some(true)); + assert_eq!(section.memory.checkBeforeUpdate, Some(true)); + } + + #[test] + fn test_cgroup_cpu_01() { + let json = r#"{ + "cpu": { + "shares": 1024, + "quota": 1000000, + "burst": 1000000, + "period": 500000, + "realtimeRuntime": 950000, + "realtimePeriod": 1000000, + "cpus": "2-3", + "mems": "0-7", + "idle": 0 + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + cpu: CpuCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.cpu.shares, Some(1024)); + assert_eq!(section.cpu.quota, Some(1000000)); + assert_eq!(section.cpu.burst, Some(1000000)); + assert_eq!(section.cpu.period, Some(500000)); + assert_eq!(section.cpu.realtimeRuntime, Some(950000)); + assert_eq!(section.cpu.realtimePeriod, Some(1000000)); + assert_eq!(section.cpu.cpus, Some("2-3".to_string())); + assert_eq!(section.cpu.mems, Some("0-7".to_string())); + assert_eq!(section.cpu.idle, Some(0)); + } + + #[test] + fn test_cgroup_cpu_02() { + let json = r#"{ + "cpu": {} + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + cpu: CpuCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.cpu.shares, None); + assert_eq!(section.cpu.quota, None); + assert_eq!(section.cpu.burst, None); + assert_eq!(section.cpu.period, None); + assert_eq!(section.cpu.realtimeRuntime, None); + assert_eq!(section.cpu.realtimePeriod, None); + assert_eq!(section.cpu.cpus, None); + assert_eq!(section.cpu.mems, None); + assert_eq!(section.cpu.idle, None); + } + + #[test] + fn test_cgroup_blkio() { + let json = r#"{ + "blockIO": { + "weight": 10, + "leafWeight": 10, + "weightDevice": [ + { + "major": 8, + "minor": 0, + "weight": 500, + "leafWeight": 300 + }, + { + "major": 8, + "minor": 16 + } + ], + "throttleReadBpsDevice": [ + { + "major": 8, + "minor": 0, + "rate": 600 + }, + { + "major": 8, + "minor": 16, + "rate": 300 + } + ] + } + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + blockIO: BlockIoCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.blockIO.weight, Some(10)); + assert_eq!(section.blockIO.leafWeight, Some(10)); + assert_eq!(section.blockIO.throttleReadIOPSDevice, None); + assert_eq!(section.blockIO.throttleWriteBpsDevice, None); + assert_eq!(section.blockIO.throttleWriteIOPSDevice, None); + + let weight_device = section.blockIO.weightDevice.as_ref().unwrap(); + assert_eq!(weight_device.len(), 2); + assert_eq!(weight_device[0].major, 8); + assert_eq!(weight_device[0].minor, 0); + assert_eq!(weight_device[0].weight, Some(500)); + assert_eq!(weight_device[0].leafWeight, Some(300)); + assert_eq!(weight_device[1].major, 8); + assert_eq!(weight_device[1].minor, 16); + assert_eq!(weight_device[1].weight, None); + assert_eq!(weight_device[1].leafWeight, None); + + let throttle = section.blockIO.throttleReadBpsDevice.as_ref().unwrap(); + assert_eq!(throttle.len(), 2); + assert_eq!(throttle[1].major, 8); + assert_eq!(throttle[1].minor, 16); + assert_eq!(throttle[1].rate, 300); + } + + #[test] + fn test_cgroup_hugetlb() { + let json = r#"{ + "hugepageLimits": [ + { + "pageSize": "2MB", + "limit": 209715200 + } + ] + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + hugepageLimits: Vec, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.hugepageLimits[0].pageSize, "2MB"); + assert_eq!(section.hugepageLimits[0].limit, 209715200); + } + + #[test] + fn test_cgroup_network_01() { + let json = r#"{ + "network": { + "classID": 1048577, + "priorities": [ + { + "name": "eth0", + "priority": 500 + } + ] + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + network: NetworkCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.network.classID, Some(1048577)); + let priorities = section.network.priorities.as_ref().unwrap(); + assert_eq!(priorities[0].name, "eth0"); + assert_eq!(priorities[0].priority, 500); + } + + #[test] + fn test_cgroup_network_02() { + let json = r#"{ + "network": {} + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + network: NetworkCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.network.classID, None); + assert_eq!(section.network.priorities, None); + } + + #[test] + fn test_cgroup_pid() { + let json = r#"{ + "pids": { + "limit": 32771 + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + pids: PidsCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.pids.limit, 32771); + } + + #[test] + fn test_cgroup_rdma() { + let json = r#"{ + "rdma": { + "mlx5_1": { + "hcaHandles": 3, + "hcaObjects": 10000 + }, + "mlx4_0": { + "hcaObjects": 1000 + }, + "rxe3": { + "hcaHandles": 10000 + } + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + rdma: RdmaCgroup, + } + + let section: Section = serde_json::from_str(json).unwrap(); + let rdma_limit = section.rdma.mlx5_1.as_ref().unwrap(); + assert_eq!(rdma_limit.hcaHandles, Some(3)); + assert_eq!(rdma_limit.hcaObjects, Some(10000)); + let rdma_limit = section.rdma.mlx4_0.as_ref().unwrap(); + assert_eq!(rdma_limit.hcaHandles, None); + assert_eq!(rdma_limit.hcaObjects, Some(1000)); + let rdma_limit = section.rdma.rxe3.as_ref().unwrap(); + assert_eq!(rdma_limit.hcaHandles, Some(10000)); + assert_eq!(rdma_limit.hcaObjects, None); + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_intel_rdt() { + let json = r#"{ + "intelRdt": { + "closID": "guaranteed_group", + "l3CacheSchema": "L3:0=7f0;1=1f", + "memBwSchema": "MB:0=20;1=70", + "enableCMT": true, + "enableMBM": true + } + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + intelRdt: IntelRdt, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!( + section.intelRdt.closID, + Some("guaranteed_group".to_string()) + ); + assert_eq!( + section.intelRdt.l3CacheSchema, + Some("L3:0=7f0;1=1f".to_string()) + ); + assert_eq!( + section.intelRdt.memBwSchema, + Some("MB:0=20;1=70".to_string()) + ); + assert_eq!(section.intelRdt.enableCMT, Some(true)); + assert_eq!(section.intelRdt.enableMBM, Some(true)); + } + + #[test] + fn test_seccomp() { + let json = r#"{ + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "architectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "getcwd", + "chmod" + ], + "action": "SCMP_ACT_ERRNO" + } + ] + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + seccomp: Seccomp, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.seccomp.defaultAction, SeccompAction::ScmpActAllow); + let architectures = section.seccomp.architectures.as_ref().unwrap(); + assert_eq!(architectures.len(), 2); + assert_eq!(architectures[0], "SCMP_ARCH_X86"); + assert_eq!(architectures[1], "SCMP_ARCH_X32"); + let syscall_names = section.seccomp.syscalls.as_ref().unwrap(); + assert_eq!(syscall_names[0].names.len(), 2); + assert_eq!(syscall_names[0].names[0], "getcwd"); + assert_eq!(syscall_names[0].names[1], "chmod"); + assert_eq!(syscall_names[0].action, SeccompAction::ScmpActErrno); + } + + #[test] + fn test_personality() { + let json = r#"{ + "personality": { + "domain": "LINUX" + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + personality: Personality, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.personality.domain, "LINUX"); + assert_eq!(section.personality.flags, None); + } +} diff --git a/ozonec/oci_spec/src/posix.rs b/ozonec/oci_spec/src/posix.rs new file mode 100644 index 0000000000000000000000000000000000000000..b284d5d363829f1e74baed8037c318be79fed7bc --- /dev/null +++ b/ozonec/oci_spec/src/posix.rs @@ -0,0 +1,249 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use serde::{Deserialize, Serialize}; + +/// Container's root filesystem. +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct Root { + /// Path to the root filesystem for the container. + pub path: String, + #[serde(default)] + /// If true then the root filesystem MUST be read-only inside the container. + pub readonly: bool, +} + +/// Resource limits for the process. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Rlimits { + /// The platform resource being limited. + #[serde(rename = "type")] + pub rlimit_type: String, + /// Value of the limit enforced for the corresponding resource. + pub soft: u64, + /// Ceiling for the soft limit that could be set by an + /// unprivileged process. + pub hard: u64, +} + +/// The user for the process that allows specific control over which user +/// the process runs as. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct User { + /// User ID in the container namespace. + #[serde(default)] + pub uid: u32, + /// Group ID in the container namespace. + #[serde(default)] + pub gid: u32, + /// [umask][umask_2] of the user. + #[serde(skip_serializing_if = "Option::is_none")] + pub umask: Option, + /// Additional group IDs in the container namespace to be added + /// to the process. + #[serde(skip_serializing_if = "Option::is_none")] + pub additionalGids: Option>, +} + +/// Hook Entry. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct HookEntry { + /// Similar semantics to IEEE Std 1003.1-2008 execv's path. + pub path: String, + /// Same semantics as IEEE Std 1003.1-2008 execv's argv. + #[serde(skip_serializing_if = "Option::is_none")] + pub args: Option>, + /// Same semantics as IEEE Std 1003.1-2008's environ. + #[serde(skip_serializing_if = "Option::is_none")] + pub env: Option>, + /// Number of seconds before aborting the hook. + #[serde(skip_serializing_if = "Option::is_none")] + pub timeout: Option, +} + +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Hooks { + /// Array of prestart hooks. + #[serde(skip_serializing_if = "Option::is_none")] + prestart: Option>, + /// Array of createRuntime hooks. + #[serde(skip_serializing_if = "Option::is_none")] + createRuntime: Option>, + /// Array of createContainer hooks. + #[serde(skip_serializing_if = "Option::is_none")] + createContainer: Option>, + /// Array of startContainer hooks. + #[serde(skip_serializing_if = "Option::is_none")] + startContainer: Option>, + /// Array of poststart hooks. + #[serde(skip_serializing_if = "Option::is_none")] + poststart: Option>, + /// Array of poststop hooks. + #[serde(skip_serializing_if = "Option::is_none")] + poststop: Option>, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_root() { + let json = r#"{ + "root": { + "path": "rootfs", + "readonly": true + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + root: Root, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.root.path, "rootfs"); + assert_eq!(section.root.readonly, true); + } + + #[test] + fn test_hooks() { + let json = r#"{ + "hooks": { + "prestart": [ + { + "path": "/usr/bin/fix-mounts", + "args": ["fix-mounts", "arg1", "arg2"], + "env": [ "key1=value1"] + }, + { + "path": "/usr/bin/setup-network" + } + ], + "createRuntime": [ + { + "path": "/usr/bin/fix-mounts", + "args": ["fix-mounts", "arg1", "arg2"], + "env": [ "key1=value1"] + }, + { + "path": "/usr/bin/setup-network" + } + ], + "createContainer": [ + { + "path": "/usr/bin/mount-hook", + "args": ["-mount", "arg1", "arg2"], + "env": [ "key1=value1"] + } + ], + "startContainer": [ + { + "path": "/usr/bin/refresh-ldcache" + } + ], + "poststart": [ + { + "path": "/usr/bin/notify-start", + "timeout": 5 + } + ], + "poststop": [ + { + "path": "/usr/sbin/cleanup.sh", + "args": ["cleanup.sh", "-f"] + } + ] + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + hooks: Hooks, + } + + let section: Section = serde_json::from_str(json).unwrap(); + let prestart = section.hooks.prestart.as_ref().unwrap(); + assert_eq!(prestart.len(), 2); + assert_eq!(prestart[0].path, "/usr/bin/fix-mounts"); + let args = prestart[0].args.as_ref().unwrap(); + assert_eq!(args.len(), 3); + assert_eq!(args[0], "fix-mounts"); + assert_eq!(args[1], "arg1"); + assert_eq!(args[2], "arg2"); + let env = prestart[0].env.as_ref().unwrap(); + assert_eq!(env.len(), 1); + assert_eq!(env[0], "key1=value1"); + assert_eq!(prestart[0].timeout, None); + assert_eq!(prestart[1].path, "/usr/bin/setup-network"); + assert_eq!(prestart[1].args, None); + assert_eq!(prestart[1].env, None); + assert_eq!(prestart[1].timeout, None); + + let create_runtime = section.hooks.createRuntime.as_ref().unwrap(); + assert_eq!(create_runtime.len(), 2); + assert_eq!(create_runtime[0].path, "/usr/bin/fix-mounts"); + let args = create_runtime[0].args.as_ref().unwrap(); + assert_eq!(args.len(), 3); + assert_eq!(args[0], "fix-mounts"); + assert_eq!(args[1], "arg1"); + assert_eq!(args[2], "arg2"); + let env = create_runtime[0].env.as_ref().unwrap(); + assert_eq!(env.len(), 1); + assert_eq!(env[0], "key1=value1"); + assert_eq!(create_runtime[0].timeout, None); + assert_eq!(create_runtime[1].path, "/usr/bin/setup-network"); + assert_eq!(create_runtime[1].args, None); + assert_eq!(create_runtime[1].env, None); + assert_eq!(create_runtime[1].timeout, None); + + let create_container = section.hooks.createContainer.as_ref().unwrap(); + assert_eq!(create_container.len(), 1); + assert_eq!(create_container[0].path, "/usr/bin/mount-hook"); + let args = create_container[0].args.as_ref().unwrap(); + assert_eq!(args.len(), 3); + assert_eq!(args[0], "-mount"); + assert_eq!(args[1], "arg1"); + assert_eq!(args[2], "arg2"); + let env = create_container[0].env.as_ref().unwrap(); + assert_eq!(env.len(), 1); + assert_eq!(env[0], "key1=value1"); + assert_eq!(create_container[0].timeout, None); + + let start_container = section.hooks.startContainer.as_ref().unwrap(); + assert_eq!(start_container.len(), 1); + assert_eq!(start_container[0].path, "/usr/bin/refresh-ldcache"); + assert_eq!(start_container[0].args, None); + assert_eq!(start_container[0].env, None); + assert_eq!(start_container[0].timeout, None); + + let poststart = section.hooks.poststart.as_ref().unwrap(); + assert_eq!(poststart.len(), 1); + assert_eq!(poststart[0].path, "/usr/bin/notify-start"); + assert_eq!(poststart[0].args, None); + assert_eq!(poststart[0].env, None); + assert_eq!(poststart[0].timeout, Some(5)); + + let poststop = section.hooks.poststop.as_ref().unwrap(); + assert_eq!(poststop.len(), 1); + assert_eq!(poststop[0].path, "/usr/sbin/cleanup.sh"); + let args = poststop[0].args.as_ref().unwrap(); + assert_eq!(args.len(), 2); + assert_eq!(args[0], "cleanup.sh"); + assert_eq!(args[1], "-f"); + assert_eq!(poststop[0].env, None); + assert_eq!(poststop[0].timeout, None); + } +} diff --git a/ozonec/oci_spec/src/process.rs b/ozonec/oci_spec/src/process.rs new file mode 100644 index 0000000000000000000000000000000000000000..a558d78b0282e7134d81d8a46f822f055eb91656 --- /dev/null +++ b/ozonec/oci_spec/src/process.rs @@ -0,0 +1,240 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use serde::{Deserialize, Serialize}; + +#[cfg(target_os = "linux")] +use crate::linux::{Capbilities, ExecCpuAffinity, IoPriority, Scheduler}; +#[cfg(target_family = "unix")] +use crate::posix::{Rlimits, User}; + +/// Console size in characters of the terminal. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ConsoleSize { + /// Height size in characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub height: Option, + /// Width size in characters. + #[serde(skip_serializing_if = "Option::is_none")] + pub width: Option, +} + +/// Container process. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Process { + /// Working directory that will be set for the executable. + pub cwd: String, + /// Similar semantics to IEEE Std 1003.1-2008 execvp's argv. + #[serde(skip_serializing_if = "Option::is_none")] + pub args: Option>, + /// Same semantics as IEEE Std 1003.1-2008's environ. + #[serde(skip_serializing_if = "Option::is_none")] + pub env: Option>, + /// Whether a terminal is attached to the process. + #[serde(default)] + pub terminal: bool, + /// Console size in characters of the terminal. + #[serde(skip_serializing_if = "Option::is_none")] + pub consoleSize: Option, + /// Full command line to be executed on Windows. + #[cfg(target_os = "windows")] + pub commandLine: Option, + /// Resource limits for the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub rlimits: Option>, + /// Name of the AppArmor profile for the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub apparmorProfile: Option, + /// Arrays that specifies the sets of capabilities for the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub capabilities: Option, + /// Setting noNewPrivileges to true prevents the process from + /// gaining additional privileges. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub noNewPrivileges: Option, + /// Oom-killer score in [pid]/oom_score_adj for the process's + /// [pid] in a proc pseudo-filesystem. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub oomScoreAdj: Option, + /// Scheduler properties for the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub scheduler: Option, + /// SELinux label for the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub selinuxLabel: Option, + /// I/O priority settings for the container's processes within + /// the process group. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub ioPriority: Option, + /// CPU affinity used to execute the process. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub execCPUAffinity: Option, + /// The user for the process that allows specific control over + /// which user the process runs as. + pub user: User, +} + +#[cfg(test)] +mod tests { + use crate::linux::IoPriClass; + + use super::*; + use serde_json; + + #[test] + fn test_process() { + let json = r#"{ + "process": { + "terminal": true, + "consoleSize": { + "height": 25, + "width": 80 + }, + "user": { + "uid": 1, + "gid": 1, + "umask": 63, + "additionalGids": [5, 6] + }, + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/root", + "args": [ + "sh" + ], + "apparmorProfile": "acme_secure_profile", + "selinuxLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675", + "ioPriority": { + "class": "IOPRIO_CLASS_IDLE", + "priority": 4 + }, + "noNewPrivileges": true, + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL" + ], + "ambient": [ + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "execCPUAffinity": { + "initial": "7", + "final": "0-3,7" + } + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + process: Process, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.process.terminal, true); + let console_size = section.process.consoleSize.as_ref().unwrap(); + assert_eq!(console_size.height, Some(25)); + assert_eq!(console_size.width, Some(80)); + assert_eq!(section.process.user.uid, 1); + assert_eq!(section.process.user.gid, 1); + assert_eq!(section.process.user.umask, Some(63)); + assert_eq!(section.process.user.additionalGids, Some(vec![5, 6])); + let env = section.process.env.as_ref().unwrap(); + assert_eq!(env.len(), 2); + assert_eq!( + env[0], + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ); + assert_eq!(env[1], "TERM=xterm"); + assert_eq!(section.process.cwd, "/root"); + let args = section.process.args.as_ref().unwrap(); + assert_eq!(args.len(), 1); + assert_eq!(args[0], "sh"); + assert_eq!( + section.process.apparmorProfile, + Some("acme_secure_profile".to_string()) + ); + assert_eq!( + section.process.selinuxLabel, + Some("system_u:system_r:svirt_lxc_net_t:s0:c124,c675".to_string()) + ); + let io_pri = section.process.ioPriority.as_ref().unwrap(); + assert_eq!(io_pri.class, IoPriClass::IoprioClassIdle); + assert_eq!(io_pri.priority, 4); + assert_eq!(section.process.noNewPrivileges, Some(true)); + let caps = section.process.capabilities.as_ref().unwrap(); + let bonding_caps = caps.bounding.as_ref().unwrap(); + assert_eq!(bonding_caps.len(), 3); + assert_eq!(bonding_caps[0], "CAP_AUDIT_WRITE"); + assert_eq!(bonding_caps[1], "CAP_KILL"); + assert_eq!(bonding_caps[2], "CAP_NET_BIND_SERVICE"); + let permitted_caps = caps.permitted.as_ref().unwrap(); + assert_eq!(permitted_caps.len(), 3); + assert_eq!(permitted_caps[0], "CAP_AUDIT_WRITE"); + assert_eq!(permitted_caps[1], "CAP_KILL"); + assert_eq!(permitted_caps[2], "CAP_NET_BIND_SERVICE"); + let inheritable_caps = caps.inheritable.as_ref().unwrap(); + assert_eq!(inheritable_caps.len(), 3); + assert_eq!(inheritable_caps[0], "CAP_AUDIT_WRITE"); + assert_eq!(inheritable_caps[1], "CAP_KILL"); + assert_eq!(inheritable_caps[2], "CAP_NET_BIND_SERVICE"); + let effective_caps = caps.effective.as_ref().unwrap(); + assert_eq!(effective_caps.len(), 2); + assert_eq!(effective_caps[0], "CAP_AUDIT_WRITE"); + assert_eq!(effective_caps[1], "CAP_KILL"); + let ambient_caps = caps.ambient.as_ref().unwrap(); + assert_eq!(ambient_caps.len(), 1); + assert_eq!(ambient_caps[0], "CAP_NET_BIND_SERVICE"); + let rlimits = section.process.rlimits.as_ref().unwrap(); + assert_eq!(rlimits.len(), 1); + assert_eq!(rlimits[0].rlimit_type, "RLIMIT_NOFILE"); + assert_eq!(rlimits[0].hard, 1024); + assert_eq!(rlimits[0].soft, 1024); + let exec_cpu_affinity = section.process.execCPUAffinity.as_ref().unwrap(); + assert_eq!(exec_cpu_affinity.initial, Some("7".to_string())); + assert_eq!(exec_cpu_affinity.final_cpus, Some("0-3,7".to_string())); + } +} diff --git a/ozonec/oci_spec/src/runtime.rs b/ozonec/oci_spec/src/runtime.rs new file mode 100644 index 0000000000000000000000000000000000000000..d68dea0bd7bbee0e50913704401e6a95de754e00 --- /dev/null +++ b/ozonec/oci_spec/src/runtime.rs @@ -0,0 +1,140 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{collections::HashMap, fs::File, io::BufReader, path::Path}; + +use anyhow::{anyhow, Context, Result}; +use serde::{Deserialize, Serialize}; + +#[cfg(target_os = "linux")] +use crate::linux::IdMapping; +#[cfg(target_family = "unix")] +use crate::posix::Root; +use crate::{linux::LinuxPlatform, posix::Hooks, process::Process, vm::VmPlatform}; + +/// Additional mounts beyond root. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Mount { + /// Destination of mount point: path inside container. + pub destination: String, + /// A device name, but can also be a file or directory name for bind mounts + /// or a dummy. + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + /// Mount options of the filesystem to be used. + #[serde(skip_serializing_if = "Option::is_none")] + pub options: Option>, + /// The type of the filesystem to be mounted. + #[serde(skip_serializing_if = "Option::is_none", rename = "type")] + pub fs_type: Option, + /// The mapping to convert UIDs from the source file system to the + /// destination mount point. + #[serde(skip_serializing_if = "Option::is_none")] + pub uidMappings: Option, + /// The mapping to convert GIDs from the source file system to the + /// destination mount point. + #[serde(skip_serializing_if = "Option::is_none")] + pub gidMappings: Option, +} + +/// Metadata necessary to implement standard operations against the container. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct RuntimeConfig { + /// Version of the Open Container Initiative Runtime Specification + /// with which the bundle complies. + pub ociVersion: String, + /// Container's root filesystem. + pub root: Root, + /// Additional mounts beyond root. + pub mounts: Vec, + /// Container process. + pub process: Process, + /// Container's hostname as seen by processes running inside the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub hostname: Option, + /// Container's domainname as seen by processes running inside the + /// container. + #[serde(skip_serializing_if = "Option::is_none")] + pub domainname: Option, + /// Linux-specific section of the container configuration. + #[cfg(target_os = "linux")] + #[serde(skip_serializing_if = "Option::is_none")] + pub linux: Option, + /// Vm-specific section of the container configuration. + #[serde(skip_serializing_if = "Option::is_none")] + pub vm: Option, + /// Custom actions related to the lifecycle of the container. + #[cfg(target_family = "unix")] + #[serde(skip_serializing_if = "Option::is_none")] + pub hooks: Option, + /// Arbitrary metadata for the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub annotations: Option>, +} + +impl RuntimeConfig { + pub fn from_file(path: &String) -> Result { + let file = File::open(Path::new(path)).with_context(|| "Failed to open config.json")?; + let reader = BufReader::new(file); + serde_json::from_reader(reader).map_err(|e| anyhow!("Failed to load config.json: {:?}", e)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_mounts() { + let json = r#"{ + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + } + ] + }"#; + + #[allow(non_snake_case)] + #[derive(Serialize, Deserialize)] + struct Section { + mounts: Vec, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.mounts.len(), 2); + assert_eq!(section.mounts[0].destination, "/proc"); + assert_eq!(section.mounts[0].fs_type, Some("proc".to_string())); + assert_eq!(section.mounts[0].source, Some("proc".to_string())); + let options = section.mounts[1].options.as_ref().unwrap(); + assert_eq!(options.len(), 4); + assert_eq!(options[0], "nosuid"); + assert_eq!(options[1], "strictatime"); + assert_eq!(options[2], "mode=755"); + assert_eq!(options[3], "size=65536k"); + } +} diff --git a/ozonec/oci_spec/src/state.rs b/ozonec/oci_spec/src/state.rs new file mode 100644 index 0000000000000000000000000000000000000000..960e3b0f1a5896285964e80eb46d238a52ab3434 --- /dev/null +++ b/ozonec/oci_spec/src/state.rs @@ -0,0 +1,110 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +/// Runtime state of the container. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Copy, Default)] +#[serde(rename_all = "lowercase")] +pub enum ContainerStatus { + Creating, + Created, + Running, + #[default] + Stopped, +} + +impl ToString for ContainerStatus { + fn to_string(&self) -> String { + match *self { + ContainerStatus::Creating => String::from("creating"), + ContainerStatus::Created => String::from("created"), + ContainerStatus::Running => String::from("running"), + ContainerStatus::Stopped => String::from("stopped"), + } + } +} + +/// The state of a container. +#[allow(non_snake_case)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +pub struct State { + /// Version of the Open Container Initiative Runtime Specification + /// with which the state complies. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub ociVersion: String, + /// Container's ID. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub id: String, + /// Runtime state of the container. + pub status: ContainerStatus, + /// ID of the container process. + #[serde(default)] + pub pid: i32, + /// Absolute path to the container's bundle directory. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub bundle: String, + /// List of annotations associated with the container. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub annotations: HashMap, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_state() { + let json = r#"{ + "ociVersion": "0.2.0", + "id": "oci-container1", + "status": "running", + "pid": 4422, + "bundle": "/containers/redis", + "annotations": { + "myKey": "myValue" + } + }"#; + + let state: State = serde_json::from_str(json).unwrap(); + assert_eq!(state.ociVersion, "0.2.0"); + assert_eq!(state.id, "oci-container1"); + assert_eq!(state.status, ContainerStatus::Running); + assert_eq!(state.pid, 4422); + assert_eq!(state.bundle, "/containers/redis"); + assert!(state.annotations.contains_key("myKey")); + assert_eq!(state.annotations.get("myKey"), Some(&"myValue".to_string())); + } + + #[test] + fn test_container_status_to_string() { + assert_eq!( + ContainerStatus::Creating.to_string(), + String::from("creating") + ); + assert_eq!( + ContainerStatus::Created.to_string(), + String::from("created") + ); + assert_eq!( + ContainerStatus::Running.to_string(), + String::from("running") + ); + assert_eq!( + ContainerStatus::Stopped.to_string(), + String::from("stopped") + ); + } +} diff --git a/ozonec/oci_spec/src/vm.rs b/ozonec/oci_spec/src/vm.rs new file mode 100644 index 0000000000000000000000000000000000000000..08d1a3548e08afc958bc92f27fa403026a2ebd5f --- /dev/null +++ b/ozonec/oci_spec/src/vm.rs @@ -0,0 +1,136 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use serde::{Deserialize, Serialize}; + +/// Hypervisor that manages the container virtual machine. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Hypervisor { + /// Path to the hypervisor binary that manages the container + /// virtual machine. + pub path: String, + /// Array of parameters to pass to the hypervisor. + #[serde(skip_serializing_if = "Option::is_none")] + pub parameters: Option>, +} + +/// Kernel to boot the container virtual machine with. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Kernel { + /// Path to the kernel used to boot the container virtual machine. + pub path: String, + #[serde(skip_serializing_if = "Option::is_none")] + /// Array of parameters to pass to the kernel. + pub parameters: Option>, + /// Path to an initial ramdisk to be used by the container + /// virtual machine. + #[serde(skip_serializing_if = "Option::is_none")] + pub initrd: Option, +} + +/// Image that contains the root filesystem for the container +/// virtual machine. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Image { + /// Path to the container virtual machine root image. + pub path: String, + /// Format of the container virtual machine root image. + pub format: String, +} + +/// Configuration for the hypervisor, kernel, and image. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct VmPlatform { + /// Hypervisor that manages the container virtual machine. + #[serde(skip_serializing_if = "Option::is_none")] + pub hypervisor: Option, + /// Kernel to boot the container virtual machine with. + pub kernel: Kernel, + /// Image that contains the root filesystem for the container + /// virtual machine. + #[serde(skip_serializing_if = "Option::is_none")] + pub image: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_hypervisor() { + let json = r#"{ + "hypervisor": { + "path": "/path/to/vmm", + "parameters": ["opts1=foo", "opts2=bar"] + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + hypervisor: Hypervisor, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.hypervisor.path, "/path/to/vmm"); + let parameters = section.hypervisor.parameters.as_ref().unwrap(); + assert_eq!(parameters.len(), 2); + assert_eq!(parameters[0], "opts1=foo"); + assert_eq!(parameters[1], "opts2=bar"); + } + + #[test] + fn test_kernel() { + let json = r#"{ + "kernel": { + "path": "/path/to/vmlinuz", + "parameters": ["foo=bar", "hello world"], + "initrd": "/path/to/initrd.img" + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + kernel: Kernel, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.kernel.path, "/path/to/vmlinuz"); + let parameters = section.kernel.parameters.as_ref().unwrap(); + assert_eq!(parameters.len(), 2); + assert_eq!(parameters[0], "foo=bar"); + assert_eq!(parameters[1], "hello world"); + assert_eq!( + section.kernel.initrd, + Some("/path/to/initrd.img".to_string()) + ); + } + + #[test] + fn test_image() { + let json = r#"{ + "image": { + "path": "/path/to/vm/rootfs.img", + "format": "raw" + } + }"#; + + #[derive(Serialize, Deserialize)] + struct Section { + image: Image, + } + + let section: Section = serde_json::from_str(json).unwrap(); + assert_eq!(section.image.path, "/path/to/vm/rootfs.img"); + assert_eq!(section.image.format, "raw"); + } +} diff --git a/ozonec/src/commands/create.rs b/ozonec/src/commands/create.rs new file mode 100644 index 0000000000000000000000000000000000000000..e4c802b000043df8ec061cfb514b9c00b778b1bc --- /dev/null +++ b/ozonec/src/commands/create.rs @@ -0,0 +1,79 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Ok, Result}; +use clap::{builder::NonEmptyStringValueParser, Parser}; + +use crate::container::{Action, Container, Launcher}; +use crate::linux::LinuxContainer; +use oci_spec::runtime::RuntimeConfig; + +/// Create a container from a bundle directory +#[derive(Parser, Debug)] +pub struct Create { + /// File to write the container PID to + #[arg(short, long)] + pub pid_file: Option, + /// Path to the bundle directory, defaults to the current working directory. + #[arg(short, long, default_value = ".")] + pub bundle: PathBuf, + /// Path to an AF_UNIX socket which will receive the pseudoterminal master + /// at a file descriptor. + #[arg(short, long)] + pub console_socket: Option, + /// Container ID to create. + #[arg(value_parser = NonEmptyStringValueParser::new(), required = true)] + pub container_id: String, +} + +impl Create { + fn launcher(&self, root: &Path, exist: &mut bool) -> Result { + let bundle_path = self + .bundle + .canonicalize() + .with_context(|| "Failed to canonicalize bundle path")?; + let config_path = bundle_path + .join("config.json") + .to_string_lossy() + .to_string(); + let mut config = RuntimeConfig::from_file(&config_path)?; + let mut rootfs_path = PathBuf::from(config.root.path); + + if !rootfs_path.is_absolute() { + rootfs_path = bundle_path.join(rootfs_path); + } + config.root.path = rootfs_path.to_string_lossy().to_string(); + + let container: Box = Box::new(LinuxContainer::new( + &self.container_id, + &root.to_string_lossy().to_string(), + &config, + &self.console_socket, + exist, + )?); + Ok(Launcher::new( + &bundle_path, + root, + true, + container, + self.pid_file.clone(), + )) + } + + pub fn run(&self, root: &Path, exist: &mut bool) -> Result<()> { + let mut launcher = self.launcher(root, exist)?; + launcher.launch(Action::Create)?; + Ok(()) + } +} diff --git a/ozonec/src/commands/delete.rs b/ozonec/src/commands/delete.rs new file mode 100644 index 0000000000000000000000000000000000000000..67f712e4dbe23dccde5d5b7433320b5ea5ff0f69 --- /dev/null +++ b/ozonec/src/commands/delete.rs @@ -0,0 +1,52 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{fs, path::Path}; + +use anyhow::{bail, Result}; +use clap::{builder::NonEmptyStringValueParser, Parser}; + +use crate::{ + container::{Container, State}, + linux::LinuxContainer, +}; + +/// Release container resources after the container process has exited +#[derive(Debug, Parser)] +pub struct Delete { + /// Specify the container id + #[arg(value_parser = NonEmptyStringValueParser::new(), required = true)] + pub container_id: String, + /// Force to delete the container (kill the container using SIGKILL) + #[arg(short, long)] + pub force: bool, +} + +impl Delete { + pub fn run(&self, root: &Path) -> Result<()> { + let state_dir = root.join(&self.container_id); + if !state_dir.exists() { + bail!("{} doesn't exist", state_dir.display()); + } + + let state = if let Ok(s) = State::load(root, &self.container_id) { + s + } else { + fs::remove_dir_all(state_dir)?; + return Ok(()); + }; + + let container = LinuxContainer::load_from_state(&state, &None)?; + container.delete(&state, self.force)?; + Ok(()) + } +} diff --git a/ozonec/src/commands/exec.rs b/ozonec/src/commands/exec.rs new file mode 100644 index 0000000000000000000000000000000000000000..ce15f572dec5c6bdafc9419a49f1b3430c0a6eab --- /dev/null +++ b/ozonec/src/commands/exec.rs @@ -0,0 +1,128 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use core::str; +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{builder::NonEmptyStringValueParser, Parser}; +use oci_spec::state::ContainerStatus; + +use crate::{ + container::{Action, Launcher, State}, + linux::LinuxContainer, + utils::OzonecErr, +}; + +/// Execute a new process inside the container +#[derive(Debug, Parser)] +pub struct Exec { + /// Path to an AF_UNIX socket which will receive a file descriptor of the master end + /// of the console's pseudoterminal + #[arg(long)] + pub console_socket: Option, + /// Allocate a pseudio-TTY + #[arg(short, long)] + pub tty: bool, + /// Current working directory in the container + #[arg(long)] + pub cwd: Option, + /// Specify the file to write the process pid to + #[arg(long)] + pub pid_file: Option, + /// Specify environment variables + #[arg(short, long, value_parser = parse_key_val::, number_of_values = 1)] + pub env: Vec<(String, String)>, + /// Prevent the process from gaining additional privileges + #[arg(long)] + pub no_new_privs: bool, + /// Specify the container id + #[arg(value_parser = NonEmptyStringValueParser::new(), required = true)] + pub container_id: String, + /// Specify the command to execute in the container + #[arg(required = false)] + pub command: Vec, +} + +fn parse_key_val(s: &str) -> Result<(T, U)> +where + T: str::FromStr, + T::Err: std::error::Error + Send + Sync + 'static, + U: str::FromStr, + U::Err: std::error::Error + Send + Sync + 'static, +{ + let pos = s + .find('=') + .ok_or(anyhow!("Invalid KEY=value: no '=' found in '{}'", s))?; + Ok((s[..pos].parse()?, s[pos + 1..].parse()?)) +} + +impl Exec { + fn launcher(&self, root: &Path) -> Result { + let mut container_state = + State::load(root, &self.container_id).with_context(|| OzonecErr::LoadConState)?; + + if let Some(config) = container_state.config.as_mut() { + config.process.terminal = self.tty; + config.process.cwd = if let Some(cwd) = &self.cwd { + cwd.to_string_lossy().to_string() + } else { + String::from("/") + }; + + for (env_name, env_value) in &self.env { + config + .process + .env + .as_mut() + .unwrap() + .push(format!("{}={}", env_name, env_value)); + } + config.process.noNewPrivileges = Some(self.no_new_privs); + config.process.args = Some(self.command.clone()); + } + + let container = LinuxContainer::load_from_state(&container_state, &self.console_socket)?; + let status = container.status()?; + if status != ContainerStatus::Created && status != ContainerStatus::Running { + bail!("Can't exec in container with {:?} state", status); + } + + Ok(Launcher::new( + &container_state.bundle, + root, + false, + Box::new(container), + self.pid_file.clone(), + )) + } + + pub fn run(&self, root: &Path) -> Result<()> { + let mut launcher = self.launcher(root)?; + launcher.launch(Action::Exec)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_key_val() { + let (key, value): (String, String) = parse_key_val("OZONEC_LOG_LEVEL=info").unwrap(); + assert_eq!(key, "OZONEC_LOG_LEVEL"); + assert_eq!(value, "info"); + + assert!(parse_key_val::("OZONEC_LOG_LEVEL").is_err()); + } +} diff --git a/ozonec/src/commands/kill.rs b/ozonec/src/commands/kill.rs new file mode 100644 index 0000000000000000000000000000000000000000..e9ab6350bcbcc463e80f5e5b05e5e562f89c70ba --- /dev/null +++ b/ozonec/src/commands/kill.rs @@ -0,0 +1,71 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{path::Path, str::FromStr}; + +use anyhow::{Context, Result}; +use clap::{builder::NonEmptyStringValueParser, Parser}; +use nix::sys::signal::Signal; + +use crate::{ + container::{Container, State}, + linux::LinuxContainer, +}; + +/// Send a signal to the container process +#[derive(Parser, Debug)] +pub struct Kill { + /// Specify the container id + #[arg(value_parser = NonEmptyStringValueParser::new(), required = true)] + pub container_id: String, + /// The signal to send to the container process + pub signal: String, +} + +impl Kill { + pub fn run(&self, root: &Path) -> Result<()> { + let container_state = State::load(root, &self.container_id)?; + let signal = parse_signal(&self.signal).with_context(|| "Invalid signal")?; + let container = LinuxContainer::load_from_state(&container_state, &None)?; + + container.kill(signal)?; + Ok(()) + } +} + +fn parse_signal(signal: &str) -> Result { + if let Ok(num) = signal.parse::() { + return Ok(Signal::try_from(num)?); + } + + let mut uppercase_sig = signal.to_uppercase(); + if !uppercase_sig.starts_with("SIG") { + uppercase_sig = format!("SIG{}", &uppercase_sig); + } + Ok(Signal::from_str(&uppercase_sig)?) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_signal() { + assert_eq!(parse_signal("9").unwrap(), Signal::SIGKILL); + assert_eq!(parse_signal("sigterm").unwrap(), Signal::SIGTERM); + assert_eq!(parse_signal("SIGBUS").unwrap(), Signal::SIGBUS); + assert_eq!(parse_signal("hup").unwrap(), Signal::SIGHUP); + assert_eq!(parse_signal("ABRT").unwrap(), Signal::SIGABRT); + assert!(parse_signal("100").is_err()); + assert!(parse_signal("ERROR").is_err()); + } +} diff --git a/ozonec/src/commands/mod.rs b/ozonec/src/commands/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..f8096f3f655dda7bab90238a4124aa632f2e762c --- /dev/null +++ b/ozonec/src/commands/mod.rs @@ -0,0 +1,25 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod create; +mod delete; +mod exec; +mod kill; +mod start; +mod state; + +pub use create::Create; +pub use delete::Delete; +pub use exec::Exec; +pub use kill::Kill; +pub use start::Start; +pub use state::State; diff --git a/ozonec/src/commands/start.rs b/ozonec/src/commands/start.rs new file mode 100644 index 0000000000000000000000000000000000000000..33ce7dd6c8c55e7389858844d3f4449f17efb076 --- /dev/null +++ b/ozonec/src/commands/start.rs @@ -0,0 +1,56 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::path::Path; + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use oci_spec::state::ContainerStatus; + +use crate::{ + container::{Action, Launcher, State}, + linux::LinuxContainer, + utils::OzonecErr, +}; + +/// Start the user-specified code from process +#[derive(Parser, Debug)] +pub struct Start { + pub container_id: String, +} + +impl Start { + fn launcher(&self, root: &Path) -> Result { + let container_state = + State::load(root, &self.container_id).with_context(|| OzonecErr::LoadConState)?; + let container = LinuxContainer::load_from_state(&container_state, &None)?; + let oci_status = container.status()?; + + if oci_status != ContainerStatus::Created { + bail!("Can't start a container with {:?} status", oci_status); + } + + Ok(Launcher::new( + &container_state.bundle, + root, + false, + Box::new(container), + None, + )) + } + + pub fn run(&self, root: &Path) -> Result<()> { + let mut launcher = self.launcher(root)?; + launcher.launch(Action::Start)?; + Ok(()) + } +} diff --git a/ozonec/src/commands/state.rs b/ozonec/src/commands/state.rs new file mode 100644 index 0000000000000000000000000000000000000000..d667694ff0d72595148bee28f5f0e7e272872962 --- /dev/null +++ b/ozonec/src/commands/state.rs @@ -0,0 +1,55 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use clap::{builder::NonEmptyStringValueParser, Parser}; +use serde::{Deserialize, Serialize}; + +use crate::{container::State as ContainerState, linux::LinuxContainer}; + +/// Request the container state +#[derive(Debug, Parser)] +pub struct State { + /// Specify the container id + #[arg(value_parser = NonEmptyStringValueParser::new(), required = true)] + pub container_id: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct RuntimeState { + pub oci_version: String, + pub id: String, + pub status: String, + pub pid: i32, + pub bundle: PathBuf, +} + +impl State { + pub fn run(&self, root: &Path) -> Result<()> { + let state = ContainerState::load(root, &self.container_id)?; + let container = LinuxContainer::load_from_state(&state, &None)?; + let runtime_state = RuntimeState { + oci_version: state.oci_version, + id: state.id, + pid: state.pid, + status: container.status()?.to_string(), + bundle: state.bundle, + }; + let json_data = &serde_json::to_string_pretty(&runtime_state) + .with_context(|| "Failed to get json data of container state")?; + + println!("{}", json_data); + Ok(()) + } +} diff --git a/ozonec/src/container/launcher.rs b/ozonec/src/container/launcher.rs new file mode 100644 index 0000000000000000000000000000000000000000..ef68c392299b2d826702ca3a5a6f2ae60f9e9de4 --- /dev/null +++ b/ozonec/src/container/launcher.rs @@ -0,0 +1,132 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +// Linux container create flow: +// ozonec create | State 1 process | Stage 2 process | ozonec start +// | | | +// -> clone3 -> | | | +// <- mapping request <- | | | +// write uid/gid mappings | | | +// -> send mapping done -> | | | +// | set uid/gid | | +// | set pid namespace | | +// <- send stage 2 pid | | -> clone3 -> | +// | exit | set rest namespaces | +// | | pivot_root/chroot | +// | | set capabilities | +// | | set seccomp | +// < send ready <- | | +// | | wait for start signal | +// update pid file | | | ozonec start $id +// exit | | | <- send start signal +// | | execvp cmd | exit + +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; + +use super::{state::State, Container}; +use crate::{linux::Process, utils::OzonecErr}; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Action { + Create, + Start, + Exec, +} + +pub struct Launcher { + pub bundle: PathBuf, + pub root: PathBuf, + /// init is set to true when creating a container. + pub init: bool, + pub runner: Box, + pub pid_file: Option, +} + +impl Launcher { + pub fn new( + bundle: &Path, + root: &Path, + init: bool, + runner: Box, + pid_file: Option, + ) -> Self { + Self { + bundle: bundle.to_path_buf(), + root: root.to_path_buf(), + init, + runner, + pid_file, + } + } + + pub fn launch(&mut self, action: Action) -> Result<()> { + if self.init { + self.spawn_container()?; + } else { + self.spawn_process(action)?; + } + + if let Some(pid_file) = self.pid_file.as_ref() { + let pid = self.runner.get_pid(); + std::fs::write(pid_file, format!("{}", pid)).with_context(|| "Failed to write pid")?; + } + + Ok(()) + } + + fn spawn_container(&mut self) -> Result<()> { + self.spawn_process(Action::Create)?; + + let mut state = self + .get_state() + .with_context(|| "Failed to get container state")?; + state.update(); + state.save().with_context(|| "Failed to save state")?; + Ok(()) + } + + fn spawn_process(&mut self, action: Action) -> Result<()> { + let mut process = self.get_process(); + match action { + Action::Create => self.runner.create(&mut process), + Action::Start => self.runner.start(), + Action::Exec => self.runner.exec(&mut process), + } + } + + fn get_process(&self) -> Process { + let config = self.runner.get_config(); + Process::new(&config.process, self.init) + } + + fn get_state(&self) -> Result { + let state = self.runner.get_oci_state()?; + let pid = self.runner.get_pid(); + let proc = + procfs::process::Process::new(pid).with_context(|| OzonecErr::ReadProcPid(pid))?; + let start_time = proc + .stat() + .with_context(|| OzonecErr::ReadProcStat(pid))? + .starttime; + + Ok(State::new( + &self.root, + &self.bundle, + state, + start_time, + *self.runner.created_time(), + self.runner.get_config(), + )) + } +} diff --git a/ozonec/src/container/mod.rs b/ozonec/src/container/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..761e2517ec5380aa6563ffffb1c5737a28fb3324 --- /dev/null +++ b/ozonec/src/container/mod.rs @@ -0,0 +1,47 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod launcher; +mod state; + +pub use launcher::{Action, Launcher}; +pub use state::State; + +use std::time::SystemTime; + +use anyhow::Result; +use libc::pid_t; +use nix::sys::signal::Signal; + +use oci_spec::{runtime::RuntimeConfig, state::State as OciState}; + +use crate::linux::Process; + +pub trait Container { + fn get_config(&self) -> &RuntimeConfig; + + fn get_oci_state(&self) -> Result; + + fn get_pid(&self) -> pid_t; + + fn created_time(&self) -> &SystemTime; + + fn create(&mut self, process: &mut Process) -> Result<()>; + + fn start(&mut self) -> Result<()>; + + fn exec(&mut self, process: &mut Process) -> Result<()>; + + fn kill(&self, sig: Signal) -> Result<()>; + + fn delete(&self, state: &State, force: bool) -> Result<()>; +} diff --git a/ozonec/src/container/state.rs b/ozonec/src/container/state.rs new file mode 100644 index 0000000000000000000000000000000000000000..659752a098f2441f377203380128e095c6bb3d80 --- /dev/null +++ b/ozonec/src/container/state.rs @@ -0,0 +1,204 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::{self, DirBuilder, File, OpenOptions}, + os::unix::fs::DirBuilderExt, + path::{Path, PathBuf}, + time::SystemTime, +}; + +use anyhow::{bail, Context, Result}; +use chrono::{DateTime, Utc}; +use libc::pid_t; +use nix::sys::stat::Mode; +use serde::{Deserialize, Serialize}; + +use oci_spec::{runtime::RuntimeConfig, state::State as OciState}; + +use crate::utils::OzonecErr; + +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +#[serde(rename_all = "camelCase")] +pub struct State { + pub oci_version: String, + pub id: String, + pub pid: pid_t, + pub root: PathBuf, + pub bundle: PathBuf, + pub rootfs: String, + pub start_time: u64, + pub created_time: DateTime, + pub config: Option, +} + +impl State { + pub fn new( + root: &Path, + bundle: &Path, + oci_state: OciState, + start_time: u64, + created_time: SystemTime, + config: &RuntimeConfig, + ) -> Self { + Self { + oci_version: oci_state.ociVersion, + id: oci_state.id, + pid: oci_state.pid, + root: root.to_path_buf(), + bundle: bundle.to_path_buf(), + rootfs: config.root.path.clone(), + start_time, + created_time: DateTime::from(created_time), + config: Some(config.clone()), + } + } + + pub fn save(&self) -> Result<()> { + if !&self.root.exists() { + DirBuilder::new() + .recursive(true) + .mode(Mode::S_IRWXU.bits()) + .create(&self.root) + .with_context(|| "Failed to create root directory")?; + } + + let path = Self::file_path(&self.root, &self.id); + let state_file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&path) + .with_context(|| OzonecErr::OpenFile(path.to_string_lossy().to_string()))?; + serde_json::to_writer(&state_file, self)?; + Ok(()) + } + + pub fn update(&mut self) { + let linux = self.config.as_mut().unwrap().linux.as_mut(); + if let Some(config) = linux { + for ns in &mut config.namespaces { + if ns.path.is_none() { + let ns_name: String = ns.ns_type.into(); + ns.path = Some(PathBuf::from(format!("/proc/{}/ns/{}", self.pid, ns_name))) + } + } + } + } + + pub fn load(root: &Path, id: &str) -> Result { + let path = Self::file_path(root, id); + if !path.exists() { + bail!("Container {} doesn't exist", id); + } + + let state_file = File::open(&path) + .with_context(|| OzonecErr::OpenFile(path.to_string_lossy().to_string()))?; + let state = serde_json::from_reader(&state_file)?; + Ok(state) + } + + pub fn remove_dir(&self) -> Result<()> { + let state_dir = &self.root.join(&self.id); + fs::remove_dir_all(state_dir).with_context(|| "Failed to remove state directory")?; + Ok(()) + } + + fn file_path(root: &Path, id: &str) -> PathBuf { + root.join(id).join("state.json") + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use fs::{create_dir_all, remove_dir_all}; + use nix::unistd::getpid; + + use crate::linux::container::tests::init_config; + use oci_spec::{ + linux::{Namespace, NamespaceType}, + state::ContainerStatus, + }; + + use super::*; + + fn init_state(root: &Path, id: &str) -> State { + let oci_state = OciState { + ociVersion: String::from("1.2"), + id: String::from(id), + status: ContainerStatus::Created, + pid: 100, + bundle: root.to_string_lossy().to_string(), + annotations: HashMap::new(), + }; + State::new(root, root, oci_state, 0, SystemTime::now(), &init_config()) + } + + #[test] + fn test_state_update() { + let root = "/tmp/ozonec"; + remove_dir_all(root).unwrap_or_default(); + let mut state = init_state(Path::new(root), "test_state_update"); + state + .config + .as_mut() + .unwrap() + .linux + .as_mut() + .unwrap() + .namespaces + .push(Namespace { + ns_type: NamespaceType::Mount, + path: None, + }); + state.pid = getpid().as_raw(); + state.update(); + + for ns in &state + .config + .as_ref() + .unwrap() + .linux + .as_ref() + .unwrap() + .namespaces + { + assert_eq!( + ns.path.as_ref().unwrap().to_str().unwrap(), + format!( + "/proc/{}/ns/{}", + state.pid, + >::into(ns.ns_type) + ) + ); + } + } + + #[test] + fn test_state_load() { + let root = "/tmp/ozonec"; + remove_dir_all(root).unwrap_or_default(); + + let state = init_state(Path::new(root), "test_state_load"); + let dir = PathBuf::from(String::from(root)).join("test_state_load"); + create_dir_all(&dir).unwrap(); + + assert!(state.save().is_ok()); + assert!(dir.join("state.json").exists()); + let loaded_state = State::load(Path::new(root), "test_state_load").unwrap(); + assert_eq!(loaded_state.id, state.id); + assert!(state.remove_dir().is_ok()); + assert!(State::load(Path::new(root), "test_state_load").is_err()); + } +} diff --git a/ozonec/src/linux/apparmor.rs b/ozonec/src/linux/apparmor.rs new file mode 100644 index 0000000000000000000000000000000000000000..1f91c59c48317a46a109d2c8355d9fe65b294a7b --- /dev/null +++ b/ozonec/src/linux/apparmor.rs @@ -0,0 +1,44 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{fs, path::Path}; + +use anyhow::{Context, Result}; + +const APPARMOR_ENABLED_PATH: &str = "/sys/module/apparmor/parameters/enabled"; +const APPARMOR_INTERFACE: &str = "/proc/self/attr/apparmor/exec"; +const APPARMOR_LEGACY_INTERFACE: &str = "/proc/self/attr/exec"; + +pub fn is_enabled() -> Result { + let enabled = fs::read_to_string(APPARMOR_ENABLED_PATH) + .with_context(|| format!("Failed to read {}", APPARMOR_ENABLED_PATH))?; + Ok(enabled.starts_with('Y')) +} + +pub fn apply_profile(profile: &str) -> Result<()> { + if profile.is_empty() { + return Ok(()); + } + + // Try the module specific subdirectory. This is recommended to configure LSMs + // since Linux kernel 5.1. AppArmor has such a directory since Linux kernel 5.8. + match activate_profile(Path::new(APPARMOR_INTERFACE), profile) { + Ok(_) => Ok(()), + Err(_) => activate_profile(Path::new(APPARMOR_LEGACY_INTERFACE), profile) + .with_context(|| "Failed to apply apparmor profile"), + } +} + +fn activate_profile(path: &Path, profile: &str) -> Result<()> { + fs::write(path, format!("exec {}", profile))?; + Ok(()) +} diff --git a/ozonec/src/linux/container.rs b/ozonec/src/linux/container.rs new file mode 100644 index 0000000000000000000000000000000000000000..0e627fb4fa6c64e3d7555db8002da7d07ca5bf38 --- /dev/null +++ b/ozonec/src/linux/container.rs @@ -0,0 +1,1258 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + collections::HashMap, + fs::{self, canonicalize, create_dir_all, OpenOptions}, + io::Write, + os::unix::net::UnixStream, + path::{Path, PathBuf}, + thread::sleep, + time::{Duration, SystemTime}, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use libc::{c_char, pid_t, setdomainname}; +use log::{debug, info}; +use nix::{ + errno::Errno, + mount::MsFlags, + sys::{ + signal::{kill, Signal}, + statfs::statfs, + wait::{waitpid, WaitStatus}, + }, + unistd::{self, chown, getegid, geteuid, sethostname, unlink, Gid, Pid, Uid}, +}; +use procfs::process::ProcState; + +use super::{ + namespace::NsController, + notify_socket::{NotifySocket, NOTIFY_SOCKET}, + process::clone_process, + NotifyListener, Process, +}; +use crate::{ + container::{Container, State}, + linux::{rootfs::Rootfs, seccomp::set_seccomp}, + utils::{prctl, Channel, Message, OzonecErr}, +}; +use oci_spec::{ + linux::{Device as OciDevice, IdMapping, NamespaceType}, + runtime::RuntimeConfig, + state::{ContainerStatus, State as OciState}, +}; + +pub struct LinuxContainer { + pub id: String, + pub root: String, + pub config: RuntimeConfig, + pub pid: pid_t, + pub start_time: u64, + pub created_time: SystemTime, + pub console_socket: Option, +} + +impl LinuxContainer { + pub fn new( + id: &String, + root: &String, + config: &RuntimeConfig, + console_socket: &Option, + exist: &mut bool, + ) -> Result { + let container_dir = format!("{}/{}", root, id); + + Self::validate_config(config)?; + + if Path::new(container_dir.as_str()).exists() { + *exist = true; + bail!("Container {} already exists", id); + } + create_dir_all(container_dir.as_str()) + .with_context(|| OzonecErr::CreateDir(container_dir.clone()))?; + chown(container_dir.as_str(), Some(geteuid()), Some(getegid())) + .with_context(|| "Failed to chown container directory")?; + + Ok(Self { + id: id.clone(), + root: container_dir, + config: config.clone(), + pid: -1, + start_time: 0, + created_time: SystemTime::now(), + console_socket: console_socket.clone(), + }) + } + + pub fn load_from_state(state: &State, console_socket: &Option) -> Result { + let root_path = format!("{}/{}", state.root.to_string_lossy().to_string(), &state.id); + let config = state + .config + .clone() + .ok_or(anyhow!("Can't find config in state"))?; + + Ok(Self { + id: state.id.clone(), + root: root_path, + config, + pid: state.pid, + start_time: state.start_time, + created_time: state.created_time.into(), + console_socket: console_socket.clone(), + }) + } + + fn validate_config(config: &RuntimeConfig) -> Result<()> { + if config.linux.is_none() { + bail!("There is no linux specific configuration in config.json for Linux container"); + } + if config.process.args.is_none() { + bail!("args in process is not set in config.json."); + } + Ok(()) + } + + fn do_first_stage( + &mut self, + process: &mut Process, + parent_channel: &Channel, + fst_stage_channel: &Channel, + notify_listener: &Option, + ) -> Result<()> { + debug!("First stage process start"); + + self.set_user_namespace(parent_channel, fst_stage_channel, process)?; + + fst_stage_channel + .receiver + .close() + .with_context(|| "Failed to close receiver end of first stage channel")?; + + process + .set_rlimits() + .with_context(|| "Failed to set rlimit")?; + // New pid namespace goes intto effect in cloned child processes. + self.set_pid_namespace()?; + + // Spawn a child process to perform the second stage to initialize container. + let init_pid = clone_process("ozonec:[2:INIT]", || { + self.do_second_stage(process, parent_channel, ¬ify_listener) + .with_context(|| "Second stage process encounters errors")?; + Ok(0) + })?; + + // Send the final container pid to the parent process. + parent_channel.send_init_pid(init_pid)?; + + debug!("First stage process exit"); + Ok(()) + } + + fn do_second_stage( + &mut self, + process: &mut Process, + parent_channel: &Channel, + notify_listener: &Option, + ) -> Result<()> { + debug!("Second stage process start"); + + unistd::setsid().with_context(|| "Failed to setsid")?; + process + .set_io_priority() + .with_context(|| "Failed to set io priority")?; + process + .set_scheduler() + .with_context(|| "Failed to set scheduler")?; + + let console_stream = match &self.console_socket { + Some(cs) => { + Some(UnixStream::connect(cs).with_context(|| "Failed to connect console socket")?) + } + None => None, + }; + self.set_rest_namespaces()?; + process.set_no_new_privileges()?; + + if process.init { + let propagation = self + .config + .linux + .as_ref() + .unwrap() + .rootfsPropagation + .clone(); + // Container running in a user namespace is not allowed to do mknod. + let mknod_device = !self.is_namespace_set(NamespaceType::User)?; + let mut devices: Vec = Vec::new(); + if let Some(devs) = self.config.linux.as_ref().unwrap().devices.as_ref() { + devices = devs.clone() + }; + let rootfs = Rootfs::new( + self.config.root.path.clone().into(), + propagation, + self.config.mounts.clone(), + mknod_device, + devices, + )?; + rootfs.prepare_rootfs(&self.config)?; + + // Entering into rootfs jail. If mount namespace is specified, use pivot_root. + // Otherwise use chroot. + if self.is_namespace_set(NamespaceType::Mount)? { + Rootfs::pivot_root(&rootfs.path).with_context(|| "Failed to pivot_root")?; + } else { + Rootfs::chroot(&rootfs.path).with_context(|| "Failed to chroot")?; + } + + self.set_sysctl_parameters()?; + } else if !self.is_namespace_set(NamespaceType::Mount)? { + Rootfs::chroot(&PathBuf::from(self.config.root.path.clone())) + .with_context(|| "Failed to chroot")?; + } + + process + .set_tty(console_stream, process.init) + .with_context(|| "Failed to set tty")?; + process.set_apparmor()?; + if self.config.root.readonly { + LinuxContainer::mount_rootfs_readonly()?; + } + self.set_readonly_paths()?; + self.set_masked_paths()?; + + let chdir_cwd_ret = process.chdir_cwd().is_err(); + process.set_additional_gids()?; + process.set_process_id()?; + + // Without setting no new privileges, setting seccomp is a privileged operation. + if !process.no_new_privileges() { + if let Some(seccomp) = &self.config.linux.as_ref().unwrap().seccomp { + set_seccomp(seccomp).with_context(|| "Failed to set seccomp")?; + } + } + process + .reset_capabilities() + .with_context(|| "Failed to reset capabilities")?; + process + .drop_capabilities() + .with_context(|| "Failed to drop capabilities")?; + if chdir_cwd_ret { + process.chdir_cwd()?; + } + // Ensure that the current working directory is inside the mount namespace root + // of the current container process. + Process::getcwd()?; + process.clean_envs(); + process.set_envs(); + if process.no_new_privileges() { + if let Some(seccomp) = &self.config.linux.as_ref().unwrap().seccomp { + set_seccomp(seccomp).with_context(|| "Failed to set seccomp")?; + } + } + + // Tell the parent process that the init process has been cloned. + parent_channel.send_container_created()?; + parent_channel + .sender + .close() + .with_context(|| "Failed to close sender of parent channel")?; + + // Listening on the notify socket to start container. + if let Some(listener) = notify_listener { + listener.wait_for_start_container()?; + listener + .close() + .with_context(|| "Failed to close notify socket")?; + } + process.exec_program(); + } + + fn mount_rootfs_readonly() -> Result<()> { + let ms_flags = MsFlags::MS_RDONLY | MsFlags::MS_REMOUNT | MsFlags::MS_BIND; + let root_path = Path::new("/"); + let fs_flags = statfs(root_path) + .with_context(|| "Statfs root directory error")? + .flags() + .bits(); + + nix::mount::mount( + None::<&str>, + root_path, + None::<&str>, + ms_flags | MsFlags::from_bits_truncate(fs_flags), + None::<&str>, + ) + .with_context(|| "Failed to remount rootfs readonly")?; + Ok(()) + } + + fn get_container_status(&self) -> Result { + if self.pid == -1 { + return Ok(ContainerStatus::Creating); + } + + let proc = procfs::process::Process::new(self.pid); + // If error occurs when accessing /proc/, the process most likely has stopped. + if proc.is_err() { + return Ok(ContainerStatus::Stopped); + } + let proc_stat = proc + .unwrap() + .stat() + .with_context(|| OzonecErr::ReadProcStat(self.pid))?; + // If starttime is not the same, then pid is reused, and the original process has stopped. + if proc_stat.starttime != self.start_time { + return Ok(ContainerStatus::Stopped); + } + + match proc_stat.state()? { + ProcState::Zombie | ProcState::Dead => Ok(ContainerStatus::Stopped), + _ => { + let notify_socket = PathBuf::from(&self.root).join(NOTIFY_SOCKET); + if notify_socket.exists() { + return Ok(ContainerStatus::Created); + } + Ok(ContainerStatus::Running) + } + } + } + + pub fn status(&self) -> Result { + Ok(self + .get_oci_state() + .with_context(|| OzonecErr::GetOciState)? + .status) + } + + fn ns_controller(&self) -> Result { + Ok(self + .config + .linux + .as_ref() + .unwrap() + .namespaces + .clone() + .try_into()?) + } + + fn set_user_namespace( + &self, + parent_channel: &Channel, + fst_stage_channel: &Channel, + process: &Process, + ) -> Result<()> { + let ns_controller: NsController = self.ns_controller()?; + + if let Some(ns) = ns_controller.get(NamespaceType::User)? { + ns_controller + .set_namespace(NamespaceType::User) + .with_context(|| "Failed to set user namespace")?; + + if ns.path.is_none() { + // Child process needs to be dumpable, otherwise the parent process is not + // allowed to write the uid/gid mappings. + prctl::set_dumpable(true) + .map_err(|e| anyhow!("Failed to set process dumpable: {e}"))?; + parent_channel + .send_id_mappings() + .with_context(|| "Failed to send id mappings")?; + fst_stage_channel + .recv_id_mappings_done() + .with_context(|| "Failed to receive id mappings done")?; + prctl::set_dumpable(false) + .map_err(|e| anyhow!("Failed to set process undumpable: {e}"))?; + } + + // After UID/GID mappings are configured, ozonec wants to make sure continue as + // the root user inside the new user namespace. This is required because the + // process of configuring the container process will require root, even though + // the root in the user namespace is likely mapped to an non-privileged user. + process.set_id(Gid::from_raw(0), Uid::from_raw(0))?; + } + Ok(()) + } + + fn is_namespace_set(&self, ns_type: NamespaceType) -> Result { + let ns_controller: NsController = self.ns_controller()?; + Ok(ns_controller.get(ns_type)?.is_some()) + } + + fn set_pid_namespace(&self) -> Result<()> { + let ns_controller = self.ns_controller()?; + + if ns_controller.get(NamespaceType::Pid)?.is_some() { + ns_controller + .set_namespace(NamespaceType::Pid) + .with_context(|| "Failed to set pid namespace")?; + } + Ok(()) + } + + fn set_readonly_paths(&self) -> Result<()> { + if let Some(readonly_paths) = self.config.linux.as_ref().unwrap().readonlyPaths.clone() { + for p in readonly_paths { + let path = Path::new(&p); + if let Err(e) = nix::mount::mount( + Some(path), + path, + None::<&str>, + MsFlags::MS_BIND | MsFlags::MS_REC, + None::<&str>, + ) { + if matches!(e, Errno::ENOENT) { + return Ok(()); + } + bail!("Failed to make {} as recursive bind mount", path.display()); + } + + nix::mount::mount( + Some(path), + path, + None::<&str>, + MsFlags::MS_NOSUID + | MsFlags::MS_NODEV + | MsFlags::MS_NOEXEC + | MsFlags::MS_BIND + | MsFlags::MS_REMOUNT + | MsFlags::MS_RDONLY, + None::<&str>, + ) + .with_context(|| format!("Failed to remount {} readonly", path.display()))?; + } + } + Ok(()) + } + + fn set_masked_paths(&self) -> Result<()> { + let linux = self.config.linux.as_ref().unwrap(); + if let Some(masked_paths) = linux.maskedPaths.clone() { + for p in masked_paths { + let path = Path::new(&p); + if let Err(e) = nix::mount::mount( + Some(Path::new("/dev/null")), + path, + None::<&str>, + MsFlags::MS_BIND, + None::<&str>, + ) { + match e { + // Ignore if path doesn't exists. + Errno::ENOENT => (), + Errno::ENOTDIR => { + let label = match linux.mountLabel.clone() { + Some(l) => format!("context=\"{}\"", l), + None => "".to_string(), + }; + nix::mount::mount( + Some(Path::new("tmpfs")), + path, + Some("tmpfs"), + MsFlags::MS_RDONLY, + Some(label.as_str()), + ) + .with_context(|| { + format!( + "Failed to make {} as masked mount by tmpfs", + path.display() + ) + })?; + } + _ => bail!( + "Failed to make {} as masked mount by /dev/null", + path.display() + ), + } + } + } + } + Ok(()) + } + + fn set_rest_namespaces(&self) -> Result<()> { + let ns_config = &self.config.linux.as_ref().unwrap().namespaces; + let ns_controller: NsController = ns_config.clone().try_into()?; + + for ns in ns_config { + match ns.ns_type { + // User namespace and pid namespace have been set in the first stage. + // Mount namespace is going to be set later to avoid failure with + // existed namespaces. + NamespaceType::User | NamespaceType::Pid | NamespaceType::Mount => (), + _ => ns_controller.set_namespace(ns.ns_type).with_context(|| { + format!( + "Failed to set {} namespace", + >::into(ns.ns_type) + ) + })?, + } + + if ns.ns_type == NamespaceType::Uts && ns.path.is_none() { + if let Some(hostname) = &self.config.hostname { + sethostname(hostname).with_context(|| "Failed to set hostname")?; + } + if let Some(domainname) = &self.config.domainname { + let errno; + + // SAFETY: FFI call with valid arguments. + match unsafe { + setdomainname( + domainname.as_bytes().as_ptr() as *const c_char, + domainname.len(), + ) + } { + 0 => return Ok(()), + -1 => errno = nix::Error::last(), + _ => errno = nix::Error::UnknownErrno, + } + bail!("Failed to set domainname: {}", errno); + } + } + } + + ns_controller + .set_namespace(NamespaceType::Mount) + .with_context(|| "Failed to set mount namespace")?; + Ok(()) + } + + fn set_id_mappings( + &self, + parent_channel: &Channel, + fst_stage_channel: &Channel, + fst_stage_pid: &Pid, + ) -> Result<()> { + parent_channel + .recv_id_mappings() + .with_context(|| "Failed to receive id mappings")?; + LinuxContainer::set_groups(fst_stage_pid, false) + .with_context(|| "Failed to disable setting groups")?; + + if let Some(linux) = self.config.linux.as_ref() { + if let Some(uid_mappings) = linux.uidMappings.as_ref() { + self.write_id_mapping(uid_mappings, fst_stage_pid, "uid_map")?; + } + if let Some(gid_mappings) = linux.gidMappings.as_ref() { + self.write_id_mapping(gid_mappings, fst_stage_pid, "gid_map")?; + } + } + + fst_stage_channel + .send_id_mappings_done() + .with_context(|| "Failed to send id mapping done")?; + fst_stage_channel + .sender + .close() + .with_context(|| "Failed to close fst_stage_channel sender")?; + Ok(()) + } + + fn write_id_mapping(&self, mappings: &Vec, pid: &Pid, file: &str) -> Result<()> { + let path = format!("/proc/{}/{}", pid.as_raw().to_string(), file); + let mut opened_file = OpenOptions::new() + .write(true) + .open(&path) + .with_context(|| OzonecErr::OpenFile(path))?; + let mut id_mappings = String::from(""); + + for m in mappings { + let mapping = format!("{} {} {}\n", m.containerID, m.hostID, m.size); + id_mappings = id_mappings + &mapping; + } + opened_file + .write_all(&id_mappings.as_bytes()) + .with_context(|| "Failed to write id mappings")?; + Ok(()) + } + + fn set_groups(pid: &Pid, allow: bool) -> Result<()> { + let path = format!("/proc/{}/setgroups", pid.as_raw().to_string()); + if allow == true { + std::fs::write(&path, "allow")? + } else { + std::fs::write(&path, "deny")? + } + Ok(()) + } + + fn set_sysctl_parameters(&self) -> Result<()> { + if let Some(sysctl_params) = self.config.linux.as_ref().unwrap().sysctl.clone() { + let sys_path = PathBuf::from("/proc/sys"); + for (param, value) in sysctl_params { + let path = sys_path.join(param.replace('.', "/")); + fs::write(&path, value.as_bytes()) + .with_context(|| format!("Failed to set {} to {}", path.display(), value))?; + } + } + Ok(()) + } +} + +impl Container for LinuxContainer { + fn get_config(&self) -> &RuntimeConfig { + &self.config + } + + fn get_pid(&self) -> pid_t { + self.pid + } + + fn created_time(&self) -> &SystemTime { + &self.created_time + } + + fn get_oci_state(&self) -> Result { + let status = self.get_container_status()?; + let pid = if status != ContainerStatus::Stopped { + self.pid + } else { + 0 + }; + + let rootfs = canonicalize(&self.config.root.path.clone()) + .with_context(|| "Failed to canonicalize root path")?; + let bundle = match rootfs.parent() { + Some(p) => p + .to_str() + .ok_or(anyhow!("root path is not valid unicode"))? + .to_string(), + None => bail!("Failed to get bundle directory"), + }; + let annotations = if let Some(a) = self.config.annotations.clone() { + a + } else { + HashMap::new() + }; + Ok(OciState { + ociVersion: self.config.ociVersion.clone(), + id: self.id.clone(), + status, + pid, + bundle, + annotations, + }) + } + + fn create(&mut self, process: &mut Process) -> Result<()> { + // Create notify socket to notify the container process to start. + let notify_listener = if process.init { + Some(NotifyListener::new(PathBuf::from(&self.root))?) + } else { + None + }; + + // As /proc/self/oom_score_adj is not allowed to write unless privileged, + // set oom_score_adj before setting process undumpable. + process + .set_oom_score_adj() + .with_context(|| "Failed to set oom_score_adj")?; + + // Make the process undumpable to avoid various race conditions that could cause + // processes in namespaces to join to access host resources (or execute code). + if !self.config.linux.as_ref().unwrap().namespaces.is_empty() { + prctl::set_dumpable(false) + .map_err(|e| anyhow!("Failed to set process undumpable: errno {}", e))?; + } + + // Create channels to communicate with child processes. + let parent_channel = Channel::::new() + .with_context(|| "Failed to create message channel for parent process")?; + let fst_stage_channel = Channel::::new()?; + // Set receivers timeout: 50ms. + parent_channel.receiver.set_timeout(50000)?; + fst_stage_channel.receiver.set_timeout(50000)?; + + // Spawn a child process to perform Stage 1. + let fst_stage_pid = clone_process("ozonec:[1:CHILD]", || { + self.do_first_stage( + process, + &parent_channel, + &fst_stage_channel, + ¬ify_listener, + ) + .with_context(|| "First stage process encounters errors")?; + Ok(0) + })?; + + if self.is_namespace_set(NamespaceType::User)? { + self.set_id_mappings(&parent_channel, &fst_stage_channel, &fst_stage_pid)?; + } + + let init_pid = parent_channel + .recv_init_pid() + .with_context(|| "Failed to receive init pid")?; + parent_channel.recv_container_created()?; + parent_channel + .receiver + .close() + .with_context(|| "Failed to close receiver end of parent channel")?; + + self.pid = init_pid.as_raw(); + self.start_time = procfs::process::Process::new(self.pid) + .with_context(|| OzonecErr::ReadProcPid(self.pid))? + .stat() + .with_context(|| OzonecErr::ReadProcStat(self.pid))? + .starttime; + + match waitpid(fst_stage_pid, None) { + Ok(WaitStatus::Exited(_, 0)) => (), + Ok(WaitStatus::Exited(_, s)) => { + info!("First stage process exits with status: {}", s); + } + Ok(WaitStatus::Signaled(_, sig, _)) => { + info!("First stage process killed by signal: {}", sig) + } + Ok(_) => (), + Err(Errno::ECHILD) => { + info!("First stage process has already been reaped"); + } + Err(e) => { + bail!("Failed to waitpid for first stage process: {e}"); + } + } + Ok(()) + } + + fn start(&mut self) -> Result<()> { + let path = PathBuf::from(&self.root).join(NOTIFY_SOCKET); + let mut notify_socket = NotifySocket::new(&path); + + notify_socket.notify_container_start()?; + unlink(&path).with_context(|| "Failed to delete notify.sock")?; + self.start_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .with_context(|| "Failed to get start time")? + .as_secs(); + Ok(()) + } + + fn exec(&mut self, process: &mut Process) -> Result<()> { + // process.init is false. + self.create(process)?; + Ok(()) + } + + fn kill(&self, sig: Signal) -> Result<()> { + let mut status = self.status()?; + if status == ContainerStatus::Stopped { + bail!("The container is already stopped"); + } + if status == ContainerStatus::Creating { + bail!("The container has not been created"); + } + + let pid = Pid::from_raw(self.pid); + match kill(pid, None) { + Err(errno) => { + if errno != Errno::ESRCH { + bail!("Failed to kill process {}: {:?}", pid, errno); + } + } + Ok(_) => kill(pid, sig)?, + } + + let mut _retry = 0; + status = self.status()?; + while status != ContainerStatus::Stopped { + sleep(Duration::from_millis(1)); + if _retry > 3 { + bail!("The container is still not stopped."); + } + status = self.status()?; + _retry += 1; + } + Ok(()) + } + + fn delete(&self, state: &State, force: bool) -> Result<()> { + match self.status()? { + ContainerStatus::Stopped => state.remove_dir()?, + _ => { + if force { + self.kill(Signal::SIGKILL) + .with_context(|| "Failed to kill the container by force")?; + state.remove_dir()?; + } else { + bail!( + "Failed to delete container {} which is not stopped", + &state.id + ); + } + } + } + Ok(()) + } +} + +#[cfg(test)] +pub mod tests { + use std::ffi::CStr; + + use chrono::DateTime; + use fs::{read_to_string, remove_dir_all, File}; + use libc::getdomainname; + use nix::sys::stat::stat; + use rusty_fork::rusty_fork_test; + use unistd::{gethostname, getpid}; + + use crate::linux::{ + mount::Mount, namespace::tests::set_namespace, process::tests::init_oci_process, + }; + use oci_spec::{ + linux::{LinuxPlatform, Namespace}, + posix::{Root, User}, + process::Process as OciProcess, + runtime::Mount as OciMount, + }; + + use super::*; + + pub fn init_config() -> RuntimeConfig { + let root = Root { + path: String::from("/tmp/ozonec/bundle/rootfs"), + readonly: true, + }; + let user = User { + uid: 0, + gid: 0, + umask: None, + additionalGids: None, + }; + let process = OciProcess { + cwd: String::from("/"), + args: Some(vec![String::from("bash")]), + env: None, + terminal: false, + consoleSize: None, + rlimits: None, + apparmorProfile: None, + capabilities: None, + noNewPrivileges: None, + oomScoreAdj: None, + scheduler: None, + selinuxLabel: None, + ioPriority: None, + execCPUAffinity: None, + user, + }; + let linux = LinuxPlatform { + namespaces: Vec::new(), + uidMappings: None, + gidMappings: None, + timeOffsets: None, + devices: None, + cgroupsPath: None, + rootfsPropagation: None, + maskedPaths: None, + readonlyPaths: None, + mountLabel: None, + personality: None, + resources: None, + rdma: None, + unified: None, + sysctl: None, + seccomp: None, + #[cfg(target_arch = "x86_64")] + intelRdt: None, + }; + RuntimeConfig { + ociVersion: String::from("1.2"), + root, + mounts: Vec::new(), + process, + hostname: None, + domainname: None, + linux: Some(linux), + vm: None, + hooks: None, + annotations: None, + } + } + + #[test] + fn test_linux_container_new() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let config = init_config(); + let mut exist: bool = false; + let container = LinuxContainer::new( + &String::from("LinuxContainer_new"), + &String::from("/tmp/ozonec"), + &config, + &None, + &mut exist, + ) + .unwrap(); + + let root = Path::new(&container.root); + assert!(root.exists()); + let root_stat = stat(root).unwrap(); + assert_eq!(root_stat.st_uid, geteuid().as_raw()); + assert_eq!(root_stat.st_gid, getegid().as_raw()); + + assert!(LinuxContainer::new( + &String::from("LinuxContainer_new"), + &String::from("/tmp/ozonec"), + &config, + &None, + &mut exist, + ) + .is_err()); + assert_eq!(exist, true); + } + + #[test] + fn test_validate_config() { + let mut config = init_config(); + config.linux = None; + assert!(LinuxContainer::validate_config(&config).is_err()); + + let linux = LinuxPlatform { + namespaces: Vec::new(), + uidMappings: None, + gidMappings: None, + timeOffsets: None, + devices: None, + cgroupsPath: None, + rootfsPropagation: None, + maskedPaths: None, + readonlyPaths: None, + mountLabel: None, + personality: None, + resources: None, + rdma: None, + unified: None, + sysctl: None, + seccomp: None, + #[cfg(target_arch = "x86_64")] + intelRdt: None, + }; + config.process.args = None; + config.linux = Some(linux); + assert!(LinuxContainer::validate_config(&config).is_err()); + } + + #[test] + fn test_load_from_state() { + let mut state = State { + oci_version: String::from("1.2"), + id: String::from("load_from_state"), + pid: 0, + root: PathBuf::from("/tmp/ozonec/root"), + bundle: PathBuf::from("/tmp/ozonec/bundle"), + rootfs: String::from("/tmp/ozonec/bundle/rootfs"), + start_time: 0, + created_time: DateTime::from(SystemTime::now()), + config: None, + }; + assert!(LinuxContainer::load_from_state(&state, &None).is_err()); + + let config = init_config(); + state.config = Some(config); + assert!(LinuxContainer::load_from_state(&state, &None).is_ok()); + } + + #[test] + fn test_status() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let config = init_config(); + create_dir_all(&config.root.path).unwrap(); + let mut exist: bool = false; + let mut container = LinuxContainer::new( + &String::from("get_container_status"), + &String::from("/tmp/ozonec"), + &config, + &None, + &mut exist, + ) + .unwrap(); + container.pid = -1; + + assert_eq!(container.status().unwrap(), ContainerStatus::Creating); + + container.pid = 0; + assert_eq!(container.status().unwrap(), ContainerStatus::Stopped); + + container.pid = getpid().as_raw(); + assert_eq!(container.status().unwrap(), ContainerStatus::Stopped); + + let proc_stat = procfs::process::Process::new(container.pid) + .unwrap() + .stat() + .unwrap(); + container.start_time = proc_stat.starttime; + assert_eq!(container.status().unwrap(), ContainerStatus::Running); + + let notify_socket = PathBuf::from(&container.root).join(NOTIFY_SOCKET); + File::create(¬ify_socket).unwrap(); + assert_eq!(container.status().unwrap(), ContainerStatus::Created); + } + + #[test] + fn test_is_namespace_set() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let mut config = init_config(); + config.linux.as_mut().unwrap().namespaces.push(Namespace { + ns_type: NamespaceType::Mount, + path: None, + }); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_is_namespace_set"), + &String::from("/tmp/ozonec/test_is_namespace_set"), + &config, + &None, + &mut exist, + ) + .unwrap(); + + assert!(container.is_namespace_set(NamespaceType::Mount).unwrap()); + assert!(!container.is_namespace_set(NamespaceType::User).unwrap()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_pid_namespace() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let mut config = init_config(); + config.linux.as_mut().unwrap().namespaces.push(Namespace { + ns_type: NamespaceType::Pid, + path: None, + }); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_pid_namespace"), + &String::from("/tmp/ozonec/test_set_pid_namespace"), + &config, + &None, + &mut exist, + ) + .unwrap(); + + assert!(container.set_pid_namespace().is_ok()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_id_mappings() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let mut config = init_config(); + let linux = config.linux.as_mut().unwrap(); + linux.namespaces = vec![Namespace { + ns_type: NamespaceType::User, + path: None, + }]; + linux.uidMappings = Some(vec![IdMapping { + containerID: 0, + hostID: 0, + size: 1000, + }]); + linux.gidMappings = Some(vec![IdMapping { + containerID: 0, + hostID: 0, + size: 1000, + }]); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_id_mappings"), + &String::from("/tmp/ozonec/test_set_id_mappings"), + &config, + &None, + &mut exist, + ) + .unwrap(); + + let fst_channel = Channel::::new().unwrap(); + let sec_channel = Channel::::new().unwrap(); + let child = clone_process("test_set_id_mappings", || { + let process = Process::new(&init_oci_process(), false); + assert!(container + .set_user_namespace(&fst_channel, &sec_channel, &process) + .is_ok()); + Ok(1) + }) + .unwrap(); + + assert!(container + .set_id_mappings(&fst_channel, &sec_channel, &child) + .is_ok()); + let path = format!("/proc/{}/setgroups", child.as_raw().to_string()); + let setgroups = fs::read_to_string(path).unwrap(); + assert_eq!(setgroups.trim(), "deny"); + let path = format!("/proc/{}/uid_map", child.as_raw().to_string()); + let uid_map = fs::read_to_string(path).unwrap(); + let mut iter = uid_map.split_ascii_whitespace(); + assert_eq!(iter.next(), Some("0")); + assert_eq!(iter.next(), Some("0")); + assert_eq!(iter.next(), Some("1000")); + assert_eq!(iter.next(), None); + let path = format!("/proc/{}/gid_map", child.as_raw().to_string()); + let gid_map = fs::read_to_string(path).unwrap(); + let mut iter = gid_map.split_ascii_whitespace(); + assert_eq!(iter.next(), Some("0")); + assert_eq!(iter.next(), Some("0")); + assert_eq!(iter.next(), Some("1000")); + assert_eq!(iter.next(), None); + + match waitpid(child, None) { + Ok(WaitStatus::Exited(_, s)) => { + assert_eq!(s, 1); + } + Ok(_) => (), + Err(e) => { + panic!("Failed to waitpid for child process: {e}"); + } + } + } + + rusty_fork_test! { + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_readonly_paths() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let root = PathBuf::from("/tmp/ozonec/test_set_readonly_paths"); + let mut config = init_config(); + let path = root.to_string_lossy().to_string(); + config.linux.as_mut().unwrap().readonlyPaths = Some(vec![path.clone()]); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_readonly_paths"), + &root.to_string_lossy().to_string(), + &config, + &None, + &mut exist, + ) + .unwrap(); + File::create(root.join("test")).unwrap(); + + assert!(container.set_readonly_paths().is_ok()); + let path = PathBuf::from(path).join("test"); + assert!(File::create(&path).is_err()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_masked_paths() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let root = PathBuf::from("/tmp/ozonec/test_set_masked_paths"); + let mut config = init_config(); + config.linux.as_mut().unwrap().maskedPaths = Some(vec![root.to_string_lossy().to_string()]); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_masked_paths"), + &root.to_string_lossy().to_string(), + &config, + &None, + &mut exist, + ) + .unwrap(); + + File::create(root.join("test")).unwrap(); + assert!(container.set_masked_paths().is_ok()); + assert!(!root.join("test").exists()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_rest_namespaces() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let root = PathBuf::from("/tmp/ozonec/test_set_rest_namespaces"); + let mut config = init_config(); + config.linux.as_mut().unwrap().namespaces = vec![ + Namespace { + ns_type: NamespaceType::User, + path: None, + }, + Namespace { + ns_type: NamespaceType::Uts, + path: None, + }, + ]; + config.hostname = Some(String::from("test_set_rest_namespaces")); + config.domainname = Some(String::from("test_set_rest_namespaces")); + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_rest_namespaces"), + &root.to_string_lossy().to_string(), + &config, + &None, + &mut exist, + ) + .unwrap(); + + assert!(container.set_rest_namespaces().is_ok()); + assert_eq!( + gethostname().unwrap().to_str().unwrap(), + "test_set_rest_namespaces" + ); + let len = 100; + let mut domain: Vec = Vec::with_capacity(len); + unsafe { + getdomainname(domain.as_mut_ptr().cast(), len); + // Ensure always null-terminated. + domain.as_mut_ptr().wrapping_add(len - 1).write(0); + let len = CStr::from_ptr(domain.as_ptr().cast()).to_bytes().len(); + domain.set_len(len); + } + assert_eq!(String::from_utf8_lossy(&domain), "test_set_rest_namespaces"); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_sysctl_parameters() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let root = PathBuf::from("/tmp/ozonec/test_set_sysctl_parameters"); + let mut config = init_config(); + config.linux.as_mut().unwrap().sysctl = Some(HashMap::new()); + let sysctl = &mut config.linux.as_mut().unwrap().sysctl; + sysctl + .as_mut() + .unwrap() + .insert(String::from("vm.oom_dump_tasks"), String::from("0")); + + let mut exist = false; + let container = LinuxContainer::new( + &String::from("test_set_sysctl_parameters"), + &root.to_string_lossy().to_string(), + &config, + &None, + &mut exist, + ) + .unwrap(); + + let mounts = vec![OciMount { + destination: String::from("/proc"), + source: Some(String::from("proc")), + options: None, + fs_type: Some(String::from("proc")), + uidMappings: None, + gidMappings: None, + }]; + let mnt = Mount::new(&root); + mnt.do_mounts(&mounts, &None).unwrap(); + + assert!(container.set_sysctl_parameters().is_ok()); + assert_eq!(read_to_string("/proc/sys/vm/oom_dump_tasks").unwrap().trim(), "0"); + } + } +} diff --git a/ozonec/src/linux/device.rs b/ozonec/src/linux/device.rs new file mode 100644 index 0000000000000000000000000000000000000000..8ecc568b562d4dc6282da7d73edb5bc43b3180c8 --- /dev/null +++ b/ozonec/src/linux/device.rs @@ -0,0 +1,436 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::{create_dir_all, remove_file, File}, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use nix::{ + mount::MsFlags, + sys::stat::{makedev, mknod, Mode, SFlag}, + unistd::{chown, Gid, Uid}, +}; +use oci_spec::linux::Device as OciDevice; + +use crate::utils::OzonecErr; + +pub struct Device { + rootfs: PathBuf, +} + +impl Device { + pub fn new(rootfs: PathBuf) -> Self { + Self { rootfs } + } + + pub fn default_devices(&self) -> Vec { + vec![ + DeviceInfo { + path: self.rootfs.join("dev/null"), + dev_type: "c".to_string(), + major: 1, + minor: 3, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + DeviceInfo { + path: self.rootfs.join("dev/zero"), + dev_type: "c".to_string(), + major: 1, + minor: 5, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + DeviceInfo { + path: self.rootfs.join("dev/full"), + dev_type: "c".to_string(), + major: 1, + minor: 7, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + DeviceInfo { + path: self.rootfs.join("dev/random"), + dev_type: "c".to_string(), + major: 1, + minor: 8, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + DeviceInfo { + path: self.rootfs.join("dev/urandom"), + dev_type: "c".to_string(), + major: 1, + minor: 9, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + DeviceInfo { + path: self.rootfs.join("dev/tty"), + dev_type: "c".to_string(), + major: 5, + minor: 0, + file_mode: Some(0o666u32), + uid: None, + gid: None, + }, + ] + } + + fn create_device_dir(&self, path: &PathBuf) -> Result<()> { + let dir = Path::new(path).parent().ok_or(anyhow!( + "Failed to get parent directory: {}", + path.display() + ))?; + if !dir.exists() { + create_dir_all(dir) + .with_context(|| OzonecErr::CreateDir(dir.to_string_lossy().to_string()))?; + } + Ok(()) + } + + fn get_sflag(&self, dev_type: &str) -> Result { + let sflag = match dev_type { + "c" => SFlag::S_IFCHR, + "b" => SFlag::S_IFBLK, + "u" => SFlag::S_IFCHR, + "p" => SFlag::S_IFIFO, + _ => bail!("Not supported device type: {}", dev_type), + }; + Ok(sflag) + } + + fn bind_device(&self, dev: &DeviceInfo) -> Result<()> { + self.create_device_dir(&dev.path)?; + + let binding = dev.path.to_string_lossy().to_string(); + let stripped_path = binding + .strip_prefix(&self.rootfs.to_string_lossy().to_string()) + .ok_or(anyhow!("Invalid device path"))?; + let src_path = PathBuf::from(stripped_path); + + if !dev.path.exists() { + File::create(&dev.path) + .with_context(|| format!("Failed to create {}", dev.path.display()))?; + } + nix::mount::mount( + Some(&src_path), + &dev.path, + Some("bind"), + MsFlags::MS_BIND, + None::<&str>, + ) + .with_context(|| OzonecErr::Mount(stripped_path.to_string()))?; + + Ok(()) + } + + fn mknod_device(&self, dev: &DeviceInfo) -> Result<()> { + self.create_device_dir(&dev.path)?; + + let sflag = self.get_sflag(&dev.dev_type)?; + let device = makedev(dev.major as u64, dev.minor as u64); + mknod( + &dev.path, + sflag, + Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)), + device, + )?; + chown( + &dev.path, + dev.uid.map(Uid::from_raw), + dev.gid.map(Gid::from_raw), + ) + .with_context(|| "Failed to chown")?; + + Ok(()) + } + + pub fn create_default_devices(&self, mknod: bool) -> Result<()> { + let default_devs = self.default_devices(); + for dev in default_devs { + if mknod { + if self.mknod_device(&dev).is_err() { + self.bind_device(&dev).with_context(|| { + OzonecErr::BindDev(dev.path.to_string_lossy().to_string()) + })?; + } + } else { + self.bind_device(&dev) + .with_context(|| OzonecErr::BindDev(dev.path.to_string_lossy().to_string()))?; + } + } + Ok(()) + } + + pub fn is_default_device(&self, dev: &OciDevice) -> bool { + for d in &self.default_devices() { + let path = self.rootfs.join(&dev.path.clone()[1..]); + if path == d.path { + return true; + } + } + return false; + } + + pub fn delete_device(&self, dev: &OciDevice) -> Result<()> { + let path = self.rootfs.join(&dev.path.clone()[1..]); + remove_file(&path).with_context(|| format!("Failed to delete {}", path.display()))?; + Ok(()) + } + + pub fn create_device(&self, dev: &OciDevice, mknod: bool) -> Result<()> { + let path = self.rootfs.join(&dev.path.clone()[1..]); + let major = dev + .major + .ok_or(anyhow!("major not set for device {}", dev.path))?; + let minor = dev + .minor + .ok_or(anyhow!("minor not set for device {}", dev.path))?; + let dev_info = DeviceInfo { + path, + dev_type: dev.dev_type.clone(), + major, + minor, + file_mode: dev.fileMode, + uid: dev.uid, + gid: dev.gid, + }; + + if mknod { + if self.mknod_device(&dev_info).is_err() { + self.bind_device(&dev_info).with_context(|| { + OzonecErr::BindDev(dev_info.path.to_string_lossy().to_string()) + })?; + } + } else { + self.bind_device(&dev_info) + .with_context(|| OzonecErr::BindDev(dev_info.path.to_string_lossy().to_string()))?; + } + Ok(()) + } +} + +pub struct DeviceInfo { + pub path: PathBuf, + dev_type: String, + major: i64, + minor: i64, + file_mode: Option, + uid: Option, + gid: Option, +} + +#[cfg(test)] +mod tests { + use std::{ + fs, + os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt}, + }; + + use nix::mount::umount; + + use super::*; + + #[test] + fn test_mknod_dev() { + let rootfs = PathBuf::from("/tmp/ozonec/mknod_dev"); + create_dir_all(&rootfs).unwrap(); + let dev = Device::new(rootfs.clone()); + let path = rootfs.join("mknod_dev"); + if path.exists() { + remove_file(&path).unwrap(); + } + let dev_info = DeviceInfo { + path: path.clone(), + dev_type: "c".to_string(), + major: 1, + minor: 3, + file_mode: Some(0o644u32), + uid: Some(1000u32), + gid: Some(1000u32), + }; + + assert!(dev.mknod_device(&dev_info).is_ok()); + assert!(path.exists()); + + let metadata = fs::metadata(&path).unwrap(); + assert!(metadata.file_type().is_char_device()); + let major = (metadata.rdev() >> 8) as u32; + let minor = (metadata.rdev() & 0xff) as u32; + assert_eq!(major, 1); + assert_eq!(minor, 3); + let file_mode = metadata.permissions().mode(); + assert_eq!(file_mode & 0o777, 0o644u32); + assert_eq!(metadata.uid(), 1000); + assert_eq!(metadata.gid(), 1000); + + fs::remove_dir_all("/tmp/ozonec").unwrap(); + } + + #[test] + #[ignore = "mount may not be permitted"] + fn test_bind_dev() { + let rootfs = PathBuf::from("/tmp/ozonec/bind_dev"); + create_dir_all(&rootfs).unwrap(); + let dev_path = PathBuf::from("/mknod_dev"); + if dev_path.exists() { + remove_file(&dev_path).unwrap(); + } + let dev = makedev(1, 3); + mknod( + &dev_path, + SFlag::S_IFCHR, + Mode::from_bits_truncate(0o644u32), + dev, + ) + .unwrap(); + let dev_to_bind = Device::new(rootfs.clone()); + let binded_path = rootfs.join("mknod_dev"); + if binded_path.exists() { + umount(&binded_path).unwrap(); + remove_file(&binded_path).unwrap(); + } + let dev_info = DeviceInfo { + path: binded_path.clone(), + dev_type: "c".to_string(), + major: 1, + minor: 3, + file_mode: Some(0o644u32), + uid: Some(1000u32), + gid: Some(1000u32), + }; + + assert!(dev_to_bind.bind_device(&dev_info).is_ok()); + + let metadata = fs::metadata(&dev_path).unwrap(); + let binded_metadata = fs::metadata(&binded_path).unwrap(); + assert_eq!(binded_metadata.file_type(), metadata.file_type()); + assert_eq!(binded_metadata.rdev(), metadata.rdev()); + assert_eq!(binded_metadata.permissions(), metadata.permissions()); + assert_eq!(binded_metadata.uid(), metadata.uid()); + assert_eq!(binded_metadata.gid(), metadata.gid()); + + umount(&binded_path).unwrap(); + fs::remove_dir_all("/tmp/ozonec").unwrap(); + fs::remove_file(dev_path).unwrap(); + } + + #[test] + fn test_create_device() { + let oci_dev = OciDevice { + dev_type: "c".to_string(), + path: "/mknod_dev".to_string(), + major: Some(1), + minor: Some(3), + fileMode: Some(0o644u32), + uid: Some(1000), + gid: Some(1000), + }; + let rootfs = PathBuf::from("/tmp/ozonec/create_device"); + create_dir_all(&rootfs).unwrap(); + let path = rootfs.join("mknod_dev"); + if path.exists() { + remove_file(&path).unwrap(); + } + let dev = Device::new(rootfs.clone()); + + assert!(dev.create_device(&oci_dev, true).is_ok()); + assert!(path.exists()); + + let metadata = fs::metadata(&path).unwrap(); + assert!(metadata.file_type().is_char_device()); + let major = (metadata.rdev() >> 8) as u32; + let minor = (metadata.rdev() & 0xff) as u32; + assert_eq!(major, 1); + assert_eq!(minor, 3); + let file_mode = metadata.permissions().mode(); + assert_eq!(file_mode & 0o777, 0o644u32); + assert_eq!(metadata.uid(), 1000); + assert_eq!(metadata.gid(), 1000); + + fs::remove_dir_all("/tmp/ozonec").unwrap(); + } + + #[test] + fn test_delete_device() { + let oci_dev = OciDevice { + dev_type: "c".to_string(), + path: "/mknod_dev".to_string(), + major: Some(1), + minor: Some(3), + fileMode: Some(0o644u32), + uid: Some(1000), + gid: Some(1000), + }; + let rootfs = PathBuf::from("/tmp/ozonec/delete_device"); + create_dir_all(&rootfs).unwrap(); + let path = rootfs.join("mknod_dev"); + if path.exists() { + remove_file(&path).unwrap(); + } + let dev = Device::new(rootfs.clone()); + dev.create_device(&oci_dev, true).unwrap(); + + assert!(dev.delete_device(&oci_dev).is_ok()); + assert!(!path.exists()); + + fs::remove_dir_all("/tmp/ozonec").unwrap(); + } + + #[test] + fn test_default_device() { + let rootfs = PathBuf::from("/tmp/ozonec/default_device"); + let dev = Device::new(rootfs.clone()); + + let mut oci_dev = OciDevice { + dev_type: "c".to_string(), + path: "mknod_dev".to_string(), + major: Some(1), + minor: Some(3), + fileMode: Some(0o644u32), + uid: Some(1000), + gid: Some(1000), + }; + assert!(!dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/null".to_string(); + assert!(dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/zero".to_string(); + assert!(dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/full".to_string(); + assert!(dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/random".to_string(); + assert!(dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/urandom".to_string(); + assert!(dev.is_default_device(&oci_dev)); + oci_dev.path = "/dev/tty".to_string(); + assert!(dev.is_default_device(&oci_dev)); + } + + #[test] + fn test_get_sflag() { + let rootfs = PathBuf::from("/tmp/ozonec/test_get_sflag"); + let dev = Device::new(rootfs.clone()); + + assert_eq!(dev.get_sflag("c").unwrap(), SFlag::S_IFCHR); + assert_eq!(dev.get_sflag("b").unwrap(), SFlag::S_IFBLK); + assert_eq!(dev.get_sflag("p").unwrap(), SFlag::S_IFIFO); + assert_eq!(dev.get_sflag("u").unwrap(), SFlag::S_IFCHR); + } +} diff --git a/ozonec/src/linux/mod.rs b/ozonec/src/linux/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..658f50c707d3170f476497750686f6be443f53ed --- /dev/null +++ b/ozonec/src/linux/mod.rs @@ -0,0 +1,29 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod container; + +mod apparmor; +mod device; +mod mount; +mod namespace; +mod notify_socket; +mod process; +mod rootfs; +mod seccomp; +mod terminal; + +pub use container::LinuxContainer; +pub use notify_socket::NotifyListener; +#[allow(unused_imports)] +pub use process::clone_process; +pub use process::Process; diff --git a/ozonec/src/linux/mount.rs b/ozonec/src/linux/mount.rs new file mode 100644 index 0000000000000000000000000000000000000000..af44bd3c863b739a3d38ce956c2b0f57e51e4b95 --- /dev/null +++ b/ozonec/src/linux/mount.rs @@ -0,0 +1,454 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + collections::HashMap, + fs::{self, canonicalize, create_dir_all, read_to_string}, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use nix::{ + mount::MsFlags, + sys::statfs::{statfs, CGROUP2_SUPER_MAGIC}, + unistd::close, +}; +use procfs::process::{MountInfo, Process}; + +use crate::utils::{openat2_in_root, proc_fd_path, OzonecErr}; +use oci_spec::runtime::Mount as OciMount; + +#[derive(PartialEq, Debug)] +enum CgroupType { + CgroupV1, + CgroupV2, +} + +pub struct Mount { + rootfs: PathBuf, +} + +impl Mount { + pub fn new(rootfs: &PathBuf) -> Self { + Self { + rootfs: rootfs.clone(), + } + } + + fn get_mount_flag_data(&self, mount: &OciMount) -> (MsFlags, String) { + let mut ms_flags = MsFlags::empty(); + let mut data = Vec::new(); + + if let Some(options) = &mount.options { + for option in options { + if let Some((clear, flag)) = match option.as_str() { + "defaults" => Some((false, MsFlags::empty())), + "ro" => Some((false, MsFlags::MS_RDONLY)), + "rw" => Some((true, MsFlags::MS_RDONLY)), + "suid" => Some((true, MsFlags::MS_NOSUID)), + "nosuid" => Some((false, MsFlags::MS_NOSUID)), + "dev" => Some((true, MsFlags::MS_NODEV)), + "nodev" => Some((false, MsFlags::MS_NODEV)), + "exec" => Some((true, MsFlags::MS_NOEXEC)), + "noexec" => Some((false, MsFlags::MS_NOEXEC)), + "sync" => Some((false, MsFlags::MS_SYNCHRONOUS)), + "async" => Some((true, MsFlags::MS_SYNCHRONOUS)), + "dirsync" => Some((false, MsFlags::MS_DIRSYNC)), + "remount" => Some((false, MsFlags::MS_REMOUNT)), + "mand" => Some((false, MsFlags::MS_MANDLOCK)), + "nomand" => Some((true, MsFlags::MS_MANDLOCK)), + "atime" => Some((true, MsFlags::MS_NOATIME)), + "noatime" => Some((false, MsFlags::MS_NOATIME)), + "diratime" => Some((true, MsFlags::MS_NODIRATIME)), + "nodiratime" => Some((false, MsFlags::MS_NODIRATIME)), + "bind" => Some((false, MsFlags::MS_BIND)), + "rbind" => Some((false, MsFlags::MS_BIND | MsFlags::MS_REC)), + "unbindable" => Some((false, MsFlags::MS_UNBINDABLE)), + "runbindable" => Some((false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC)), + "private" => Some((false, MsFlags::MS_PRIVATE)), + "rprivate" => Some((false, MsFlags::MS_PRIVATE | MsFlags::MS_REC)), + "shared" => Some((false, MsFlags::MS_SHARED)), + "rshared" => Some((false, MsFlags::MS_SHARED | MsFlags::MS_REC)), + "slave" => Some((false, MsFlags::MS_SLAVE)), + "rslave" => Some((false, MsFlags::MS_SLAVE | MsFlags::MS_REC)), + "relatime" => Some((false, MsFlags::MS_RELATIME)), + "norelatime" => Some((true, MsFlags::MS_RELATIME)), + "strictatime" => Some((false, MsFlags::MS_STRICTATIME)), + "nostrictatime" => Some((true, MsFlags::MS_STRICTATIME)), + _ => None, + } { + if clear { + ms_flags &= !flag; + } else { + ms_flags |= flag; + } + continue; + } + data.push(option.as_str()); + } + } + (ms_flags, data.join(",")) + } + + fn do_one_mount(&self, mount: &OciMount, label: &Option) -> Result<()> { + let mut fs_type = mount.fs_type.as_deref(); + let (mnt_flags, mut data) = self.get_mount_flag_data(mount); + if let Some(label) = label { + if fs_type != Some("proc") && fs_type != Some("sysfs") { + match data.is_empty() { + true => data = format!("context=\"{}\"", label), + false => data = format!("{},context=\"{}\"", data, label), + } + } + } + + let src_binding = mount + .source + .clone() + .ok_or(anyhow!("Mount source not set"))?; + let mut source = Path::new(&src_binding); + let canonicalized; + // Strip the first "/". + let target_binding = self.rootfs.join(&mount.destination[1..]); + let target = Path::new(&target_binding); + + if !(mnt_flags & MsFlags::MS_BIND).is_empty() { + canonicalized = canonicalize(source) + .with_context(|| format!("Failed to canonicalize {}", source.display()))?; + source = canonicalized.as_path(); + let dir = if source.is_file() { + target.parent().ok_or(anyhow!( + "Failed to get parent directory: {}", + target.display() + ))? + } else { + target + }; + create_dir_all(dir) + .with_context(|| OzonecErr::CreateDir(dir.to_string_lossy().to_string()))?; + // Actually when MS_BIND is set, filesystemtype is ignored by mount syscall. + fs_type = Some("bind"); + } else { + // Sysfs doesn't support duplicate mounting to one directory. + if self.is_mounted_sysfs_dir(&target.to_string_lossy().to_string()) { + nix::mount::umount(target) + .with_context(|| format!("Failed to umount {}", target.display()))?; + } + } + + let target_fd = openat2_in_root( + &Path::new(&self.rootfs), + &Path::new(&mount.destination[1..]), + !source.is_file(), + )?; + nix::mount::mount( + Some(source), + &proc_fd_path(target_fd), + fs_type, + mnt_flags, + Some(data.as_str()), + ) + .with_context(|| OzonecErr::Mount(source.to_string_lossy().to_string()))?; + close(target_fd).with_context(|| OzonecErr::CloseFd)?; + Ok(()) + } + + fn is_mounted_sysfs_dir(&self, path: &str) -> bool { + if let Ok(metadata) = fs::metadata(path) { + if metadata.file_type().is_dir() { + if let Ok(mounts) = read_to_string("/proc/mounts") { + for line in mounts.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 3 && parts[1] == path && parts[2] == "sysfs" { + return true; + } + } + } + } + } + false + } + + pub fn do_mounts(&self, mounts: &Vec, label: &Option) -> Result<()> { + for mount in mounts { + match mount.fs_type.as_deref() { + Some("cgroup") => match self.cgroup_type()? { + CgroupType::CgroupV1 => self + .do_cgroup_mount(mount) + .with_context(|| "Failed to do cgroup mount")?, + CgroupType::CgroupV2 => bail!("Cgroup V2 is not supported now"), + }, + _ => self.do_one_mount(mount, label)?, + } + } + Ok(()) + } + + fn do_cgroup_mount(&self, mount: &OciMount) -> Result<()> { + // Strip the first "/". + let rel_target = Path::new(&mount.destination[1..]); + let target_fd = openat2_in_root(&Path::new(&self.rootfs), rel_target, true)?; + nix::mount::mount( + Some("tmpfs"), + &proc_fd_path(target_fd), + Some("tmpfs"), + MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV, + None::<&str>, + ) + .with_context(|| OzonecErr::Mount(String::from("tmpfs")))?; + close(target_fd).with_context(|| OzonecErr::CloseFd)?; + + let process = Process::myself().with_context(|| OzonecErr::AccessProcSelf)?; + let mnt_info: Vec = + process.mountinfo().with_context(|| OzonecErr::GetMntInfo)?; + let proc_cgroups: HashMap = process + .cgroups() + .with_context(|| "Failed to get cgroups belong to")? + .into_iter() + .map(|cgroup| (cgroup.controllers.join(","), cgroup.pathname)) + .collect(); + // Get all of available cgroup mount points. + let host_cgroups: Vec = mnt_info + .into_iter() + .filter(|m| m.fs_type == "cgroup") + .map(|m| m.mount_point) + .collect(); + for cg_path in host_cgroups { + let cg = cg_path + .file_name() + .ok_or(anyhow!("Failed to get controller file"))? + .to_str() + .ok_or(anyhow!( + "Convert {:?} to string error", + cg_path.file_name().unwrap() + ))?; + let proc_cg_key = if cg == "systemd" { + String::from("systemd") + } else { + cg.to_string() + }; + + if let Some(src) = proc_cgroups.get(&proc_cg_key) { + let source = cg_path.join(&src[1..]); + let rel_target = cg_path + .strip_prefix("/") + .with_context(|| format!("{} doesn't start with '/'", cg_path.display()))?; + let target_fd = openat2_in_root(&Path::new(&self.rootfs), rel_target, true)?; + + nix::mount::mount( + Some(&source), + &proc_fd_path(target_fd), + Some("bind"), + MsFlags::MS_BIND | MsFlags::MS_REC, + None::<&str>, + ) + .with_context(|| OzonecErr::Mount(source.to_string_lossy().to_string()))?; + close(target_fd).with_context(|| OzonecErr::CloseFd)?; + } + } + + Ok(()) + } + + fn cgroup_type(&self) -> Result { + let cgroup_path = Path::new("/sys/fs/cgroup"); + if !cgroup_path.exists() { + bail!("/sys/fs/cgroup doesn't exist."); + } + + let st = statfs(cgroup_path).with_context(|| "statfs /sys/fs/cgroup error")?; + if st.filesystem_type() == CGROUP2_SUPER_MAGIC { + return Ok(CgroupType::CgroupV2); + } + Ok(CgroupType::CgroupV1) + } +} + +#[cfg(test)] +mod tests { + use rusty_fork::rusty_fork_test; + + use crate::linux::namespace::tests::set_namespace; + use oci_spec::linux::NamespaceType; + + use super::*; + + fn init_mount(rootfs: &str) -> Mount { + let path = PathBuf::from(rootfs); + create_dir_all(&path).unwrap(); + Mount::new(&path) + } + + #[test] + fn test_is_mounted_sysfs_dir() { + let mut path = PathBuf::from("/test"); + let mut mnt = Mount::new(&path); + assert!(!mnt.is_mounted_sysfs_dir(path.to_str().unwrap())); + + path = PathBuf::from("/sys"); + mnt = Mount::new(&path); + assert!(mnt.is_mounted_sysfs_dir(path.to_str().unwrap())); + } + + #[test] + fn test_cgroup_type() { + let rootfs = PathBuf::from("/tmp/ozonec/test_cgroup_type"); + let mnt = Mount::new(&rootfs); + let cgroup_path = Path::new("/sys/fs/cgroup"); + + if !cgroup_path.exists() { + assert!(mnt.cgroup_type().is_err()); + } else { + let st = statfs(cgroup_path).unwrap(); + if st.filesystem_type() == CGROUP2_SUPER_MAGIC { + assert_eq!(mnt.cgroup_type().unwrap(), CgroupType::CgroupV2); + } else { + assert_eq!(mnt.cgroup_type().unwrap(), CgroupType::CgroupV1); + } + } + } + + #[test] + fn test_get_mount_flag_data() { + let rootfs = PathBuf::from("/test_get_mount_flag_data"); + let mnt = Mount::new(&rootfs); + let mut oci_mnt = OciMount { + destination: String::new(), + source: None, + options: Some(vec![ + String::from("defaults"), + String::from("rw"), + String::from("suid"), + String::from("dev"), + String::from("exec"), + String::from("async"), + String::from("nomand"), + String::from("atime"), + String::from("diratime"), + String::from("norelatime"), + String::from("nostrictatime"), + ]), + fs_type: None, + uidMappings: None, + gidMappings: None, + }; + + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::empty()); + + oci_mnt.options = Some(vec![ + String::from("ro"), + String::from("nosuid"), + String::from("nodev"), + String::from("noexec"), + String::from("sync"), + String::from("dirsync"), + String::from("remount"), + String::from("mand"), + String::from("noatime"), + String::from("nodiratime"), + String::from("bind"), + String::from("unbindable"), + String::from("private"), + String::from("shared"), + String::from("slave"), + String::from("relatime"), + String::from("strictatime"), + ]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!( + flags, + MsFlags::MS_RDONLY + | MsFlags::MS_NOSUID + | MsFlags::MS_NODEV + | MsFlags::MS_NOEXEC + | MsFlags::MS_SYNCHRONOUS + | MsFlags::MS_DIRSYNC + | MsFlags::MS_REMOUNT + | MsFlags::MS_MANDLOCK + | MsFlags::MS_NOATIME + | MsFlags::MS_NODIRATIME + | MsFlags::MS_BIND + | MsFlags::MS_UNBINDABLE + | MsFlags::MS_PRIVATE + | MsFlags::MS_SHARED + | MsFlags::MS_SLAVE + | MsFlags::MS_RELATIME + | MsFlags::MS_STRICTATIME + ); + + oci_mnt.options = Some(vec![String::from("rbind")]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::MS_BIND | MsFlags::MS_REC); + oci_mnt.options = Some(vec![String::from("runbindable")]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC); + oci_mnt.options = Some(vec![String::from("rprivate")]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::MS_PRIVATE | MsFlags::MS_REC); + oci_mnt.options = Some(vec![String::from("rshared")]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::MS_SHARED | MsFlags::MS_REC); + oci_mnt.options = Some(vec![String::from("rslave")]); + let (flags, _data) = mnt.get_mount_flag_data(&oci_mnt); + assert_eq!(flags, MsFlags::MS_SLAVE | MsFlags::MS_REC); + } + + rusty_fork_test! { + #[test] + #[ignore = "unshare may not be permitted"] + fn test_do_mounts_cgroup() { + set_namespace(NamespaceType::Mount); + + let mounts = vec![OciMount { + destination: String::from("/sys/fs/cgroup"), + source: Some(String::from("cgroup")), + options: Some(vec![ + String::from("nosuid"), + String::from("noexec"), + String::from("nodev"), + String::from("relatime"), + String::from("ro"), + ]), + fs_type: Some(String::from("cgroup")), + uidMappings: None, + gidMappings: None, + }]; + let mnt = init_mount("/tmp/ozonec/test_do_mounts_cgroup"); + + assert!(mnt.do_mounts(&mounts, &None).is_ok()); + assert!(mnt.rootfs.join("sys/fs/cgroup").exists()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_do_mounts_bind() { + set_namespace(NamespaceType::Mount); + + let mounts = vec![OciMount { + destination: String::from("/dest"), + source: Some(String::from("/tmp/ozonec/test_do_mounts_bind/source")), + options: Some(vec![ + String::from("rbind") + ]), + fs_type: None, + uidMappings: None, + gidMappings: None, + }]; + let mnt = init_mount("/tmp/ozonec/test_do_mounts_bind"); + create_dir_all(&mnt.rootfs.join("source")).unwrap(); + + assert!(mnt.do_mounts(&mounts, &None).is_ok()); + assert!(mnt.rootfs.join("dest").exists()); + } + } +} diff --git a/ozonec/src/linux/namespace.rs b/ozonec/src/linux/namespace.rs new file mode 100644 index 0000000000000000000000000000000000000000..819bd99ef87ebb9ed143a0f201e07313abb9e52e --- /dev/null +++ b/ozonec/src/linux/namespace.rs @@ -0,0 +1,141 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; + +use anyhow::{Context, Result}; +use nix::{ + fcntl::{self, OFlag}, + sched::{setns, unshare, CloneFlags}, + sys::stat::Mode, + unistd, +}; +use oci_spec::linux::{Namespace, NamespaceType}; + +pub struct NsController { + pub namespaces: HashMap, +} + +impl TryFrom> for NsController { + type Error = anyhow::Error; + + fn try_from(namespaces: Vec) -> Result { + Ok(NsController { + namespaces: namespaces + .iter() + .map(|ns| match ns.ns_type.try_into() { + Ok(flag) => Ok((flag, ns.clone())), + Err(e) => Err(e), + }) + .collect::>>()? + .into_iter() + .collect(), + }) + } +} + +impl NsController { + pub fn set_namespace(&self, ns_type: NamespaceType) -> Result<()> { + if let Some(ns) = self.get(ns_type)? { + match ns.path.clone() { + Some(path) => { + let fd = fcntl::open(&path, OFlag::empty(), Mode::empty()) + .with_context(|| format!("fcntl error at opening {}", path.display()))?; + setns(fd, ns_type.try_into()?).with_context(|| "Failed to setns")?; + unistd::close(fd).with_context(|| "Close fcntl fd error")?; + } + None => unshare(ns_type.try_into()?).with_context(|| "Failed to unshare")?, + } + } + Ok(()) + } + + pub fn get(&self, ns_type: NamespaceType) -> Result> { + let clone_flags: CloneFlags = ns_type.try_into()?; + Ok(self.namespaces.get(&clone_flags)) + } +} + +#[cfg(test)] +pub mod tests { + use std::{path::PathBuf, thread::sleep, time::Duration}; + + use nix::sys::{ + signal::{self, Signal}, + wait::{waitpid, WaitStatus}, + }; + + use crate::linux::process::clone_process; + + use super::*; + + fn init_ns_controller(ns_type: NamespaceType) -> NsController { + let mut ns_ctrl = NsController { + namespaces: HashMap::new(), + }; + let ns = Namespace { + ns_type, + path: None, + }; + ns_ctrl.namespaces.insert(ns_type.try_into().unwrap(), ns); + ns_ctrl + } + + pub fn set_namespace(ns_type: NamespaceType) { + let ns_ctrl = init_ns_controller(ns_type); + ns_ctrl.set_namespace(ns_type).unwrap(); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_namespace() { + let mut ns_ctrl = init_ns_controller(NamespaceType::Mount); + let fst_child = clone_process("test_set_namespace_with_unshare", || { + assert!(ns_ctrl.set_namespace(NamespaceType::Mount).is_ok()); + sleep(Duration::from_secs(10)); + Ok(1) + }) + .unwrap(); + + let ns_path = PathBuf::from(format!("/proc/{}/ns/mnt", fst_child.as_raw())); + ns_ctrl + .namespaces + .get_mut(&CloneFlags::CLONE_NEWNS) + .unwrap() + .path = Some(ns_path); + let sec_child = clone_process("test_set_namespace_with_setns", || { + assert!(ns_ctrl.set_namespace(NamespaceType::Mount).is_ok()); + Ok(1) + }) + .unwrap(); + + match waitpid(sec_child, None) { + Ok(WaitStatus::Exited(_, s)) => { + assert_eq!(s, 1); + } + Ok(_) => (), + Err(e) => { + panic!("Failed to waitpid for unshare process: {e}"); + } + } + signal::kill(fst_child.clone(), Signal::SIGKILL).unwrap(); + match waitpid(fst_child, None) { + Ok(WaitStatus::Exited(_, s)) => { + assert_eq!(s, 1); + } + Ok(_) => (), + Err(e) => { + panic!("Failed to waitpid for setns process: {e}"); + } + } + } +} diff --git a/ozonec/src/linux/notify_socket.rs b/ozonec/src/linux/notify_socket.rs new file mode 100644 index 0000000000000000000000000000000000000000..356be384cc446794eedf8a09f035c341e7faad27 --- /dev/null +++ b/ozonec/src/linux/notify_socket.rs @@ -0,0 +1,129 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + env, + io::{Read, Write}, + os::unix::{ + io::AsRawFd, + net::{UnixListener, UnixStream}, + }, + path::PathBuf, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use nix::unistd::{self, chdir}; + +use crate::utils::OzonecErr; + +pub const NOTIFY_SOCKET: &str = "notify.sock"; + +pub struct NotifyListener { + socket: UnixListener, +} + +impl NotifyListener { + pub fn new(root: PathBuf) -> Result { + // The length of path of Unix domain socket has the limit 108, which is smaller then + // the maximum length of file on Linux (255). + let cwd = env::current_dir().with_context(|| OzonecErr::GetCurDir)?; + chdir(&root).with_context(|| "Failed to chdir to root directory")?; + let listener = + UnixListener::bind(NOTIFY_SOCKET).with_context(|| "Failed to bind notify socket")?; + chdir(&cwd).with_context(|| "Failed to chdir to previous working directory")?; + Ok(Self { socket: listener }) + } + + pub fn wait_for_start_container(&self) -> Result<()> { + match self.socket.accept() { + Ok((mut socket, _)) => { + let mut response = String::new(); + socket + .read_to_string(&mut response) + .with_context(|| "Invalid response from notify socket")?; + } + Err(e) => { + bail!("Failed to accept on notify socket: {}", e); + } + } + Ok(()) + } + + pub fn close(&self) -> Result<()> { + Ok(unistd::close(self.socket.as_raw_fd())?) + } +} + +pub struct NotifySocket { + path: PathBuf, +} + +impl NotifySocket { + pub fn new(path: &PathBuf) -> Self { + Self { path: path.into() } + } + + pub fn notify_container_start(&mut self) -> Result<()> { + let cwd = env::current_dir().with_context(|| OzonecErr::GetCurDir)?; + let root_path = self + .path + .parent() + .ok_or(anyhow!("Invalid notify socket path"))?; + chdir(root_path).with_context(|| "Failed to chdir to root directory")?; + + let mut stream = + UnixStream::connect(NOTIFY_SOCKET).with_context(|| "Failed to connect notify.sock")?; + stream.write_all(b"start container")?; + chdir(&cwd).with_context(|| "Failed to chdir to previous working directory")?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::fs::{create_dir_all, remove_dir_all}; + + use nix::sys::wait::{waitpid, WaitStatus}; + + use crate::linux::process::clone_process; + + use super::*; + + #[test] + fn test_notify_socket() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + let root = PathBuf::from("/tmp/ozonec/notify_socket"); + create_dir_all(&root).unwrap(); + + let socket_path = root.join(NOTIFY_SOCKET); + let mut socket = NotifySocket::new(&socket_path); + let listener = NotifyListener::new(root.clone()).unwrap(); + let child = clone_process("notify_socket", || { + listener.wait_for_start_container().unwrap(); + Ok(1) + }) + .unwrap(); + socket.notify_container_start().unwrap(); + + match waitpid(child, None) { + Ok(WaitStatus::Exited(_, s)) => { + assert_eq!(s, 1); + } + Ok(_) => (), + Err(e) => { + panic!("Failed to waitpid for child process: {e}"); + } + } + } +} diff --git a/ozonec/src/linux/process.rs b/ozonec/src/linux/process.rs new file mode 100644 index 0000000000000000000000000000000000000000..2159727db7fb2d522117e616ac8e5488a645cc8f --- /dev/null +++ b/ozonec/src/linux/process.rs @@ -0,0 +1,778 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + env, + ffi::CString, + fs::{self, read_to_string}, + io::{stderr, stdin, stdout}, + mem, + os::unix::{ + io::{AsRawFd, RawFd}, + net::UnixStream, + }, + path::PathBuf, + str::FromStr, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use caps::{self, CapSet, Capability, CapsHashSet}; +use libc::SIGCHLD; +use nix::{ + errno::Errno, + sched::{clone, CloneFlags}, + unistd::{self, chdir, setresgid, setresuid, Gid, Pid, Uid}, +}; +use rlimit::{setrlimit, Resource, Rlim}; + +use super::{ + apparmor, + terminal::{connect_stdio, setup_console}, +}; +use crate::utils::{prctl, Clone3, OzonecErr}; +use oci_spec::{linux::IoPriClass, process::Process as OciProcess}; + +pub struct Process { + pub stdin: Option, + pub stdout: Option, + pub stderr: Option, + pub init: bool, + pub tty: bool, + pub oci: OciProcess, +} + +impl Process { + pub fn new(oci: &OciProcess, init: bool) -> Self { + let mut p = Process { + stdin: None, + stdout: None, + stderr: None, + tty: oci.terminal, + init, + oci: oci.clone(), + }; + + if !p.tty { + p.stdin = Some(stdin().as_raw_fd()); + p.stdout = Some(stdout().as_raw_fd()); + p.stderr = Some(stderr().as_raw_fd()); + } + p + } + + pub fn set_tty(&self, console_fd: Option, mount: bool) -> Result<()> { + if self.tty { + if console_fd.is_none() { + bail!("Terminal is specified, but no console socket set"); + } + setup_console(&console_fd.unwrap().as_raw_fd(), mount) + .with_context(|| "Failed to setup console")?; + } else { + connect_stdio( + self.stdin.as_ref().unwrap(), + self.stdout.as_ref().unwrap(), + self.stderr.as_ref().unwrap(), + )?; + // SAFETY: FFI call with valid arguments. + unsafe { libc::ioctl(0, libc::TIOCSCTTY) }; + } + Ok(()) + } + + pub fn set_oom_score_adj(&self) -> Result<()> { + if let Some(score) = self.oci.oomScoreAdj { + fs::write("/proc/self/oom_score_adj", score.to_string().as_bytes())?; + } + Ok(()) + } + + pub fn set_rlimits(&self) -> Result<()> { + if let Some(rlimits) = self.oci.rlimits.as_ref() { + for rlimit in rlimits { + setrlimit( + Resource::from_str(&rlimit.rlimit_type) + .with_context(|| "rlimit type is ill-formatted")?, + Rlim::from_raw(rlimit.soft), + Rlim::from_raw(rlimit.hard), + )?; + } + } + Ok(()) + } + + pub fn set_io_priority(&self) -> Result<()> { + if let Some(io_prio) = &self.oci.ioPriority { + let class = match io_prio.class { + IoPriClass::IoprioClassRt => 1i64, + IoPriClass::IoprioClassBe => 2i64, + IoPriClass::IoprioClassIdle => 3i64, + }; + // Who is a process id or thread id identifying a single process or + // thread. If who is 0, then operate on the calling process or thread. + let io_prio_who_process: libc::c_int = 1; + let io_prio_who_pid = 0; + // SAFETY: FFI call with valid arguments. + match unsafe { + libc::syscall( + libc::SYS_ioprio_set, + io_prio_who_process, + io_prio_who_pid, + (class << 13) | io_prio.priority, + ) + } { + 0 => Ok(()), + -1 => Err(nix::Error::last()), + _ => Err(nix::Error::UnknownErrno), + }?; + } + Ok(()) + } + + pub fn set_scheduler(&self) -> Result<()> { + if let Some(scheduler) = &self.oci.scheduler { + // SAFETY: FFI call with valid arguments. + let mut param: libc::sched_param = unsafe { mem::zeroed() }; + param.sched_priority = scheduler.priority.unwrap_or_default(); + // SAFETY: FFI call with valid arguments. + match unsafe { libc::sched_setscheduler(0, scheduler.policy.into(), ¶m) } { + 0 => Ok(()), + -1 => Err(nix::Error::last()), + _ => Err(nix::Error::UnknownErrno), + }?; + } + Ok(()) + } + + pub fn no_new_privileges(&self) -> bool { + self.oci.noNewPrivileges.is_some() + } + + pub fn set_no_new_privileges(&self) -> Result<()> { + if let Some(no_new_privileges) = self.oci.noNewPrivileges { + if no_new_privileges { + prctl::set_no_new_privileges(true) + .map_err(|e| anyhow!("Failed to set no new privileges: {}", e))?; + } + } + Ok(()) + } + + pub fn chdir_cwd(&self) -> Result<()> { + if !self.oci.cwd.is_empty() { + chdir(&PathBuf::from(&self.oci.cwd)) + .with_context(|| format!("Failed to chdir to {}", &self.oci.cwd))?; + } + Ok(()) + } + + pub fn drop_capabilities(&self) -> Result<()> { + if let Some(caps) = self.oci.capabilities.as_ref() { + if let Some(bounding) = caps.bounding.as_ref() { + let all_caps = caps::read(None, CapSet::Bounding) + .with_context(|| OzonecErr::GetAllCaps("Bounding".to_string()))?; + let caps_hash_set = to_cap_set(bounding)?; + for cap in all_caps.difference(&caps_hash_set) { + caps::drop(None, CapSet::Bounding, *cap).with_context(|| { + format!("Failed to drop {} from bonding set", cap.to_string()) + })?; + } + } + if let Some(effective) = caps.effective.as_ref() { + caps::set(None, CapSet::Effective, &to_cap_set(effective)?) + .with_context(|| OzonecErr::SetCaps("Effective".to_string()))?; + } + if let Some(permitted) = caps.permitted.as_ref() { + caps::set(None, CapSet::Permitted, &to_cap_set(permitted)?) + .with_context(|| OzonecErr::SetCaps("Permitted".to_string()))?; + } + if let Some(inheritable) = caps.inheritable.as_ref() { + caps::set(None, CapSet::Inheritable, &to_cap_set(inheritable)?) + .with_context(|| OzonecErr::SetCaps("Inheritable".to_string()))?; + } + if let Some(ambient) = caps.ambient.as_ref() { + caps::set(None, CapSet::Ambient, &to_cap_set(ambient)?) + .with_context(|| OzonecErr::SetCaps("Ambient".to_string()))?; + } + } + Ok(()) + } + + pub fn set_apparmor(&self) -> Result<()> { + if let Some(profile) = &self.oci.apparmorProfile { + if !apparmor::is_enabled()? { + bail!("Apparmor is disabled."); + } + apparmor::apply_profile(profile)?; + } + Ok(()) + } + + pub fn reset_capabilities(&self) -> Result<()> { + let permitted = caps::read(None, CapSet::Permitted) + .with_context(|| OzonecErr::GetAllCaps("Permitted".to_string()))?; + caps::set(None, CapSet::Effective, &permitted)?; + Ok(()) + } + + pub fn set_additional_gids(&self) -> Result<()> { + if let Some(additional_gids) = &self.oci.user.additionalGids { + let setgroups = read_to_string("proc/self/setgroups") + .with_context(|| "Failed to read setgroups")?; + if setgroups.trim() == "deny" { + bail!("Cannot set additional gids as setgroup is desabled"); + } + + let gids: Vec = additional_gids + .iter() + .map(|gid| Gid::from_raw(*gid)) + .collect(); + unistd::setgroups(&gids).with_context(|| "Failed to set additional gids")?; + } + Ok(()) + } + + pub fn set_process_id(&self) -> Result<()> { + let gid = Gid::from(self.oci.user.gid); + let uid = Uid::from(self.oci.user.uid); + self.set_id(gid, uid)?; + Ok(()) + } + + pub fn set_id(&self, gid: Gid, uid: Uid) -> Result<()> { + prctl::set_keep_capabilities(true) + .map_err(|e| anyhow!("Failed to enable keeping capabilities: {}", e))?; + setresgid(gid, gid, gid).with_context(|| "Failed to setresgid")?; + setresuid(uid, uid, uid).with_context(|| "Failed to setresuid")?; + + let permitted = caps::read(None, CapSet::Permitted) + .with_context(|| OzonecErr::GetAllCaps("Permitted".to_string()))?; + caps::set(None, CapSet::Effective, &permitted) + .with_context(|| OzonecErr::SetCaps("Effective".to_string()))?; + prctl::set_keep_capabilities(false) + .map_err(|e| anyhow!("Failed to disable keeping capabilities: {}", e))?; + Ok(()) + } + + // Check and reserve valid environment variables. + // Invalid env vars may cause panic, refer to https://doc.rust-lang.org/std/env/fn.set_var.html#panics + // Key should not : + // * contain NULL character '\0' + // * contain ASCII character '=' + // * be empty + // Value should not: + // * contain NULL character '\0' + fn is_env_valid(env: &str) -> Option<(&str, &str)> { + // Split the env var by '=' to ensure there is no '=' in key, and there is only one '=' + // in the whole env var. + if let Some((key, value)) = env.split_once('=') { + if !key.is_empty() + && !key.as_bytes().contains(&b'\0') + && !value.as_bytes().contains(&b'\0') + { + return Some((key.trim(), value.trim())); + } + } + None + } + + pub fn set_envs(&self) { + if let Some(envs) = &self.oci.env { + for env in envs { + if let Some((key, value)) = Self::is_env_valid(env) { + env::set_var(key, value); + } + } + } + } + + pub fn clean_envs(&self) { + env::vars().for_each(|(key, _value)| env::remove_var(key)); + } + + pub fn exec_program(&self) -> ! { + // It has been make sure that args is not None in validate_config(). + let args = &self.oci.args.as_ref().unwrap(); + // args don't have 0 byte in the middle such as "hello\0world". + let exec_bin = CString::new(args[0].as_str().as_bytes()).unwrap(); + let args: Vec = args + .iter() + .map(|s| CString::new(s.as_bytes()).unwrap_or_default()) + .collect(); + + let _ = unistd::execvp(&exec_bin, &args).map_err(|e| match e { + nix::Error::UnknownErrno => std::process::exit(-2), + _ => std::process::exit(e as i32), + }); + + unreachable!() + } + + pub fn getcwd() -> Result<()> { + unistd::getcwd().map_err(|e| match e { + Errno::ENOENT => anyhow!("Current working directory is out of container rootfs"), + _ => anyhow!("Failed to getcwd"), + })?; + Ok(()) + } +} + +// Clone a new child process. +pub fn clone_process Result>(child_name: &str, mut cb: F) -> Result { + let mut clone3 = Clone3::default(); + clone3.exit_signal(SIGCHLD as u64); + + let mut ret = clone3.call(); + if ret.is_err() { + // clone3() may not be supported in the kernel, fallback to clone(); + let mut stack = [0; 1024 * 1024]; + ret = clone( + Box::new(|| match cb() { + Ok(r) => r as isize, + Err(e) => { + eprintln!("{}", e); + -1 + } + }), + &mut stack, + CloneFlags::empty(), + Some(SIGCHLD), + ) + .map_err(|e| anyhow!("Clone error: errno {}", e)); + } + + match ret { + Ok(pid) => { + if pid.as_raw() != 0 { + return Ok(pid); + } + + prctl::set_name(child_name) + .map_err(|e| anyhow!("Failed to set process name: errno {}", e))?; + let ret = match cb() { + Err(e) => { + eprintln!("Child process exit with errors: {:?}", e); + -1 + } + Ok(exit_code) => exit_code, + }; + std::process::exit(ret); + } + Err(e) => bail!(e), + } +} + +fn to_cap_set(caps: &Vec) -> Result { + let mut caps_hash_set = CapsHashSet::new(); + + for c in caps { + let cap = to_cap(&c)?; + caps_hash_set.insert(cap); + } + Ok(caps_hash_set) +} + +fn to_cap(value: &str) -> Result { + let binding = value.to_uppercase(); + let stripped = binding.strip_prefix("CAP_").unwrap_or(&binding); + + match stripped { + "AUDIT_CONTROL" => Ok(Capability::CAP_AUDIT_CONTROL), + "AUDIT_READ" => Ok(Capability::CAP_AUDIT_READ), + "AUDIT_WRITE" => Ok(Capability::CAP_AUDIT_WRITE), + "BLOCK_SUSPEND" => Ok(Capability::CAP_BLOCK_SUSPEND), + "BPF" => Ok(Capability::CAP_BPF), + "CHECKPOINT_RESTORE" => Ok(Capability::CAP_CHECKPOINT_RESTORE), + "CHOWN" => Ok(Capability::CAP_CHOWN), + "DAC_OVERRIDE" => Ok(Capability::CAP_DAC_OVERRIDE), + "DAC_READ_SEARCH" => Ok(Capability::CAP_DAC_READ_SEARCH), + "FOWNER" => Ok(Capability::CAP_FOWNER), + "FSETID" => Ok(Capability::CAP_FSETID), + "IPC_LOCK" => Ok(Capability::CAP_IPC_LOCK), + "IPC_OWNER" => Ok(Capability::CAP_IPC_OWNER), + "KILL" => Ok(Capability::CAP_KILL), + "LEASE" => Ok(Capability::CAP_LEASE), + "LINUX_IMMUTABLE" => Ok(Capability::CAP_LINUX_IMMUTABLE), + "MAC_ADMIN" => Ok(Capability::CAP_MAC_ADMIN), + "MAC_OVERRIDE" => Ok(Capability::CAP_MAC_OVERRIDE), + "MKNOD" => Ok(Capability::CAP_MKNOD), + "NET_ADMIN" => Ok(Capability::CAP_NET_ADMIN), + "NET_BIND_SERVICE" => Ok(Capability::CAP_NET_BIND_SERVICE), + "NET_BROADCAST" => Ok(Capability::CAP_NET_BROADCAST), + "NET_RAW" => Ok(Capability::CAP_NET_RAW), + "PERFMON" => Ok(Capability::CAP_PERFMON), + "SETGID" => Ok(Capability::CAP_SETGID), + "SETFCAP" => Ok(Capability::CAP_SETFCAP), + "SETPCAP" => Ok(Capability::CAP_SETPCAP), + "SETUID" => Ok(Capability::CAP_SETUID), + "SYS_ADMIN" => Ok(Capability::CAP_SYS_ADMIN), + "SYS_BOOT" => Ok(Capability::CAP_SYS_BOOT), + "SYS_CHROOT" => Ok(Capability::CAP_SYS_CHROOT), + "SYS_MODULE" => Ok(Capability::CAP_SYS_MODULE), + "SYS_NICE" => Ok(Capability::CAP_SYS_NICE), + "SYS_PACCT" => Ok(Capability::CAP_SYS_PACCT), + "SYS_PTRACE" => Ok(Capability::CAP_SYS_PTRACE), + "SYS_RAWIO" => Ok(Capability::CAP_SYS_RAWIO), + "SYS_RESOURCE" => Ok(Capability::CAP_SYS_RESOURCE), + "SYS_TIME" => Ok(Capability::CAP_SYS_TIME), + "SYS_TTY_CONFIG" => Ok(Capability::CAP_SYS_TTY_CONFIG), + "SYSLOG" => Ok(Capability::CAP_SYSLOG), + "WAKE_ALARM" => Ok(Capability::CAP_WAKE_ALARM), + _ => bail!("Invalid capability: {}", value), + } +} + +#[cfg(test)] +pub mod tests { + use std::path::Path; + + use nix::sys::resource::{getrlimit, Resource}; + use rusty_fork::rusty_fork_test; + use unistd::getcwd; + + use oci_spec::{ + linux::{Capbilities, IoPriority, SchedPolicy, Scheduler}, + posix::{Rlimits, User}, + }; + + use super::*; + + pub fn init_oci_process() -> OciProcess { + let user = User { + uid: 0, + gid: 0, + umask: None, + additionalGids: None, + }; + OciProcess { + cwd: String::from("/"), + args: Some(vec![String::from("bash")]), + env: None, + terminal: false, + consoleSize: None, + rlimits: None, + apparmorProfile: None, + capabilities: None, + noNewPrivileges: None, + oomScoreAdj: None, + scheduler: None, + selinuxLabel: None, + ioPriority: None, + execCPUAffinity: None, + user, + } + } + + #[test] + fn test_process_new() { + let mut oci_process = init_oci_process(); + + let process = Process::new(&oci_process, false); + assert_eq!(process.stdin.unwrap(), stdin().as_raw_fd()); + assert_eq!(process.stdout.unwrap(), stdout().as_raw_fd()); + assert_eq!(process.stderr.unwrap(), stderr().as_raw_fd()); + + oci_process.terminal = true; + let process = Process::new(&oci_process, false); + assert!(process.stdin.is_none()); + assert!(process.stdout.is_none()); + assert!(process.stderr.is_none()); + } + + #[test] + fn test_set_tty() { + let mut oci_process = init_oci_process(); + + let process = Process::new(&oci_process, false); + assert!(process.set_tty(None, false).is_ok()); + + oci_process.terminal = true; + let process = Process::new(&oci_process, false); + assert!(process.set_tty(None, false).is_err()); + } + + #[test] + fn test_chdir_cwd() { + let oci_process = init_oci_process(); + let process = Process::new(&oci_process, false); + + assert!(process.chdir_cwd().is_ok()); + assert_eq!(getcwd().unwrap().to_str().unwrap(), "/"); + } + + #[test] + fn test_set_envs() { + let mut oci_process = init_oci_process(); + oci_process.env = Some(vec![ + String::from("OZONEC_ENV_1=1"), + String::from("=OZONEC_ENV_2"), + String::from("OZONEC_ENV"), + ]); + let process = Process::new(&oci_process, false); + + process.set_envs(); + for (key, value) in env::vars() { + if key == "OZONEC_ENV_1" { + assert_eq!(value, "1"); + continue; + } + assert_ne!(value, "OZONEC_ENV_2"); + assert_ne!(key, "OZONEC_ENV"); + assert_ne!(value, "OZONEC_ENV"); + } + + env::remove_var("OZONEC_ENV_1"); + } + + #[test] + fn test_to_cap() { + assert_eq!( + to_cap("CAP_AUDIT_CONTROL").unwrap(), + Capability::CAP_AUDIT_CONTROL + ); + assert_eq!( + to_cap("CAP_AUDIT_READ").unwrap(), + Capability::CAP_AUDIT_READ + ); + assert_eq!( + to_cap("CAP_AUDIT_WRITE").unwrap(), + Capability::CAP_AUDIT_WRITE + ); + assert_eq!( + to_cap("CAP_BLOCK_SUSPEND").unwrap(), + Capability::CAP_BLOCK_SUSPEND + ); + assert_eq!(to_cap("CAP_BPF").unwrap(), Capability::CAP_BPF); + assert_eq!( + to_cap("CAP_CHECKPOINT_RESTORE").unwrap(), + Capability::CAP_CHECKPOINT_RESTORE + ); + assert_eq!(to_cap("CAP_CHOWN").unwrap(), Capability::CAP_CHOWN); + assert_eq!( + to_cap("CAP_DAC_OVERRIDE").unwrap(), + Capability::CAP_DAC_OVERRIDE + ); + assert_eq!( + to_cap("CAP_DAC_READ_SEARCH").unwrap(), + Capability::CAP_DAC_READ_SEARCH + ); + assert_eq!(to_cap("CAP_FOWNER").unwrap(), Capability::CAP_FOWNER); + assert_eq!(to_cap("CAP_FSETID").unwrap(), Capability::CAP_FSETID); + assert_eq!(to_cap("CAP_IPC_LOCK").unwrap(), Capability::CAP_IPC_LOCK); + assert_eq!(to_cap("CAP_IPC_OWNER").unwrap(), Capability::CAP_IPC_OWNER); + assert_eq!(to_cap("CAP_KILL").unwrap(), Capability::CAP_KILL); + assert_eq!(to_cap("CAP_LEASE").unwrap(), Capability::CAP_LEASE); + assert_eq!( + to_cap("CAP_LINUX_IMMUTABLE").unwrap(), + Capability::CAP_LINUX_IMMUTABLE + ); + assert_eq!(to_cap("CAP_MAC_ADMIN").unwrap(), Capability::CAP_MAC_ADMIN); + assert_eq!( + to_cap("CAP_MAC_OVERRIDE").unwrap(), + Capability::CAP_MAC_OVERRIDE + ); + assert_eq!(to_cap("CAP_MKNOD").unwrap(), Capability::CAP_MKNOD); + assert_eq!(to_cap("CAP_NET_ADMIN").unwrap(), Capability::CAP_NET_ADMIN); + assert_eq!( + to_cap("CAP_NET_BIND_SERVICE").unwrap(), + Capability::CAP_NET_BIND_SERVICE + ); + assert_eq!( + to_cap("CAP_NET_BROADCAST").unwrap(), + Capability::CAP_NET_BROADCAST + ); + assert_eq!(to_cap("CAP_NET_RAW").unwrap(), Capability::CAP_NET_RAW); + assert_eq!(to_cap("CAP_PERFMON").unwrap(), Capability::CAP_PERFMON); + assert_eq!(to_cap("CAP_SETGID").unwrap(), Capability::CAP_SETGID); + assert_eq!(to_cap("CAP_SETFCAP").unwrap(), Capability::CAP_SETFCAP); + assert_eq!(to_cap("CAP_SETPCAP").unwrap(), Capability::CAP_SETPCAP); + assert_eq!(to_cap("CAP_SETUID").unwrap(), Capability::CAP_SETUID); + assert_eq!(to_cap("CAP_SYS_ADMIN").unwrap(), Capability::CAP_SYS_ADMIN); + assert_eq!(to_cap("CAP_SYS_BOOT").unwrap(), Capability::CAP_SYS_BOOT); + assert_eq!( + to_cap("CAP_SYS_CHROOT").unwrap(), + Capability::CAP_SYS_CHROOT + ); + assert_eq!( + to_cap("CAP_SYS_MODULE").unwrap(), + Capability::CAP_SYS_MODULE + ); + assert_eq!(to_cap("CAP_SYS_NICE").unwrap(), Capability::CAP_SYS_NICE); + assert_eq!(to_cap("CAP_SYS_PACCT").unwrap(), Capability::CAP_SYS_PACCT); + assert_eq!( + to_cap("CAP_SYS_PTRACE").unwrap(), + Capability::CAP_SYS_PTRACE + ); + assert_eq!(to_cap("CAP_SYS_RAWIO").unwrap(), Capability::CAP_SYS_RAWIO); + assert_eq!( + to_cap("CAP_SYS_RESOURCE").unwrap(), + Capability::CAP_SYS_RESOURCE + ); + assert_eq!(to_cap("CAP_SYS_TIME").unwrap(), Capability::CAP_SYS_TIME); + assert_eq!( + to_cap("CAP_SYS_TTY_CONFIG").unwrap(), + Capability::CAP_SYS_TTY_CONFIG + ); + assert_eq!(to_cap("CAP_SYSLOG").unwrap(), Capability::CAP_SYSLOG); + assert_eq!( + to_cap("CAP_WAKE_ALARM").unwrap(), + Capability::CAP_WAKE_ALARM + ); + assert!(to_cap("CAP_TO_CAP").is_err()); + } + + rusty_fork_test! { + #[test] + #[ignore = "oom_score_adj may not be permitted to set"] + fn test_set_oom_score_adj() { + let mut oci_process = init_oci_process(); + oci_process.oomScoreAdj = Some(100); + let process = Process::new(&oci_process, false); + + assert!(process.set_oom_score_adj().is_ok()); + assert_eq!( + read_to_string(Path::new("/proc/self/oom_score_adj")).unwrap(), + String::from("100\n") + ); + } + + #[test] + #[ignore = "setrlimit may not be permitted"] + fn test_set_rlimits() { + let mut oci_process = init_oci_process(); + let rlimits = Rlimits { + rlimit_type: String::from("RLIMIT_CORE"), + soft: 10, + hard: 20, + }; + oci_process.rlimits = Some(vec![rlimits]); + let process = Process::new(&oci_process, false); + + assert!(process.set_rlimits().is_ok()); + assert_eq!(getrlimit(Resource::RLIMIT_CORE).unwrap().0, 10); + assert_eq!(getrlimit(Resource::RLIMIT_CORE).unwrap().1, 20); + } + + #[test] + fn test_set_io_priority() { + let mut oci_process = init_oci_process(); + let io_pri = IoPriority { + class: IoPriClass::IoprioClassBe, + priority: 7, + }; + oci_process.ioPriority = Some(io_pri.clone()); + let process = Process::new(&oci_process, false); + + assert!(process.set_io_priority().is_ok()); + + let io_prio_who_process: libc::c_int = 1; + let io_prio_who_pid = 0; + let ioprio = unsafe { + libc::syscall(libc::SYS_ioprio_get, io_prio_who_process, io_prio_who_pid) + }; + assert_eq!(ioprio, (2 as i64) << 13 | io_pri.priority); + } + + #[test] + fn test_set_scheduler() { + let mut oci_process = init_oci_process(); + let scheduler = Scheduler { + policy: SchedPolicy::SchedOther, + nice: None, + priority: None, + flags: None, + runtime: None, + deadline: None, + period: None, + }; + oci_process.scheduler = Some(scheduler); + let process = Process::new(&oci_process, false); + + assert!(process.set_scheduler().is_ok()); + } + + #[test] + fn test_set_no_new_privileges() { + let mut oci_process = init_oci_process(); + oci_process.noNewPrivileges = Some(true); + let process = Process::new(&oci_process, false); + + assert!(process.set_no_new_privileges().is_ok()); + } + + #[test] + #[ignore = "capset may not be permitted"] + fn test_drop_capabilities() { + let mut oci_process = init_oci_process(); + let caps = Capbilities { + effective: Some(vec![ + String::from("CAP_DAC_OVERRIDE"), + String::from("CAP_DAC_READ_SEARCH"), + String::from("CAP_SETFCAP"), + ]), + bounding: Some(vec![ + String::from("CAP_DAC_OVERRIDE"), + String::from("CAP_DAC_READ_SEARCH"), + ]), + inheritable: Some(vec![String::from("CAP_DAC_READ_SEARCH")]), + permitted: Some(vec![ + String::from("CAP_DAC_OVERRIDE"), + String::from("CAP_DAC_READ_SEARCH"), + String::from("CAP_SETFCAP"), + ]), + ambient: Some(vec![String::from("CAP_DAC_READ_SEARCH")]), + }; + oci_process.capabilities = Some(caps); + let process = Process::new(&oci_process, false); + + assert!(process.drop_capabilities().is_ok()); + let mut caps = caps::read(None, CapSet::Bounding).unwrap(); + assert_eq!(caps.len(), 2); + assert!(caps.get(&Capability::CAP_DAC_OVERRIDE).is_some()); + assert!(caps.get(&Capability::CAP_DAC_READ_SEARCH).is_some()); + caps = caps::read(None, CapSet::Effective).unwrap(); + assert_eq!(caps.len(), 3); + assert!(caps.get(&Capability::CAP_DAC_OVERRIDE).is_some()); + assert!(caps.get(&Capability::CAP_DAC_READ_SEARCH).is_some()); + assert!(caps.get(&Capability::CAP_SETFCAP).is_some()); + caps = caps::read(None, CapSet::Inheritable).unwrap(); + assert_eq!(caps.len(), 1); + assert!(caps.get(&Capability::CAP_DAC_READ_SEARCH).is_some()); + caps = caps::read(None, CapSet::Permitted).unwrap(); + assert_eq!(caps.len(), 3); + assert!(caps.get(&Capability::CAP_DAC_OVERRIDE).is_some()); + assert!(caps.get(&Capability::CAP_DAC_READ_SEARCH).is_some()); + assert!(caps.get(&Capability::CAP_SETFCAP).is_some()); + caps = caps::read(None, CapSet::Ambient).unwrap(); + assert_eq!(caps.len(), 1); + assert!(caps.get(&Capability::CAP_DAC_READ_SEARCH).is_some()); + } + + #[test] + fn test_reset_capabilities() { + let oci_process = init_oci_process(); + let process = Process::new(&oci_process, false); + + assert!(process.reset_capabilities().is_ok()); + let permit_caps = caps::read(None, CapSet::Permitted).unwrap(); + let eff_caps = caps::read(None, CapSet::Effective).unwrap(); + assert_eq!(permit_caps, eff_caps); + } + + #[test] + fn test_clean_envs() { + let oci_process = init_oci_process(); + let process = Process::new(&oci_process, false); + process.clean_envs(); + assert_eq!(env::vars().count(), 0); + } + } +} diff --git a/ozonec/src/linux/rootfs.rs b/ozonec/src/linux/rootfs.rs new file mode 100644 index 0000000000000000000000000000000000000000..b7854a3f2ba956e29add078f91c05e98e9e0d230 --- /dev/null +++ b/ozonec/src/linux/rootfs.rs @@ -0,0 +1,508 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::remove_file, + os::unix::fs::symlink, + path::{Path, PathBuf}, +}; + +use anyhow::{bail, Context, Result}; +use nix::{ + fcntl::{open, OFlag}, + mount::{umount2, MntFlags, MsFlags}, + sys::stat::{umask, Mode}, + unistd::{chroot, close, fchdir, pivot_root}, + NixPath, +}; +use procfs::process::Process; + +use super::{device::Device, mount::Mount}; +use crate::utils::OzonecErr; +use oci_spec::{ + linux::Device as OciDevice, + runtime::{Mount as OciMount, RuntimeConfig}, +}; + +pub struct Rootfs { + pub path: PathBuf, + propagation_flags: MsFlags, + mounts: Vec, + // Should we mknod the device or bind one. + mknod_device: bool, + devices: Vec, +} + +impl Rootfs { + pub fn new( + path: PathBuf, + propagation: Option, + mounts: Vec, + mknod_device: bool, + devices: Vec, + ) -> Result { + if !path.exists() { + bail!("Rootfs directory not exist"); + } + + let propagation_flags = Self::get_mount_flags(propagation)?; + Ok(Self { + path, + propagation_flags, + mounts, + mknod_device, + devices, + }) + } + + fn get_mount_flags(propagation: Option) -> Result { + let flags = match propagation.as_deref() { + Some("shared") => MsFlags::MS_SHARED, + Some("private") => MsFlags::MS_PRIVATE, + Some("slave") => MsFlags::MS_SLAVE, + Some("unbindable") => MsFlags::MS_UNBINDABLE, + Some(_) => bail!("Invalid rootfsPropagation"), + None => MsFlags::MS_REC | MsFlags::MS_SLAVE, + }; + Ok(flags) + } + + fn set_propagation(&self) -> Result<()> { + nix::mount::mount( + None::<&str>, + Path::new("/"), + None::<&str>, + self.propagation_flags, + None::<&str>, + ) + .with_context(|| "Failed to set rootfs mount propagation")?; + Ok(()) + } + + fn mount(&self) -> Result<()> { + nix::mount::mount( + Some(&self.path), + &self.path, + None::<&str>, + MsFlags::MS_BIND | MsFlags::MS_REC, + None::<&str>, + )?; + Ok(()) + } + + fn make_parent_mount_private(&self) -> Result<()> { + let process = Process::myself().with_context(|| OzonecErr::AccessProcSelf)?; + let mount_info = process.mountinfo().with_context(|| OzonecErr::GetMntInfo)?; + + match mount_info + .into_iter() + .filter(|m| self.path.starts_with(&m.mount_point) && m.mount_point != self.path) + .map(|m| m.mount_point) + .max_by_key(|m| m.len()) + .as_ref() + { + Some(m) => { + nix::mount::mount(Some(m), m, None::<&str>, MsFlags::MS_PRIVATE, None::<&str>)? + } + None => (), + } + Ok(()) + } + + // OCI spec requires runtime MUST create the following symlinks if the source file exists after + // processing mounts: + // dev/fd -> /proc/self/fd + // dev/stdin -> /proc/self/fd/0 + // dev/stdout -> /proc/self/fd/1 + // dev/stderr -> /proc/self/fd/2 + fn set_default_symlinks(&self) -> Result<()> { + let link_pairs = vec![ + ((&self.path).join("dev/fd"), "/proc/self/fd"), + ((&self.path).join("dev/stdin"), "/proc/self/fd/0"), + ((&self.path).join("dev/stdout"), "/proc/self/fd/1"), + ((&self.path).join("dev/stderr"), "/proc/self/fd/2"), + ]; + + for pair in link_pairs { + let cloned_pair = pair.clone(); + symlink(pair.1, pair.0).with_context(|| { + format!( + "Failed to create symlink {} -> {}", + cloned_pair.0.display(), + cloned_pair.1 + ) + })?; + } + Ok(()) + } + + fn do_mounts(&self, config: &RuntimeConfig) -> Result<()> { + let mount = Mount::new(&self.path); + mount + .do_mounts(&self.mounts, &config.linux.as_ref().unwrap().mountLabel) + .with_context(|| "Failed to do mounts")?; + Ok(()) + } + + fn link_ptmx(&self) -> Result<()> { + let ptmx = self.path.clone().join("dev/ptmx"); + if ptmx.exists() { + remove_file(&ptmx).with_context(|| "Failed to delete ptmx")?; + } + symlink("pts/ptmx", &ptmx) + .with_context(|| format!("Failed to create symlink {} -> pts/ptmx", ptmx.display()))?; + Ok(()) + } + + fn create_default_devices(&self, mknod: bool) -> Result<()> { + let dev = Device::new(self.path.clone()); + dev.create_default_devices(mknod)?; + Ok(()) + } + + fn create_devices(&self, devices: &Vec, mknod: bool) -> Result<()> { + let dev = Device::new(self.path.clone()); + for d in devices { + if dev.is_default_device(d) { + dev.delete_device(d)?; + } + dev.create_device(d, mknod) + .with_context(|| format!("Failed to create device {}", d.path))?; + } + Ok(()) + } + + pub fn prepare_rootfs(&self, config: &RuntimeConfig) -> Result<()> { + self.set_propagation()?; + self.mount().with_context(|| "Failed to mount rootfs")?; + self.make_parent_mount_private() + .with_context(|| "Failed to make parent mount private")?; + self.do_mounts(config)?; + self.set_default_symlinks()?; + + let old_mode = umask(Mode::from_bits_truncate(0o000)); + self.create_default_devices(self.mknod_device)?; + self.create_devices(&self.devices, self.mknod_device)?; + umask(old_mode); + + self.link_ptmx()?; + Ok(()) + } + + pub fn chroot(path: &Path) -> Result<()> { + let new_root = open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty()) + .with_context(|| OzonecErr::OpenFile(path.to_string_lossy().to_string()))?; + chroot(path)?; + fchdir(new_root).with_context(|| "Failed to chdir to new root directory")?; + Ok(()) + } + + pub fn pivot_root(path: &Path) -> Result<()> { + let new_root = open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty()) + .with_context(|| OzonecErr::OpenFile(path.to_string_lossy().to_string()))?; + let old_root = open("/", OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty()) + .with_context(|| OzonecErr::OpenFile("/".to_string()))?; + + pivot_root(path, path)?; + nix::mount::mount( + None::<&str>, + "/", + None::<&str>, + MsFlags::MS_SLAVE | MsFlags::MS_REC, + None::<&str>, + ) + .with_context(|| OzonecErr::Mount("/".to_string()))?; + + fchdir(old_root).with_context(|| "Failed to chdir to old root directory")?; + umount2(".", MntFlags::MNT_DETACH) + .with_context(|| "Failed to umount old root directory")?; + fchdir(new_root).with_context(|| "Failed to chdir to new root directory")?; + + close(old_root).with_context(|| "Failed to close old_root")?; + close(new_root).with_context(|| "Failed to close new_root")?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs::{self, create_dir_all, read_link, remove_dir_all}, + os::unix::fs::FileTypeExt, + }; + + use nix::unistd::chdir; + use rusty_fork::rusty_fork_test; + + use crate::linux::{container::tests::init_config, namespace::tests::set_namespace}; + use oci_spec::linux::NamespaceType; + + use super::*; + + fn init_rootfs(path: &str, propagation: Option, mounts: Vec) -> Rootfs { + let path = PathBuf::from(path); + create_dir_all(&path).unwrap(); + Rootfs::new(path, propagation, mounts, true, Vec::new()).unwrap() + } + + #[test] + fn test_rootfs_new() { + let path = PathBuf::from("/test_rootfs_new"); + assert!(Rootfs::new(path, None, Vec::new(), true, Vec::new()).is_err()); + } + + #[test] + fn test_get_mount_flags() { + assert_eq!( + Rootfs::get_mount_flags(Some(String::from("shared"))).unwrap(), + MsFlags::MS_SHARED + ); + assert_eq!( + Rootfs::get_mount_flags(Some(String::from("private"))).unwrap(), + MsFlags::MS_PRIVATE + ); + assert_eq!( + Rootfs::get_mount_flags(Some(String::from("slave"))).unwrap(), + MsFlags::MS_SLAVE + ); + assert_eq!( + Rootfs::get_mount_flags(Some(String::from("unbindable"))).unwrap(), + MsFlags::MS_UNBINDABLE + ); + assert_eq!( + Rootfs::get_mount_flags(None).unwrap(), + MsFlags::MS_REC | MsFlags::MS_SLAVE + ); + assert!(Rootfs::get_mount_flags(Some(String::from("unbind"))).is_err()); + } + + rusty_fork_test! { + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_propagation() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let rootfs = init_rootfs( + "/tmp/ozonec/test_set_propagation", + Some(String::from("shared")), + Vec::new(), + ); + + assert!(rootfs.set_propagation().is_ok()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_make_parent_mount_private() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + + let parent = PathBuf::from("/tmp/ozonec/test_make_parent_mount_private"); + create_dir_all(&parent).unwrap(); + nix::mount::mount( + Some(&parent), + &parent, + None::<&str>, + MsFlags::MS_BIND, + None::<&str>, + ) + .unwrap(); + let rootfs = init_rootfs( + "/tmp/ozonec/test_make_parent_mount_private/rootfs", + Some(String::from("shared")), + Vec::new(), + ); + + assert!(rootfs.make_parent_mount_private().is_ok()); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_set_default_symlinks() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let mounts = vec![ + OciMount { + destination: String::from("/proc"), + source: Some(String::from("/proc")), + options: Some(Vec::new()), + fs_type: Some(String::from("proc")), + uidMappings: None, + gidMappings: None, + }, + OciMount { + destination: String::from("/dev"), + source: Some(String::from("tmpfs")), + options: Some(vec![ + String::from("nosuid"), + String::from("strictatime"), + String::from("mode=755"), + String::from("size=65536k"), + ]), + fs_type: Some(String::from("tmpfs")), + uidMappings: None, + gidMappings: None, + }, + ]; + let rootfs = init_rootfs( + "/tmp/ozonec/test_set_default_symlinks", + Some(String::from("shared")), + mounts, + ); + rootfs.mount().unwrap(); + + let mut config = init_config(); + config.root.path = rootfs.path.to_string_lossy().to_string(); + rootfs.do_mounts(&config).unwrap(); + + assert!(rootfs.set_default_symlinks().is_ok()); + chdir(&rootfs.path).unwrap(); + let mut path = PathBuf::from("dev/fd"); + let mut metadata = fs::symlink_metadata(&path).unwrap(); + assert!(metadata.is_symlink()); + assert_eq!(read_link(&path).unwrap(), PathBuf::from("/proc/self/fd")); + path = PathBuf::from("dev/stdin"); + metadata = fs::symlink_metadata(&path).unwrap(); + assert!(metadata.is_symlink()); + assert_eq!(read_link(&path).unwrap(), PathBuf::from("/proc/self/fd/0")); + path = PathBuf::from("dev/stdout"); + metadata = fs::symlink_metadata(&path).unwrap(); + assert!(metadata.is_symlink()); + assert_eq!(read_link(&path).unwrap(), PathBuf::from("/proc/self/fd/1")); + path = PathBuf::from("dev/stderr"); + metadata = fs::symlink_metadata(&path).unwrap(); + assert!(metadata.is_symlink()); + assert_eq!(read_link(&path).unwrap(), PathBuf::from("/proc/self/fd/2")); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_link_ptmx() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let mounts = vec![OciMount { + destination: String::from("/dev"), + source: Some(String::from("tmpfs")), + options: Some(vec![ + String::from("nosuid"), + String::from("strictatime"), + String::from("mode=755"), + String::from("size=65536k"), + ]), + fs_type: Some(String::from("tmpfs")), + uidMappings: None, + gidMappings: None, + }]; + let rootfs = init_rootfs( + "/tmp/ozonec/test_link_ptmx", + Some(String::from("shared")), + mounts, + ); + let mut config = init_config(); + config.root.path = rootfs.path.to_string_lossy().to_string(); + rootfs.do_mounts(&config).unwrap(); + + assert!(rootfs.link_ptmx().is_ok()); + + chdir(&rootfs.path).unwrap(); + let path = PathBuf::from("dev/ptmx"); + let metadata = fs::symlink_metadata(&path).unwrap(); + assert!(metadata.is_symlink()); + assert_eq!(read_link(&path).unwrap(), PathBuf::from("pts/ptmx")); + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_create_default_devices() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + let mounts = vec![OciMount { + destination: String::from("/dev"), + source: Some(String::from("tmpfs")), + options: Some(vec![ + String::from("nosuid"), + String::from("strictatime"), + String::from("mode=755"), + String::from("size=65536k"), + ]), + fs_type: Some(String::from("tmpfs")), + uidMappings: None, + gidMappings: None, + }]; + let rootfs = init_rootfs( + "/tmp/ozonec/test_create_default_devices", + Some(String::from("shared")), + mounts, + ); + let mut config = init_config(); + config.root.path = rootfs.path.to_string_lossy().to_string(); + rootfs.do_mounts(&config).unwrap(); + + assert!(rootfs.create_default_devices(false).is_ok()); + for dev in Device::new(rootfs.path.clone()).default_devices() { + assert!(dev.path.exists()); + let metadata = fs::metadata(&dev.path).unwrap(); + assert!(metadata.file_type().is_char_device()); + } + } + + #[test] + #[ignore = "unshare may not be permitted"] + fn test_create_devices() { + remove_dir_all("/tmp/ozonec").unwrap_or_default(); + + set_namespace(NamespaceType::Mount); + + let mounts = vec![OciMount { + destination: String::from("/dev"), + source: Some(String::from("tmpfs")), + options: Some(vec![ + String::from("nosuid"), + String::from("strictatime"), + String::from("mode=755"), + String::from("size=65536k"), + ]), + fs_type: Some(String::from("tmpfs")), + uidMappings: None, + gidMappings: None, + }]; + let rootfs = init_rootfs( + "/tmp/ozonec/test_create_devices", + Some(String::from("shared")), + mounts, + ); + let mut config = init_config(); + config.root.path = rootfs.path.to_string_lossy().to_string(); + rootfs.do_mounts(&config).unwrap(); + + let devices = vec![OciDevice { + dev_type: String::from("c"), + path: String::from("/dev/test"), + major: Some(1), + minor: Some(3), + fileMode: Some(0o666u32), + uid: None, + gid: None, + }]; + assert!(rootfs.create_devices(&devices, true).is_ok()); + let path = rootfs.path.join("dev/test"); + assert!(path.exists()); + let metadata = fs::metadata(&path).unwrap(); + assert!(metadata.file_type().is_char_device()); + } + } +} diff --git a/ozonec/src/linux/seccomp.rs b/ozonec/src/linux/seccomp.rs new file mode 100644 index 0000000000000000000000000000000000000000..3fdc35e3c16e6619475431cc73855cfdc01aed30 --- /dev/null +++ b/ozonec/src/linux/seccomp.rs @@ -0,0 +1,193 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::vec; + +use anyhow::{bail, Context, Result}; + +use libseccomp::{ + ScmpAction, ScmpArch, ScmpArgCompare, ScmpCompareOp, ScmpFilterContext, ScmpSyscall, +}; +use oci_spec::linux::{Seccomp, SeccompAction as OciSeccompAction, SeccompOp}; + +use crate::utils::OzonecErr; + +fn parse_action(action: OciSeccompAction, errno: Option) -> ScmpAction { + let errno = errno.unwrap_or(libc::EPERM as u32); + match action { + OciSeccompAction::ScmpActKill => ScmpAction::KillThread, + OciSeccompAction::ScmpActKillProcess => ScmpAction::KillProcess, + OciSeccompAction::ScmpActTrap => ScmpAction::Trap, + OciSeccompAction::ScmpActErrno => ScmpAction::Errno(errno as i32), + OciSeccompAction::ScmpActTrace => ScmpAction::Trace(errno as u16), + OciSeccompAction::ScmpActLog => ScmpAction::Log, + OciSeccompAction::ScmpActAllow => ScmpAction::Allow, + _ => ScmpAction::KillThread, + } +} + +fn parse_cmp(op: SeccompOp, mask: u64) -> ScmpCompareOp { + match op { + SeccompOp::ScmpCmpNe => ScmpCompareOp::NotEqual, + SeccompOp::ScmpCmpLt => ScmpCompareOp::Less, + SeccompOp::ScmpCmpLe => ScmpCompareOp::LessOrEqual, + SeccompOp::ScmpCmpEq => ScmpCompareOp::Equal, + SeccompOp::ScmpCmpGe => ScmpCompareOp::GreaterEqual, + SeccompOp::ScmpCmpGt => ScmpCompareOp::Greater, + SeccompOp::ScmpCmpMaskedEq => ScmpCompareOp::MaskedEqual(mask), + } +} + +fn check_seccomp(seccomp: &Seccomp) -> Result<()> { + // We don't support NOTIFY as the default action. When the seccomp filter + // is created with NOTIFY, the container process will have to communicate + // the returned fd to another process. Therefore, ozonec needs to call + // the WRITE syscall. And then READ and CLOSE syscalls are also needed to + // be enabled to use. + if seccomp.defaultAction == OciSeccompAction::ScmpActNotify { + bail!("SCMP_ACT_NOTIFY is not supported as the default action"); + } + if let Some(syscalls) = &seccomp.syscalls { + for syscall in syscalls { + if syscall.action == OciSeccompAction::ScmpActNotify { + for name in &syscall.names { + if name == "write" { + bail!("SCMP_ACT_NOTIFY is not supported to be used for write syscall"); + } + } + } + } + } + + Ok(()) +} + +pub fn set_seccomp(seccomp: &Seccomp) -> Result<()> { + check_seccomp(seccomp)?; + + let default_action = parse_action(seccomp.defaultAction, seccomp.defaultErrnoRet); + if let Some(syscalls) = &seccomp.syscalls { + let mut filter = ScmpFilterContext::new_filter(default_action)?; + #[cfg(target_arch = "x86_64")] + filter + .add_arch(ScmpArch::X8664) + .with_context(|| OzonecErr::AddScmpArch)?; + #[cfg(target_arch = "aarch64")] + filter + .add_arch(ScmpArch::Aarch64) + .with_context(|| OzonecErr::AddScmpArch)?; + + for syscall in syscalls { + let action = parse_action(syscall.action, syscall.errnoRet); + if action == default_action { + continue; + } + + for name in &syscall.names { + let sc = ScmpSyscall::from_name(name)?; + let mut comparators: Vec = vec![]; + if let Some(args) = &syscall.args { + for arg in args { + let op = parse_cmp(arg.op, arg.value); + let cmp = match arg.op { + SeccompOp::ScmpCmpMaskedEq => { + ScmpArgCompare::new(arg.index as u32, op, arg.valueTwo.unwrap_or(0)) + } + _ => ScmpArgCompare::new(arg.index as u32, op, arg.value), + }; + comparators.push(cmp); + } + } + filter + .add_rule_conditional(action, sc, &comparators) + .with_context(|| "Failed to add conditional rule")?; + } + } + filter + .load() + .with_context(|| "Failed to load filter into the kernel")?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use rusty_fork::rusty_fork_test; + + use oci_spec::linux::{SeccompSyscall, SeccompSyscallArg}; + + use super::*; + + #[test] + fn test_check_seccomp() { + let mut seccomp = Seccomp { + defaultAction: OciSeccompAction::ScmpActNotify, + defaultErrnoRet: None, + architectures: None, + flags: None, + listennerPath: None, + seccompFd: None, + listenerMetadata: None, + syscalls: None, + }; + assert!(check_seccomp(&seccomp).is_err()); + + seccomp.defaultAction = OciSeccompAction::ScmpActAllow; + let syscall = SeccompSyscall { + names: vec![String::from("write")], + action: OciSeccompAction::ScmpActNotify, + errnoRet: None, + args: None, + }; + seccomp.syscalls = Some(vec![syscall]); + assert!(check_seccomp(&seccomp).is_err()); + } + + rusty_fork_test! { + #[test] + fn test_set_seccomp() { + let mut seccomp = Seccomp { + defaultAction: OciSeccompAction::ScmpActAllow, + defaultErrnoRet: None, + architectures: None, + flags: None, + listennerPath: None, + seccompFd: None, + listenerMetadata: None, + syscalls: None, + }; + let syscall = SeccompSyscall { + names: vec![String::from("write")], + action: OciSeccompAction::ScmpActKill, + errnoRet: None, + args: Some(vec![ + SeccompSyscallArg { + index: 0, + value: 0, + valueTwo: Some(0), + op: SeccompOp::ScmpCmpEq, + }, + SeccompSyscallArg { + index: 2, + value: 0, + valueTwo: Some(0), + op: SeccompOp::ScmpCmpMaskedEq, + }, + ]), + }; + seccomp.syscalls = Some(vec![syscall]); + + assert!(set_seccomp(&seccomp).is_ok()); + } + } +} diff --git a/ozonec/src/linux/terminal.rs b/ozonec/src/linux/terminal.rs new file mode 100644 index 0000000000000000000000000000000000000000..26da7376e5ad2cd5486ad22d78d0e390eec95e8b --- /dev/null +++ b/ozonec/src/linux/terminal.rs @@ -0,0 +1,111 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::File, + io::IoSlice, + mem::ManuallyDrop, + os::unix::io::{AsRawFd, RawFd}, + path::PathBuf, +}; + +use anyhow::{bail, Context, Result}; +use nix::{ + errno::errno, + fcntl::{open, OFlag}, + mount::MsFlags, + pty::{posix_openpt, ptsname, unlockpt}, + sys::{ + socket::{sendmsg, ControlMessage, MsgFlags, UnixAddr}, + stat::{fchmod, Mode}, + }, + unistd::{close, dup2}, +}; + +use crate::utils::OzonecErr; + +pub enum Stdio { + Stdin = 0, + Stdout = 1, + Stderr = 2, +} + +pub fn setup_console(console_fd: &RawFd, mount: bool) -> Result<()> { + let master_fd = posix_openpt(OFlag::O_RDWR).with_context(|| "openpt error")?; + let pty_name: &[u8] = b"/dev/ptmx"; + let iov = [IoSlice::new(pty_name)]; + // Use ManuallyDrop to keep fds open. + let master = ManuallyDrop::new(master_fd.as_raw_fd()); + let fds = [master.as_raw_fd()]; + let cmsg = ControlMessage::ScmRights(&fds); + sendmsg::( + console_fd.as_raw_fd(), + &iov, + &[cmsg], + MsgFlags::empty(), + None, + ) + .with_context(|| "sendmsg error")?; + + // SAFETY: FFI call with valid arguments. + let slave_name = unsafe { ptsname(&master_fd).with_context(|| "ptsname error")? }; + unlockpt(&master_fd).with_context(|| "unlockpt error")?; + let slave_path = PathBuf::from(&slave_name); + if mount { + let file = File::create("/dev/console").with_context(|| "Failed to create /dev/console")?; + fchmod(file.as_raw_fd(), Mode::from_bits_truncate(0o666u32)) + .with_context(|| "chmod error")?; + nix::mount::mount( + Some(&slave_path), + "/dev/console", + Some("bind"), + MsFlags::MS_BIND, + None::<&str>, + ) + .with_context(|| OzonecErr::Mount(slave_name.clone()))?; + } + + let slave_fd = open(&slave_path, OFlag::O_RDWR, Mode::empty()) + .with_context(|| OzonecErr::OpenFile(slave_name.clone()))?; + let slave = ManuallyDrop::new(slave_fd); + // SAFETY: FFI call with valid arguments. + if unsafe { libc::ioctl(slave.as_raw_fd(), libc::TIOCSCTTY) } != 0 { + bail!("TIOCSCTTY error: {}", errno()); + } + connect_stdio(&slave_fd, &slave_fd, &slave_fd)?; + close(console_fd.as_raw_fd()).with_context(|| "Failed to close console socket")?; + Ok(()) +} + +pub fn connect_stdio(stdin: &RawFd, stdout: &RawFd, stderr: &RawFd) -> Result<()> { + dup2(*stdin, (Stdio::Stdin as i32).as_raw_fd()) + .with_context(|| OzonecErr::Dup2("stdin".to_string()))?; + dup2(*stdout, (Stdio::Stdout as i32).as_raw_fd()) + .with_context(|| OzonecErr::Dup2("stdout".to_string()))?; + dup2(*stderr, (Stdio::Stderr as i32).as_raw_fd()) + .with_context(|| OzonecErr::Dup2("stderr".to_string()))?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_connect_stdio() { + let stdin: RawFd = 0; + let stdout: RawFd = 0; + let stderr: RawFd = 0; + + assert!(connect_stdio(&stdin, &stdout, &stderr).is_ok()); + } +} diff --git a/ozonec/src/main.rs b/ozonec/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..29c529bb9fbdcd75ef215d689b48c99ee5e9f6aa --- /dev/null +++ b/ozonec/src/main.rs @@ -0,0 +1,144 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod commands; +mod container; +mod linux; +mod utils; + +use std::{ + fs::remove_dir_all, + path::{Path, PathBuf}, + process::exit, +}; + +use anyhow::{anyhow, Context, Result}; +use clap::{crate_description, Args, Parser, Subcommand}; +use commands::{Delete, Exec, Kill, Start, State}; +use log::info; +use nix::unistd::geteuid; + +use crate::{commands::Create, utils::logger}; + +// Global options which are not binded to any specific command. +#[derive(Args, Debug)] +struct GlobalOpts { + /// Root directory to store container state. + #[arg(short, long)] + root: Option, + /// Path of log file. + #[arg(short, long)] + log: Option, + /// Enable debug log level. + #[arg(short, long)] + debug: bool, +} + +// Standard commands supported by [OCI runtime-spec] +// (https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) +// and [OCI Command Line Interface] +// (https://github.com/opencontainers/runtime-tools/blob/master/docs/command-line-interface.md). +#[derive(Subcommand, Debug)] +enum StandardCmd { + Create(Create), + Start(Start), + State(State), + Kill(Kill), + Delete(Delete), +} + +// Extended commands not documented in [OCI Command Line Interface]. +#[derive(Subcommand, Debug)] +enum ExtendCmd { + Exec(Exec), +} + +#[derive(Subcommand, Debug)] +enum Command { + #[command(flatten)] + Standard(StandardCmd), + #[command(flatten)] + Extend(ExtendCmd), +} + +#[derive(Parser, Debug)] +#[command(version, author, about = crate_description!())] +#[command(propagate_version = true)] +struct Cli { + #[command(flatten)] + global: GlobalOpts, + #[command(subcommand)] + cmd: Command, +} + +fn cmd_run(command: Command, root: &Path) -> Result<()> { + match command { + Command::Standard(cmd) => match cmd { + StandardCmd::Create(create) => { + info!("Run command: {:?}", create); + + let mut root_exist = false; + create.run(root, &mut root_exist).map_err(|e| { + if !root_exist { + let _ = remove_dir_all(root); + } + anyhow!(e) + })? + } + StandardCmd::Start(start) => { + info!("Run command: {:?}", start); + start.run(root)? + } + StandardCmd::Kill(kill) => { + info!("Run command: {:?}", kill); + kill.run(root)? + } + StandardCmd::Delete(delete) => { + info!("Run command: {:?}", delete); + delete.run(root)? + } + StandardCmd::State(state) => { + info!("Run command: {:?}", state); + state.run(root)? + } + }, + Command::Extend(cmd) => match cmd { + ExtendCmd::Exec(exec) => { + info!("Run command: {:?}", exec); + exec.run(root)? + } + }, + } + Ok(()) +} + +fn real_main() -> Result<()> { + let cli = Cli::parse(); + + logger::init(&cli.global.log, cli.global.debug).with_context(|| "Failed to init logger")?; + + let root_path = if let Some(root) = cli.global.root { + root + } else { + let euid = geteuid(); + PathBuf::from(format!("/var/run/user/{}/ozonec", euid)) + }; + cmd_run(cli.cmd, &root_path) +} + +fn main() { + if let Err(e) = real_main() { + eprintln!("ERROR: {:?}", e); + exit(1); + } + exit(0); +} diff --git a/ozonec/src/utils/channel.rs b/ozonec/src/utils/channel.rs new file mode 100644 index 0000000000000000000000000000000000000000..41b2b08b882ed26e1eaa777bb3cb81fd9032d5e4 --- /dev/null +++ b/ozonec/src/utils/channel.rs @@ -0,0 +1,261 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fmt::Debug, + io::{IoSlice, IoSliceMut}, + marker::PhantomData, + mem, + os::unix::io::RawFd, + slice, +}; + +use anyhow::{bail, Context, Result}; +use nix::{ + sys::{ + socket::{ + recvmsg, sendmsg, setsockopt, socketpair, sockopt, AddressFamily, MsgFlags, SockFlag, + SockType, UnixAddr, + }, + time::TimeVal, + }, + unistd::{self, Pid}, +}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; + +// Wrapper for messages to be sent between parent and child processes. +#[derive(Debug, Serialize, Deserialize)] +pub enum Message { + IdMappingStart, + IdMappingDone, + InitReady(i32), + ContainerCreated, + ExecFailed(String), +} + +pub struct Sender { + fd: RawFd, + phantom: PhantomData, +} + +impl Sender +where + T: Serialize, +{ + pub fn close(&self) -> Result<()> { + Ok(unistd::close(self.fd)?) + } + + pub fn send(&self, msg: T) -> Result<()> { + let msg_vec = serde_json::to_vec(&msg).with_context(|| "Failed to load message")?; + let msg_len = msg_vec.len() as u64; + let iov = [ + IoSlice::new(unsafe { + slice::from_raw_parts((&msg_len as *const u64) as *const u8, mem::size_of::()) + }), + IoSlice::new(&msg_vec), + ]; + + sendmsg::(self.fd, &iov, &[], MsgFlags::empty(), None)?; + Ok(()) + } +} + +pub struct Receiver { + fd: RawFd, + phantom: PhantomData, +} + +impl Receiver +where + T: DeserializeOwned, +{ + pub fn close(&self) -> Result<()> { + Ok(unistd::close(self.fd)?) + } + + pub fn set_timeout(&self, timeout: i64) -> Result<()> { + let timeval = TimeVal::new(0, timeout); + setsockopt(self.fd, sockopt::ReceiveTimeout, &timeval) + .with_context(|| "Failed to set receiver end timeout")?; + Ok(()) + } + + fn max_len_iovec(&self) -> Result { + let mut len: u64 = 0; + // SAFETY: len and type "u64" are both valid. + let mut iov = [IoSliceMut::new(unsafe { + slice::from_raw_parts_mut((&mut len as *mut u64) as *mut u8, mem::size_of::()) + })]; + + recvmsg::(self.fd, &mut iov, None, MsgFlags::MSG_PEEK)?; + match len { + 0 => bail!("Failed to get maximum length"), + _ => Ok(len), + } + } + + pub fn recv(&self) -> Result { + let msg_len = self.max_len_iovec()?; + let mut received_len: u64 = 0; + let mut buf = vec![0u8; msg_len as usize]; + let bytes = { + let mut iov = [ + IoSliceMut::new(unsafe { + slice::from_raw_parts_mut( + (&mut received_len as *mut u64) as *mut u8, + mem::size_of::(), + ) + }), + IoSliceMut::new(&mut buf), + ]; + let mut cmsg = nix::cmsg_space!(T); + let msg = recvmsg::( + self.fd, + &mut iov, + Some(&mut cmsg), + MsgFlags::MSG_CMSG_CLOEXEC, + )?; + msg.bytes + }; + + match bytes { + 0 => bail!("Received zero length message"), + _ => Ok(serde_json::from_slice(&buf[..]) + .with_context(|| "Failed to read received message")?), + } + } +} + +pub struct Channel { + pub sender: Sender, + pub receiver: Receiver, +} + +impl Channel { + pub fn new() -> Result> { + let (sender_fd, receiver_fd) = socketpair( + AddressFamily::Unix, + SockType::SeqPacket, + None, + SockFlag::SOCK_CLOEXEC, + )?; + let sender = Sender { + fd: sender_fd, + phantom: PhantomData, + }; + let receiver = Receiver { + fd: receiver_fd, + phantom: PhantomData, + }; + + Ok(Channel { sender, receiver }) + } + + pub fn recv_container_created(&self) -> Result<()> { + let msg = self.receiver.recv()?; + match msg { + Message::ContainerCreated => Ok(()), + _ => bail!("Expect receiving ContainerCreated, but got {:?}", msg), + } + } + + pub fn send_container_created(&self) -> Result<()> { + self.sender + .send(Message::ContainerCreated) + .with_context(|| "Failed to send created message to parent process") + } + + pub fn recv_id_mappings(&self) -> Result<()> { + let msg = self.receiver.recv()?; + match msg { + Message::IdMappingStart => Ok(()), + _ => bail!("Expect receiving IdMappingStart, but got {:?}", msg), + } + } + + pub fn send_id_mappings(&self) -> Result<()> { + self.sender.send(Message::IdMappingStart) + } + + pub fn recv_init_pid(&self) -> Result { + let msg = self.receiver.recv()?; + match msg { + Message::InitReady(pid) => Ok(Pid::from_raw(pid)), + _ => bail!("Expect receiving InitReady, but got {:?}", msg), + } + } + + pub fn recv_id_mappings_done(&self) -> Result<()> { + let msg = self.receiver.recv()?; + match msg { + Message::IdMappingDone => Ok(()), + _ => bail!("Expect receiving IdMappingDone, but got {:?}", msg), + } + } + + pub fn send_id_mappings_done(&self) -> Result<()> { + self.sender.send(Message::IdMappingDone) + } + + pub fn send_init_pid(&self, pid: Pid) -> Result<()> { + self.sender + .send(Message::InitReady(pid.as_raw())) + .with_context(|| "Failed to send container process pid") + } +} + +#[cfg(test)] +mod tests { + use nix::sys::wait::{waitpid, WaitStatus}; + use unistd::getpid; + + use crate::linux::clone_process; + + use super::*; + + #[test] + fn test_channel() { + let channel = Channel::::new().unwrap(); + let child = clone_process("test_channel", || { + channel.receiver.close().unwrap(); + + channel.send_container_created().unwrap(); + channel.send_init_pid(getpid()).unwrap(); + channel.send_id_mappings().unwrap(); + channel.send_id_mappings_done().unwrap(); + + channel.sender.close().unwrap(); + Ok(0) + }) + .unwrap(); + + channel.sender.close().unwrap(); + + channel.recv_container_created().unwrap(); + channel.recv_init_pid().unwrap(); + channel.recv_id_mappings().unwrap(); + channel.recv_id_mappings_done().unwrap(); + + channel.receiver.close().unwrap(); + + match waitpid(child, None) { + Ok(WaitStatus::Exited(_, s)) => { + assert_eq!(s, 0); + } + Ok(_) => (), + Err(e) => { + panic!("Failed to waitpid for child process: {e}"); + } + } + } +} diff --git a/ozonec/src/utils/clone.rs b/ozonec/src/utils/clone.rs new file mode 100644 index 0000000000000000000000000000000000000000..2dd99e6629b2f0dfd63b1e122d915f18fb5327de --- /dev/null +++ b/ozonec/src/utils/clone.rs @@ -0,0 +1,125 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::unix::io::{AsRawFd, RawFd}; + +use anyhow::{bail, Context, Result}; +use libc::pid_t; +use nix::{errno::errno, unistd::Pid}; + +bitflags::bitflags! { + #[derive(Default)] + pub struct Flags: u64 { + const CHILD_CLEARTID = 0x00200000; + const CHILD_SETTID = 0x01000000; + const FILES = 0x00000400; + const FS = 0x00000200; + const INTO_CGROUP = 0x200000000; + const IO = 0x80000000; + const NEWCGROUP = 0x02000000; + const NEWIPC = 0x08000000; + const NEWNET = 0x40000000; + const NEWNS = 0x00020000; + const NEWPID = 0x20000000; + const NEWTIME = 0x00000080; + const NEWUSER = 0x10000000; + const NEWUTS = 0x04000000; + const PARENT = 0x00008000; + const PARENT_SETTID = 0x00100000; + const PIDFD = 0x00001000; + const PTRACE = 0x00002000; + const SETTLS = 0x00080000; + const SIGHAND = 0x00000800; + const SYSVSEM = 0x00040000; + const THREAD = 0x00010000; + const UNTRACED = 0x00800000; + const VFORK = 0x00004000; + const VM = 0x00000100; + } +} + +#[repr(C, align(8))] +#[derive(Debug, Default)] +pub struct CloneArgs { + pub flags: u64, + pub pid_fd: u64, + pub child_tid: u64, + pub parent_tid: u64, + pub exit_signal: u64, + pub stack: u64, + pub stack_size: u64, + pub tls: u64, + pub cgroup: u64, +} + +#[derive(Default)] +pub struct Clone3<'a> { + flags: Flags, + pidfd: Option<&'a mut RawFd>, + child_tid: Option<&'a mut libc::pid_t>, + parent_tid: Option<&'a mut libc::pid_t>, + exit_signal: u64, + stack: Option<&'a mut [u8]>, + tls: Option, + cgroup: Option<&'a dyn AsRawFd>, +} + +fn option_as_mut_ptr(o: &mut Option<&mut T>) -> *mut T { + match o { + Some(inner) => *inner as *mut T, + None => std::ptr::null_mut(), + } +} + +fn option_slice_as_mut_ptr(o: &mut Option<&mut [T]>) -> *mut T { + match o { + Some(inner) => inner.as_mut_ptr(), + None => std::ptr::null_mut(), + } +} + +impl<'a> Clone3<'a> { + pub fn exit_signal(&mut self, exit_signal: u64) -> &mut Self { + self.exit_signal = exit_signal; + self + } + + pub fn call(&mut self) -> Result { + let clone_args = CloneArgs { + flags: self.flags.bits(), + pid_fd: option_as_mut_ptr(&mut self.pidfd) as u64, + child_tid: option_as_mut_ptr(&mut self.child_tid) as u64, + parent_tid: option_as_mut_ptr(&mut self.parent_tid) as u64, + exit_signal: self.exit_signal, + stack: option_slice_as_mut_ptr(&mut self.stack) as u64, + stack_size: self.stack.as_ref().map(|stack| stack.len()).unwrap_or(0) as u64, + tls: self.tls.unwrap_or(0), + cgroup: self.cgroup.map(AsRawFd::as_raw_fd).unwrap_or(0) as u64, + }; + + // SAFETY: FFI call with valid arguments. + let ret = unsafe { + libc::syscall( + libc::SYS_clone3, + &clone_args as *const CloneArgs, + core::mem::size_of::(), + ) + }; + if ret == -1 { + bail!("clone3 error: errno {}", errno()); + } + + Ok(Pid::from_raw( + pid_t::try_from(ret).with_context(|| "Invalid pid")?, + )) + } +} diff --git a/ozonec/src/utils/error.rs b/ozonec/src/utils/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..b3f93728d3a05efd55be82af0c62f8284dcd31ec --- /dev/null +++ b/ozonec/src/utils/error.rs @@ -0,0 +1,49 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum OzonecErr { + #[error("Failed to access /proc/{0}")] + ReadProcPid(i32), + #[error("Failed to access /proc/{0}/status")] + ReadProcStat(i32), + #[error("Failed to open {0}")] + OpenFile(String), + #[error("Failed to create directory {0}")] + CreateDir(String), + #[error("Failed to mount {0}")] + Mount(String), + #[error("Failed to access /proc/self")] + AccessProcSelf, + #[error("Failed to get mountinfo")] + GetMntInfo, + #[error("Dup2 {0} error")] + Dup2(String), + #[error("Failed to get all capabilities of {0} set")] + GetAllCaps(String), + #[error("Failed to set the capability set {0}")] + SetCaps(String), + #[error("Failed to add architecture to seccomp filter")] + AddScmpArch, + #[error("Failed to get current directory")] + GetCurDir, + #[error("Failed to load container state")] + LoadConState, + #[error("Failed to get oci state")] + GetOciState, + #[error("Failed to bind device: {0}")] + BindDev(String), + #[error("Close fd error")] + CloseFd, +} diff --git a/ozonec/src/utils/logger.rs b/ozonec/src/utils/logger.rs new file mode 100644 index 0000000000000000000000000000000000000000..1251de8a9a1026c1736f56d95947072d88cebc38 --- /dev/null +++ b/ozonec/src/utils/logger.rs @@ -0,0 +1,292 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::{remove_file, rename, File, OpenOptions}, + io::{stderr, Write}, + num::Wrapping, + os::unix::fs::OpenOptionsExt, + path::{Path, PathBuf}, + sync::Mutex, + time::UNIX_EPOCH, +}; + +use anyhow::{Context, Result}; +use log::{set_boxed_logger, set_max_level, Level, LevelFilter, Log, Metadata, Record}; +use nix::unistd::{getpid, gettid}; + +use super::OzonecErr; + +// Maximum size of log file is 100MB. +const LOG_ROTATE_SIZE_MAX: usize = 100 * 1024 * 1024; +// Logs are retained for seven days at most. +const LOG_ROTATE_CNT_MAX: u8 = 7; + +struct LogRotate { + handler: Box, + path: String, + size: Wrapping, + created_day: i32, +} + +impl LogRotate { + fn rotate(&mut self, inc_size: usize) -> Result<()> { + if self.path.is_empty() { + return Ok(()); + } + + self.size += Wrapping(inc_size); + let seconds = wall_time().0; + let today = formatted_time(seconds)[2]; + if self.size < Wrapping(LOG_ROTATE_SIZE_MAX) && self.created_day == today { + return Ok(()); + } + + // Delete oldest log file. + let mut rotate_cnt = LOG_ROTATE_CNT_MAX - 1; + let olddest = format!("{}{}", self.path, rotate_cnt); + if Path::new(&olddest).exists() { + remove_file(&olddest).with_context(|| "Failed to delete olddest log")?; + } + + // Rename remaining logs. + let mut new_log = olddest; + while rotate_cnt != 0 { + let mut old_log = self.path.clone(); + + rotate_cnt -= 1; + if rotate_cnt != 0 { + old_log += &rotate_cnt.to_string(); + } + + if Path::new(&old_log).exists() { + rename(&old_log, &new_log) + .with_context(|| format!("Failed to rename {} to {}", old_log, new_log))?; + } + new_log = old_log; + } + + self.handler = Box::new( + open_log_file(&PathBuf::from(self.path.clone())) + .with_context(|| format!("Failed to convert {}", self.path))?, + ); + self.size = Wrapping(0); + self.created_day = today; + Ok(()) + } +} + +fn open_log_file(path: &PathBuf) -> Result { + OpenOptions::new() + .read(false) + .write(true) + .append(true) + .create(true) + .mode(0o640) + .open(path) + .with_context(|| OzonecErr::OpenFile(path.to_string_lossy().to_string())) +} + +fn formatted_time(seconds: i64) -> [i32; 6] { + // SAFETY: an all-zero value is valid for libc::tm. + let mut ti: libc::tm = unsafe { std::mem::zeroed() }; + unsafe { + // SAFETY: seconds and ti are both local variables and valid. + libc::localtime_r(&seconds, &mut ti); + } + [ + ti.tm_year + 1900, + ti.tm_mon + 1, + ti.tm_mday, + ti.tm_hour, + ti.tm_min, + ti.tm_sec, + ] +} + +fn wall_time() -> (i64, i64) { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { + // SAFETY: ts is a local variable and valid. + libc::clock_gettime(libc::CLOCK_REALTIME, &mut ts); + } + (ts.tv_sec, ts.tv_nsec) +} + +fn formatted_now() -> String { + let (sec, nsec) = wall_time(); + let formatted_time = formatted_time(sec); + + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}:{:09}", + formatted_time[0], + formatted_time[1], + formatted_time[2], + formatted_time[3], + formatted_time[4], + formatted_time[5], + nsec + ) +} + +struct Logger { + rotate: Mutex, + level: Level, +} + +impl Logger { + fn new(path: &Option, level: Level) -> Result { + let (log_file, log_size, created_day) = match path { + Some(p) => { + let file = Box::new(open_log_file(&p)?); + let metadata = file.metadata().with_context(|| "Failed to get metadata")?; + let mod_time = metadata + .modified() + .with_context(|| "Failed to get modify time")?; + let seconds = mod_time + .duration_since(UNIX_EPOCH) + .with_context(|| "Failed to get duration time")? + .as_secs(); + let log_size = Wrapping(metadata.len() as usize); + let created_day = formatted_time(seconds as i64)[2]; + (file as Box, log_size, created_day) + } + None => (Box::new(stderr()) as Box, Wrapping(0), 0), + }; + + let rotate = Mutex::new(LogRotate { + handler: log_file, + path: path + .as_ref() + .unwrap_or(&PathBuf::new()) + .to_string_lossy() + .to_string(), + size: log_size, + created_day, + }); + Ok(Self { rotate, level }) + } +} + +impl Log for Logger { + fn enabled(&self, metadata: &Metadata) -> bool { + metadata.level() <= self.level + } + + fn log(&self, record: &Record) { + if !self.enabled(record.metadata()) { + return; + } + + let fmt_msg = format_args!( + "{:<5}: [{}][{}][{}: {}]:{}: {}\n", + formatted_now(), + getpid(), + gettid(), + record.file().unwrap_or(""), + record.line().unwrap_or(0), + record.level(), + record.args() + ) + .to_string(); + + let mut log_rotate = self.rotate.lock().unwrap(); + if let Err(e) = log_rotate.handler.write_all(fmt_msg.as_bytes()) { + eprintln!("Failed to log message: {:?}", e); + return; + } + if let Err(e) = log_rotate.rotate(fmt_msg.as_bytes().len()) { + eprintln!("Failed to rotate log files: {:?}", e); + } + } + + fn flush(&self) {} +} + +pub fn init(path: &Option, debug: bool) -> Result<()> { + let log_level = if debug { + Level::Debug + } else { + match std::env::var("OZONEC_LOG_LEVEL") { + Ok(level) => match level.to_lowercase().as_str() { + "error" => Level::Error, + "warn" => Level::Warn, + "info" => Level::Info, + "debug" => Level::Debug, + "trace" => Level::Trace, + _ => Level::Info, + }, + _ => Level::Info, + } + }; + + let logger = Box::new(Logger::new(path, log_level)?); + set_boxed_logger(logger) + .map(|_| set_max_level(LevelFilter::Trace)) + .with_context(|| "Logger has been already set")?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::{fs, os::unix::fs::MetadataExt}; + + use super::*; + + #[test] + fn test_logger_init() { + assert!(init(&Some(PathBuf::from("/tmp/ozonec.log")), false).is_ok()); + remove_file(Path::new("/tmp/ozonec.log")).unwrap(); + } + + #[test] + fn test_logger_rotate() { + let log_file = PathBuf::from("/tmp/ozonec.log"); + let logger = Logger::new(&Some(log_file.clone()), Level::Debug).unwrap(); + let mut locked_rotate = logger.rotate.lock().unwrap(); + // Time in metadata are not changed as the file descriptor is still opened. + let inode = fs::metadata(&log_file).unwrap().ino(); + for i in 1..LOG_ROTATE_CNT_MAX { + let file = format!("{}{}", locked_rotate.path, i); + let path = Path::new(&file); + File::create(path).unwrap(); + } + + locked_rotate.size = Wrapping(0); + assert!(locked_rotate.rotate(1024).is_ok()); + let mut new_inode = fs::metadata(&log_file).unwrap().ino(); + assert_eq!(inode, new_inode); + + locked_rotate.size = Wrapping(LOG_ROTATE_SIZE_MAX); + assert!(locked_rotate.rotate(1024).is_ok()); + new_inode = fs::metadata(&log_file).unwrap().ino(); + assert_ne!(inode, new_inode); + assert_eq!(locked_rotate.size, Wrapping(0)); + + locked_rotate.size = Wrapping(0); + locked_rotate.created_day = formatted_time(wall_time().0)[2] - 1; + assert!(locked_rotate.rotate(1024).is_ok()); + new_inode = fs::metadata(&log_file).unwrap().ino(); + assert_ne!(inode, new_inode); + assert_eq!(locked_rotate.size, Wrapping(0)); + + for i in 1..LOG_ROTATE_CNT_MAX { + let file = format!("{}{}", locked_rotate.path, i); + let path = Path::new(&file); + remove_file(path).unwrap(); + } + remove_file(Path::new("/tmp/ozonec.log")).unwrap(); + } +} diff --git a/ozonec/src/utils/mod.rs b/ozonec/src/utils/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..59da672aff35e40aa258150e826563cb5af770d9 --- /dev/null +++ b/ozonec/src/utils/mod.rs @@ -0,0 +1,124 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod logger; +pub mod prctl; + +mod channel; +mod clone; +mod error; + +pub use channel::{Channel, Message}; +pub use clone::Clone3; +pub use error::OzonecErr; + +use std::{ + fs::create_dir_all, + mem, + os::unix::io::{AsRawFd, RawFd}, + path::{Path, PathBuf}, +}; + +use anyhow::{bail, Context, Result}; +use nix::{ + errno::errno, + fcntl::{open, OFlag}, + sys::stat::Mode, + NixPath, +}; + +struct OpenHow(libc::open_how); + +bitflags::bitflags! { + struct ResolveFlag: libc::c_ulonglong { + const RESOLVE_BENEATH = libc::RESOLVE_BENEATH; + const RESOLVE_IN_ROOT = libc::RESOLVE_IN_ROOT; + const RESOLVE_NO_MAGICLINKS = libc::RESOLVE_NO_MAGICLINKS; + const RESOLVE_NO_SYMLINKS = libc::RESOLVE_NO_SYMLINKS; + const RESOLVE_NO_XDEV = libc::RESOLVE_NO_XDEV; + } +} + +impl OpenHow { + fn new() -> Self { + // SAFETY: FFI call with valid arguments. + unsafe { mem::zeroed() } + } + + fn flags(mut self, flags: OFlag) -> Self { + let flags = flags.bits() as libc::c_ulonglong; + self.0.flags = flags; + self + } + + fn mode(mut self, mode: Mode) -> Self { + let mode = mode.bits() as libc::c_ulonglong; + self.0.mode = mode; + self + } + + fn resolve(mut self, resolve: ResolveFlag) -> Self { + let resolve = resolve.bits() as libc::c_ulonglong; + self.0.resolve = resolve; + self + } +} + +/// Get a file descriptor by openat2 with `root` path, relative `target` path in `root` +/// and whether is director or not. If the target directory or file doesn't exist, create +/// automatically. +pub fn openat2_in_root(root: &Path, target: &Path, is_dir: bool) -> Result { + let mut flags = OFlag::O_CLOEXEC; + let mode; + if is_dir { + flags |= OFlag::O_DIRECTORY | OFlag::O_PATH; + mode = Mode::empty(); + create_dir_all(root.join(target)) + .with_context(|| OzonecErr::CreateDir(target.to_string_lossy().to_string()))?; + } else { + flags |= OFlag::O_CREAT; + mode = Mode::S_IRWXU; + }; + + let mut open_how = OpenHow::new() + .flags(flags) + .mode(mode) + .resolve(ResolveFlag::RESOLVE_IN_ROOT); + let dirfd = open(root, flags & !OFlag::O_CREAT, Mode::empty()) + .with_context(|| OzonecErr::OpenFile(root.to_string_lossy().to_string()))?; + + // SAFETY: FFI call with valid arguments. + let fd = target + .with_nix_path(|p| unsafe { + libc::syscall( + libc::SYS_openat2, + dirfd.as_raw_fd(), + p.as_ptr(), + &mut open_how as *mut OpenHow, + mem::size_of::(), + ) + }) + .with_context(|| "with_nix_path error")?; + if fd < 0 { + bail!( + "openat2 {} error with RESOLVE_IN_ROOT: {}", + target.display(), + errno() + ); + } + Ok(RawFd::try_from(fd)?) +} + +/// Build path "/proc/self/fd/{}" with an opened file descriptor. +pub fn proc_fd_path(dirfd: RawFd) -> PathBuf { + PathBuf::from(format!("/proc/self/fd/{}", dirfd)) +} diff --git a/ozonec/src/utils/prctl.rs b/ozonec/src/utils/prctl.rs new file mode 100644 index 0000000000000000000000000000000000000000..5bc05441f7a484fa40589d3ce303d71c526fe484 --- /dev/null +++ b/ozonec/src/utils/prctl.rs @@ -0,0 +1,94 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::ffi::CString; + +use anyhow::{bail, Result}; +use libc::{c_int, c_ulong, prctl}; +use nix::errno::errno; + +#[allow(non_camel_case_types)] +enum PrctlOption { + PR_SET_DUMPABLE = 4, + PR_SET_KEEPCAPS = 8, + PR_SET_NAME = 15, + PR_SET_NO_NEW_PRIVS = 38, +} + +pub fn set_dumpable(dumpable: bool) -> Result<()> { + // SAFETY: FFI call with valid arguments. + let ret = unsafe { + prctl( + PrctlOption::PR_SET_DUMPABLE as c_int, + dumpable as c_ulong, + 0, + 0, + 0, + ) + }; + if ret != 0 { + bail!("errno {}", errno()); + } + Ok(()) +} + +pub fn set_keep_capabilities(keep_capabilities: bool) -> Result<()> { + // SAFETY: FFI call with valid arguments. + let ret = unsafe { + prctl( + PrctlOption::PR_SET_KEEPCAPS as c_int, + keep_capabilities as c_ulong, + 0, + 0, + 0, + ) + }; + if ret != 0 { + bail!("errno {}", errno()); + } + Ok(()) +} + +pub fn set_no_new_privileges(new_privileges: bool) -> Result<()> { + // SAFETY: FFI call with valid arguments. + let ret = unsafe { + prctl( + PrctlOption::PR_SET_NO_NEW_PRIVS as c_int, + new_privileges as c_ulong, + 0, + 0, + 0, + ) + }; + if ret != 0 { + bail!("errno {}", errno()); + } + Ok(()) +} + +pub fn set_name(name: &str) -> Result<()> { + let binding = CString::new(name).unwrap(); + // SAFETY: FFI call with valid arguments. + let ret = unsafe { + prctl( + PrctlOption::PR_SET_NAME as c_int, + binding.as_ptr() as c_ulong, + 0, + 0, + 0, + ) + }; + if ret != 0 { + bail!("errno {}", errno()); + } + Ok(()) +} diff --git a/ozonec/tests/README.md b/ozonec/tests/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c33982d51f94803e733ca9c9f83f6a087af8ba12 --- /dev/null +++ b/ozonec/tests/README.md @@ -0,0 +1,30 @@ +# Integration Tests + +ozonec uses [bats (Bash Automated Testing System)](https://github.com/bats-core/bats-core) framework to run +integration tests written in *bash*. + +## Before running tests + +Install [bats (Bash Automated Testing System)](https://github.com/bats-core/bats-core#installing-bats-from-source) from source: +``` +$ git clone https://github.com/bats-core/bats-core.git +$ cd bats-core +$ ./install.sh /usr/local +``` + +And *jq* is may also needed to modify json file in tests. + +## Running tests + +You can run tests using bats directly. For example: +``` +bats ./ +``` +Or you can just run a single test file. For example: +``` +bats create.bats +``` + +## Writing tests + +Please refer to [bats (Writing tests)](https://bats-core.readthedocs.io/en/stable/writing-tests.html). \ No newline at end of file diff --git a/ozonec/tests/create.bats b/ozonec/tests/create.bats new file mode 100644 index 0000000000000000000000000000000000000000..02d44c8b9625ac018a710caef9b2472afec20314 --- /dev/null +++ b/ozonec/tests/create.bats @@ -0,0 +1,87 @@ +#! /usr/bin/env bats + +load helpers + +setup_file() +{ + setup_bundle +} + +teardown_file() +{ + remove_test_dir +} + +setup() +{ + CONTAINER_ID=$(uuidgen) + ROOT_DIR="$DEFAULT_ROOT_DIR" +} + +teardown() +{ + if [ "$ROOT_DIR" == "$DEFAULT_ROOT_DIR" ]; then + ozonec kill "$CONTAINER_ID" 9 + ozonec delete "$CONTAINER_ID" + else + ozonec --root "$ROOT_DIR" kill "$CONTAINER_ID" 9 + ozonec --root "$ROOT_DIR" delete "$CONTAINER_ID" + fi +} + +@test "ozonec create" { + ozonec create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "" + [ -d "$ROOT_DIR/$CONTAINER_ID" ] + [ -S "$ROOT_DIR/$CONTAINER_ID/notify.sock" ] + [ -f "$ROOT_DIR/$CONTAINER_ID/state.json" ] +} + +@test "ozonec create with absolute path of rootfs" { + local rootfs_dir="$(pwd)/rootfs" + update_config '.root.path = "'$rootfs_dir'"' + ozonec create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "" +} + +@test "ozonec create with pidfile" { + ozonec create --pid-file ./pidfile "$CONTAINER_ID" 3>&- + local pid=$(cat ./pidfile) + check_container_status "$CONTAINER_ID" created "" "$pid" +} + +@test "ozonec create with duplicate id" { + ozonec create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "" + ! ozonec create "$CONTAINER_ID" 3>&- +} + +@test "ozonec create with absolute bundle path" { + local bundle_dir="$(dirname `pwd`)/bundle" + ozonec create --bundle "$bundle_dir" "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "" +} + +@test "ozonec create with relative bundle path" { + local bundle_dir="../bundle" + ozonec create --bundle "$bundle_dir" "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "" +} + +@test "ozonec create with absolute root path" { + ROOT_DIR="$(dirname `pwd`)/root" + ozonec --root "$ROOT_DIR" create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "$ROOT_DIR" + [ -d "$ROOT_DIR/$CONTAINER_ID" ] + [ -S "$ROOT_DIR/$CONTAINER_ID/notify.sock" ] + [ -f "$ROOT_DIR/$CONTAINER_ID/state.json" ] +} + +@test "ozonec create with relative root path" { + ROOT_DIR="../root" + ozonec --root "$ROOT_DIR" create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "$ROOT_DIR" + [ -d "$ROOT_DIR/$CONTAINER_ID" ] + [ -S "$ROOT_DIR/$CONTAINER_ID/notify.sock" ] + [ -f "$ROOT_DIR/$CONTAINER_ID/state.json" ] +} \ No newline at end of file diff --git a/ozonec/tests/exec.bats b/ozonec/tests/exec.bats new file mode 100644 index 0000000000000000000000000000000000000000..ffdf6c389518bb8c77af79d59a9022650a8bf0da --- /dev/null +++ b/ozonec/tests/exec.bats @@ -0,0 +1,33 @@ +#! /usr/bin/env bats + +load helpers + +setup_file() +{ + setup_bundle + + export ROOT_DIR="$TEST_DIR/root" + export CONTAINER_ID=$(uuidgen) + + ozonec --root "$ROOT_DIR" create "$CONTAINER_ID" 3>&- + check_container_status "$CONTAINER_ID" created "$ROOT_DIR" + ozonec --root "$ROOT_DIR" start "$CONTAINER_ID" + check_container_status "$CONTAINER_ID" running "$ROOT_DIR" +} + +teardown_file() +{ + ozonec --root "$ROOT_DIR" kill "$CONTAINER_ID" 9 + ozonec --root "$ROOT_DIR" delete "$CONTAINER_ID" + remove_test_dir +} + +@test "ozonec exec" { + ozonec --root "$ROOT_DIR" exec "$CONTAINER_ID" -- ls -alh +} + +@test "ozonec exec with pidfile" { + ozonec --root "$ROOT_DIR" exec --pid-file pidfile "$CONTAINER_ID" -- ls -alh + local pid=$(cat pidfile) + [[ "$pid" -gt 0 ]] +} \ No newline at end of file diff --git a/ozonec/tests/helpers.bash b/ozonec/tests/helpers.bash new file mode 100644 index 0000000000000000000000000000000000000000..781fa6539ffc347cf6de3dc0d2d010652abdb19d --- /dev/null +++ b/ozonec/tests/helpers.bash @@ -0,0 +1,46 @@ +#! /bin/bash + +bats_require_minimum_version 1.5.0 + +DEFAULT_ROOT_DIR="/var/run/user/$(echo $UID)/ozonec" + +# Reformat config.json file with jq command. +function update_config() +{ + jq "$@" config.json | awk 'BEGIN{RS="";getline<"-";print>ARGV[1]}' config.json +} + +function setup_bundle() +{ + # Directory for each container. + TEST_DIR=$(mktemp -d "$BATS_RUN_TMPDIR/ozonec.XXXXXX") + chmod a+x "$TEST_DIR" "$BATS_RUN_TMPDIR" + + local bundle="$BATS_TEST_DIRNAME/bundle.tar.gz" + tar --exclude 'rootfs/dev/*' -C "$TEST_DIR" -xf "$bundle" + cd "$TEST_DIR/bundle" +} + +function remove_test_dir() +{ + rm -rf "$TEST_DIR" +} + +function check_container_status() { + local container_id="$1" + local state="$2" + local root="$3" + + if [ "$root" == "" ]; then + run ozonec state "$container_id" + else + run ozonec --root "$root" state "$container_id" + fi + [[ $status -eq 0 ]] + [[ "$output" == *"\"status\": \"$state\""* ]] + + if [ $# -gt 3 ]; then + local pid="$4" + [[ "$(expr match "$output" '.*"pid": \([0-9]*\).*')" == "$pid" ]] + fi +} \ No newline at end of file diff --git a/pci/Cargo.toml b/pci/Cargo.toml deleted file mode 100644 index e96649c85ac4ee36f7b82ea2590bba7ca0978f1c..0000000000000000000000000000000000000000 --- a/pci/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "pci" -version = "2.1.0" -authors = ["Huawei StratoVirt Team"] -edition = "2018" -license = "Mulan PSL v2" -description = "PCI" - -[dependencies] -byteorder = "1.3.4" -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = "0.4.8" -vmm-sys-util = ">=0.7.0" -once_cell = "1.9.0" -address_space = { path = "../address_space" } -hypervisor = { path = "../hypervisor" } -machine_manager = { path = "../machine_manager" } -migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } -sysbus = { path = "../sysbus" } -util = { path = "../util" } -acpi = { path = "../acpi" } diff --git a/src/main.rs b/src/main.rs index 1b974ee8288601fb33a8d478ab49828a263d596e..c2e81909ef1f6cfa922f1842b759b4f972b563e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,64 +10,83 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; - use std::io::Write; -use std::os::unix::fs::OpenOptionsExt; +use std::process::{ExitCode, Termination}; use std::sync::{Arc, Mutex}; -use machine::{LightMachine, MachineOps, StdMachine}; +use anyhow::{bail, Context, Result}; +use log::{error, info}; +use thiserror::Error; + +use machine::{type_init, LightMachine, MachineOps, StdMachine}; use machine_manager::{ cmdline::{check_api_channel, create_args_parser, create_vmconfig}, config::MachineType, config::VmConfig, event_loop::EventLoop, - qmp::QmpChannel, - signal_handler::{exit_with_code, register_kill_signal, VM_EXIT_GENE_ERR}, - socket::Socket, + qmp::qmp_channel::QmpChannel, + qmp::qmp_socket::Socket, + signal_handler::{exit_with_code, handle_signal, register_kill_signal, VM_EXIT_GENE_ERR}, temp_cleaner::TempCleaner, + test_server::TestSock, }; use util::loop_context::EventNotifierHelper; -use util::unix::{parse_uri, UnixPath}; +use util::test_helper::{is_test_enabled, set_test_enabled}; use util::{arg_parser, daemonize::daemonize, logger, set_termi_canon_mode}; -error_chain! { - links { - Manager(machine_manager::errors::Error, machine_manager::errors::ErrorKind); - Util(util::errors::Error, util::errors::ErrorKind); - Machine(machine::errors::Error, machine::errors::ErrorKind); - } - foreign_links { - Io(std::io::Error); - } +#[derive(Error, Debug)] +enum MainError { + #[error("Manager")] + Manager { + #[from] + source: machine_manager::error::MachineManagerError, + }, + #[error("Util")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("Machine")] + Machine { + #[from] + source: machine::error::MachineError, + }, + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, } -quick_main!(run); +fn main() -> ExitCode { + match run() { + Ok(ret) => ret.report(), + Err(ref e) => { + write!(&mut std::io::stderr(), "{}", format_args!("{:?}\r\n", e)) + .expect("Error writing to stderr"); + + ExitCode::FAILURE + } + } +} fn run() -> Result<()> { + type_init()?; + let cmd_args = create_args_parser().get_matches()?; - if let Some(logfile_path) = cmd_args.value_of("display log") { - if logfile_path.is_empty() { - logger::init_logger_with_env(Some(Box::new(std::io::stdout()))) - .chain_err(|| "Failed to init logger.")?; - } else { - let logfile = std::fs::OpenOptions::new() - .read(false) - .write(true) - .append(true) - .create(true) - .mode(0o640) - .open(logfile_path) - .chain_err(|| "Failed to open log file")?; - logger::init_logger_with_env(Some(Box::new(logfile))) - .chain_err(|| "Failed to init logger.")?; + if cmd_args.is_present("mod-test") { + let machine = cmd_args.value_of("machine").unwrap_or_default(); + let accel = cmd_args.value_of("accel").unwrap_or_default(); + if !machine.contains("accel=test") && accel.ne("test") { + bail!("MST can only use test accel!"); } + set_test_enabled(); } + let logfile_path = cmd_args.value_of("display log").unwrap_or_default(); + logger::init_log(logfile_path)?; + std::panic::set_hook(Box::new(|panic_msg| { set_termi_canon_mode().expect("Failed to set terminal to canonical mode."); @@ -84,7 +103,13 @@ fn run() -> Result<()> { exit_with_code(VM_EXIT_GENE_ERR); })); - let mut vm_config: VmConfig = create_vmconfig(&cmd_args)?; + let mut vm_config: VmConfig = match create_vmconfig(&cmd_args) { + Ok(vm_cfg) => vm_cfg, + Err(e) => { + error!("Failed to create vmconfig {:?}", e); + return Err(e); + } + }; info!("VmConfig is {:?}", vm_config); match real_main(&cmd_args, &mut vm_config) { @@ -92,19 +117,11 @@ fn run() -> Result<()> { info!("MainLoop over, Vm exit"); // clean temporary file TempCleaner::clean(); + handle_signal(); } Err(ref e) => { set_termi_canon_mode().expect("Failed to set terminal to canonical mode."); - if cmd_args.is_present("display log") { - error!("{}", error_chain::ChainedError::display_chain(e)); - } else { - write!( - &mut std::io::stderr(), - "{}", - error_chain::ChainedError::display_chain(e) - ) - .expect("Failed to write to stderr"); - } + error!("{}", format!("{:?}\r\n", e)); // clean temporary file TempCleaner::clean(); exit_with_code(VM_EXIT_GENE_ERR); @@ -139,50 +156,65 @@ fn real_main(cmd_args: &arg_parser::ArgMatches, vm_config: &mut VmConfig) -> Res let mut sockets = Vec::new(); let vm: Arc> = match vm_config.machine_config.mach_type { MachineType::MicroVm => { + if is_test_enabled() { + panic!("module test framework does not support microvm.") + } let vm = Arc::new(Mutex::new( - LightMachine::new(vm_config).chain_err(|| "Failed to init MicroVM")?, + LightMachine::new(vm_config).with_context(|| "Failed to init MicroVM")?, )); - MachineOps::realize(&vm, vm_config, cmd_args.is_present("incoming")) - .chain_err(|| "Failed to realize micro VM.")?; - EventLoop::set_manager(vm.clone(), None); + MachineOps::realize(&vm, vm_config).with_context(|| "Failed to realize micro VM.")?; + EventLoop::set_manager(vm.clone()); for listener in listeners { - sockets.push(Socket::from_unix_listener(listener, Some(vm.clone()))); + sockets.push(Socket::from_listener(listener, Some(vm.clone()))); } vm } MachineType::StandardVm => { let vm = Arc::new(Mutex::new( - StdMachine::new(vm_config).chain_err(|| "Failed to init StandardVM")?, + StdMachine::new(vm_config).with_context(|| "Failed to init StandardVM")?, )); - MachineOps::realize(&vm, vm_config, cmd_args.is_present("incoming")) - .chain_err(|| "Failed to realize standard VM.")?; - EventLoop::set_manager(vm.clone(), None); + MachineOps::realize(&vm, vm_config) + .with_context(|| "Failed to realize standard VM.")?; + EventLoop::set_manager(vm.clone()); + + if is_test_enabled() { + let sock_path = cmd_args.value_of("mod-test"); + let test_sock = TestSock::new(sock_path.unwrap().as_str(), vm.clone()); + EventLoop::update_event( + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(test_sock))), + None, + ) + .with_context(|| "Failed to add test socket to MainLoop")?; + } for listener in listeners { - sockets.push(Socket::from_unix_listener(listener, Some(vm.clone()))); + sockets.push(Socket::from_listener(listener, Some(vm.clone()))); } vm } MachineType::None => { + if is_test_enabled() { + panic!("please specify machine type.") + } let vm = Arc::new(Mutex::new( - StdMachine::new(vm_config).chain_err(|| "Failed to init NoneVM")?, + StdMachine::new(vm_config).with_context(|| "Failed to init NoneVM")?, )); - EventLoop::set_manager(vm.clone(), None); + EventLoop::set_manager(vm.clone()); + for listener in listeners { - sockets.push(Socket::from_unix_listener(listener, Some(vm.clone()))); + sockets.push(Socket::from_listener(listener, Some(vm.clone()))); } vm } }; - if let Some(uri) = cmd_args.value_of("incoming") { - if let (UnixPath::File, path) = parse_uri(&uri)? { - migration::MigrationManager::restore_snapshot(&path) - .chain_err(|| "Failed to start with incoming migration.")?; - } else { - bail!("Unsupported incoming unix path type.") - } + let balloon_switch_on = vm_config.dev_name.contains_key("balloon"); + if !cmd_args.is_present("disable-seccomp") { + vm.lock() + .unwrap() + .register_seccomp(balloon_switch_on) + .with_context(|| "Failed to register seccomp rules.")?; } for socket in sockets { @@ -190,22 +222,12 @@ fn real_main(cmd_args: &arg_parser::ArgMatches, vm_config: &mut VmConfig) -> Res EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(socket))), None, ) - .chain_err(|| "Failed to add api event to MainLoop")?; + .with_context(|| "Failed to add api event to MainLoop")?; } - vm.lock() - .unwrap() - .run(cmd_args.is_present("freeze_cpu")) - .chain_err(|| "Failed to start VM.")?; - - let balloon_switch_on = vm_config.dev_name.get("balloon").is_some(); - if !cmd_args.is_present("disable-seccomp") { - vm.lock() - .unwrap() - .register_seccomp(balloon_switch_on) - .chain_err(|| "Failed to register seccomp rules.")?; - } + machine::vm_run(&vm, cmd_args).with_context(|| "Failed to start VM.")?; - EventLoop::loop_run().chain_err(|| "MainLoop exits unexpectedly: error occurs")?; + EventLoop::loop_run().with_context(|| "MainLoop exits unexpectedly: error occurs")?; + EventLoop::loop_clean(); Ok(()) } diff --git a/sysbus/Cargo.toml b/sysbus/Cargo.toml deleted file mode 100644 index b7b798da7da200ca8a2b7a6fd11ec774ab6a3122..0000000000000000000000000000000000000000 --- a/sysbus/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "sysbus" -version = "2.1.0" -authors = ["Huawei StratoVirt Team"] -edition = "2018" -license = "Mulan PSL v2" -description = "Emulate system bus" - -[dependencies] -error-chain = "0.12.4" -kvm-ioctls = "0.6.0" -vmm-sys-util = ">=0.7.0" -acpi = { path = "../acpi" } -address_space = { path = "../address_space" } -hypervisor = { path = "../hypervisor" } diff --git a/sysbus/src/lib.rs b/sysbus/src/lib.rs deleted file mode 100644 index 9b0999012248e450cae53e6b16d96fe803282f7f..0000000000000000000000000000000000000000 --- a/sysbus/src/lib.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -#[macro_use] -extern crate error_chain; - -pub mod errors { - error_chain! { - links { - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - Hypervisor(hypervisor::errors::Error, hypervisor::errors::ErrorKind); - } - foreign_links { - KvmIoctl(kvm_ioctls::Error); - } - } -} - -use std::sync::{Arc, Mutex}; - -use acpi::{AmlBuilder, AmlScope}; -use address_space::{AddressSpace, GuestAddress, Region, RegionIoEventFd, RegionOps}; -use hypervisor::kvm::KVM_FDS; -use vmm_sys_util::eventfd::EventFd; - -use errors::{Result, ResultExt}; - -pub struct SysBus { - #[cfg(target_arch = "x86_64")] - pub sys_io: Arc, - pub sys_mem: Arc, - pub devices: Vec>>, - pub free_irqs: (i32, i32), - pub min_free_irq: i32, - pub mmio_region: (u64, u64), - pub min_free_base: u64, -} - -impl SysBus { - pub fn new( - #[cfg(target_arch = "x86_64")] sys_io: &Arc, - sys_mem: &Arc, - free_irqs: (i32, i32), - mmio_region: (u64, u64), - ) -> Self { - Self { - #[cfg(target_arch = "x86_64")] - sys_io: sys_io.clone(), - sys_mem: sys_mem.clone(), - devices: Vec::new(), - free_irqs, - min_free_irq: free_irqs.0, - mmio_region, - min_free_base: mmio_region.0, - } - } - - pub fn build_region_ops(&self, dev: &Arc>) -> RegionOps { - let cloned_dev = dev.clone(); - let read_ops = move |data: &mut [u8], addr: GuestAddress, offset: u64| -> bool { - cloned_dev.lock().unwrap().read(data, addr, offset) - }; - - let cloned_dev = dev.clone(); - let write_ops = move |data: &[u8], addr: GuestAddress, offset: u64| -> bool { - cloned_dev.lock().unwrap().write(data, addr, offset) - }; - - RegionOps { - read: Arc::new(read_ops), - write: Arc::new(write_ops), - } - } - - pub fn attach_device( - &mut self, - dev: &Arc>, - region_base: u64, - region_size: u64, - ) -> Result<()> { - let region_ops = self.build_region_ops(dev); - let region = Region::init_io_region(region_size, region_ops); - let locked_dev = dev.lock().unwrap(); - - region.set_ioeventfds(&locked_dev.ioeventfds()); - match locked_dev.get_type() { - SysBusDevType::Serial if cfg!(target_arch = "x86_64") => { - #[cfg(target_arch = "x86_64")] - self.sys_io - .root() - .add_subregion(region, region_base) - .chain_err(|| { - format!( - "Failed to register region in I/O space: offset={},size={}", - region_base, region_size - ) - })?; - } - SysBusDevType::FwCfg if cfg!(target_arch = "x86_64") => { - #[cfg(target_arch = "x86_64")] - self.sys_io - .root() - .add_subregion(region, region_base) - .chain_err(|| { - format!( - "Failed to register region in I/O space: offset 0x{:x}, size {}", - region_base, region_size - ) - })?; - } - SysBusDevType::Rtc if cfg!(target_arch = "x86_64") => { - #[cfg(target_arch = "x86_64")] - self.sys_io - .root() - .add_subregion(region, region_base) - .chain_err(|| { - format!( - "Failed to register region in I/O space: offset 0x{:x}, size {}", - region_base, region_size - ) - })?; - } - _ => self - .sys_mem - .root() - .add_subregion(region, region_base) - .chain_err(|| { - format!( - "Failed to register region in memory space: offset={},size={}", - region_base, region_size - ) - })?, - } - - self.devices.push(dev.clone()); - Ok(()) - } -} - -#[derive(Copy, Clone)] -pub struct SysRes { - pub region_base: u64, - pub region_size: u64, - pub irq: i32, -} - -impl Default for SysRes { - fn default() -> Self { - Self { - region_base: 0, - region_size: 0, - irq: -1, - } - } -} - -#[allow(clippy::upper_case_acronyms)] -#[derive(Eq, PartialEq)] -pub enum SysBusDevType { - Serial, - Rtc, - VirtioMmio, - #[cfg(target_arch = "aarch64")] - PL011, - FwCfg, - Flash, - Others, -} - -/// Operations for sysbus devices. -pub trait SysBusDevOps: Send + AmlBuilder { - /// Read function of device. - /// - /// # Arguments - /// - /// * `data` - A u8-type array. - /// * `base` - Base address of this device. - /// * `offset` - Offset from base address. - fn read(&mut self, data: &mut [u8], base: GuestAddress, offset: u64) -> bool; - - /// Write function of device. - /// - /// # Arguments - /// - /// * `data` - A u8-type array. - /// * `base` - Base address of this device. - /// * `offset` - Offset from base address. - fn write(&mut self, data: &[u8], base: GuestAddress, offset: u64) -> bool; - - fn ioeventfds(&self) -> Vec { - Vec::new() - } - - fn interrupt_evt(&self) -> Option<&EventFd> { - None - } - - fn set_irq(&mut self, sysbus: &mut SysBus) -> Result { - let irq = sysbus.min_free_irq; - if irq > sysbus.free_irqs.1 { - bail!("IRQ number exhausted."); - } - - match self.interrupt_evt() { - None => Ok(-1_i32), - Some(evt) => { - KVM_FDS.load().register_irqfd(evt, irq as u32)?; - sysbus.min_free_irq = irq + 1; - Ok(irq) - } - } - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - None - } - - fn set_sys_resource( - &mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - ) -> Result<()> { - let irq = self.set_irq(sysbus)?; - if let Some(res) = self.get_sys_resource() { - res.region_base = region_base; - res.region_size = region_size; - res.irq = irq; - return Ok(()); - } - bail!("Failed to get sys resource."); - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::Others - } - - fn reset(&mut self) -> Result<()> { - Ok(()) - } -} - -impl AmlBuilder for SysBus { - fn aml_bytes(&self) -> Vec { - let mut scope = AmlScope::new("_SB"); - self.devices.iter().for_each(|dev| { - scope.append(&dev.lock().unwrap().aml_bytes()); - }); - - scope.aml_bytes() - } -} diff --git a/tests/hydropper/README.cn.md b/tests/hydropper/README.cn.md index 5e359b3c2c24b854d32be31071b57d33256dcec5..2a8d872f0fff82d61d540ae6d6c605438bc8b151 100644 --- a/tests/hydropper/README.cn.md +++ b/tests/hydropper/README.cn.md @@ -22,14 +22,15 @@ $ pip3 install -r requirements.txt ```sh $ yum install nmap $ yum install iperf3 +$ yum install bridge-utils ``` 4. 网络配置(可参考以下模板): ```sh brctl addbr strato_br0 -ifconfig strato_br0 up -ifconfig strato_br0 1.1.1.1 +ip link set strato_br0 up +ip address add 1.1.1.1 dev strato_br0 ``` 5. 构建测试镜像请参考 docs/IMAGE_BUILD.md。 @@ -113,4 +114,4 @@ def test_microvm_xxx(microvm): ### 日志 - pytest默认日志路径:/var/log/pytest.log -- stratovirt默认日志路径:/var/log/stratovirt \ No newline at end of file +- stratovirt默认日志路径:/var/log/stratovirt diff --git a/tests/hydropper/README.md b/tests/hydropper/README.md index 5475ab289ab9fcfa4d1ad50a8d2938cffae55104..47e453e73a93de002d48402dd13fa649b69e1801 100644 --- a/tests/hydropper/README.md +++ b/tests/hydropper/README.md @@ -22,14 +22,15 @@ $ pip3 install -r requirements.txt ```sh $ yum install nmap $ yum install iperf3 +$ yum install bridge-utils ``` 4. Network configuration(template) ```sh brctl addbr strato_br0 -ifconfig strato_br0 up -ifconfig strato_br0 1.1.1.1 +ip link set strato_br0 up +ip address add 1.1.1.1 dev strato_br0 ``` 5. For details about how to build a test image, see docs/IMAGE_BUILD.md. @@ -115,4 +116,4 @@ def test_microvm_xxx(microvm): ### Log - pytest default log path: /var/log/pytest.log -- stratovirt default log path: /var/log/stratovirt \ No newline at end of file +- stratovirt default log path: /var/log/stratovirt diff --git a/tests/hydropper/docs/IMAGE_BUILD.md b/tests/hydropper/docs/IMAGE_BUILD.md index 2547c3e64be53b5a3c387e42a8ae6ec750aa235a..57506c848ce55f50865f84b6c0fcc17ebaa26bce 100644 --- a/tests/hydropper/docs/IMAGE_BUILD.md +++ b/tests/hydropper/docs/IMAGE_BUILD.md @@ -1,6 +1,6 @@ # 构建测试镜像 -1. 请于openEuler官网,下载所需版本的stratovirt_img和vmlinux.bin。(以下以openEuler-21.03-stratovirt-x86_64.img为例) +1. 请于openEuler官网,下载所需版本的stratovirt_img。(以下以openEuler-22.03-LTS-stratovirt-x86_64.img为例) - 地址:https://openeuler.org/zh/download/ @@ -15,13 +15,13 @@ - 扩容stratovirt_img ```shell - cat extend.img >> openEuler-21.03-stratovirt-x86_64.img + cat extend.img >> openEuler-22.03-LTS-stratovirt-x86_64.img ``` - 调整文件系统大小 ```shell - e2fsck -f openEuler-21.03-stratovirt-x86_64.img && resize2fs openEuler-21.03-stratovirt-x86_64.img + e2fsck -f openEuler-22.03-LTS-stratovirt-x86_64.img && resize2fs openEuler-22.03-LTS-stratovirt-x86_64.img ``` 3. 添加依赖包 @@ -29,13 +29,13 @@ - 挂载镜像 ```shell - mount openEuler-21.03-stratovirt-x86_64.img /mnt + mount openEuler-22.03-LTS-stratovirt-x86_64.img /mnt ``` -- 配置在线yum源,请参考: [开发环境准备.md](https://gitee.com/openeuler/docs/blob/stable2-21.03/docs/zh/docs/ApplicationDev/开发环境准备.md)。由于stratovirt_img内没有vi等编辑工具,建议先在主机上创建文件openEuler.repo,并配置好yum源,完成后将openEuler.repo拷贝到镜像内。 +- 配置DNS服务配置文件(/etc/resolv.conf)。挂载镜像中的etc/resolv.conf文件为空,需要配置DNS服务才能更新yum源。 ```shell - cp ./openEuler.repo /mnt/etc/yum.repos.d + cp /etc/resolv.conf /mnt/etc/resolv.conf ``` - 进入镜像挂载目录,通过yum命令安装依赖包。 @@ -43,7 +43,10 @@ ```shell cd /mnt chroot . + echo "set enable-bracketed-paste off" > /root/.inputrc yum -y install openssh + # For PMU tests + yum -y install perf ``` - 离开当前目录后,使用umount命令卸载镜像。 diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_balloon.py b/tests/hydropper/testcases/microvm/functional/test_microvm_balloon.py index 2b6532e434441d4d82a7ad84172cbcc96c798fba..4602a8996f0e98c610ae83dffff383c8261ad134 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_balloon.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_balloon.py @@ -18,6 +18,26 @@ import pytest LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" logging.basicConfig(filename='/var/log/pytest.log', level=logging.DEBUG, format=LOG_FORMAT) +@pytest.mark.acceptance +def test_microvm_balloon_fpr(microvm): + """ + Test free page reporting of querying balloon + + steps: + 1) launch microvm with argument: "-balloon free-page-reporting=true". + 2) execute command "stress --vm 2 --vm-bytes 1G --vm-keep --timeout 20". + 3) compare rss between booted and fpr done. + """ + test_vm = microvm + test_vm.basic_config(mem_size=3072, balloon=True, free_page_reporting=True) + test_vm.launch() + + rss_booted = test_vm.get_rss_with_status_check() + test_vm.memory_stress() + rss_fpr_done = test_vm.get_rss_with_status_check() + assert rss_fpr_done - rss_booted < 20480 + test_vm.shutdown() + @pytest.mark.acceptance def test_microvm_balloon_query(microvm): """ @@ -41,7 +61,7 @@ def test_microvm_balloon(microvm): steps: 1) launch microvm with argument: "-balloon deflate-on-oom=true". 2) query memory size, and save. - 3) set memory size through balloon device to 814748368. + 3) set memory size through balloon device to 814743552. 4) wait 5 seconds for ballooning. 5) check if the memory size is less than 2524971008. 6) set memory size through balloon device to 2524971008, and wait. @@ -49,7 +69,6 @@ def test_microvm_balloon(microvm): Note that balloon device may not inflate as many as the given argument, but it can deflate until no page left in balloon device. Therefore, memory in step 5 is less than 2524971008, while that in step 7 equals 2524971008. - """ test_vm = microvm test_vm.basic_config(balloon=True, deflate_on_oom=True) @@ -57,7 +76,7 @@ def test_microvm_balloon(microvm): resp = test_vm.query_balloon() ori = int(resp["return"]["actual"]) - resp = test_vm.balloon_set(value=814748368) + resp = test_vm.balloon_set(value=814743552) time.sleep(5) test_vm.event_wait(name='BALLOON_CHANGED', timeout=2.0) resp = test_vm.query_balloon() diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_cmdline.py b/tests/hydropper/testcases/microvm/functional/test_microvm_cmdline.py index 4eb3d0e5682a85c07d46b316cd7199db161b88a4..7eaabafb6802b993eddd615243b5c3025c3128d4 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_cmdline.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_cmdline.py @@ -58,7 +58,7 @@ def test_microvm_with_unsupported_param(): 1) Launch microvm with a unsupported param. 2) Expect run with error code, but not panic. """ - _cmd = "%s --unsupport" % CONFIG.stratovirt_microvm_bin + _cmd = "%s --unsupported" % CONFIG.stratovirt_microvm_bin try: _result = run(_cmd, shell=True, capture_output=True, check=False) except TypeError: diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_cpu_features.py b/tests/hydropper/testcases/microvm/functional/test_microvm_cpu_features.py index 17573c7cb4b240abc792479a39c0e9f4a5b0fc59..b955b559a886252fee91651cef3d3fed5e0718d8 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_cpu_features.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_cpu_features.py @@ -14,6 +14,7 @@ import platform import logging import re +import json from enum import Enum from enum import auto import pytest @@ -21,85 +22,42 @@ LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" logging.basicConfig(filename='/var/log/pytest.log', level=logging.DEBUG, format=LOG_FORMAT) -class CpuVendor(Enum): - """CPU vendors enum.""" +def _parse_output(output): - AMD = auto() - INTEL = auto() + cpu_info = {} + for item in output: + cpu_info.update({item['field']: item['data']}) + cpu_info.update(_parse_output(item.get('children', []))) + return cpu_info +def _get_cpu_info(test_microvm): -def _get_cpu_vendor(): - cif = open('/proc/cpuinfo', 'r') - host_vendor_id = None - while True: - line = cif.readline() - if line == '': - break - matchoutput = re.search("^vendor_id\\s+:\\s+(.+)$", line) - if matchoutput: - host_vendor_id = matchoutput.group(1) - cif.close() - assert host_vendor_id is not None - - if host_vendor_id == "AuthenticAMD": - return CpuVendor.AMD - return CpuVendor.INTEL - - -def _check_guest_cmd_output(microvm, guest_cmd, expected_header, - expected_separator, - expected_key_value_store): - status, output = microvm.serial_cmd(guest_cmd) - - assert status == 0 - for line in output.splitlines(): - line = line.strip() - if line != '': - # all the keys have been matched. Stop. - if not expected_key_value_store: - break - - # try to match the header if needed. - if expected_header not in (None, ''): - if line.strip() == expected_header: - expected_header = None - continue - - # see if any key matches. - # we use a try-catch block here since line.split() may fail. - try: - [key, value] = list( - map(lambda x: x.strip(), line.split(expected_separator))) - except ValueError: - continue - - if key in expected_key_value_store.keys(): - assert value == expected_key_value_store[key], \ - "%s does not have the expected value" % key - del expected_key_value_store[key] - - else: - break - - assert not expected_key_value_store, \ - "some keys in dictionary have not been found in the output: %s" \ - % expected_key_value_store - + output = json.loads(test_microvm.ssh_session.cmd_output("lscpu -J")) + return _parse_output(output.get("lscpu", [])) def _check_cpu_topology(test_microvm, expected_cpu_count, expected_threads_per_core, expected_cores_per_socket, expected_cpus_list): + expected_cpu_topology = { - "CPU(s)": str(expected_cpu_count), - "On-line CPU(s) list": expected_cpus_list, - "Thread(s) per core": str(expected_threads_per_core), - "Core(s) per socket": str(expected_cores_per_socket), - "Socket(s)": str(int(expected_cpu_count / expected_cores_per_socket / expected_threads_per_core)), + "CPU(s):": str(expected_cpu_count), + "On-line CPU(s) list:": expected_cpus_list, + "Thread(s) per core:": str(expected_threads_per_core), + "Core(s) per socket:": str(expected_cores_per_socket), + "Socket(s):": str(int(expected_cpu_count / expected_cores_per_socket / expected_threads_per_core)), } - _check_guest_cmd_output(test_microvm, "lscpu", None, ':', - expected_cpu_topology) + cpu_info = _get_cpu_info(test_microvm) + if "Core(s) per cluster:" in cpu_info.keys(): + expected_cpu_topology["Core(s) per cluster:"] = expected_cpu_topology["Core(s) per socket:"] + del expected_cpu_topology["Core(s) per socket:"] + if "Cluster(s):" in cpu_info.keys(): + expected_cpu_topology["Cluster(s):"] = expected_cpu_topology["Socket(s):"] + del expected_cpu_topology["Socket(s):"] + + for key, expect_value in expected_cpu_topology.items(): + assert cpu_info[key] == expect_value @pytest.mark.acceptance @@ -129,19 +87,13 @@ def test_128vcpu_topo(microvm): test_vm.basic_config(vcpu_count=128) test_vm.launch() - if 'x86_64' in platform.machine(): - _check_cpu_topology(test_vm, 128, 1, 128, "0-127") - else: - _check_cpu_topology(test_vm, 128, 2, 2, "0-127") + _check_cpu_topology(test_vm, 128, 1, 128, "0-127") @pytest.mark.skipif("platform.machine().startswith('aarch64')") @pytest.mark.acceptance def test_brand_string(microvm): """Ensure the guest band string is correct. - In x86_64 platform, the guest brand string is: - - Intel(R) Xeon(R) Processor @ {host frequency} """ branch_string_format = "^model name\\s+:\\s+(.+)$" host_brand_string = None @@ -165,11 +117,27 @@ def test_brand_string(microvm): assert matchoutput guest_brand_string = matchoutput.group(1) assert guest_brand_string + assert guest_brand_string == host_brand_string + + +@pytest.mark.skipif("platform.machine().startswith('x86_64')") +@pytest.mark.acceptance +def test_pmu(microvm): + '''Test for PMU events and interrupt. + ''' + test_vm = microvm + test_vm.basic_config(vcpu_count=1,cpu_features="pmu=on") + test_vm.launch() + + #PMU events available? + guest_cmd = "perf list | grep cache-misses" + status, output = test_vm.serial_cmd(guest_cmd) + assert status == 0 + + #PMU interrupt available? + guest_cmd = "cat /proc/interrupts | grep -i 'pmu' | head -1" + status, output = test_vm.serial_cmd(guest_cmd) + assert status == 0 - cpu_vendor = _get_cpu_vendor() - expected_guest_brand_string = "" - if cpu_vendor == CpuVendor.INTEL: - expected_guest_brand_string = host_brand_string - assert guest_brand_string == expected_guest_brand_string diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_isula.py b/tests/hydropper/testcases/microvm/functional/test_microvm_isula.py index 5d341a87faca60bec404403d62c4022587f3bd6b..b528f3984a7f0d5a286ecd0f8908f586e73a284b 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_isula.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_isula.py @@ -162,7 +162,7 @@ def test_microvm_isula_sandbox(container): name="sandbox1-hydropper", annotation="io.kubernetes.docker.type=podsandbox") LOG.info("podsandbox container id:%s", podsandbox_id) - + podsandbox_id = podsandbox_id.strip('\n') container_id = kata_container.run_isula(options="-tid", runtime="io.containerd.kata.v2", diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_vhost_vsock.py b/tests/hydropper/testcases/microvm/functional/test_microvm_vhost_vsock.py index 96dfb0972b3007a096070fcbe678917a07fe73f2..16bf01b59a4c299add9831b66fe3e87b23b2af49 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_vhost_vsock.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_vhost_vsock.py @@ -27,7 +27,7 @@ BLOB_SIZE = 2000 def _check_vsock_enable(microvm): - """Check virtio rng device in Guest""" + """Check vhost vsock device in Guest""" _cmd = "ls /dev/vsock" status, _ = microvm.serial_cmd(_cmd) if status != 0: @@ -37,7 +37,7 @@ def _check_vsock_enable(microvm): return False status, _ = microvm.serial_cmd(_cmd) - assert status == 0 + assert status == 0 return True @@ -77,13 +77,13 @@ def _get_recv_data_from_guest(microvm): ssh_session.close() @pytest.mark.acceptance -def test_microvm_virtio_vsock(microvm, nc_vsock_path, test_session_root_path): - """Test virtio-rng device""" +def test_microvm_vhost_vsock(microvm, nc_vsock_path, test_session_root_path): + """Test vhost vsock device""" test_vm = microvm test_vm.basic_config(vsocknums=1) test_vm.launch() - # check virtio-vsock device + # check vhost vsock device if not _check_vsock_enable(test_vm): pytest.skip("vhost-vsock init failed, skip this testcase") diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_virtio_blk.py b/tests/hydropper/testcases/microvm/functional/test_microvm_virtio_blk.py index 1dde7b6216242c7b18ac6000925bb13a61f6bdac..34760a1079c2aa3afa9badadb56341a26d274b64 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_virtio_blk.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_virtio_blk.py @@ -11,6 +11,7 @@ # See the Mulan PSL v2 for more details. """Test microvm virtio block""" +import json import os import logging from subprocess import run @@ -29,14 +30,15 @@ def _get_lsblk_info(test_vm): """ retdict = {} if test_vm.ssh_session is not None: - _output = test_vm.ssh_session.cmd_output("lsblk") - for line in _output.split("\n"): - temp = line.split() - if len(temp) == 6: - name = temp[0] - size = temp[3] - readonly = temp[4] - if name not in retdict: + _output = json.loads(test_vm.ssh_session.cmd_output("lsblk -J")) + blockdevices = _output.get("blockdevices", []) + for dic in blockdevices: + mountpoints = dic.get("mountpoints", []) + if len(mountpoints) != 0 and None in mountpoints: + name = dic.get("name", "") + size = dic.get("size", "") + readonly = dic.get("ro", None) + if size != "0B" and name not in retdict: retdict[name] = {"size": size, "readonly": readonly} return retdict @@ -117,7 +119,7 @@ def test_microvm_virtio_blk_md5(test_session_root_path, microvm): """ Test data consistency by md5sum: - 1) Generate a temp disk for test_vm and launch. + 1) Generate a temp disk for test_vm and launch. 2) Mount the temp disk 3) Touch a file and compute it md5sum. 4) Umount the temp disk @@ -140,8 +142,8 @@ def test_microvm_virtio_blk_md5(test_session_root_path, microvm): mount_cmd = "mount /dev/vdb /mnt" test_vm.serial_cmd(mount_cmd) - wirte_cmd = "touch /mnt/test_virtioblk.c" - test_vm.serial_cmd(wirte_cmd) + write_cmd = "touch /mnt/test_virtioblk.c" + test_vm.serial_cmd(write_cmd) _cmd = "md5sum /mnt/test_virtioblk.c" _, md5 = test_vm.serial_cmd(_cmd) diff --git a/tests/hydropper/testcases/microvm/functional/test_microvm_vmlife.py b/tests/hydropper/testcases/microvm/functional/test_microvm_vmlife.py index 8923af60f06961414c8b7875d283ca75119e1e8b..929839fd0d2e72cc349447c14d8b577a52d5c71c 100644 --- a/tests/hydropper/testcases/microvm/functional/test_microvm_vmlife.py +++ b/tests/hydropper/testcases/microvm/functional/test_microvm_vmlife.py @@ -78,14 +78,6 @@ def test_microvm_destroy(microvm, destroy_value): test_vm.destroy(signal=destroy_value) -@pytest.mark.system -def test_microvm_inshutdown(microvm): - """Test a normal microvm inshutdown""" - test_vm = microvm - test_vm.launch() - test_vm.inshutdown() - - @pytest.mark.acceptance def test_microvm_pause_resume(microvm): """Test a normal microvm pause""" diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_balloon.py b/tests/hydropper/testcases/standvm/functional/test_standvm_balloon.py index f007127c88cb338b325e9e3fb8a6b1d6e17127c4..a1a2b5829441c740c5e2b7f1e1361895722e0e11 100644 --- a/tests/hydropper/testcases/standvm/functional/test_standvm_balloon.py +++ b/tests/hydropper/testcases/standvm/functional/test_standvm_balloon.py @@ -18,6 +18,26 @@ import pytest LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" logging.basicConfig(filename='/var/log/pytest.log', level=logging.DEBUG, format=LOG_FORMAT) +@pytest.mark.acceptance +def test_standvm_balloon_fpr(standvm): + """ + Test free page reporting of querying balloon + + steps: + 1) launch standvm with argument: "-balloon free-page-reporting=true". + 2) execute command "stress --vm 2 --vm-bytes 1G --vm-keep --timeout 20". + 3) compare rss between booted and fpr done. + """ + test_vm = standvm + test_vm.basic_config(mem_size=3072, balloon=True, free_page_reporting=True) + test_vm.launch() + + rss_booted = test_vm.get_rss_with_status_check() + test_vm.memory_stress() + rss_fpr_done = test_vm.get_rss_with_status_check() + assert rss_fpr_done - rss_booted < 20480 + test_vm.shutdown() + @pytest.mark.acceptance def test_standvm_balloon_query(standvm): """ @@ -41,7 +61,7 @@ def test_standvm_balloon(standvm): steps: 1) launch standvm with argument: "-balloon deflate-on-oom=true". 2) query memory size, and save. - 3) set memory size through balloon device to 814748368. + 3) set memory size through balloon device to 814743552. 4) wait 5 seconds for ballooning. 5) check if the memory size is less than 2524971008. 6) set memory size through balloon device to 2524971008, and wait. @@ -49,7 +69,6 @@ def test_standvm_balloon(standvm): Note that balloon device may not inflate as many as the given argument, but it can deflate until no page left in balloon device. Therefore, memory in step 5 is less than 2524971008, while that in step 7 equals 2524971008. - """ test_vm = standvm test_vm.basic_config(balloon=True, deflate_on_oom=True) @@ -57,7 +76,7 @@ def test_standvm_balloon(standvm): resp = test_vm.query_balloon() ori = int(resp["return"]["actual"]) - resp = test_vm.balloon_set(value=814748368) + resp = test_vm.balloon_set(value=814743552) time.sleep(5) test_vm.event_wait(name='BALLOON_CHANGED', timeout=2.0) resp = test_vm.query_balloon() diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_cpu_feature.py b/tests/hydropper/testcases/standvm/functional/test_standvm_cpu_feature.py new file mode 100644 index 0000000000000000000000000000000000000000..bbff0828537335f6a32ed7684cb5dc5881f72249 --- /dev/null +++ b/tests/hydropper/testcases/standvm/functional/test_standvm_cpu_feature.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +# +# StratoVirt is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan +# PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http:#license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +"""Test standvm PMU""" +import time +import logging +import pytest + +LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" +logging.basicConfig(filename='/var/log/pytest.log', level=logging.DEBUG, format=LOG_FORMAT) + +@pytest.mark.skipif("platform.machine().startswith('x86_64')") +@pytest.mark.acceptance +def test_standvm_pmu(standvm): + """ + Test PMU feature for standvm. + + steps: + 1) launch standvm with argument: "-cpu pmu=on". + 2) Check PMU presence. + """ + test_vm = standvm + test_vm.basic_config(cpu_features="pmu=on") + test_vm.launch() + #PMU events available? + guest_cmd = "perf list | grep cache-misses" + status, output = test_vm.serial_cmd(guest_cmd) + assert status == 0 + + #PMU interrupt available? + guest_cmd = "cat /proc/interrupts | grep -i 'pmu' | head -1" + status, output = test_vm.serial_cmd(guest_cmd) + assert status == 0 diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py b/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py index 5e0168586890a0be02a315a824ad2b658b3b9310..94eeee97ef5b46c6683d7a63df70ef143646f129 100644 --- a/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py +++ b/tests/hydropper/testcases/standvm/functional/test_standvm_isula.py @@ -140,7 +140,7 @@ def test_standvm_isula_sandbox(container): name="sandbox1-hydropper-stand", annotation="io.kubernetes.docker.type=podsandbox") LOG.info("podsandbox container id:%s", podsandbox_id) - + podsandbox_id = podsandbox_id.strip('\n') container_id = kata_container.run_isula(options="-tid", runtime="io.containerd.kata.v2", diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py b/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py index dc399a58a1be37b18c6abb225e571fd431fe9b29..602798469c5a19ea821eb8bd7c7eb9fd4ab119a9 100644 --- a/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py +++ b/tests/hydropper/testcases/standvm/functional/test_standvm_vfio.py @@ -89,6 +89,6 @@ def test_standvm_vfio_ssd(standvm, bdf): assert ret == 0 ret, _ = testvm.serial_cmd("rm test") assert ret == 0 - + session.close() testvm.shutdown() diff --git a/tests/hydropper/testcases/standvm/functional/test_standvm_vhost_vsock.py b/tests/hydropper/testcases/standvm/functional/test_standvm_vhost_vsock.py index d96f3ff237fdec7ba6c63ad06c66c82d8f7aa160..365144125ce3f4f81fc8b18ca055805cb9e8d96b 100644 --- a/tests/hydropper/testcases/standvm/functional/test_standvm_vhost_vsock.py +++ b/tests/hydropper/testcases/standvm/functional/test_standvm_vhost_vsock.py @@ -27,7 +27,7 @@ BLOB_SIZE = 2000 def _check_vsock_enable(standvm): - """Check virtio rng device in Guest""" + """Check vhost vsock device in Guest""" _cmd = "ls /dev/vsock" status, _ = standvm.serial_cmd(_cmd) if status != 0: @@ -37,7 +37,7 @@ def _check_vsock_enable(standvm): return False status, _ = standvm.serial_cmd(_cmd) - assert status == 0 + assert status == 0 return True @@ -77,13 +77,13 @@ def _get_recv_data_from_guest(standvm): ssh_session.close() @pytest.mark.acceptance -def test_standvm_virtio_vsock(standvm, nc_vsock_path, test_session_root_path): - """Test virtio-rng device""" +def test_standvm_vhost_vsock(standvm, nc_vsock_path, test_session_root_path): + """Test vhost vsock device""" test_vm = standvm test_vm.basic_config(vsocknums=1) test_vm.launch() - # check virtio-vsock device + # check vhost vsock device if not _check_vsock_enable(test_vm): pytest.skip("vhost-vsock init failed, skip this testcase") diff --git a/tests/hydropper/utils/resources.py b/tests/hydropper/utils/resources.py index b8e16803011faa99a5a03ebe929c3478901c583a..77f5e502a33be16033fc728e714a57ca15b46869 100644 --- a/tests/hydropper/utils/resources.py +++ b/tests/hydropper/utils/resources.py @@ -47,7 +47,7 @@ class NetworkResource(Singleton): # create bridge if it does not exist run("brctl show %s || brctl addbr %s" % (self.bridge, self.bridge), shell=True, check=True) - run("ifconfig %s up" % self.bridge, shell=True, check=True) + run("ip link set %s up" % self.bridge, shell=True, check=True) for index in range(self.nets_num): ipaddr = "%s.%s.1" % (self.ip_prefix, str(self.ip_3rd + index)) diff --git a/tests/hydropper/utils/utils_common.py b/tests/hydropper/utils/utils_common.py index 7713bef7f084d13216b833b858187c2fdbc809c8..8618b642b168ab3ee04478006f6f49ee97001096 100644 --- a/tests/hydropper/utils/utils_common.py +++ b/tests/hydropper/utils/utils_common.py @@ -70,7 +70,7 @@ def config_host_vfio(net_type, number, bdf): def rebind_vfio_pci(bdf): """unbind old driver and bind a new one""" - run("echo %s > /sys/bus/pci/devices/%s/driver/unbind" % (bdf, bdf), shell=True, check=True) + run("echo %s > /sys/bus/pci/devices/%s/driver/unbind" % (bdf, bdf), shell=True, check=True) run("echo `lspci -ns %s | awk -F':| ' '{print $5\" \"$6}'` > /sys/bus/pci/drivers/vfio-pci/new_id"\ %bdf, shell=True, check=True) diff --git a/tests/hydropper/utils/utils_qmp.py b/tests/hydropper/utils/utils_qmp.py index 7d0527dcf540be27e0295dd853b590ef893f8f1b..aa4d2e7a650bf2674a805b6780b1d6010f59bac8 100644 --- a/tests/hydropper/utils/utils_qmp.py +++ b/tests/hydropper/utils/utils_qmp.py @@ -48,7 +48,7 @@ def assert_qmp_absent(dictionary, path): def assert_qmp(dictionary, path, value): """ Assert that the value for a specific path in a QMP dict - matches. When given a list of values, assert that any of + matches. When given a list of values, assert that any of them matches. """ result = dictpath(dictionary, path) diff --git a/tests/hydropper/virt/basevm.py b/tests/hydropper/virt/basevm.py index 8946bc130a91d2d385bfa9b1a0faaea2cfdef8e8..fb134fdbe3dda4d2eb4075dc15ed99af3a2678dd 100644 --- a/tests/hydropper/virt/basevm.py +++ b/tests/hydropper/virt/basevm.py @@ -34,6 +34,7 @@ from utils.exception import QMPCapabilitiesError from utils.exception import QMPTimeoutError from utils.exception import SSHError from utils.exception import LoginTimeoutError +from subprocess import getstatusoutput LOG = TestLog.get_global_log() LOGIN_TIMEOUT = 10 @@ -111,6 +112,7 @@ class BaseVM: self.withpid = False self.balloon = balloon self.deflate_on_oom = False + self.free_page_reporting = False self.quickstart_incoming = None def __enter__(self): @@ -135,6 +137,20 @@ class BaseVM: return None + def get_rss_with_status_check(self): + INVALID_VALUE = -1 + cmd = "ps -q %d -o rss=" % self.pid + status, output = getstatusoutput(cmd) + assert status == 0 + return int(output) + + def memory_stress(self, thread_num=2, vm_bytes='1G', timeout=20): + status, _ = self.serial_cmd("stress-ng --vm %d --vm-bytes %s --vm-keep --timeout %d" % (thread_num, vm_bytes, timeout)) + if status != 0: + logging.error("Cannot execute stress in stratovirt.") + assert status == 0 + time.sleep(20) + def _pre_shutdown(self): pass @@ -359,7 +375,7 @@ class BaseVM: self.serial_cmd("systemctl restart sshd") if 'dhcp' in model: self.serial_session.run_func("cmd_output", ("dhclient %s" % self.interfaces[index])) - _cmd = "ifconfig %s | awk '/inet/ {print $2}' | cut -f2 -d ':' | " \ + _cmd = "ip address show %s | awk '/inet/ {print $2}' | cut -f2 -d ':' | " \ "awk 'NR==1 {print $1}'" % self.interfaces[index] output = self.serial_session.run_func("cmd_output", _cmd) self.guest_ips.append(output) diff --git a/tests/hydropper/virt/microvm.py b/tests/hydropper/virt/microvm.py index 40f470478494ce1d6778ed65204fea35137513b5..f11ee3916e5e134b0bf96720abdb1acd8f093ec3 100644 --- a/tests/hydropper/virt/microvm.py +++ b/tests/hydropper/virt/microvm.py @@ -195,11 +195,16 @@ class MicroVM(BaseVM): 'guest-cid=%s' % (sockcid, sockcid)]) if self.balloon: + _temp_balloon_args = 'virtio-balloon-device' if self.deflate_on_oom: - _temp_balloon_args = 'deflate-on-oom=true' + _temp_balloon_args += ',deflate-on-oom=true' else: - _temp_balloon_args = 'deflate-on-oom=false' - args.extend(['-device', 'virtio-balloon-device', _temp_balloon_args]) + _temp_balloon_args += ',deflate-on-oom=false' + if self.free_page_reporting: + _temp_balloon_args += ',free-page-reporting=true' + else: + _temp_balloon_args += ',free-page-reporting=false' + args.extend(['-device', _temp_balloon_args]) if self.iothreads > 0: args = self.make_iothread_cmd(args) @@ -220,6 +225,9 @@ class MicroVM(BaseVM): if "vhost_type" in kwargs: self.vhost_type = kwargs.get("vhost_type") del kwargs["vhost_type"] + if "cpu_features" in kwargs: + self.configdict["machine-config"]["cpu_features"] = kwargs.get("cpu_features") + del kwargs["cpu_features"] for key, value in kwargs.items(): if hasattr(self, key): @@ -254,6 +262,10 @@ class MicroVM(BaseVM): self.add_args('-m', _temp_mem_args) if "mem_path" in configdict["machine-config"]: self.add_args('-mem-path', configdict["machine-config"]["mem_path"]) + # make CPU feature cmdline + if "cpu_features" in configdict["machine-config"]: + self.add_args('-cpu', configdict["machine-config"]["cpu_features"]) + # make block cmdline for block in configdict.get("block", []): diff --git a/tests/hydropper/virt/standvm.py b/tests/hydropper/virt/standvm.py index ae6cc43d7433d4f6d3d7643a83fc40d974eed9be..45f3263c9ba25fd4d7ff5108bed96f717c217e4d 100644 --- a/tests/hydropper/virt/standvm.py +++ b/tests/hydropper/virt/standvm.py @@ -285,6 +285,10 @@ class StandVM(BaseVM): _temp_balloon_args += ',deflate-on-oom=true' else: _temp_balloon_args += ',deflate-on-oom=false' + if self.free_page_reporting: + _temp_balloon_args += ',free-page-reporting=true' + else: + _temp_balloon_args += ',free-page-reporting=false' if self.multifunction["balloon"]: _temp_balloon_args += ",multifunction=on" self.add_args('-device', _temp_balloon_args) @@ -354,6 +358,9 @@ class StandVM(BaseVM): if "vhost_type" in kwargs: self.vhost_type = kwargs.get("vhost_type") del kwargs["vhost_type"] + if "cpu_features" in kwargs: + self.configdict["machine-config"]["cpu_features"] = kwargs.get("cpu_features") + del kwargs["cpu_features"] for key, value in kwargs.items(): if hasattr(self, key): @@ -388,6 +395,9 @@ class StandVM(BaseVM): self.add_args('-m', _temp_mem_args) if "mem_path" in configdict["machine-config"]: self.add_args('-mem-path', configdict["machine-config"]["mem_path"]) + # make CPU feature cmdline + if "cpu_features" in configdict["machine-config"]: + self.add_args('-cpu', configdict["machine-config"]["cpu_features"]) # make block cmdline for block in configdict.get("block", []): diff --git a/tests/mod_test/Cargo.toml b/tests/mod_test/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..9b144af1d1723862e1465ed46f7e0b67c7526537 --- /dev/null +++ b/tests/mod_test/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "mod_test" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" + +[dependencies] +rand = "0.8.5" +hex = "0.4.3" +vmm-sys-util = "0.12.1" +anyhow = "1.0" +serde_json = "1.0" +libc = "0.2" +byteorder = "1.4.3" +serde = { version = "1.0", features = ["derive"] } +devices = { path = "../../devices", features = ["scream", "pvpanic"]} +util = { path = "../../util" } +acpi = { path = "../../acpi" } +machine = { path = "../../machine" } +machine_manager = { path = "../../machine_manager"} +virtio = { path = "../../virtio", features = ["virtio_gpu"] } diff --git a/tests/mod_test/src/lib.rs b/tests/mod_test/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..4c8516f9aef3fd27d85e21a6eb1923a07fe569b7 --- /dev/null +++ b/tests/mod_test/src/lib.rs @@ -0,0 +1,15 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod libdriver; +pub mod libtest; +pub mod utils; diff --git a/tests/mod_test/src/libdriver/fwcfg.rs b/tests/mod_test/src/libdriver/fwcfg.rs new file mode 100644 index 0000000000000000000000000000000000000000..3941d9588a2143f4fbb453fdf64519dd8cc47107 --- /dev/null +++ b/tests/mod_test/src/libdriver/fwcfg.rs @@ -0,0 +1,138 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem; + +use super::malloc::GuestAllocator; +use crate::libtest::{TestState, MACHINE_TYPE_ARG}; +use crate::utils::{swap_u16, swap_u32, swap_u64}; +use devices::legacy::FwCfgEntryType; +#[cfg(target_arch = "aarch64")] +use machine::aarch64::standard::{LayoutEntryType, MEM_LAYOUT}; + +#[cfg(target_arch = "aarch64")] +pub const FW_CFG_BASE: u64 = MEM_LAYOUT[LayoutEntryType::FwCfg as usize].0; +#[cfg(target_arch = "x86_64")] +pub const FW_CFG_BASE: u64 = 0x510; + +const FW_CFG_FNAME_SIZE: usize = 56; + +#[repr(C)] +pub struct FwCfgDmaAccess { + control: u32, + length: u32, + address: u64, +} + +pub fn bios_args(base_args: &mut Vec<&str>) { + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + base_args.append(&mut args); + args = "-drive file=/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw,if=pflash,unit=0,readonly=true" + .split(' ') + .collect(); + base_args.append(&mut args); +} + +impl TestState { + pub fn fw_cfg_read_bytes(&self, key: u16, data: &mut Vec, len: u32) { + self.writew(FW_CFG_BASE + 0x8, swap_u16(key)); + for _i in 0..len { + data.push(self.readb(FW_CFG_BASE)) + } + } + + pub fn fw_cfg_read_u16(&self, key: u16) -> u16 { + self.writew(FW_CFG_BASE + 0x8, swap_u16(key)); + self.readw(FW_CFG_BASE) + } + + pub fn fw_cfg_read_u32(&self, key: u16) -> u32 { + self.writew(FW_CFG_BASE + 0x8, swap_u16(key)); + self.readl(FW_CFG_BASE) + } + + pub fn dma_transfer_bytes(&self, access: u64, buff: u64, size: u32, ctrl: u32) { + self.writel(access, swap_u32(ctrl)); + self.writel(access + 4, swap_u32(size)); + self.writeq(access + 8, swap_u64(buff)); + + self.writeq(FW_CFG_BASE + 0x10, swap_u64(access)); + } + + pub fn fw_cfg_read_file( + &self, + allocator: &mut GuestAllocator, + file_name: &str, + data: &mut Vec, + data_len: u32, + ) -> u32 { + let file_name_len = file_name.to_string().len(); + let mut file_size = 0; + let mut name: [u8; FW_CFG_FNAME_SIZE] = [0; FW_CFG_FNAME_SIZE]; + let buff = allocator.alloc(FW_CFG_FNAME_SIZE as u64); + let access = allocator.alloc(mem::size_of::() as u64); + + self.writew(FW_CFG_BASE + 0x8, swap_u16(FwCfgEntryType::FileDir as u16)); + let count = swap_u32(self.readl(FW_CFG_BASE)); + for _i in 0..count { + let mut size = swap_u32(self.readl(FW_CFG_BASE)); + let select = swap_u16(self.readw(FW_CFG_BASE)); + let _reserved = swap_u16(self.readw(FW_CFG_BASE)); + // Read file name by DMA. + self.dma_transfer_bytes(access, buff, FW_CFG_FNAME_SIZE as u32, 2); + for i in 0..FW_CFG_FNAME_SIZE { + name[i] = self.readb(buff + i as u64); + } + if String::from_utf8_lossy(&name[0..file_name_len]).eq(file_name) { + file_size = size; + if size > data_len { + size = data_len; + } + self.fw_cfg_read_bytes(select, data, size); + break; + } + } + file_size + } + + pub fn fw_cfg_write_file( + &self, + allocator: &mut GuestAllocator, + file_name: &str, + data_access: u64, + data_addr: u64, + data_len: u32, + ) { + let file_name_len = file_name.to_string().len(); + let mut name: [u8; FW_CFG_FNAME_SIZE] = [0; FW_CFG_FNAME_SIZE]; + let buff = allocator.alloc(FW_CFG_FNAME_SIZE as u64); + let access = allocator.alloc(mem::size_of::() as u64); + + self.writew(FW_CFG_BASE + 0x8, swap_u16(FwCfgEntryType::FileDir as u16)); + let count = swap_u32(self.readl(FW_CFG_BASE)); + for _i in 0..count { + let _size = swap_u32(self.readl(FW_CFG_BASE)); + let select = swap_u16(self.readw(FW_CFG_BASE)); + let _reserved = swap_u16(self.readw(FW_CFG_BASE)); + // Read file name by DMA. + self.dma_transfer_bytes(access, buff, FW_CFG_FNAME_SIZE as u32, 2); + for i in 0..FW_CFG_FNAME_SIZE { + name[i] = self.readb(buff + i as u64); + } + if String::from_utf8_lossy(&name[0..file_name_len]).eq(file_name) { + self.writew(FW_CFG_BASE + 0x8, swap_u16(select)); + self.dma_transfer_bytes(data_access, data_addr, data_len, 16); + break; + } + } + } +} diff --git a/tests/mod_test/src/libdriver/ivshmem.rs b/tests/mod_test/src/libdriver/ivshmem.rs new file mode 100644 index 0000000000000000000000000000000000000000..edb5ef6206fd45c12a91747be854d191a35e9f69 --- /dev/null +++ b/tests/mod_test/src/libdriver/ivshmem.rs @@ -0,0 +1,78 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, rc::Rc}; + +use super::{ + pci::{PCIBarAddr, TestPciDev}, + pci_bus::TestPciBus, +}; + +pub struct TestIvshmemDev { + pub pci_dev: TestPciDev, + bar0_addr: PCIBarAddr, + bar1_addr: PCIBarAddr, + pub bar2_addr: PCIBarAddr, +} + +impl TestIvshmemDev { + pub fn new(pci_bus: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar0_addr: 0, + bar1_addr: 0, + bar2_addr: 0, + } + } + + pub fn init(&mut self, pci_slot: u8) { + let devfn = pci_slot << 3; + assert!(self.pci_dev.find_pci_device(devfn)); + + self.pci_dev.enable(); + self.bar0_addr = self.pci_dev.io_map(0); + self.bar1_addr = self.pci_dev.io_map(1); + self.bar2_addr = self.pci_dev.io_map(2); + } + + pub fn writeb(&mut self, offset: u64, value: u8) { + self.pci_dev.io_writeb(self.bar2_addr, offset, value); + } + + pub fn writew(&mut self, offset: u64, value: u16) { + self.pci_dev.io_writew(self.bar2_addr, offset, value); + } + + pub fn writel(&mut self, offset: u64, value: u32) { + self.pci_dev.io_writel(self.bar2_addr, offset, value); + } + + pub fn writeq(&mut self, offset: u64, value: u64) { + self.pci_dev.io_writeq(self.bar2_addr, offset, value); + } + + pub fn readw(&self, offset: u64) -> u16 { + self.pci_dev.io_readw(self.bar2_addr, offset) + } + + pub fn readl(&self, offset: u64) -> u32 { + self.pci_dev.io_readl(self.bar2_addr, offset) + } + + pub fn writel_reg(&self, offset: u64, value: u32) { + self.pci_dev.io_writel(self.bar0_addr, offset, value); + } + + pub fn readl_reg(&self, offset: u64) -> u32 { + self.pci_dev.io_readl(self.bar0_addr, offset) + } +} diff --git a/tests/mod_test/src/libdriver/machine.rs b/tests/mod_test/src/libdriver/machine.rs new file mode 100644 index 0000000000000000000000000000000000000000..beba4786a20e986ca0704cde380e98a7b87cb802 --- /dev/null +++ b/tests/mod_test/src/libdriver/machine.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; + +use super::malloc::GuestAllocator; +use super::pci_bus::TestPciBus; +use crate::libtest::TestState; + +const ARM_VIRT_RAM_ADDR: u64 = 0x40000000; +const ARM_VIRT_RAM_SIZE: u64 = 0x20000000; +const ARM_PAGE_SIZE: u64 = 4096; + +pub struct TestStdMachine { + pub pci_bus: Rc>, + pub allocator: Rc>, +} + +impl TestStdMachine { + pub fn new(test_state: Rc>) -> Self { + Self { + pci_bus: Rc::new(RefCell::new(TestPciBus::new(test_state))), + allocator: Rc::new(RefCell::new(GuestAllocator::new( + ARM_VIRT_RAM_ADDR, + ARM_VIRT_RAM_SIZE, + ARM_PAGE_SIZE, + ))), + } + } + + pub fn new_bymem(test_state: Rc>, memsize: u64, page_size: u64) -> Self { + Self { + pci_bus: Rc::new(RefCell::new(TestPciBus::new(test_state))), + allocator: Rc::new(RefCell::new(GuestAllocator::new( + ARM_VIRT_RAM_ADDR, + memsize, + page_size, + ))), + } + } +} diff --git a/tests/mod_test/src/libdriver/malloc.rs b/tests/mod_test/src/libdriver/malloc.rs new file mode 100644 index 0000000000000000000000000000000000000000..49cb9a680346294edea9628d5f7ce7710ebadc5a --- /dev/null +++ b/tests/mod_test/src/libdriver/malloc.rs @@ -0,0 +1,150 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use util::num_ops::round_up; + +#[derive(Clone, Copy)] +struct MemBlock { + start: u64, + size: u64, +} + +impl MemBlock { + fn new(start: u64, size: u64) -> Self { + MemBlock { start, size } + } + + pub fn reduce(&mut self, size: u64) { + assert!(self.size > size); + self.start += size; + self.size -= size; + } +} + +pub struct GuestAllocator { + start: u64, + end: u64, + page_size: u64, + free: Vec, + used: Vec, +} + +impl GuestAllocator { + pub fn new(start: u64, size: u64, page_size: u64) -> Self { + Self { + start, + end: start + size, + page_size, + free: vec![MemBlock::new(start, size)], + used: Vec::new(), + } + } + + fn add_free_block(&mut self, new_mb: MemBlock) { + let mut target = self.free.len(); + for (i, mb) in self.free.iter().enumerate() { + if mb.size >= new_mb.size { + target = i; + break; + } + } + self.free.insert(target, new_mb); + } + + fn add_used_block(&mut self, new_mb: MemBlock) { + let mut target = self.used.len(); + for (i, mb) in self.used.iter().enumerate() { + if mb.start >= new_mb.start { + target = i; + break; + } + } + self.used.insert(target, new_mb); + } + + fn alloc_free_block(&mut self, index: usize, size: u64) { + let start = self.free[index].start; + let used_mb = MemBlock::new(start, size); + self.add_used_block(used_mb); + if self.free[index].size == size { + self.free.remove(index); + } else { + self.free[index].reduce(size); + } + } + + fn free_used_block(&mut self, index: usize) { + let free_mb = self.used[index]; + self.add_free_block(free_mb); + self.used.remove(index); + } + + pub fn alloc(&mut self, size: u64) -> u64 { + let alloc_size = round_up(size, self.page_size).unwrap(); + + let mut addr: Option = None; + let mut index: Option = None; + for (i, mb) in self.free.iter().enumerate() { + if mb.size >= alloc_size { + addr = Some(mb.start); + index = Some(i); + break; + } + } + + self.alloc_free_block(index.unwrap(), alloc_size); + addr.unwrap() + } + + pub fn free(&mut self, addr: u64) { + assert!(self.start <= addr && addr < self.end); + let mut index: Option = None; + for (i, mb) in self.used.iter().enumerate() { + if mb.start >= addr { + index = Some(i); + break; + } + } + + if let Some(i) = index { + self.free_used_block(i); + } + } +} + +#[cfg(test)] +mod test { + use super::GuestAllocator; + + const PAGE_SIZE_4K: u64 = 1 << 12; + const ADDRESS_BASE: u64 = 0x4000_0000; + const ADDRESS_SIZE: u64 = 0x2000_0000; + + #[test] + fn test_guest_allocator() { + let mut guest_allocator = GuestAllocator::new(ADDRESS_BASE, ADDRESS_SIZE, PAGE_SIZE_4K); + + let mut expect_addr = ADDRESS_BASE; + let mut addr = guest_allocator.alloc(4096 + 1); + assert_eq!(addr, expect_addr); + guest_allocator.free(addr); + + addr = guest_allocator.alloc(4096 * 10); + expect_addr += 4096 * 2; + assert_eq!(addr, expect_addr); + guest_allocator.free(addr); + + addr = guest_allocator.alloc(4096); + expect_addr = ADDRESS_BASE; + assert_eq!(addr, expect_addr); + } +} diff --git a/tests/mod_test/src/libdriver/mod.rs b/tests/mod_test/src/libdriver/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..de696bc0b0ccd27215b82a039d063e4daceeb52c --- /dev/null +++ b/tests/mod_test/src/libdriver/mod.rs @@ -0,0 +1,27 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod fwcfg; +pub mod ivshmem; +pub mod machine; +pub mod malloc; +pub mod pci; +pub mod pci_bus; +pub mod qcow2; +pub mod usb; +pub mod virtio; +pub mod virtio_block; +pub mod virtio_gpu; +pub mod virtio_pci_modern; +pub mod virtio_rng; +pub mod virtiofs; +pub mod vnc; diff --git a/tests/mod_test/src/libdriver/pci.rs b/tests/mod_test/src/libdriver/pci.rs new file mode 100644 index 0000000000000000000000000000000000000000..1fc0ad10cb4b2f25aff47fe4df7ad5118353b0b8 --- /dev/null +++ b/tests/mod_test/src/libdriver/pci.rs @@ -0,0 +1,433 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; + +use super::pci_bus::PciBusOps; +use super::pci_bus::TestPciBus; + +const BAR_MAP: [u8; 6] = [0x10, 0x14, 0x18, 0x1c, 0x20, 0x24]; +const PCI_PIN_NUM: u8 = 4; +pub const PCI_VENDOR_ID: u8 = 0x00; +pub const PCI_DEVICE_ID: u8 = 0x02; +pub const PCI_COMMAND: u8 = 0x04; + +const PCI_COMMAND_IO: u8 = 0x1; +pub const PCI_COMMAND_MEMORY: u8 = 0x2; +const PCI_COMMAND_MASTER: u8 = 0x4; +pub const PCI_COMMAND_INTX_DISABLE: u16 = 0x400; + +pub const PCI_STATUS: u8 = 0x06; +pub const PCI_STATUS_INTERRUPT: u16 = 0x08; +pub const PCI_STATUS_CAP_LIST: u16 = 0x10; +pub const PCI_REVISION_ID: u8 = 0x08; +pub const PCI_SUB_CLASS_DEVICE: u8 = 0x0a; +pub const PCI_HEADER_TYPE: u8 = 0x0e; +pub const PCI_PRIMARY_BUS: u8 = 0x18; +pub const PCI_SECONDARY_BUS: u8 = 0x19; +pub const PCI_SUBORDINATE_BUS: u8 = 0x1a; +pub const PCI_SUBSYSTEM_VENDOR_ID: u8 = 0x2c; +pub const PCI_SUBSYSTEM_ID: u8 = 0x2e; + +pub const PCI_CAPABILITY_LIST: u8 = 0x34; +pub const PCI_INTERRUPT_PIN: u8 = 0x3d; +pub const PCI_BRIDGE_CONTROL: u8 = 0x3e; +pub const BRIDGE_CTL_SEC_BUS_RESET: u8 = 0x40; + +pub const PCI_CAP_LIST_NEXT: u8 = 1; +pub const PCI_CAP_ID_VNDR: u8 = 0x09; +pub const PCI_CAP_ID_EXP: u8 = 0x10; +pub const PCI_CAP_ID_MSIX: u8 = 0x11; + +pub const PCI_MSIX_MSG_CTL: u8 = 2; +pub const PCI_MSIX_MSG_CTL_TSIZE: u16 = 0x07FF; +pub const PCI_MSIX_MSG_CTL_MASKALL: u16 = 0x4000; +pub const PCI_MSIX_MSG_CTL_ENABLE: u16 = 0x8000; +pub const PCI_MSIX_TABLE: u8 = 4; +pub const PCI_MSIX_TABLE_BIR: u32 = 0x00000007; +pub const PCI_MSIX_PBA: u8 = 8; +pub const PCI_MSIX_PBA_BIR: u32 = 0x00000007; + +pub const PCI_MSIX_ENTRY_SIZE: u16 = 16; +pub const PCI_MSIX_ENTRY_LOWER_ADDR: u64 = 0x0; +pub const PCI_MSIX_ENTRY_UPPER_ADDR: u64 = 0x4; +pub const PCI_MSIX_ENTRY_DATA: u64 = 0x8; +pub const PCI_MSIX_ENTRY_VECTOR_CTRL: u64 = 0xc; +pub const PCI_MSIX_ENTRY_CTRL_MASKBIT: u32 = 0x00000001; + +pub const PCI_EXP_LNKSTA: u8 = 0x12; +pub const PCI_EXP_LNKSTA_CLS: u16 = 0x000f; +pub const PCI_EXP_LNKSTA_NLW: u16 = 0x03f0; +pub const PCI_EXP_LNKSTA_DLLLA: u16 = 0x2000; + +pub const PCI_EXP_SLTSTA: u8 = 0x1a; +pub const PCI_EXP_SLTSTA_ABP: u16 = 0x0001; +pub const PCI_EXP_SLTSTA_PDC: u16 = 0x0008; +pub const PCI_EXP_SLTSTA_CC: u16 = 0x0010; +pub const PCI_EXP_SLTSTA_PDS: u16 = 0x0040; + +pub const PCI_EXP_SLTCTL: u8 = 0x18; +pub const PCI_EXP_SLTCTL_ABPE: u16 = 0x0001; +pub const PCI_EXP_SLTCTL_PDCE: u16 = 0x0008; +pub const PCI_EXP_SLTCTL_CCIE: u16 = 0x0010; +pub const PCI_EXP_SLTCTL_HPIE: u16 = 0x0020; +pub const PCI_EXP_SLTCTL_PIC: u16 = 0x0300; +pub const PCI_EXP_SLTCTL_PWR_IND_ON: u16 = 0x0100; +pub const PCI_EXP_SLTCTL_PWR_IND_BLINK: u16 = 0x0200; +pub const PCI_EXP_SLTCTL_PWR_IND_OFF: u16 = 0x0300; +pub const PCI_EXP_SLTCTL_PCC: u16 = 0x0400; +pub const PCI_EXP_SLTCTL_PWR_ON: u16 = 0x0000; +pub const PCI_EXP_SLTCTL_PWR_OFF: u16 = 0x0400; +pub type PCIBarAddr = u64; +pub const INVALID_BAR_ADDR: u64 = u64::MAX; + +const PCI_LINK_GSI: [u32; 4] = [48, 49, 50, 51]; + +pub trait PciMsixOps { + fn set_msix_vector(&self, msix_entry: u16, msix_addr: u64, msix_data: u32); +} + +#[derive(Clone)] +pub struct TestPciDev { + pub pci_bus: Rc>, + pub bus_num: u8, + pub devfn: u8, + pub msix_enabled: bool, + pub msix_table_bar: PCIBarAddr, + pub msix_pba_bar: PCIBarAddr, + pub msix_table_off: u64, + pub msix_pba_off: u64, + pub msix_used_vectors: u32, + pub irq_num: u32, +} + +impl TestPciDev { + pub fn new(pci_bus: Rc>) -> Self { + Self { + pci_bus, + bus_num: 0, + devfn: 0, + msix_enabled: false, + msix_table_bar: 0, + msix_pba_bar: 0, + msix_table_off: 0, + msix_pba_off: 0, + msix_used_vectors: 0, + irq_num: std::u32::MAX, + } + } + + pub fn set_bus_num(&mut self, bus_num: u8) { + self.bus_num = bus_num; + } + + pub fn enable(&self) { + let mut cmd = self.config_readw(PCI_COMMAND); + cmd |= u16::from(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + self.config_writew(PCI_COMMAND, cmd); + + cmd = self.config_readw(PCI_COMMAND); + assert!(cmd & u16::from(PCI_COMMAND_IO) == u16::from(PCI_COMMAND_IO)); + assert!(cmd & u16::from(PCI_COMMAND_MEMORY) == u16::from(PCI_COMMAND_MEMORY)); + assert!(cmd & u16::from(PCI_COMMAND_MASTER) == u16::from(PCI_COMMAND_MASTER)); + } + + pub fn find_capability(&self, id: u8, start_addr: u8) -> u8 { + let mut addr = if start_addr != 0 { + self.config_readb(start_addr + PCI_CAP_LIST_NEXT) + } else { + self.config_readb(start_addr + PCI_CAPABILITY_LIST) + }; + + loop { + let cap = self.config_readb(addr); + if cap != id { + addr = self.config_readb(addr + PCI_CAP_LIST_NEXT); + } + if cap == id || addr == 0 { + break; + } + } + addr + } + + pub fn set_intx_irq_num(&mut self, slot: u8) { + let pin = ((self.config_readb(PCI_INTERRUPT_PIN) - 1 + slot) % PCI_PIN_NUM) as usize; + self.irq_num = PCI_LINK_GSI[pin]; + } + + pub fn has_intx(&self) -> bool { + self.pci_bus + .borrow() + .test_state + .borrow() + .query_intx(self.irq_num) + } + + pub fn eoi_intx(&self) { + self.pci_bus + .borrow() + .test_state + .borrow() + .eoi_intx(self.irq_num); + } + + /// Enable MSI-X. + /// + /// # Arguments + /// + /// `bar_addr` - Address of the bar where the MSI-X is located. Address allocated by Default. + pub fn enable_msix(&mut self, bar_addr: Option) { + let addr = self.find_capability(PCI_CAP_ID_MSIX, 0); + assert!(addr != 0); + let value = self.config_readw(addr + PCI_MSIX_MSG_CTL); + self.config_writew(addr + PCI_MSIX_MSG_CTL, value | PCI_MSIX_MSG_CTL_ENABLE); + + let table = self.config_readl(addr + PCI_MSIX_TABLE); + let bar_table = table & PCI_MSIX_TABLE_BIR; + self.msix_table_bar = if let Some(addr) = bar_addr { + addr + } else { + self.io_map(bar_table as u8) + }; + self.msix_table_off = u64::from(table & !PCI_MSIX_TABLE_BIR); + + let table = self.config_readl(addr + PCI_MSIX_PBA); + let bar_pba = table & PCI_MSIX_TABLE_BIR; + if bar_pba != bar_table { + self.msix_pba_bar = self.io_map(bar_pba as u8); + } else { + self.msix_pba_bar = self.msix_table_bar; + } + self.msix_pba_off = u64::from(table & !PCI_MSIX_TABLE_BIR); + self.msix_enabled = true; + } + + pub fn disable_msix(&mut self) { + let addr = self.find_capability(PCI_CAP_ID_MSIX, 0); + assert!(addr != 0); + let value = self.config_readw(addr + PCI_MSIX_MSG_CTL); + self.config_writew(addr + PCI_MSIX_MSG_CTL, value & !PCI_MSIX_MSG_CTL_ENABLE); + + self.msix_enabled = false; + self.msix_table_off = 0; + self.msix_pba_off = 0; + } + + pub fn has_msix(&self, msix_addr: u64, msix_data: u32) -> bool { + self.pci_bus + .borrow() + .test_state + .borrow() + .query_msix(msix_addr, msix_data) + } + + pub fn get_msix_table_size(&self) -> u16 { + let addr = self.find_capability(PCI_CAP_ID_MSIX, 0); + assert!(addr != 0); + + let value = self.config_readw(addr + PCI_MSIX_MSG_CTL); + (value & PCI_MSIX_MSG_CTL_TSIZE) + 1 + } + + pub fn init_notification(&self) { + let cap_exp_addr = self.find_capability(PCI_CAP_ID_EXP, 0); + let mut cmd = self.pci_bus.borrow().config_readw( + self.bus_num, + self.devfn, + cap_exp_addr + PCI_EXP_SLTCTL, + ); + cmd |= + PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE; + + self.pci_bus.borrow().config_writew( + self.bus_num, + self.devfn, + cap_exp_addr + PCI_EXP_SLTCTL, + cmd, + ); + } + + pub fn clear_slot_event(&self) { + let cap_exp_addr = self.find_capability(PCI_CAP_ID_EXP, 0); + let mut status = self.pci_bus.borrow().config_readw( + self.bus_num, + self.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + ); + + status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC; + self.pci_bus.borrow().config_writew( + self.bus_num, + self.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + status, + ); + } + + pub fn io_readb(&self, bar_addr: PCIBarAddr, offset: u64) -> u8 { + let pci_bus = self.pci_bus.borrow_mut(); + let value_bytes = pci_bus.memread((bar_addr + offset) as u32, 1); + assert!(!value_bytes.is_empty()); + u8::from_le_bytes(value_bytes[0..1].try_into().unwrap()) + } + + pub fn io_readw(&self, bar_addr: PCIBarAddr, offset: u64) -> u16 { + let pci_bus = self.pci_bus.borrow_mut(); + let value_bytes = pci_bus.memread((bar_addr + offset) as u32, 2); + assert!(value_bytes.len() >= 2); + u16::from_le_bytes(value_bytes[0..2].try_into().unwrap()) + } + + pub fn io_readl(&self, bar_addr: PCIBarAddr, offset: u64) -> u32 { + let pci_bus = self.pci_bus.borrow_mut(); + let value_bytes = pci_bus.memread((bar_addr + offset) as u32, 4); + assert!(value_bytes.len() >= 4); + u32::from_le_bytes(value_bytes[0..4].try_into().unwrap()) + } + + pub fn io_readq(&self, bar_addr: PCIBarAddr, offset: u64) -> u64 { + let pci_bus = self.pci_bus.borrow_mut(); + let value_bytes = pci_bus.memread((bar_addr + offset) as u32, 8); + assert!(value_bytes.len() >= 8); + u64::from_le_bytes(value_bytes[0..8].try_into().unwrap()) + } + + pub fn io_writeb(&self, bar_addr: PCIBarAddr, offset: u64, value: u8) { + let value_buf = value.to_le_bytes().to_vec(); + let pci_bus = self.pci_bus.borrow_mut(); + pci_bus.memwrite((bar_addr + offset) as u32, &value_buf); + } + + pub fn io_writew(&self, bar_addr: PCIBarAddr, offset: u64, value: u16) { + let value_buf = value.to_le_bytes().to_vec(); + let pci_bus = self.pci_bus.borrow_mut(); + pci_bus.memwrite((bar_addr + offset) as u32, &value_buf); + } + + pub fn io_writel(&self, bar_addr: PCIBarAddr, offset: u64, value: u32) { + let value_buf = value.to_le_bytes().to_vec(); + let pci_bus = self.pci_bus.borrow_mut(); + pci_bus.memwrite((bar_addr + offset) as u32, &value_buf); + } + + #[allow(unused)] + pub fn io_writeq(&self, bar_addr: PCIBarAddr, offset: u64, value: u64) { + let value_buf = value.to_le_bytes().to_vec(); + let pci_bus = self.pci_bus.borrow_mut(); + pci_bus.memwrite((bar_addr + offset) as u32, &value_buf); + } + + pub fn find_pci_device(&mut self, devfn: u8) -> bool { + self.devfn = devfn; + self.config_readw(PCI_VENDOR_ID) != 0xFFFF + } + + pub fn io_map(&self, barnum: u8) -> u64 { + assert!(barnum <= 5); + let bar_offset: u8 = BAR_MAP[barnum as usize]; + + self.config_writel(bar_offset, 0xFFFFFFFF); + let addr: u32 = self.config_readl(bar_offset) & !(0x0F_u32); + assert!(addr != 0); + + let mut pci_bus = self.pci_bus.borrow_mut(); + let size: u64 = 1 << addr.trailing_zeros(); + let location: u64 = (pci_bus.mmio_alloc_ptr + size - 1) / size * size; + if location < pci_bus.mmio_alloc_ptr || location + size > pci_bus.mmio_limit { + return INVALID_BAR_ADDR; + } + + pci_bus.mmio_alloc_ptr = location + size; + drop(pci_bus); + self.config_writel(bar_offset, location as u32); + let bar_addr: PCIBarAddr = location; + bar_addr + } + + pub fn config_readb(&self, offset: u8) -> u8 { + self.pci_bus + .borrow() + .config_readb(self.bus_num, self.devfn, offset) + } + + pub fn config_readw(&self, offset: u8) -> u16 { + self.pci_bus + .borrow() + .config_readw(self.bus_num, self.devfn, offset) + } + + pub fn config_readl(&self, offset: u8) -> u32 { + self.pci_bus + .borrow() + .config_readl(self.bus_num, self.devfn, offset) + } + + pub fn config_readq(&self, offset: u8) -> u64 { + self.pci_bus + .borrow() + .config_readq(self.bus_num, self.devfn, offset) + } + + #[allow(unused)] + pub fn config_writeb(&self, offset: u8, value: u8) { + self.pci_bus + .borrow() + .config_writeb(self.bus_num, self.devfn, offset, value); + } + + pub fn config_writew(&self, offset: u8, value: u16) { + self.pci_bus + .borrow() + .config_writew(self.bus_num, self.devfn, offset, value); + } + + pub fn config_writel(&self, offset: u8, value: u32) { + self.pci_bus + .borrow() + .config_writel(self.bus_num, self.devfn, offset, value); + } + + pub fn config_writeq(&self, offset: u8, value: u64) { + self.pci_bus + .borrow() + .config_writeq(self.bus_num, self.devfn, offset, value); + } +} + +impl PciMsixOps for TestPciDev { + fn set_msix_vector(&self, msix_entry: u16, msix_addr: u64, msix_data: u32) { + assert!(self.msix_enabled); + let offset = self.msix_table_off + u64::from(msix_entry * 16); + + let msix_table_bar = self.msix_table_bar; + self.io_writel( + msix_table_bar, + offset + PCI_MSIX_ENTRY_LOWER_ADDR, + msix_addr as u32, + ); + self.io_writel( + msix_table_bar, + offset + PCI_MSIX_ENTRY_UPPER_ADDR, + (msix_addr >> 32) as u32, + ); + self.io_writel(msix_table_bar, offset + PCI_MSIX_ENTRY_DATA, msix_data); + + let ctl = self.io_readl(msix_table_bar, offset + PCI_MSIX_ENTRY_VECTOR_CTRL); + self.io_writel( + msix_table_bar, + offset + PCI_MSIX_ENTRY_VECTOR_CTRL, + ctl & !PCI_MSIX_ENTRY_CTRL_MASKBIT, + ); + } +} diff --git a/tests/mod_test/src/libdriver/pci_bus.rs b/tests/mod_test/src/libdriver/pci_bus.rs new file mode 100644 index 0000000000000000000000000000000000000000..db0889db6f304af5eb26b8fdf5c3c347c4dbb991 --- /dev/null +++ b/tests/mod_test/src/libdriver/pci_bus.rs @@ -0,0 +1,165 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; + +use crate::libdriver::pci::*; +use crate::libtest::TestState; +use crate::utils::{read_le_u16, read_le_u32, read_le_u64}; +#[cfg(target_arch = "aarch64")] +use machine::aarch64::standard::{LayoutEntryType, MEM_LAYOUT}; +#[cfg(target_arch = "x86_64")] +use machine::x86_64::standard::{LayoutEntryType, MEM_LAYOUT}; + +const PCIE_MMIO_BASE: u64 = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].0; +const PCIE_MMIO_SIZE: u64 = MEM_LAYOUT[LayoutEntryType::PcieMmio as usize].1; +#[cfg(target_arch = "aarch64")] +const PCIE_ECAM_BASE: u64 = MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0; +#[cfg(target_arch = "x86_64")] +const PCIE_ECAM_BASE: u64 = MEM_LAYOUT[LayoutEntryType::PcieEcam as usize].0; + +pub trait PciBusOps { + fn memread(&self, addr: u32, len: usize) -> Vec; + fn memwrite(&self, addr: u32, buf: &[u8]); + + fn config_readb(&self, bus_num: u8, devfn: u8, offset: u8) -> u8; + fn config_readw(&self, bus_num: u8, devfn: u8, offset: u8) -> u16; + fn config_readl(&self, bus_num: u8, devfn: u8, offset: u8) -> u32; + fn config_readq(&self, bus_num: u8, devfn: u8, offset: u8) -> u64; + + fn config_writeb(&self, bus_num: u8, devfn: u8, offset: u8, value: u8); + fn config_writew(&self, bus_num: u8, devfn: u8, offset: u8, value: u16); + fn config_writel(&self, bus_num: u8, devfn: u8, offset: u8, value: u32); + fn config_writeq(&self, bus_num: u8, devfn: u8, offset: u8, value: u64); +} + +#[allow(unused)] +pub struct TestPciBus { + pub mmio_alloc_ptr: u64, + pub mmio_limit: u64, + pub ecam_alloc_ptr: u64, + not_hotpluggable: bool, + pub test_state: Rc>, +} + +impl TestPciBus { + pub fn new(test_state: Rc>) -> Self { + Self { + mmio_alloc_ptr: PCIE_MMIO_BASE, + mmio_limit: PCIE_MMIO_SIZE, + ecam_alloc_ptr: PCIE_ECAM_BASE, + not_hotpluggable: false, + test_state, + } + } + + fn get_addr(&self, bus_num: u8, devfn: u8, offset: u8) -> u64 { + self.ecam_alloc_ptr + + u64::from(u32::from(bus_num) << 20 | u32::from(devfn) << 12 | u32::from(offset)) + } + + pub fn pci_auto_bus_scan(&self, root_port_num: u8) { + let current_bus = 0; + let mut sub_bus = 0; + + for addr in 0..=root_port_num { + let devfn = addr << 3; + if current_bus == 0 && devfn == 0 { + continue; + } + + if devfn & 0x7 != 0 { + continue; + } + + if self.config_readb(current_bus, devfn, PCI_HEADER_TYPE) == 0 { + continue; + } + + let vendor_id = self.config_readw(current_bus, devfn, PCI_VENDOR_ID); + if vendor_id == 0xffff || vendor_id == 0x0000 { + continue; + } + + if self.config_readw(current_bus, devfn, PCI_SUB_CLASS_DEVICE) == 0x0604 { + self.pciauto_scan_setup_bridge(current_bus, devfn, sub_bus); + sub_bus += 1 + } + } + } + + fn pciauto_scan_setup_bridge(&self, current_bus: u8, devfn: u8, sub_bus: u8) { + self.config_writeb(current_bus, devfn, PCI_PRIMARY_BUS, 0); + self.config_writeb(current_bus, devfn, PCI_SECONDARY_BUS, sub_bus + 1); + self.config_writeb(current_bus, devfn, PCI_SUBORDINATE_BUS, sub_bus + 1); + } +} + +impl PciBusOps for TestPciBus { + fn memread(&self, addr: u32, len: usize) -> Vec { + self.test_state + .borrow() + .memread(u64::from(addr), len as u64) + } + + fn memwrite(&self, addr: u32, buf: &[u8]) { + self.test_state.borrow().memwrite(u64::from(addr), buf); + } + + fn config_readb(&self, bus_num: u8, devfn: u8, offset: u8) -> u8 { + let addr = self.get_addr(bus_num, devfn, offset); + self.test_state.borrow().memread(addr, 1)[0] + } + + fn config_readw(&self, bus_num: u8, devfn: u8, offset: u8) -> u16 { + let addr = self.get_addr(bus_num, devfn, offset); + let mut buf: &[u8] = &self.test_state.borrow().memread(addr, 2)[0..2]; + read_le_u16(&mut buf) + } + + fn config_readl(&self, bus_num: u8, devfn: u8, offset: u8) -> u32 { + let addr = self.get_addr(bus_num, devfn, offset); + let mut buf: &[u8] = &self.test_state.borrow().memread(addr, 4)[0..4]; + read_le_u32(&mut buf) + } + + fn config_readq(&self, bus_num: u8, devfn: u8, offset: u8) -> u64 { + let addr = self.get_addr(bus_num, devfn, offset); + let mut buf: &[u8] = &self.test_state.borrow().memread(addr, 8)[0..8]; + read_le_u64(&mut buf) + } + + fn config_writeb(&self, bus_num: u8, devfn: u8, offset: u8, value: u8) { + let addr = self.get_addr(bus_num, devfn, offset); + let buf = value.to_le_bytes(); + self.test_state.borrow().memwrite(addr, &buf); + } + + fn config_writew(&self, bus_num: u8, devfn: u8, offset: u8, value: u16) { + let addr = self.get_addr(bus_num, devfn, offset); + let buf = value.to_le_bytes(); + self.test_state.borrow().memwrite(addr, &buf); + } + + fn config_writel(&self, bus_num: u8, devfn: u8, offset: u8, value: u32) { + let addr = self.get_addr(bus_num, devfn, offset); + let buf = value.to_le_bytes(); + self.test_state.borrow().memwrite(addr, &buf); + } + + fn config_writeq(&self, bus_num: u8, devfn: u8, offset: u8, value: u64) { + let addr = self.get_addr(bus_num, devfn, offset); + let buf = value.to_le_bytes(); + self.test_state.borrow().memwrite(addr, &buf); + } +} diff --git a/tests/mod_test/src/libdriver/qcow2.rs b/tests/mod_test/src/libdriver/qcow2.rs new file mode 100644 index 0000000000000000000000000000000000000000..fb6ba7685358ef4f5e3c4d0259332bb1c41eead6 --- /dev/null +++ b/tests/mod_test/src/libdriver/qcow2.rs @@ -0,0 +1,335 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; +use std::{ + fs::File, + io::{Seek, SeekFrom, Write}, + os::unix::prelude::{AsRawFd, OpenOptionsExt}, +}; + +use anyhow::{bail, Result}; +use byteorder::{BigEndian, ByteOrder}; +use libc::{c_int, iovec, off_t, preadv}; +use serde_json::Value; + +use crate::libtest::TestState; +use util::aio::Iovec; + +const QCOW_MAGIC: u32 = 0x514649fb; +const ENTRY_SIZE: u64 = 8; +const QCOW_VERSION_2_MIN_LEN: usize = 72; +const QCOW_VERSION_3_MIN_LEN: usize = 104; +const QCOW2_OFFSET_COPIED: u64 = 1 << 63; +const CLUSTER_BITS: u64 = 16; +pub const CLUSTER_SIZE: u64 = 1 << CLUSTER_BITS; + +#[derive(Debug)] +pub struct Qcow2Driver { + header: QcowHeader, + file: File, +} + +impl Qcow2Driver { + fn new(image_path: String) -> Self { + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(image_path) + .unwrap(); + + let mut qcow2 = Qcow2Driver { + header: QcowHeader::default(), + file, + }; + qcow2.load_header(); + qcow2 + } + + fn load_header(&mut self) { + let mut buf = vec![0; QcowHeader::len()]; + let ret = self.raw_read(0, &mut buf); + assert_eq!(ret, buf.len() as i64); + self.header = QcowHeader::from_vec(&buf).unwrap(); + } + + fn raw_read(&self, offset: u64, buf: &mut [u8]) -> i64 { + let ptr = buf.as_mut_ptr() as u64; + let cnt = buf.len() as u64; + let iovec = [Iovec::new(ptr, cnt)]; + + unsafe { + preadv( + self.file.as_raw_fd() as c_int, + iovec.as_ptr() as *const iovec, + iovec.len() as c_int, + offset as off_t, + ) as i64 + } + } + + fn raw_write(&mut self, offset: u64, buf: &mut [u8]) { + self.file.seek(SeekFrom::Start(offset)).unwrap(); + self.file.write_all(buf).unwrap(); + } +} + +#[repr(C)] +#[derive(Clone, Debug, Default)] +pub struct QcowHeader { + pub magic: u32, + pub version: u32, + pub backing_file_offset: u64, + pub backing_file_size: u32, + pub cluster_bits: u32, + pub size: u64, + pub crypt_method: u32, + pub l1_size: u32, + pub l1_table_offset: u64, + pub refcount_table_offset: u64, + pub refcount_table_clusters: u32, + pub nb_snapshots: u32, + pub snapshots_offset: u64, + // version >= v3 + pub incompatible_features: u64, + pub compatible_features: u64, + pub autoclear_features: u64, + pub refcount_order: u32, + pub header_length: u32, +} + +impl QcowHeader { + pub fn from_vec(buf: &[u8]) -> Result { + if buf.len() < QCOW_VERSION_2_MIN_LEN { + bail!( + "Invalid header len {}, the min len {}", + buf.len(), + QCOW_VERSION_2_MIN_LEN + ); + } + let mut header = QcowHeader { + magic: BigEndian::read_u32(&buf[0..4]), + version: BigEndian::read_u32(&buf[4..8]), + backing_file_offset: BigEndian::read_u64(&buf[8..16]), + backing_file_size: BigEndian::read_u32(&buf[16..20]), + cluster_bits: BigEndian::read_u32(&buf[20..24]), + size: BigEndian::read_u64(&buf[24..32]), + crypt_method: BigEndian::read_u32(&buf[32..36]), + l1_size: BigEndian::read_u32(&buf[36..40]), + l1_table_offset: BigEndian::read_u64(&buf[40..48]), + refcount_table_offset: BigEndian::read_u64(&buf[48..56]), + refcount_table_clusters: BigEndian::read_u32(&buf[56..60]), + nb_snapshots: BigEndian::read_u32(&buf[60..64]), + snapshots_offset: BigEndian::read_u64(&buf[64..72]), + ..Default::default() + }; + if header.magic != QCOW_MAGIC { + bail!("Invalid format {}", header.magic); + } + if header.version == 2 { + header.refcount_order = 4; + header.header_length = QCOW_VERSION_2_MIN_LEN as u32; + } else if header.version == 3 { + if buf.len() < QCOW_VERSION_3_MIN_LEN { + bail!("Invalid header len for version 3 {}", buf.len()); + } + header.incompatible_features = BigEndian::read_u64(&buf[72..80]); + header.compatible_features = BigEndian::read_u64(&buf[80..88]); + header.autoclear_features = BigEndian::read_u64(&buf[88..96]); + header.refcount_order = BigEndian::read_u32(&buf[96..100]); + header.header_length = BigEndian::read_u32(&buf[100..104]); + } else { + bail!("Invalid version {}", header.version); + } + Ok(header) + } + + pub fn to_vec(&self) -> Vec { + let sz = if self.version == 2 { + QCOW_VERSION_2_MIN_LEN + } else { + QcowHeader::len() + }; + let mut buf = vec![0; sz]; + BigEndian::write_u32(&mut buf[0..4], self.magic); + BigEndian::write_u32(&mut buf[4..8], self.version); + BigEndian::write_u64(&mut buf[8..16], self.backing_file_offset); + BigEndian::write_u32(&mut buf[16..20], self.backing_file_size); + BigEndian::write_u32(&mut buf[20..24], self.cluster_bits); + BigEndian::write_u64(&mut buf[24..32], self.size); + BigEndian::write_u32(&mut buf[32..36], self.crypt_method); + BigEndian::write_u32(&mut buf[36..40], self.l1_size); + BigEndian::write_u64(&mut buf[40..48], self.l1_table_offset); + BigEndian::write_u64(&mut buf[48..56], self.refcount_table_offset); + BigEndian::write_u32(&mut buf[56..60], self.refcount_table_clusters); + BigEndian::write_u32(&mut buf[60..64], self.nb_snapshots); + BigEndian::write_u64(&mut buf[64..72], self.snapshots_offset); + if self.version >= 3 { + BigEndian::write_u64(&mut buf[72..80], self.incompatible_features); + BigEndian::write_u64(&mut buf[80..88], self.compatible_features); + BigEndian::write_u64(&mut buf[88..96], self.autoclear_features); + BigEndian::write_u32(&mut buf[96..100], self.refcount_order); + BigEndian::write_u32(&mut buf[100..104], self.header_length); + } + buf + } + + #[inline] + pub fn len() -> usize { + std::mem::size_of::() + } +} + +// From size to bits. +fn size_to_bits(size: u64) -> Option { + (0..63).find(|&i| size >> i == 1) +} + +/// Create a qcow2 format image for test. +pub fn create_qcow2_img(image_path: String, image_size: u64) { + let img_bits = size_to_bits(image_size).unwrap(); + let img_size = image_size; + let cluster_bits = CLUSTER_BITS; + let cluster_sz = 1 << cluster_bits; + + let l1_entry_size: u64 = 1 << (cluster_bits * 2 - 3); + let l1_size = (img_size + l1_entry_size - 1) / l1_entry_size; + let header = QcowHeader { + magic: QCOW_MAGIC, + version: 3, + backing_file_offset: 0, + backing_file_size: 0, + cluster_bits: cluster_bits as u32, + size: 1 << img_bits, + crypt_method: 0, + l1_size: l1_size as u32, + l1_table_offset: 3 * cluster_sz, + refcount_table_offset: cluster_sz, + refcount_table_clusters: 1, + nb_snapshots: 0, + snapshots_offset: 0, + incompatible_features: 0, + compatible_features: 0, + autoclear_features: 0, + refcount_order: 4, + header_length: std::mem::size_of::() as u32, + }; + + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CREAT | libc::O_TRUNC) + .open(image_path.clone()) + .unwrap(); + file.set_len(cluster_sz * 3 + u64::from(header.l1_size) * ENTRY_SIZE) + .unwrap(); + file.write_all(&header.to_vec()).unwrap(); + + // Cluster 1 is the refcount table. + assert_eq!(header.refcount_table_offset, cluster_sz); + let mut refcount_table = [0_u8; ENTRY_SIZE as usize]; + BigEndian::write_u64(&mut refcount_table, cluster_sz * 2); + file.seek(SeekFrom::Start(cluster_sz)).unwrap(); + file.write_all(&refcount_table).unwrap(); + + // Clusters which has been allocated. + assert_eq!(header.refcount_order, 4); + let clusters = + 3 + ((header.l1_size * ENTRY_SIZE as u32 + cluster_sz as u32 - 1) >> cluster_bits); + let mut refcount_block = Vec::new(); + for _ in 0..clusters { + refcount_block.push(0x00); + refcount_block.push(0x01); + } + file.seek(SeekFrom::Start(cluster_sz * 2)).unwrap(); + file.write_all(&refcount_block).unwrap(); + + // Full the disk. + write_full_disk(image_path); +} + +/// Full the disk(this function is only used for test). +/// By default, the data occupied by the l2 table and refcount table should not exceed one cluster. +/// If the defined disk is too large, it may result in incorrect data format for. +/// For example. +/// If you defined cluster size = 1 << 16, the max disk size cannout exceed the +/// 1 << (16 * 2 - 3) = 512M. +fn write_full_disk(image_path: String) { + let mut qcow2 = Qcow2Driver::new(image_path); + let cluster_bits = qcow2.header.cluster_bits; + let cluster_size = 1 << cluster_bits; + let image_size = qcow2.header.size; + + let n_cluster = image_size / cluster_size; + // Header + refcount table + refcount block + l1 table + l2 table = 5 cluster. + qcow2.file.set_len((5 + n_cluster) * cluster_size).unwrap(); + // Write l2 table. + let mut refcount_block: Vec = Vec::new(); + let mut l1_table = [0_u8; ENTRY_SIZE as usize]; + BigEndian::write_u64(&mut l1_table, (cluster_size * 4) | QCOW2_OFFSET_COPIED); + let mut l2_table: Vec = Vec::new(); + for _ in 0..5 { + refcount_block.push(0x00); + refcount_block.push(0x01); + } + let offset_start = 5 * cluster_size; + for i in 0..n_cluster { + let addr = offset_start + i * cluster_size; + let l2_table_value = addr | QCOW2_OFFSET_COPIED; + + let mut tmp_buf = vec![0_u8; ENTRY_SIZE as usize]; + BigEndian::write_u64(&mut tmp_buf, l2_table_value); + l2_table.append(&mut tmp_buf); + refcount_block.push(0x00); + refcount_block.push(0x01); + + let mut cluster_buff = vec![0_u8; cluster_size as usize]; + qcow2.raw_write(addr, &mut cluster_buff); + } + qcow2.raw_write(cluster_size * 2, &mut refcount_block); + qcow2.raw_write(cluster_size * 3, &mut l1_table); + qcow2.raw_write(cluster_size * 4, &mut l2_table); +} + +pub fn create_snapshot(state: Rc>, device: &str, snap: &str) { + let qmp_str = format!("{{\"execute\":\"blockdev-snapshot-internal-sync\",\"arguments\":{{\"device\":\"{}\",\"name\":\"{}\"}}}}", device, snap); + state.borrow_mut().qmp(&qmp_str); +} + +pub fn delete_snapshot(state: Rc>, device: &str, snap: &str) { + let qmp_str = format!("{{\"execute\":\"blockdev-snapshot-delete-internal-sync\",\"arguments\":{{\"device\":\"{}\",\"name\":\"{}\"}}}}", device, snap); + state.borrow_mut().qmp(&qmp_str); +} + +pub fn query_snapshot(state: Rc>) -> Value { + let qmp_str = + "{\"execute\":\"human-monitor-command\",\"arguments\":{\"command-line\":\"info snapshots\"}}".to_string(); + let value = state.borrow_mut().qmp(&qmp_str); + + value +} + +// Check if there exists snapshot with the specified name. +pub fn check_snapshot(state: Rc>, snap: &str) -> bool { + let value = query_snapshot(state); + let str = (*value.get("return").unwrap()).as_str().unwrap(); + let lines: Vec<&str> = str.split("\r\n").collect(); + for line in lines { + let buf: Vec<&str> = line.split_whitespace().collect(); + if buf.len() > 2 && buf[1] == snap { + return true; + } + } + + false +} diff --git a/tests/mod_test/src/libdriver/usb.rs b/tests/mod_test/src/libdriver/usb.rs new file mode 100644 index 0000000000000000000000000000000000000000..c3633b48be864f6818f6051a6c8c1fbfdf118355 --- /dev/null +++ b/tests/mod_test/src/libdriver/usb.rs @@ -0,0 +1,2531 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::{RefCell, RefMut}, + collections::{HashMap, LinkedList}, + mem::size_of, + rc::Rc, + time::Duration, +}; + +use byteorder::{ByteOrder, LittleEndian}; +use serde_json::Value; + +use super::{ + machine::TestStdMachine, + malloc::GuestAllocator, + pci::{PCIBarAddr, TestPciDev, PCI_VENDOR_ID}, + pci_bus::TestPciBus, +}; +use crate::libdriver::pci::{PciMsixOps, PCI_DEVICE_ID}; +use crate::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use devices::usb::{ + config::*, + hid::{ + HID_GET_IDLE, HID_GET_PROTOCOL, HID_GET_REPORT, HID_SET_IDLE, HID_SET_PROTOCOL, + HID_SET_REPORT, + }, + xhci::{ + xhci_controller::{ + DwordOrder, XhciEpCtx, XhciInputCtrlCtx, XhciSlotCtx, EP_RUNNING, SLOT_DEFAULT, + }, + xhci_regs::{ + XHCI_INTR_REG_ERDP_LO, XHCI_INTR_REG_ERSTBA_LO, XHCI_INTR_REG_ERSTSZ, + XHCI_INTR_REG_IMAN, XHCI_INTR_REG_IMOD, XHCI_INTR_REG_SIZE, XHCI_OPER_REG_CONFIG, + XHCI_OPER_REG_PAGESIZE, XHCI_OPER_REG_USBCMD, XHCI_OPER_REG_USBSTS, + }, + xhci_trb::{TRBCCode, TRBType, TRB_SIZE}, + }, + UsbDeviceRequest, +}; +use util::byte_code::ByteCode; + +pub const PCI_VENDOR_ID_REDHAT: u16 = 0x1b36; +pub const PCI_DEVICE_ID_REDHAT_XHCI: u16 = 0x000d; +pub const PCI_CLASS_PI: u8 = 0x9; +pub const SUB_CLASS_CODE: u8 = 0xa; +pub const PCI_CLASS_SERIAL_USB: u16 = 0x0c03; + +pub const XHCI_PCI_CAP_OFFSET: u32 = 0; +pub const XHCI_PCI_OPER_OFFSET: u32 = 0x40; +pub const XHCI_PCI_PORT_OFFSET: u32 = 0x440; +pub const XHCI_PCI_PORT_LENGTH: u32 = 0x10; +pub const XHCI_PCI_RUNTIME_OFFSET: u32 = 0x1000; +pub const XHCI_PCI_DOORBELL_OFFSET: u32 = 0x2000; +pub const XHCI_PORTSC_OFFSET: u64 = 0x0; +pub const XHCI_OPER_REG_DCBAAP: u64 = 0x30; +pub const XHCI_OPER_REG_CMD_RING_CTRL: u64 = 0x18; + +pub const USB_CONFIG_MAX_SLOTS_EN_MASK: u32 = 0xff; +const DEVICE_CONTEXT_ENTRY_SIZE: u32 = 0x8; +const EVENT_RING_SEGMENT_TABLE_ENTRY_SIZE: u32 = 0x10; +const RUNTIME_REGS_INTERRUPT_OFFSET: u64 = 0x20; +const PORT_EVENT_ID_SHIFT: u32 = 24; +const PORT_EVENT_ID_MASK: u32 = 0xff; +// TRB +const TRB_INTR_SHIFT: u32 = 22; +const TRB_INTR_MASK: u32 = 0x3ff; +const TRB_C: u32 = 1; +const TRB_TYPE_SHIFT: u32 = 10; +const TRB_TYPE_MASK: u32 = 0x3f; +const TRB_SLOT_ID_SHIFT: u32 = 24; +const TRB_SLOT_ID_MASK: u32 = 0xff; +const TRB_EP_ID_SHIFT: u32 = 16; +const TRB_EP_ID_MASK: u32 = 0x1f; +const TRB_BSR_SHIFT: u32 = 9; +const TRB_BSR_MASK: u32 = 0x1; +const TRB_TD_SIZE_SHIFT: u32 = 9; +const TRB_TD_SIZE_MASK: u32 = 0x1; +const TRB_TRANSFER_LENGTH_SHIFT: u32 = 0; +const TRB_TRANSFER_LENGTH_MASK: u32 = 0x1ffff; +const TRB_IOC_SHIFT: u32 = 5; +const TRB_IOC_MASK: u32 = 0x1; +const TRB_CH_SHIFT: u32 = 4; +const TRB_CH_MASK: u32 = 0x1; +const TRB_IDT_SHIFT: u32 = 6; +const TRB_IDT_MASK: u32 = 0x1; +const TRB_ISP_SHIFT: u32 = 2; +const TRB_ISP_MASK: u32 = 0x1; +const TRB_DIR_SHIFT: u32 = 16; +const TRB_DIR_MASK: u32 = 0x1; +const TRB_TRT_SHIFT: u32 = 16; +const TRB_TRT_MASK: u32 = 0x1; +const TRB_TC_SHIFT: u32 = 1; +const TRB_TC_MASK: u32 = 0x1; +const TRB_DC_SHIFT: u32 = 9; +const TRB_DC_MASK: u32 = 0x1; +const DEVICE_CONTEXT_SIZE: u64 = 0x400; +const INPUT_CONTEXT_SIZE: u64 = 0x420; +pub const CONTROL_ENDPOINT_ID: u32 = 1; +pub const HID_KEYBOARD_LEN: u64 = 8; +pub const HID_POINTER_LEN: u64 = 7; +pub const KEYCODE_SPACE: u32 = 57; +pub const KEYCODE_NUM1: u32 = 2; +const HID_POINTER_REPORT_LEN: u8 = 89; +// Descriptor type +pub const USB_DESCRIPTOR_TYPE_DEVICE: u8 = 1; +pub const USB_DESCRIPTOR_TYPE_CONFIG: u8 = 2; +pub const USB_DESCRIPTOR_TYPE_STRING: u8 = 3; +pub const USB_DESCRIPTOR_TYPE_INTERFACE: u8 = 4; +pub const USB_DESCRIPTOR_TYPE_ENDPOINT: u8 = 5; +// Test config +pub const USB_CONFIG_MAX_SLOTS_ENABLED: u32 = 4; +pub const USB_CONFIG_MAX_INTERRUPTER: u32 = 4; +pub const COMMAND_RING_LEN: u64 = 256; +pub const EVENT_RING_SEGMENT_TABLE_LEN: u32 = 1; +pub const EVENT_RING_LEN: u64 = 64; +pub const TRANSFER_RING_LEN: u64 = 256; +// Max TRB limit in one TD. +pub const TD_TRB_LIMIT: u64 = 0x20000 + 10; +// The USB keyboard and tablet intr endpoint id. +pub const HID_DEVICE_ENDPOINT_ID: u32 = 3; +pub const STORAGE_DEVICE_IN_ENDPOINT_ID: u32 = 3; +pub const STORAGE_DEVICE_OUT_ENDPOINT_ID: u32 = 4; +// Primary Interrupter +pub const PRIMARY_INTERRUPTER_ID: usize = 0; +pub const XHCI_PCI_SLOT_NUM: u8 = 0x5; +pub const XHCI_PCI_FUN_NUM: u8 = 0; +const INTERFACE_ID_CONTROL: u8 = 0; +// According to UVC specification 1.5 +// A.2. Video Interface Subclass Codes +const SC_VIDEOCONTROL: u8 = 0x01; +const SC_VIDEOSTREAMING: u8 = 0x02; +const SC_VIDEO_INTERFACE_COLLECTION: u8 = 0x03; + +#[derive(Eq, PartialEq)] +enum UsbDeviceType { + Tablet, + Keyboard, + Storage, + Camera, + Other, +} + +#[derive(Debug, Default, Copy, Clone)] +pub struct TestNormalTRB { + parameter: u64, + status: u32, + control: u32, + // Force mismatch cycle + pub force_cycle: bool, +} + +impl TestNormalTRB { + pub fn generate_normal_td(target: u32, len: u32) -> TestNormalTRB { + let mut trb = TestNormalTRB::default(); + trb.set_ioc_flag(true); + trb.set_isp_flag(true); + trb.set_idt_flag(true); + trb.set_interrupter_target(target); + trb.set_trb_type(TRBType::TrNormal as u32); + trb.set_trb_transfer_length(len); + trb + } + + pub fn generate_setup_td(device_req: &UsbDeviceRequest) -> TestNormalTRB { + let mut setup_trb = TestNormalTRB::default(); + setup_trb.parameter = u64::from(device_req.length) << 48 + | u64::from(device_req.index) << 32 + | u64::from(device_req.value) << 16 + | u64::from(device_req.request) << 8 + | u64::from(device_req.request_type); + setup_trb.set_idt_flag(true); + setup_trb.set_ch_flag(true); + setup_trb.set_trb_type(TRBType::TrSetup as u32); + setup_trb.set_trb_transfer_length(8); + setup_trb.set_transfer_type(3); + setup_trb + } + + pub fn generate_data_td(ptr: u64, len: u16, in_dir: bool) -> TestNormalTRB { + let mut data_trb = TestNormalTRB::default(); + data_trb.set_pointer(ptr); + data_trb.set_ch_flag(true); + data_trb.set_dir_flag(in_dir); + data_trb.set_trb_type(TRBType::TrData as u32); + data_trb.set_trb_transfer_length(u32::from(len)); + data_trb + } + + pub fn generate_status_td(dir: bool) -> TestNormalTRB { + let mut status_trb = TestNormalTRB::default(); + status_trb.set_ch_flag(false); + status_trb.set_ioc_flag(true); + status_trb.set_dir_flag(dir); + status_trb.set_trb_type(TRBType::TrStatus as u32); + status_trb + } + + pub fn generate_event_data_trb(ptr: u64) -> TestNormalTRB { + let mut ev_data_trb = TestNormalTRB::default(); + ev_data_trb.set_pointer(ptr); + ev_data_trb.set_ioc_flag(true); + ev_data_trb.set_trb_type(TRBType::TrEvdata as u32); + ev_data_trb + } + + pub fn set_interrupter_target(&mut self, v: u32) { + self.status &= !(TRB_INTR_MASK << TRB_INTR_SHIFT); + self.status |= (v & TRB_INTR_MASK) << TRB_INTR_SHIFT; + } + + pub fn set_cycle_bit(&mut self, v: bool) { + if v { + self.control |= TRB_C; + } else { + self.control &= !TRB_C; + } + } + + pub fn set_trb_type(&mut self, v: u32) { + self.control &= !(TRB_TYPE_MASK << TRB_TYPE_SHIFT); + self.control |= (v & TRB_TYPE_MASK) << TRB_TYPE_SHIFT; + } + + pub fn set_slot_id(&mut self, v: u32) { + self.control &= !(TRB_SLOT_ID_MASK << TRB_SLOT_ID_SHIFT); + self.control |= (v & TRB_SLOT_ID_MASK) << TRB_SLOT_ID_SHIFT; + } + + pub fn set_ep_id(&mut self, v: u32) { + self.control &= !(TRB_EP_ID_MASK << TRB_EP_ID_SHIFT); + self.control |= (v & TRB_EP_ID_MASK) << TRB_EP_ID_SHIFT; + } + + pub fn set_bsr(&mut self, v: bool) { + self.control &= !(TRB_BSR_MASK << TRB_BSR_SHIFT); + self.control |= (if v { 1 } else { 0 } & TRB_BSR_MASK) << TRB_BSR_SHIFT; + } + + fn to_xhci_event(&self) -> TestXhciEvent { + let mut evt = TestXhciEvent::default(); + evt.ptr = self.parameter; + evt.ccode = (self.status >> 24) & 0xff; + evt.length = self.status & 0xffffff; + evt.flags = self.control; + evt + } + + pub fn set_pointer(&mut self, dequeue: u64) { + self.parameter = dequeue; + } + + pub fn set_td_size(&mut self, sz: u32) { + self.status &= !(TRB_TD_SIZE_MASK << TRB_TD_SIZE_SHIFT); + self.status |= (sz & TRB_TD_SIZE_MASK) << TRB_TD_SIZE_SHIFT; + } + + pub fn set_trb_transfer_length(&mut self, len: u32) { + self.status &= !(TRB_TRANSFER_LENGTH_MASK << TRB_TRANSFER_LENGTH_SHIFT); + self.status |= (len & TRB_TRANSFER_LENGTH_MASK) << TRB_TRANSFER_LENGTH_SHIFT; + } + + pub fn set_ioc_flag(&mut self, v: bool) { + if v { + self.control |= TRB_IOC_MASK << TRB_IOC_SHIFT; + } else { + self.control &= !(TRB_IOC_MASK << TRB_IOC_SHIFT); + } + } + + pub fn set_ch_flag(&mut self, v: bool) { + if v { + self.control |= TRB_CH_MASK << TRB_CH_SHIFT; + } else { + self.control &= !(TRB_CH_MASK << TRB_CH_SHIFT); + } + } + + pub fn set_idt_flag(&mut self, v: bool) { + if v { + self.control |= TRB_IDT_MASK << TRB_IDT_SHIFT; + } else { + self.control &= !(TRB_IDT_MASK << TRB_IDT_SHIFT); + } + } + + pub fn set_isp_flag(&mut self, v: bool) { + if v { + self.control |= TRB_ISP_MASK << TRB_ISP_SHIFT; + } else { + self.control &= !(TRB_ISP_MASK << TRB_ISP_SHIFT); + } + } + + pub fn set_dir_flag(&mut self, v: bool) { + if v { + self.control |= TRB_DIR_MASK << TRB_DIR_SHIFT; + } else { + self.control &= !(TRB_DIR_MASK << TRB_DIR_SHIFT); + } + } + + pub fn set_transfer_type(&mut self, v: u32) { + self.control &= !(TRB_TRT_MASK << TRB_TRT_SHIFT); + self.control |= (v & TRB_TRT_MASK) << TRB_TRT_SHIFT; + } + + pub fn set_toggle_cycle(&mut self, v: bool) { + if v { + self.control |= TRB_TC_MASK << TRB_TC_SHIFT; + } else { + self.control &= !(TRB_TC_MASK << TRB_TC_SHIFT); + } + } + + pub fn set_dc_flag(&mut self, v: bool) { + self.control &= !(TRB_DC_MASK << TRB_DC_SHIFT); + self.control |= (if v { 1 } else { 0 } & TRB_DC_MASK) << TRB_DC_SHIFT; + } +} + +#[derive(Default)] +pub struct TestXhciEvent { + pub ccode: u32, + pub ptr: u64, + pub length: u32, + flags: u32, +} + +impl TestXhciEvent { + pub fn get_port_id(&self) -> u32 { + (self.ptr as u32) >> PORT_EVENT_ID_SHIFT & PORT_EVENT_ID_MASK + } + + pub fn get_slot_id(&self) -> u32 { + (self.flags >> TRB_SLOT_ID_SHIFT) & TRB_SLOT_ID_MASK + } + + pub fn get_ep_id(&self) -> u32 { + (self.flags >> TRB_EP_ID_SHIFT) & TRB_EP_ID_MASK + } + + pub fn get_trb_type(&self) -> u32 { + (self.flags >> TRB_TYPE_SHIFT) & TRB_TYPE_MASK + } +} + +#[derive(Default, Debug, Copy, Clone)] +struct TestXhciRing { + pointer: u64, + start: u64, + size: u64, + cycle_bit: bool, +} + +impl TestXhciRing { + fn new() -> Self { + Self { + pointer: 0, + start: 0, + size: 0, + cycle_bit: true, + } + } + + fn init(&mut self, addr: u64, sz: u64) { + self.pointer = addr; + self.start = addr; + self.size = sz; + self.cycle_bit = true; + } + + fn update_pointer(&mut self, addr: u64) { + self.pointer = addr; + } + + fn increase_pointer(&mut self, sz: u64) { + self.pointer += sz; + } +} + +pub struct TestEventRingSegment { + pub addr: u64, + pub size: u32, + pub reserved: u32, +} + +impl Default for TestEventRingSegment { + fn default() -> Self { + Self::new() + } +} + +impl TestEventRingSegment { + pub fn new() -> Self { + Self { + addr: 0, + size: 0, + reserved: 0, + } + } + + pub fn init(&mut self, addr: u64, sz: u32) { + self.addr = addr; + self.size = sz; + } +} + +#[derive(Default, Clone, Copy)] +struct TestXhciInterrupter { + erstsz: u32, + erstba: u64, + segment_index: u32, + cycle_bit: bool, + er_pointer: u64, + trb_count: u32, +} + +#[derive(Default, Clone, Copy)] +struct DeviceSlot { + endpoints: [EndpointContext; 31], +} + +#[derive(Default, Clone, Copy)] +struct EndpointContext { + transfer_ring: TestXhciRing, +} + +// Iovec for test transfer. +#[derive(Default, Clone, Copy)] +pub struct TestIovec { + pub io_base: u64, + pub io_len: usize, + pub direct: bool, + // Whether the trb is event data trb. + pub event_data: bool, +} + +impl TestIovec { + pub fn new(base: u64, len: usize, direct: bool) -> Self { + Self { + io_base: base, + io_len: len, + direct, + event_data: false, + } + } +} + +struct TestXhciDevice { + cmd_ring: TestXhciRing, + dcbaap: u64, + device_slot: Vec, + interrupter: Vec, +} + +impl TestXhciDevice { + fn new() -> Self { + Self { + cmd_ring: TestXhciRing::new(), + dcbaap: 0, + device_slot: vec![DeviceSlot::default(); (USB_CONFIG_MAX_SLOTS_ENABLED + 1) as usize], + interrupter: vec![TestXhciInterrupter::default(); USB_CONFIG_MAX_INTERRUPTER as usize], + } + } +} + +pub struct TestXhciPciDevice { + pub pci_dev: TestPciDev, + pub bar_addr: PCIBarAddr, + bar_idx: u8, + allocator: Rc>, + xhci: TestXhciDevice, + pub device_config: HashMap, + // Event list to save all ready event when has msix. + event_list: LinkedList, + // msix config + config_msix_entry: u16, + config_msix_addr: u64, + config_msix_data: u32, +} + +impl TestXhciPciDevice { + pub fn new(pci_bus: Rc>, allocator: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar_addr: 0, + bar_idx: 0, + allocator, + xhci: TestXhciDevice::new(), + device_config: HashMap::new(), + event_list: LinkedList::new(), + config_msix_entry: 0, + config_msix_addr: 0, + config_msix_data: 0, + } + } + + pub fn run(&mut self) { + let status = self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBSTS, + ); + assert!(status & USB_STS_HCH == USB_STS_HCH); + let cmd = self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBCMD, + ); + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBCMD, + cmd | USB_CMD_RUN, + ); + let status = self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBSTS, + ); + assert!(status & USB_STS_HCH != USB_STS_HCH); + } + + /// Reference 4.2 Host Controller initialization. + pub fn init_host_controller(&mut self, pci_slot: u8, pci_fn: u8) { + self.init_pci_device(pci_slot, pci_fn); + self.read_pci_config(); + self.read_capability(); + self.init_memory(); + self.init_max_device_slot_enabled(); + self.init_device_context_base_address_array_pointer(); + self.init_command_ring_dequeue_pointer(); + self.init_interrupter(); + } + + pub fn init_device(&mut self, port_id: u32) -> u32 { + let usb_device_type = self.get_usb_device_type(); + // reset usb port + self.reset_port(port_id); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + self.enable_slot(); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + if *self + .device_config + .get("address_device_bsr") + .unwrap_or(&false) + { + // address device bsr = 1 + let slot_id = evt.get_slot_id(); + self.address_device(slot_id, true, port_id); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_ctx = self.get_slot_context(slot_id); + assert_eq!(slot_ctx.get_slot_state(), SLOT_DEFAULT); + let ep0_ctx = self.get_endpoint_context(slot_id, CONTROL_ENDPOINT_ID); + assert_eq!(ep0_ctx.get_ep_state(), EP_RUNNING); + // reset device + self.reset_device(slot_id); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_ctx = self.get_slot_context(slot_id); + assert_eq!(slot_ctx.get_slot_state(), SLOT_DEFAULT); + assert_eq!(slot_ctx.get_usb_device_address(), 0); + } + // address device bsr = 0 + let slot_id = evt.get_slot_id(); + self.address_device(slot_id, false, port_id); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get descriptor + self.get_usb_descriptor(slot_id); + // get hid report descriptor + self.check_hid_report_descriptor(slot_id); + // evaluate context + self.evaluate_context(slot_id, 0x1234, 0, 64); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_ctx = self.get_slot_context(slot_id); + assert_eq!(slot_ctx.get_max_exit_latency(), 0x1234); + assert_eq!(slot_ctx.get_interrupter_target(), 0); + let ep0_ctx = self.get_endpoint_context(slot_id, CONTROL_ENDPOINT_ID); + assert_eq!(ep0_ctx.get_max_packet_size(), 64); + // get configuration + self.get_configuration(slot_id); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 1); + assert_eq!(buf[0], 0); + // configure endpoint + self.configure_endpoint(slot_id, false); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // set feature + self.set_feature(slot_id, USB_DEVICE_REMOTE_WAKEUP as u16); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get status + self.get_status(slot_id); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - 16, 2); + + match usb_device_type { + UsbDeviceType::Tablet | UsbDeviceType::Keyboard => { + assert_eq!(buf, [2, 0]); + } + UsbDeviceType::Storage => { + assert_eq!(buf, [3, 0]); + } + UsbDeviceType::Camera => { + assert_eq!(buf, [2, 0]); + } + _ => {} + } + + slot_id + } + + /// Rest host controller. + pub fn reset_controller(&mut self, auto_run: bool) { + // reset xhci + self.oper_regs_write(0, USB_CMD_HCRST); + let status = self.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE != USB_STS_HCE); + if auto_run { + self.init_host_controller(XHCI_PCI_SLOT_NUM, XHCI_PCI_FUN_NUM); + self.run(); + } else { + self.init_pci_device(XHCI_PCI_SLOT_NUM, XHCI_PCI_FUN_NUM); + } + // clean up the event list. + self.event_list.clear(); + } + + // Low Level API + + pub fn oper_regs_read(&self, offset: u64) -> u32 { + self.pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_OPER_OFFSET) + offset) + } + + pub fn oper_regs_write(&mut self, offset: u64, value: u32) { + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + offset, + value, + ); + } + + pub fn interrupter_regs_read(&self, intr_idx: u64, offset: u64) -> u32 { + self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_RUNTIME_OFFSET) + + XHCI_INTR_REG_SIZE + + intr_idx * XHCI_INTR_REG_SIZE + + offset, + ) + } + + pub fn interrupter_regs_write(&mut self, intr_idx: u64, offset: u64, value: u32) { + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_RUNTIME_OFFSET) + + RUNTIME_REGS_INTERRUPT_OFFSET + + intr_idx * XHCI_INTR_REG_SIZE + + offset, + value, + ); + } + + pub fn interrupter_regs_readq(&self, intr_idx: u64, offset: u64) -> u64 { + self.pci_dev.io_readq( + self.bar_addr, + u64::from(XHCI_PCI_RUNTIME_OFFSET) + + XHCI_INTR_REG_SIZE + + intr_idx * XHCI_INTR_REG_SIZE + + offset, + ) + } + + pub fn interrupter_regs_writeq(&mut self, intr_idx: u64, offset: u64, value: u64) { + self.pci_dev.io_writeq( + self.bar_addr, + u64::from(XHCI_PCI_RUNTIME_OFFSET) + + RUNTIME_REGS_INTERRUPT_OFFSET + + intr_idx * XHCI_INTR_REG_SIZE + + offset, + value, + ); + } + + pub fn port_regs_read(&self, port_id: u32, offset: u64) -> u32 { + self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_PORT_OFFSET + XHCI_PCI_PORT_LENGTH * (port_id - 1)) + offset, + ) + } + + pub fn port_regs_write(&mut self, port_id: u32, offset: u64, value: u32) { + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_PORT_OFFSET + XHCI_PCI_PORT_LENGTH * (port_id - 1)) + offset, + value, + ); + } + + pub fn doorbell_write(&mut self, slot_id: u32, target: u32) { + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_DOORBELL_OFFSET) + u64::from(slot_id << 2), + target, + ); + } + + pub fn init_pci_device(&mut self, pci_slot: u8, pci_fn: u8) { + let devfn = pci_slot << 3 | pci_fn; + assert!(self.pci_dev.find_pci_device(devfn)); + + self.pci_dev.enable(); + self.bar_addr = self.pci_dev.io_map(self.bar_idx); + } + + pub fn read_pci_config(&self) { + let vendor_id = self.pci_dev.config_readw(PCI_VENDOR_ID); + assert_eq!(vendor_id, PCI_VENDOR_ID_REDHAT); + // device id + let device_id = self.pci_dev.config_readw(PCI_DEVICE_ID); + assert_eq!(device_id, PCI_DEVICE_ID_REDHAT_XHCI); + // class code + let pi = self.pci_dev.config_readb(PCI_CLASS_PI); + assert_eq!(pi, 0x30); + let class_code = self.pci_dev.config_readw(SUB_CLASS_CODE); + assert_eq!(class_code, PCI_CLASS_SERIAL_USB); + } + + pub fn read_capability(&self) { + // Interface Version Number + let cap = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET)); + assert!(cap & 0x01000000 == 0x01000000); + // HCSPARAMS1 + let hcsparams1 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x4)); + assert_eq!(hcsparams1 & 0xffffff, 0x000140); + // HCSPARAMS2 + let hcsparams2 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x8)); + assert_eq!(hcsparams2, 0xf); + // HCSPARAMS3 + let hcsparams3 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0xc)); + assert_eq!(hcsparams3, 0); + // HCCPARAMS1 + let hccparams1 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x10)); + // AC64 = 1 + assert_eq!(hccparams1 & 1, 1); + // doorbell offset + let db_offset = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x14)); + assert_eq!(db_offset, 0x2000); + // runtime offset + let runtime_offset = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x18)); + assert_eq!(runtime_offset, 0x1000); + // HCCPARAMS2 + let hccparams2 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x1c)); + assert_eq!(hccparams2, 0); + // USB 2.0 + let usb2_version = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x20)); + assert!(usb2_version & 0x02000000 == 0x02000000); + let usb2_name = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x24)); + assert_eq!(usb2_name, 0x20425355); + let usb2_port = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x28)); + let usb2_port_num = (usb2_port >> 8) & 0xff; + // extend capability end + let end = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x2c)); + assert_eq!(end, 0); + // USB 3.0 + let usb3_version = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x30)); + assert!(usb3_version & 0x03000000 == 0x03000000); + let usb3_name = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x34)); + assert_eq!(usb3_name, 0x20425355); + let usb3_port = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x38)); + let usb3_port_num = (usb3_port >> 8) & 0xff; + // extend capability end + let end = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x3c)); + assert_eq!(end, 0); + // Max ports + let hcsparams1 = self + .pci_dev + .io_readl(self.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET + 0x4)); + assert_eq!(hcsparams1 >> 24, usb2_port_num + usb3_port_num); + } + + pub fn init_max_device_slot_enabled(&mut self) { + // NOTE: not implement yet. use a fake value. + let enabled_slot = USB_CONFIG_MAX_SLOTS_ENABLED & USB_CONFIG_MAX_SLOTS_EN_MASK; + self.pci_dev.io_writel( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CONFIG, + enabled_slot, + ); + + let config = self.pci_dev.io_readl( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CONFIG, + ); + assert_eq!(config, enabled_slot); + } + + pub fn init_device_context_base_address_array_pointer(&mut self) { + let dcba = DEVICE_CONTEXT_ENTRY_SIZE * (USB_CONFIG_MAX_SLOTS_ENABLED + 1); + let dcbaap = self.allocator.borrow_mut().alloc(u64::from(dcba)); + self.pci_dev.io_writeq( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_DCBAAP, + dcbaap, + ); + + let value = self.pci_dev.io_readq( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_DCBAAP, + ); + assert_eq!(value, dcbaap); + self.xhci.dcbaap = value; + } + + pub fn init_command_ring_dequeue_pointer(&mut self) { + let cmd_ring_sz = u64::from(TRB_SIZE) * COMMAND_RING_LEN; + let cmd_ring = self.allocator.borrow_mut().alloc(cmd_ring_sz); + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow_mut() + .memset(cmd_ring, cmd_ring_sz, &[0]); + self.xhci.cmd_ring.init(cmd_ring, cmd_ring_sz); + self.pci_dev.io_writeq( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CMD_RING_CTRL, + cmd_ring, + ); + // Read dequeue pointer return 0. + let cmd_ring = self.pci_dev.io_readq( + self.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CMD_RING_CTRL, + ); + assert_eq!(cmd_ring, 0); + } + + pub fn init_interrupter(&mut self) { + // init event ring. + self.init_event_ring( + PRIMARY_INTERRUPTER_ID, + EVENT_RING_SEGMENT_TABLE_LEN, + EVENT_RING_LEN as u32, + ); + self.init_msix(); + } + + pub fn init_msix(&mut self) { + self.pci_dev.enable_msix(Some(self.bar_addr)); + self.config_msix_entry = 0; + // Random data, which is used only to check whether read and write data are consistent. + self.config_msix_data = 0x12345678; + self.config_msix_addr = self.allocator.borrow_mut().alloc(4); + self.pci_dev.set_msix_vector( + self.config_msix_entry, + self.config_msix_addr, + self.config_msix_data, + ); + } + + pub fn reset_port(&mut self, port_id: u32) { + assert!(port_id > 0); + let port_offset = u64::from(XHCI_PCI_PORT_OFFSET + XHCI_PCI_PORT_LENGTH * (port_id - 1)); + self.pci_dev + .io_writel(self.bar_addr, port_offset + XHCI_PORTSC_OFFSET, PORTSC_PR); + self.oper_regs_write(XHCI_OPER_REG_USBSTS, USB_STS_PCD); + let status = self.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_PCD != USB_STS_PCD); + } + + pub fn no_op(&mut self) { + let mut trb = TestNormalTRB::default(); + trb.set_interrupter_target(0); + trb.set_trb_type(TRBType::CrNoop as u32); + self.queue_command(&mut trb); + } + + pub fn enable_slot(&mut self) { + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(0); + trb.set_trb_type(TRBType::CrEnableSlot as u32); + self.queue_command(&mut trb); + } + + pub fn disable_slot(&mut self, slot_id: u32) { + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_trb_type(TRBType::CrDisableSlot as u32); + self.queue_command(&mut trb); + } + + // Return the address of input context to allow outside modify. + pub fn address_device(&mut self, slot_id: u32, bsr: bool, port_number: u32) -> u64 { + let output_ctx_addr = self.alloc_device_context(); + self.set_device_context_address(slot_id, output_ctx_addr); + let input_ctx_addr = self.alloc_input_context(); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.add_flags |= 0x3; // slot and ep0. + self.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + // Slot context + let mut slot_ctx = XhciSlotCtx::default(); + slot_ctx.set_context_entry(1); + slot_ctx.set_port_number(port_number); + self.mem_write_u32(input_ctx_addr + 0x20, slot_ctx.as_dwords()); + // Endpoint 0 context + let mut ep0_ctx = XhciEpCtx::default(); + let ep0_tr_ring = self + .allocator + .borrow_mut() + .alloc(u64::from(TRB_SIZE) * TRANSFER_RING_LEN); + ep0_ctx.set_tr_dequeue_pointer(ep0_tr_ring | 1); + ep0_ctx.set_ep_state(0); + ep0_ctx.set_ep_type(4); + self.mem_write_u32(input_ctx_addr + 0x40, ep0_ctx.as_dwords()); + self.xhci.device_slot[slot_id as usize].endpoints[(CONTROL_ENDPOINT_ID - 1) as usize] + .transfer_ring + .init(ep0_tr_ring, u64::from(TRB_SIZE) * TRANSFER_RING_LEN); + + let mut trb = TestNormalTRB::default(); + trb.parameter = input_ctx_addr; + trb.set_trb_type(TRBType::CrAddressDevice as u32); + trb.set_slot_id(slot_id); + trb.set_bsr(bsr); + self.queue_command(&mut trb); + input_ctx_addr + } + + // Return the address of input context to allow outside modify. + pub fn configure_endpoint(&mut self, slot_id: u32, dc: bool) -> u64 { + let usb_device_type = self.get_usb_device_type(); + let mut endpoint_id: Vec = Vec::new(); + let mut endpoint_type: Vec = Vec::new(); + let mut endpoint_offset: Vec = Vec::new(); + + match usb_device_type { + UsbDeviceType::Keyboard | UsbDeviceType::Tablet => { + endpoint_id.push(HID_DEVICE_ENDPOINT_ID); + endpoint_type.push(7); + endpoint_offset.push(0x80); + } + UsbDeviceType::Storage => { + endpoint_id.push(STORAGE_DEVICE_IN_ENDPOINT_ID); + endpoint_type.push(6); + endpoint_offset.push(0x80); + endpoint_id.push(STORAGE_DEVICE_OUT_ENDPOINT_ID); + endpoint_type.push(2); + endpoint_offset.push(0xa0); + } + UsbDeviceType::Camera => { + endpoint_id.push(3); + endpoint_type.push(6); + endpoint_offset.push(0x80); + } + _ => { + endpoint_id.push(3); + endpoint_type.push(2); + endpoint_offset.push(0x60); + } + } + + let output_ctx_addr = self.get_device_context_address(slot_id); + // Input context. + let input_ctx_addr = self.alloc_input_context(); + let mut input_ctx = XhciInputCtrlCtx::default(); + + for i in 0..endpoint_id.len() { + input_ctx.add_flags |= 0x1 | 1 << endpoint_id[i]; + input_ctx.drop_flags |= 1 << endpoint_id[i]; + } + + self.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + // Slot context. + let mut slot_ctx = XhciSlotCtx::default(); + self.mem_read_u32(output_ctx_addr, slot_ctx.as_mut_dwords()); + slot_ctx.set_context_entry(4); + self.mem_write_u32(input_ctx_addr + 0x20, slot_ctx.as_dwords()); + // Endpoint context. + let mut ep_ctx = XhciEpCtx::default(); + let tr_ring_size = if *self + .device_config + .get("over_transfer_ring") + .unwrap_or(&false) + { + TD_TRB_LIMIT + } else { + u64::from(TRB_SIZE) * TRANSFER_RING_LEN + }; + + for i in 0..endpoint_id.len() { + let ep_tr_ring = self.allocator.borrow_mut().alloc(tr_ring_size); + ep_ctx.set_tr_dequeue_pointer(ep_tr_ring | 1); + ep_ctx.set_interval(10); + ep_ctx.set_ep_state(0); + ep_ctx.set_ep_type(endpoint_type[i]); + self.mem_write_u32(input_ctx_addr + endpoint_offset[i], ep_ctx.as_dwords()); + self.xhci.device_slot[slot_id as usize].endpoints[(endpoint_id[i] - 1) as usize] + .transfer_ring + .init(ep_tr_ring, tr_ring_size); + } + + let mut trb = TestNormalTRB::default(); + trb.parameter = input_ctx_addr; + trb.set_trb_type(TRBType::CrConfigureEndpoint as u32); + trb.set_slot_id(slot_id); + trb.set_dc_flag(dc); + self.queue_command(&mut trb); + input_ctx_addr + } + + pub fn evaluate_context( + &mut self, + slot_id: u32, + max_exit_latency: u32, + intr_target: u32, + max_pkt_sz: u32, + ) -> u64 { + let input_ctx_addr = self.alloc_input_context(); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.add_flags = 0x1 | 1 << CONTROL_ENDPOINT_ID; + self.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + // Slot context. + let mut slot_ctx = XhciSlotCtx::default(); + slot_ctx.set_max_exit_latency(max_exit_latency); + slot_ctx.set_interrupter_target(intr_target); + self.mem_write_u32(input_ctx_addr + 0x20, slot_ctx.as_dwords()); + // Endpoint 0 context. + let mut ep0_ctx = XhciEpCtx::default(); + ep0_ctx.set_max_packet_size(max_pkt_sz); + self.mem_write_u32(input_ctx_addr + 0x40, ep0_ctx.as_dwords()); + + let mut trb = TestNormalTRB::default(); + trb.set_pointer(input_ctx_addr); + trb.set_slot_id(slot_id); + trb.set_trb_type(TRBType::CrEvaluateContext as u32); + self.queue_command(&mut trb); + input_ctx_addr + } + + pub fn stop_endpoint(&mut self, slot_id: u32, ep_id: u32) { + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_ep_id(ep_id); + // NOTE: Suspend flag not supported. + trb.set_trb_type(TRBType::CrStopEndpoint as u32); + self.queue_command(&mut trb); + } + + pub fn reset_endpoint(&mut self, slot_id: u32, ep_id: u32) { + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_ep_id(ep_id); + // NOTE: TSP flag not supported. + trb.set_trb_type(TRBType::CrResetEndpoint as u32); + self.queue_command(&mut trb); + } + + pub fn set_tr_dequeue(&mut self, ptr: u64, slot_id: u32, ep_id: u32) { + let mut trb = TestNormalTRB::default(); + if self.get_cycle_bit(slot_id, ep_id) { + trb.set_pointer(ptr | 1); + } else { + trb.set_pointer(ptr); + } + trb.set_slot_id(slot_id); + trb.set_ep_id(ep_id); + trb.set_trb_type(TRBType::CrSetTrDequeue as u32); + self.queue_command(&mut trb); + // update transfer dequeue pointer in the ring together. + self.set_transfer_pointer(ptr, slot_id, ep_id); + } + + pub fn reset_device(&mut self, slot_id: u32) { + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_trb_type(TRBType::CrResetDevice as u32); + self.queue_command(&mut trb); + } + + pub fn fetch_event(&mut self, intr_idx: usize) -> Option { + const MSIX_LIMIT: u32 = 4; + for _ in 0..MSIX_LIMIT { + if self.has_msix(self.config_msix_addr, self.config_msix_data) { + for _ in 0..EVENT_RING_LEN { + let ptr = self.xhci.interrupter[intr_idx].er_pointer; + let trb = self.read_event(ptr); + let event = trb.to_xhci_event(); + if (event.flags & TRB_C == TRB_C) == self.xhci.interrupter[intr_idx].cycle_bit { + let event = trb.to_xhci_event(); + self.increase_event_ring(intr_idx); + self.interrupter_regs_writeq( + intr_idx as u64, + XHCI_INTR_REG_ERDP_LO, + self.xhci.interrupter[intr_idx].er_pointer | u64::from(ERDP_EHB), + ); + self.event_list.push_back(event); + } else { + break; + } + } + break; + } + std::thread::sleep(Duration::from_millis(50)); + } + self.event_list.pop_front() + } + + pub fn queue_device_request(&mut self, slot_id: u32, device_req: &UsbDeviceRequest) -> u64 { + // Setup Stage. + let mut setup_trb = TestNormalTRB::generate_setup_td(device_req); + self.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let ptr = self + .allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + self.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + self.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + ptr + } + + // Queue TD with multi-TRB. + pub fn queue_td_by_iovec( + &mut self, + slot_id: u32, + ep_id: u32, + iovecs: &mut Vec, + dir: bool, + ) { + for i in 0..iovecs.len() { + let iovec = &mut iovecs[i]; + let mut trb = TestNormalTRB::default(); + if iovec.event_data { + trb.set_pointer(iovec.io_base); + trb.set_trb_type(TRBType::TrEvdata as u32); + } else { + if iovec.direct { + trb.set_idt_flag(true); + iovec.io_base = self.get_transfer_pointer(slot_id, ep_id); + } + trb.set_pointer(iovec.io_base); + trb.set_trb_transfer_length(iovec.io_len as u32); + trb.set_dir_flag(dir); + trb.set_trb_type(TRBType::TrNormal as u32); + } + if i != iovecs.len() - 1 { + trb.set_ch_flag(true); + } else { + trb.set_ioc_flag(true); + } + trb.set_cycle_bit(self.get_cycle_bit(slot_id, ep_id)); + let en_ptr = self.get_transfer_pointer(slot_id, ep_id); + self.write_trb(en_ptr, &trb); + self.increase_transfer_ring(slot_id, ep_id, 1); + } + } + + // Queue TD (single TRB) with IDT=1 + pub fn queue_direct_td(&mut self, slot_id: u32, ep_id: u32, len: u64) { + let mut trb = TestNormalTRB::generate_normal_td(0, len as u32); + self.queue_trb(slot_id, ep_id, &mut trb); + } + + // Queue multi-TD with IDT=1 + pub fn queue_multi_direct_td(&mut self, slot_id: u32, ep_id: u32, sz: u64, num: usize) { + for _ in 0..num { + self.queue_direct_td(slot_id, ep_id, sz); + } + } + + // Queue TD (single TRB) + pub fn queue_indirect_td(&mut self, slot_id: u32, ep_id: u32, sz: u64) -> u64 { + let mut trb = TestNormalTRB::generate_normal_td(0, sz as u32); + let ptr = self.allocator.borrow_mut().alloc(sz); + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow_mut() + .memset(ptr, sz, &[0]); + trb.set_pointer(ptr); + trb.set_idt_flag(false); + self.queue_trb(slot_id, ep_id, &mut trb); + ptr + } + + // Queue multi-TD + pub fn queue_multi_indirect_td(&mut self, slot_id: u32, ep_id: u32, sz: u64, num: usize) { + for _ in 0..num { + self.queue_indirect_td(slot_id, ep_id, sz); + } + } + + pub fn get_transfer_data_by_iovec(&self, iovecs: &Vec) -> Vec { + let mut buf = Vec::new(); + for iov in iovecs.iter() { + let tmp = self.mem_read(iov.io_base, iov.io_len); + for e in tmp.iter() { + buf.push(*e); + } + } + buf + } + + // Read data from parameter directly. + pub fn get_transfer_data_direct(&self, addr: u64, len: u64) -> Vec { + self.mem_read(addr, len as usize) + } + + // Read data from parameter as address. + pub fn get_transfer_data_indirect(&self, addr: u64, len: u64) -> Vec { + let buf = self.mem_read(addr, 8); + let mem = LittleEndian::read_u64(&buf); + + self.mem_read(mem, len as usize) + } + + pub fn get_transfer_data_indirect_with_offset( + &self, + addr: u64, + len: usize, + offset: u64, + ) -> Vec { + let buf = self.mem_read(addr, 8); + let mem = LittleEndian::read_u64(&buf); + + self.mem_read(mem + offset, len) + } + + pub fn get_command_pointer(&self) -> u64 { + self.xhci.cmd_ring.pointer + } + + pub fn get_transfer_pointer(&self, slot_id: u32, ep_id: u32) -> u64 { + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .pointer + & !0xf + } + + pub fn get_event_pointer(&self, intr_idx: usize) -> u64 { + self.xhci.interrupter[intr_idx].er_pointer + } + + pub fn set_transfer_pointer(&mut self, ptr: u64, slot_id: u32, ep_id: u32) { + if self.check_slot_ep_invalid(slot_id, ep_id) { + return; + } + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .pointer = ptr; + } + + pub fn get_slot_context(&self, slot_id: u32) -> XhciSlotCtx { + let output_ctx_addr = self.get_device_context_address(slot_id); + let mut slot_ctx = XhciSlotCtx::default(); + self.mem_read_u32(output_ctx_addr, slot_ctx.as_mut_dwords()); + slot_ctx + } + + pub fn get_endpoint_context(&self, slot_id: u32, ep_id: u32) -> XhciEpCtx { + let output_ctx_addr = self.get_device_context_address(slot_id); + let mut ep_ctx = XhciEpCtx::default(); + self.mem_read_u32( + output_ctx_addr + 0x20 * u64::from(ep_id), + ep_ctx.as_mut_dwords(), + ); + ep_ctx + } + + /// Queue one TRB to command ring. + pub fn queue_command(&mut self, trb: &mut TestNormalTRB) { + trb.set_cycle_bit(self.xhci.cmd_ring.cycle_bit); + self.write_trb(self.xhci.cmd_ring.pointer, trb); + self.increase_command_ring(); + if *self + .device_config + .get("command_auto_doorbell") + .unwrap_or(&false) + { + self.doorbell_write(0, 0); + } + } + + /// Queue one TRB to transfer ring. + pub fn queue_trb(&mut self, slot_id: u32, ep_id: u32, trb: &mut TestNormalTRB) { + if trb.force_cycle { + trb.set_cycle_bit(!self.get_cycle_bit(slot_id, ep_id)); + } else { + trb.set_cycle_bit(self.get_cycle_bit(slot_id, ep_id)); + } + let en_ptr = self.get_transfer_pointer(slot_id, ep_id); + self.write_trb(en_ptr, trb); + self.increase_transfer_ring(slot_id, ep_id, 1); + } + + pub fn queue_link_trb(&mut self, slot_id: u32, ep_id: u32, ptr: u64, tc: bool) { + let mut trb = TestNormalTRB::default(); + trb.parameter = ptr & !0xf; + trb.set_trb_type(TRBType::TrLink as u32); + trb.set_toggle_cycle(tc); + // Command ring + if slot_id == 0 { + trb.set_cycle_bit(self.xhci.cmd_ring.cycle_bit); + self.write_trb(self.xhci.cmd_ring.pointer, &trb); + if tc { + self.xhci.cmd_ring.cycle_bit = !self.xhci.cmd_ring.cycle_bit; + } + self.xhci.cmd_ring.update_pointer(trb.parameter); + } else { + trb.set_cycle_bit( + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .cycle_bit, + ); + self.write_trb(self.get_transfer_pointer(slot_id, ep_id), &trb); + if tc { + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .cycle_bit = !self.xhci.device_slot[slot_id as usize].endpoints + [(ep_id - 1) as usize] + .transfer_ring + .cycle_bit; + } + self.set_transfer_pointer(ptr, slot_id, ep_id); + } + } + + pub fn init_event_ring(&mut self, intr_idx: usize, erstsz: u32, ersz: u32) { + // ERSTSZ + self.interrupter_regs_write(intr_idx as u64, XHCI_INTR_REG_ERSTSZ, erstsz); + self.xhci.interrupter[intr_idx].erstsz = erstsz; + let data = self.interrupter_regs_read(intr_idx as u64, XHCI_INTR_REG_ERSTSZ); + assert_eq!(data, erstsz); + // ERSTBA + let table_size = EVENT_RING_SEGMENT_TABLE_ENTRY_SIZE * erstsz; + let evt_ring_seg_table = self.allocator.borrow_mut().alloc(u64::from(table_size)); + self.xhci.interrupter[intr_idx].erstba = evt_ring_seg_table; + // NOTE: Only support one Segment now. + let mut seg = TestEventRingSegment::new(); + let evt_ring_sz = u64::from(TRB_SIZE * ersz); + let evt_ring = self.allocator.borrow_mut().alloc(evt_ring_sz); + seg.init(evt_ring, ersz); + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow_mut() + .memset(evt_ring, evt_ring_sz, &[0]); + + let mut buf = [0_u8; TRB_SIZE as usize]; + LittleEndian::write_u64(&mut buf, seg.addr); + LittleEndian::write_u32(&mut buf[8..], seg.size); + LittleEndian::write_u32(&mut buf[12..], seg.reserved); + self.mem_write(self.xhci.interrupter[intr_idx].erstba, &buf); + // init event ring + self.load_event_segment(intr_idx); + self.xhci.interrupter[intr_idx].cycle_bit = true; + + // Write ERSTBA last, because write it will trigger reset event ring. + self.interrupter_regs_writeq(intr_idx as u64, XHCI_INTR_REG_ERSTBA_LO, evt_ring_seg_table); + let data = self.interrupter_regs_readq(intr_idx as u64, XHCI_INTR_REG_ERSTBA_LO); + assert_eq!(data, evt_ring_seg_table & !0x3f); + // Write ERDP + self.interrupter_regs_writeq( + intr_idx as u64, + XHCI_INTR_REG_ERDP_LO, + self.get_event_pointer(intr_idx), + ); + let data = self.interrupter_regs_readq(intr_idx as u64, XHCI_INTR_REG_ERDP_LO); + assert_eq!(data, self.get_event_pointer(intr_idx)); + + // enable USB_CMD_INTE + let value = self.oper_regs_read(XHCI_OPER_REG_USBCMD); + self.oper_regs_write(XHCI_OPER_REG_USBCMD, value | USB_CMD_INTE); + // enable INTE + let value = self.interrupter_regs_read(intr_idx as u64, XHCI_INTR_REG_IMAN); + self.interrupter_regs_write(intr_idx as u64, XHCI_INTR_REG_IMAN, value | IMAN_IE); + // set IMOD + self.interrupter_regs_write(intr_idx as u64, XHCI_INTR_REG_IMOD, 8); + let value = self.interrupter_regs_read(intr_idx as u64, XHCI_INTR_REG_IMOD); + assert_eq!(value, 8); + } + + pub fn recovery_endpoint(&mut self, slot_id: u32, ep_id: u32) { + self.reset_endpoint(slot_id, ep_id); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } + + pub fn test_invalie_device_request( + &mut self, + slot_id: u32, + request_type: u8, + request: u8, + value: u16, + ) { + let device_req = UsbDeviceRequest { + request_type, + request, + value, + index: 0, + length: 64, + }; + self.queue_device_request(slot_id, &device_req); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + self.recovery_endpoint(slot_id, CONTROL_ENDPOINT_ID); + } + + // Fake init memory. + fn init_memory(&mut self) { + let page_size = self.oper_regs_read(XHCI_OPER_REG_PAGESIZE); + assert_eq!(page_size, 1); + } + + fn get_cycle_bit(&self, slot_id: u32, ep_id: u32) -> bool { + if self.check_slot_ep_invalid(slot_id, ep_id) { + return false; + } + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .cycle_bit + } + + fn check_slot_ep_invalid(&self, slot_id: u32, ep_id: u32) -> bool { + slot_id as usize >= self.xhci.device_slot.len() + || ep_id as usize > self.xhci.device_slot[slot_id as usize].endpoints.len() + } + + fn increase_event_ring(&mut self, intr_idx: usize) { + self.xhci.interrupter[intr_idx].trb_count -= 1; + self.xhci.interrupter[intr_idx].er_pointer += u64::from(TRB_SIZE); + if self.xhci.interrupter[intr_idx].trb_count == 0 { + self.xhci.interrupter[intr_idx].segment_index += 1; + if self.xhci.interrupter[intr_idx].segment_index + == self.xhci.interrupter[intr_idx].erstsz + { + self.xhci.interrupter[intr_idx].cycle_bit = + !self.xhci.interrupter[intr_idx].cycle_bit; + self.xhci.interrupter[intr_idx].segment_index = 0; + } + self.load_event_segment(intr_idx); + } + } + + fn load_event_segment(&mut self, intr_idx: usize) { + let idx = self.xhci.interrupter[intr_idx].segment_index; + let evt_seg = self.read_segment_entry(intr_idx, idx); + self.xhci.interrupter[intr_idx].er_pointer = evt_seg.addr; + self.xhci.interrupter[intr_idx].trb_count = evt_seg.size; + } + + fn read_segment_entry(&self, intr_idx: usize, index: u32) -> TestEventRingSegment { + assert!(index <= self.xhci.interrupter[intr_idx].erstsz); + let addr = self.xhci.interrupter[intr_idx].erstba + u64::from(TRB_SIZE * index); + let evt_seg_buf = self.mem_read(addr, TRB_SIZE as usize); + let mut evt_seg = TestEventRingSegment::new(); + evt_seg.addr = LittleEndian::read_u64(&evt_seg_buf); + evt_seg.size = LittleEndian::read_u32(&evt_seg_buf[8..]); + evt_seg.reserved = LittleEndian::read_u32(&evt_seg_buf[12..]); + evt_seg + } + + fn set_device_context_address(&mut self, slot_id: u32, addr: u64) { + let device_ctx_addr = self.xhci.dcbaap + u64::from(slot_id * DEVICE_CONTEXT_ENTRY_SIZE); + let mut buf = [0_u8; 8]; + LittleEndian::write_u64(&mut buf, addr); + self.mem_write(device_ctx_addr, &buf); + } + + fn get_device_context_address(&self, slot_id: u32) -> u64 { + let device_ctx_addr = self.xhci.dcbaap + u64::from(slot_id * DEVICE_CONTEXT_ENTRY_SIZE); + let mut buf = self.mem_read(device_ctx_addr, 8); + + LittleEndian::read_u64(&mut buf) + } + + fn has_msix(&mut self, msix_addr: u64, msix_data: u32) -> bool { + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow() + .query_msix(msix_addr, msix_data) + } + + fn increase_command_ring(&mut self) { + let cmd_ring = self.xhci.cmd_ring; + if cmd_ring.pointer + u64::from(TRB_SIZE) + >= cmd_ring.start + cmd_ring.size * u64::from(TRB_SIZE) + { + self.queue_link_trb(0, 0, cmd_ring.start, true); + } + self.xhci.cmd_ring.pointer += u64::from(TRB_SIZE); + } + + fn increase_transfer_ring(&mut self, slot_id: u32, ep_id: u32, len: u64) { + let tr_ring = + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize].transfer_ring; + if tr_ring.pointer + u64::from(TRB_SIZE) + >= tr_ring.start + tr_ring.size * u64::from(TRB_SIZE) + { + self.queue_link_trb(slot_id, ep_id, tr_ring.start, true); + } + self.xhci.device_slot[slot_id as usize].endpoints[(ep_id - 1) as usize] + .transfer_ring + .increase_pointer(u64::from(TRB_SIZE) * len); + } + + fn write_trb(&mut self, addr: u64, trb: &TestNormalTRB) { + let mut buf = [0_u8; TRB_SIZE as usize]; + LittleEndian::write_u64(&mut buf, trb.parameter); + LittleEndian::write_u32(&mut buf[8..], trb.status); + LittleEndian::write_u32(&mut buf[12..], trb.control); + self.mem_write(addr, &buf); + } + + fn read_event(&self, addr: u64) -> TestNormalTRB { + let buf = self.mem_read(addr, 16); + let mut trb = TestNormalTRB::default(); + trb.parameter = LittleEndian::read_u64(&buf); + trb.status = LittleEndian::read_u32(&buf[8..]); + trb.control = LittleEndian::read_u32(&buf[12..]); + trb + } + + fn alloc_input_context(&mut self) -> u64 { + let input_ctx_addr = self.allocator.borrow_mut().alloc(INPUT_CONTEXT_SIZE); + input_ctx_addr + } + + fn alloc_device_context(&mut self) -> u64 { + let output_ctx_addr = self.allocator.borrow_mut().alloc(DEVICE_CONTEXT_SIZE); + output_ctx_addr + } +} + +// Descriptor +impl TestXhciPciDevice { + fn get_usb_device_type(&mut self) -> UsbDeviceType { + if *self.device_config.get("tablet").unwrap_or(&false) { + UsbDeviceType::Tablet + } else if *self.device_config.get("keyboard").unwrap_or(&false) { + UsbDeviceType::Keyboard + } else if *self.device_config.get("storage").unwrap_or(&false) { + UsbDeviceType::Storage + } else if *self.device_config.get("camera").unwrap_or(&false) { + UsbDeviceType::Camera + } else { + UsbDeviceType::Other + } + } + + fn get_iad_desc(&mut self, offset: &mut u64, addr: u64) { + let usb_device_type = self.get_usb_device_type(); + if usb_device_type != UsbDeviceType::Camera { + return; + } + + // 1. IAD header descriptor + *offset += u64::from(USB_DT_CONFIG_SIZE); + let buf = self.get_transfer_data_indirect_with_offset(addr, 8_usize, *offset); + + // descriptor type + assert_eq!(buf[1], USB_DT_INTERFACE_ASSOCIATION); + // class + assert_eq!(buf[4], USB_CLASS_VIDEO); + // subclass + assert_eq!(buf[5], SC_VIDEO_INTERFACE_COLLECTION); + + // 2. VC interface + *offset += 8; + let buf = self.get_transfer_data_indirect_with_offset( + addr, + USB_DT_INTERFACE_SIZE as usize, + *offset, + ); + + assert_eq!(buf[1], USB_DT_INTERFACE); + assert_eq!(buf[2], INTERFACE_ID_CONTROL); + assert_eq!(buf[5], USB_CLASS_VIDEO); + assert_eq!(buf[6], SC_VIDEOCONTROL); + + // get total vc length from its header descriptor + *offset += u64::from(USB_DT_INTERFACE_SIZE); + let buf = self.get_transfer_data_indirect_with_offset(addr, 0xd_usize, *offset); + + let total = u16::from_le_bytes(buf[5..7].try_into().unwrap()); + let remained = total - 0xd; + + *offset += 0xd; + let _buf = self.get_transfer_data_indirect_with_offset(addr, remained as usize, *offset); + + // 3. VS interface + *offset += u64::from(remained); + let buf = self.get_transfer_data_indirect_with_offset( + addr, + USB_DT_INTERFACE_SIZE as usize, + *offset, + ); + + assert_eq!(buf[1], USB_DT_INTERFACE); + assert_eq!(buf[5], USB_CLASS_VIDEO); + assert_eq!(buf[6], SC_VIDEOSTREAMING); + + // get total vs length from its header descriptor + *offset += u64::from(USB_DT_INTERFACE_SIZE); + let buf = self.get_transfer_data_indirect_with_offset(addr, 0xf_usize, *offset); + let total = u16::from_le_bytes(buf[4..6].try_into().unwrap()); + let remained = total - 0xf; + + *offset += 0xf; + let _buf = self.get_transfer_data_indirect_with_offset(addr, remained as usize, *offset); + } + + fn get_interfaces(&mut self, offset: &mut u64, addr: u64) { + let usb_device_type = self.get_usb_device_type(); + if usb_device_type == UsbDeviceType::Camera { + return; + } + + *offset += u64::from(USB_DT_CONFIG_SIZE); + let buf = self.get_transfer_data_indirect_with_offset( + addr, + USB_DT_INTERFACE_SIZE as usize, + *offset, + ); + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_INTERFACE); + // USB class + match usb_device_type { + UsbDeviceType::Tablet | UsbDeviceType::Keyboard => { + assert_eq!(buf[5], USB_CLASS_HID); + } + UsbDeviceType::Storage => { + assert_eq!(buf[5], USB_CLASS_MASS_STORAGE); + assert_eq!(buf[6], 0x06); + assert_eq!(buf[7], 0x50); + } + _ => {} + } + + match usb_device_type { + UsbDeviceType::Tablet => { + // hid descriptor + *offset += u64::from(USB_DT_INTERFACE_SIZE); + let buf = self.get_transfer_data_indirect_with_offset(addr, 9, *offset); + assert_eq!( + buf, + [ + 0x9, + 0x21, + 0x01, + 0x0, + 0x0, + 0x01, + 0x22, + HID_POINTER_REPORT_LEN, + 0x0 + ] + ); + } + UsbDeviceType::Keyboard => { + // hid descriptor + *offset += u64::from(USB_DT_INTERFACE_SIZE); + let buf = self.get_transfer_data_indirect_with_offset(addr, 9, *offset); + assert_eq!(buf, [0x09, 0x21, 0x11, 0x01, 0x00, 0x01, 0x22, 0x3f, 0]); + } + _ => {} + } + + *offset += u64::from(USB_DT_INTERFACE_SIZE); + // endpoint descriptor + let buf = self.get_transfer_data_indirect_with_offset( + addr, + USB_DT_ENDPOINT_SIZE as usize, + *offset, + ); + + match usb_device_type { + UsbDeviceType::Tablet | UsbDeviceType::Keyboard => { + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_ENDPOINT); + // endpoint address + assert_eq!(buf[2], USB_DIRECTION_DEVICE_TO_HOST | 0x1); + } + UsbDeviceType::Storage => { + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_ENDPOINT); + // endpoint address + assert_eq!(buf[2], USB_DIRECTION_DEVICE_TO_HOST | 0x01); + *offset += u64::from(USB_DT_ENDPOINT_SIZE); + // endpoint descriptor + let buf = self.get_transfer_data_indirect_with_offset( + addr, + USB_DT_ENDPOINT_SIZE as usize, + *offset, + ); + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_ENDPOINT); + // endpoint address + assert_eq!(buf[2], USB_DIRECTION_HOST_TO_DEVICE | 0x02); + } + _ => {} + } + } + + fn check_string_descriptor(&mut self, slot_id: u32, name_idx: u16, name: &str) { + self.get_string_descriptor(slot_id, name_idx); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + + let len = name.len() * 2 + 2; + let buf = self.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), len as u64); + for i in 0..name.len() { + assert_eq!(buf[2 * i + 2], name.as_bytes()[i]); + } + } + + pub fn get_usb_descriptor(&mut self, slot_id: u32) { + let usb_device_type = self.get_usb_device_type(); + // device descriptor + self.get_device_descriptor(slot_id); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = self.get_transfer_data_indirect( + evt.ptr - u64::from(TRB_SIZE), + u64::from(USB_DT_DEVICE_SIZE), + ); + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_DEVICE); + // bcdUSB + match usb_device_type { + UsbDeviceType::Tablet | UsbDeviceType::Keyboard => { + assert_eq!(buf[2..4], [0, 1]); + } + UsbDeviceType::Storage => { + assert_eq!(buf[2..4], [0, 2]); + } + UsbDeviceType::Camera => { + assert_eq!(buf[2..4], [0, 3]); + } + _ => {} + } + // config descriptor + self.get_config_descriptor(slot_id); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let addr = evt.ptr - u64::from(TRB_SIZE); + let mut offset = 0; + let buf = + self.get_transfer_data_indirect_with_offset(addr, USB_DT_CONFIG_SIZE as usize, offset); + // descriptor type + assert_eq!(buf[1], USB_DESCRIPTOR_TYPE_CONFIG); + // configure value + assert_eq!(buf[5], 1); + + self.get_iad_desc(&mut offset, addr); + self.get_interfaces(&mut offset, addr); + + // string descriptor + self.get_string_descriptor(slot_id, 0); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - 16, 4); + // Language ID + assert_eq!(buf, [4, 3, 9, 4]); + + match usb_device_type { + UsbDeviceType::Tablet => { + self.check_string_descriptor(slot_id, 3, "HID Tablet"); + } + UsbDeviceType::Keyboard => { + self.check_string_descriptor(slot_id, 3, "HID Keyboard"); + } + UsbDeviceType::Storage => { + self.check_string_descriptor(slot_id, 2, "StratoVirt USB Storage"); + } + UsbDeviceType::Camera => { + self.check_string_descriptor(slot_id, 2, "USB Camera"); + } + _ => {} + } + } + + pub fn check_hid_report_descriptor(&mut self, slot_id: u32) { + let usb_device_type = self.get_usb_device_type(); + match usb_device_type { + UsbDeviceType::Keyboard => { + self.get_hid_report_descriptor(slot_id, 63); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 63); + assert_eq!( + buf, + [ + 0x05, 0x01, 0x09, 0x06, 0xa1, 0x01, 0x75, 0x01, 0x95, 0x08, 0x05, 0x07, + 0x19, 0xe0, 0x29, 0xe7, 0x15, 0x00, 0x25, 0x01, 0x81, 0x02, 0x95, 0x01, + 0x75, 0x08, 0x81, 0x01, 0x95, 0x05, 0x75, 0x01, 0x05, 0x08, 0x19, 0x01, + 0x29, 0x05, 0x91, 0x02, 0x95, 0x01, 0x75, 0x03, 0x91, 0x01, 0x95, 0x06, + 0x75, 0x08, 0x15, 0x00, 0x25, 0xff, 0x05, 0x07, 0x19, 0x00, 0x29, 0xff, + 0x81, 0x00, 0xc0 + ] + ); + } + UsbDeviceType::Tablet => { + self.get_hid_report_descriptor(slot_id, u16::from(HID_POINTER_REPORT_LEN)); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect( + evt.ptr - u64::from(TRB_SIZE), + u64::from(HID_POINTER_REPORT_LEN), + ); + assert_eq!( + buf, + [ + 0x05, 0x01, 0x09, 0x02, 0xa1, 0x01, 0x09, 0x01, 0xa1, 0x00, 0x05, 0x09, + 0x19, 0x01, 0x29, 0x05, 0x15, 0x00, 0x25, 0x01, 0x95, 0x05, 0x75, 0x01, + 0x81, 0x02, 0x95, 0x01, 0x75, 0x03, 0x81, 0x01, 0x05, 0x01, 0x09, 0x30, + 0x09, 0x31, 0x15, 0x00, 0x26, 0xff, 0x7f, 0x35, 0x00, 0x46, 0xff, 0x7f, + 0x75, 0x10, 0x95, 0x02, 0x81, 0x02, 0x05, 0x01, 0x09, 0x38, 0x15, 0x81, + 0x25, 0x7f, 0x35, 0x00, 0x45, 0x00, 0x75, 0x08, 0x95, 0x01, 0x81, 0x06, + 0x05, 0x0c, 0x0a, 0x38, 0x02, 0x15, 0x81, 0x25, 0x7f, 0x75, 0x08, 0x95, + 0x01, 0x81, 0x06, 0xc0, 0xc0, + ] + ); + } + _ => {} + } + } + + pub fn get_device_descriptor(&mut self, slot_id: u32) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: u16::from(USB_DT_DEVICE) << 8, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_config_descriptor(&mut self, slot_id: u32) { + let buf_len = 4096; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: u16::from(USB_DT_CONFIGURATION) << 8, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_string_descriptor(&mut self, slot_id: u32, index: u16) { + let buf_len = 128; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: u16::from(USB_DT_STRING) << 8 | index, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_status(&mut self, slot_id: u32) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_STATUS, + value: 0, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_configuration(&mut self, slot_id: u32) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_CONFIGURATION, + value: 0, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_configuration(&mut self, slot_id: u32, v: u16) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_SET_CONFIGURATION, + value: v, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn clear_feature(&mut self, slot_id: u32, v: u16) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_CLEAR_FEATURE, + value: v, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_feature(&mut self, slot_id: u32, v: u16) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_SET_FEATURE, + value: v, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_interface(&mut self, slot_id: u32, index: u16) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_IN_REQUEST, + request: USB_REQUEST_GET_INTERFACE, + value: 0, + index, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_interface(&mut self, slot_id: u32, v: u16, index: u16) { + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_OUT_REQUEST, + request: USB_REQUEST_SET_INTERFACE, + value: v, + index, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_hid_report_descriptor(&mut self, slot_id: u32, len: u16) { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: 0x22 << 8, + index: 0, + length: len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_report(&mut self, slot_id: u32) { + let buf_len = 8; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: HID_GET_REPORT, + value: 0, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_report(&mut self, slot_id: u32, v: u16) { + // NOTE: set with data, and keyboard not implement yet. + let buf_len = 64; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: HID_SET_REPORT, + value: v, + index: 0, + length: buf_len, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_protocol(&mut self, slot_id: u32) { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: HID_GET_PROTOCOL, + value: 0, + index: 0, + length: 1, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_protocol(&mut self, slot_id: u32, v: u16) { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: HID_SET_PROTOCOL, + value: v, + index: 0, + length: 0, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_idle(&mut self, slot_id: u32) { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: HID_GET_IDLE, + value: 0, + index: 0, + length: 1, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn set_idle(&mut self, slot_id: u32, v: u16) { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: HID_SET_IDLE, + value: v, + index: 0, + length: 0, + }; + self.queue_device_request(slot_id, &device_req); + } +} + +// Device operation +impl TestXhciPciDevice { + pub fn test_keyboard_event(&mut self, slot_id: u32, test_state: Rc>) { + qmp_send_key_event(test_state.borrow_mut(), 57, true); + qmp_send_key_event(test_state.borrow_mut(), 57, false); + self.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + self.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 44, 0, 0, 0, 0, 0]); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + } + + pub fn test_pointer_event(&mut self, slot_id: u32, test_state: Rc>) { + qmp_send_pointer_event(test_state.borrow_mut(), 100, 200, 0, true); + qmp_send_pointer_event(test_state.borrow_mut(), 200, 100, 1, true); + self.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN, 2); + self.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, 100, 0, 200, 0, 0, 0]); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [1, 200, 0, 100, 0, 0, 0]); + } +} + +// Memory operation +impl TestXhciPciDevice { + pub fn mem_read_u32(&self, addr: u64, buf: &mut [u32]) { + let vec_len = std::mem::size_of_val(buf); + let tmp = self.mem_read(addr, vec_len); + for i in 0..buf.len() { + buf[i] = LittleEndian::read_u32(&tmp[(size_of::() * i)..]); + } + } + + pub fn mem_write_u32(&self, addr: u64, buf: &[u32]) { + let vec_len = std::mem::size_of_val(buf); + let mut vec = vec![0_u8; vec_len]; + let tmp = vec.as_mut_slice(); + for i in 0..buf.len() { + LittleEndian::write_u32(&mut tmp[(size_of::() * i)..], buf[i]); + } + self.mem_write(addr, tmp); + } + + pub fn mem_read(&self, addr: u64, len: usize) -> Vec { + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow_mut() + .memread(addr, len as u64) + } + + pub fn mem_write(&self, addr: u64, buf: &[u8]) { + self.pci_dev + .pci_bus + .borrow() + .test_state + .borrow_mut() + .memwrite(addr, buf); + } +} + +#[allow(non_snake_case)] +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct VideoStreamingControl { + pub bmHint: u16, + pub bFormatIndex: u8, + pub bFrameIndex: u8, + pub dwFrameInterval: u32, + pub wKeyFrameRate: u16, + pub wPFrameRate: u16, + pub wCompQuality: u16, + pub wCompWindowSize: u16, + pub wDelay: u16, + pub dwMaxVideoFrameSize: u32, + pub dwMaxPayloadTransferSize: u32, +} + +impl ByteCode for VideoStreamingControl {} +const SET_CUR: u8 = 0x1; +const VS_PROBE_CONTROL: u8 = 1; +const VS_COMMIT_CONTROL: u8 = 2; +const VS_INTERFACE_NUM: u16 = 1; +const GET_CUR: u8 = 0x81; +const GET_INFO: u8 = 0x86; + +const TRB_MAX_LEN: u32 = 64 * 1024; +const FRAME_WAIT_MS: u64 = 20; + +// USB Camera +impl TestXhciPciDevice { + pub fn vs_get_info(&mut self, slot_id: u32) -> u8 { + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: GET_INFO, + value: u16::from(VS_PROBE_CONTROL) << 8, + index: VS_INTERFACE_NUM, + length: 1, + }; + self.queue_device_request(slot_id, &device_req); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 1); + buf[0] + } + + pub fn vs_get_cur(&mut self, slot_id: u32) -> VideoStreamingControl { + let len = std::mem::size_of::() as u16; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: GET_CUR, + value: u16::from(VS_PROBE_CONTROL) << 8, + index: VS_INTERFACE_NUM, + length: len, + }; + self.queue_device_request(slot_id, &device_req); + self.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = self.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), u64::from(len)); + let mut vs_control = VideoStreamingControl::default(); + vs_control.as_mut_bytes().copy_from_slice(&buf); + vs_control + } + + pub fn vs_probe_control(&mut self, slot_id: u32, fmt_idx: u8, frm_idx: u8) { + let mut vs: VideoStreamingControl = VideoStreamingControl::default(); + let len = std::mem::size_of::() as u16; + vs.bFormatIndex = fmt_idx; + vs.bFrameIndex = frm_idx; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: SET_CUR, + value: u16::from(VS_PROBE_CONTROL) << 8, + index: VS_INTERFACE_NUM, + length: len, + }; + let data_ptr = self.queue_device_request(slot_id, &device_req); + self.mem_write(data_ptr, vs.as_bytes()); + } + + pub fn vs_commit_control(&mut self, slot_id: u32, fmt_idx: u8, frm_idx: u8) { + let mut vs = VideoStreamingControl::default(); + vs.bFormatIndex = fmt_idx; + vs.bFrameIndex = frm_idx; + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: SET_CUR, + value: u16::from(VS_COMMIT_CONTROL) << 8, + index: VS_INTERFACE_NUM, + length: 0, + }; + let data_ptr = self.queue_device_request(slot_id, &device_req); + self.mem_write(data_ptr, vs.as_bytes()); + } + + pub fn vs_clear_feature(&mut self, slot_id: u32) { + let device_req = UsbDeviceRequest { + request_type: USB_ENDPOINT_OUT_REQUEST, + request: USB_REQUEST_CLEAR_FEATURE, + value: 0, + index: 0, + length: 0, + }; + self.queue_device_request(slot_id, &device_req); + } + + pub fn get_payload( + &mut self, + slot_id: u32, + ep_id: u32, + frame_len: u32, + header_len: u32, + max_payload: u32, + ) -> Vec> { + let sz = max_payload - header_len; + let payload_cnt = frame_len + sz - 1 / sz; + let mut image = Vec::new(); + for _ in 0..payload_cnt { + let (done, buf) = self.do_payload_transfer(slot_id, ep_id, max_payload); + image.push(buf); + if done { + break; + } + } + image + } + + fn do_payload_transfer(&mut self, slot_id: u32, ep_id: u32, total: u32) -> (bool, Vec) { + let cnt = (total + TRB_MAX_LEN - 1) / TRB_MAX_LEN; + let mut data = Vec::new(); + for _ in 0..cnt { + self.queue_indirect_td(slot_id, ep_id, u64::from(TRB_MAX_LEN)); + self.doorbell_write(slot_id, ep_id); + // wait for frame done. + std::thread::sleep(std::time::Duration::from_millis(FRAME_WAIT_MS)); + let evt = self.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + if evt.ccode == TRBCCode::Success as u32 { + let mut buf = self.get_transfer_data_indirect(evt.ptr, u64::from(TRB_MAX_LEN)); + data.append(&mut buf); + } else if evt.ccode == TRBCCode::ShortPacket as u32 { + let copied = u64::from(TRB_MAX_LEN - evt.length); + let mut buf = self.get_transfer_data_indirect(evt.ptr, copied); + data.append(&mut buf); + if total == data.len() as u32 { + return (false, data); + } else { + return (true, data); + } + } else { + assert_eq!(evt.ccode, 0); + return (false, Vec::new()); + } + } + (false, data) + } +} + +pub struct TestUsbBuilder { + args: Vec, + config: HashMap, +} + +impl Default for TestUsbBuilder { + fn default() -> Self { + Self::new() + } +} + +impl TestUsbBuilder { + pub fn new() -> Self { + let mut args = Vec::new(); + let machine: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + let mut arg = machine.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut arg); + + let machine: Vec<&str> = "-D /var/log/mst.log".split(' ').collect(); + let mut arg = machine.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut arg); + Self { + args, + config: HashMap::new(), + } + } + + pub fn with_xhci_config(mut self, id: &str, port2: u32, port3: u32) -> Self { + let mut args = format!( + "-device nec-usb-xhci,id={},bus=pcie.0,addr={}", + id, XHCI_PCI_SLOT_NUM + ); + if port2 != 0 { + args = format!("{},p2={}", args, port2); + } + if port3 != 0 { + args = format!("{},p3={}", args, port3); + } + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + self + } + + pub fn with_xhci(self, id: &str) -> Self { + self.with_xhci_config(id, 0, 0) + } + + pub fn with_usb_keyboard(mut self, id: &str) -> Self { + let args = format!("-device usb-kbd,id={}", id); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + self.config.insert(String::from("keyboard"), true); + self + } + + pub fn with_usb_tablet(mut self, id: &str) -> Self { + let args = format!("-device usb-tablet,id={}", id); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + self.config.insert(String::from("tablet"), true); + self + } + + pub fn with_usb_storage(mut self, image_path: &str, media: &str) -> Self { + let args = "-device usb-storage,drive=drive0,id=storage0".to_string(); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + + let args = format!( + "-drive if=none,id=drive0,format=raw,media={},aio=off,direct=false,file={}", + media, image_path + ); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + + self.config.insert(String::from("storage"), true); + self + } + + pub fn with_usb_camera(mut self, id: &str, path: &str) -> Self { + let args = format!("-cameradev demo,id=camdev0,path={}", path); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + let args = format!("-device usb-camera,id={},cameradev=camdev0", id); + let args: Vec<&str> = args[..].split(' ').collect(); + let mut args = args.into_iter().map(|s| s.to_string()).collect(); + self.args.append(&mut args); + self.config.insert(String::from("camera"), true); + self + } + + pub fn with_config(mut self, k: &str, v: bool) -> Self { + self.config.insert(k.to_string(), v); + self + } + + pub fn build( + &mut self, + ) -> ( + Rc>, + Rc>, + Rc>, + ) { + let args = self.args.iter().map(AsRef::as_ref).collect(); + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let xhci = Rc::new(RefCell::new(TestXhciPciDevice::new( + machine.pci_bus, + allocator.clone(), + ))); + + for (k, v) in self.config.iter() { + xhci.borrow_mut().device_config.insert(k.clone(), *v); + } + + if *self.config.get("auto_run").unwrap_or(&false) { + // init host controller. + let mut xhci = xhci.borrow_mut(); + xhci.init_host_controller(XHCI_PCI_SLOT_NUM, XHCI_PCI_FUN_NUM); + xhci.run(); + } else { + // only init pci, let testcase init controller. + let mut xhci = xhci.borrow_mut(); + xhci.init_pci_device(XHCI_PCI_SLOT_NUM, XHCI_PCI_FUN_NUM); + } + + (xhci, test_state, allocator) + } +} + +// Helper +pub fn qmp_send_key_event(test_state: RefMut, v: u32, down: bool) { + let value_str = format!("{},{}", v, if down { 1 } else { 0 }); + let mut str = + "{\"execute\": \"input_event\", \"arguments\": { \"key\": \"keyboard\", \"value\":\"" + .to_string(); + str += &value_str; + str += "\" }}"; + test_state.qmp(&str); +} + +pub fn qmp_send_multi_key_event(test_state: Rc>, key_list: &[u32], down: bool) { + for item in key_list { + qmp_send_key_event(test_state.borrow_mut(), *item, down); + } +} + +pub fn qmp_send_pointer_event(test_state: RefMut, x: i32, y: i32, btn: i32, down: bool) { + let value_str = format!("{},{},{},{}", x, y, btn, if down { 1 } else { 0 }); + let mut str = + "{\"execute\": \"input_event\", \"arguments\": { \"key\": \"pointer\", \"value\":\"" + .to_string(); + str += &value_str; + str += "\" }}"; + test_state.qmp(&str); +} + +pub fn qmp_plug_keyboard_event(test_state: RefMut, num: u32) -> Value { + let num_str = format!("{}", num); + let mut str = + "{\"execute\":\"device_add\",\"arguments\":{\"driver\":\"usb-kbd\",\"id\":\"input" + .to_string(); + str += &num_str; + str += "\",\"bus\":\"usb.0\",\"port\":\"1\"}}"; + + test_state.qmp(&str) +} + +pub fn qmp_plug_tablet_event(test_state: RefMut, num: u32) -> Value { + let num_str = format!("{}", num); + let mut str = + "{\"execute\":\"device_add\",\"arguments\":{\"driver\":\"usb-tablet\",\"id\":\"input" + .to_string(); + str += &num_str; + str += "\",\"bus\":\"usb.0\",\"port\":\"2\"}}"; + + test_state.qmp(&str) +} + +pub fn qmp_unplug_usb_event(test_state: RefMut, num: u32) -> Value { + let num_str = format!("{}", num); + let mut str = "{\"execute\":\"device_del\",\"arguments\":{\"id\":\"input".to_string(); + str += &num_str; + str += "\"}}"; + + test_state.qmp(&str) +} + +pub fn qmp_event_read(test_state: RefMut) { + test_state.qmp_read(); +} + +pub fn clear_iovec(test_state: RefMut, iovecs: &Vec) { + for iov in iovecs.iter() { + test_state.memwrite(iov.io_base, &vec![0; iov.io_len]); + } +} diff --git a/tests/mod_test/src/libdriver/virtio.rs b/tests/mod_test/src/libdriver/virtio.rs new file mode 100644 index 0000000000000000000000000000000000000000..c5d357e3b376fef1c5039816052f5b945626be34 --- /dev/null +++ b/tests/mod_test/src/libdriver/virtio.rs @@ -0,0 +1,597 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::collections::HashMap; +use std::mem::size_of; +use std::rc::Rc; +use std::time; + +use super::malloc::GuestAllocator; +use crate::libtest::TestState; +use util::byte_code::ByteCode; +use util::num_ops::round_up; +use util::offset_of; + +pub const VIRTIO_F_BAD_FEATURE: u64 = 0x40000000; +pub const VIRTIO_F_VERSION_1: u64 = 32; +pub const VIRTIO_CONFIG_S_ACKNOWLEDGE: u8 = 1; +pub const VIRTIO_CONFIG_S_DRIVER: u8 = 2; +pub const VIRTIO_CONFIG_S_DRIVER_OK: u8 = 4; +pub const VIRTIO_CONFIG_S_FEATURES_OK: u8 = 8; +pub const VIRTIO_CONFIG_S_NEEDS_RESET: u8 = 0x40; +pub const VIRTIO_CONFIG_S_FAILED: u8 = 0x80; +pub const VRING_DESC_F_NEXT: u16 = 1; +pub const VRING_DESC_F_WRITE: u16 = 2; +pub const VRING_DESC_F_INDIRECT: u16 = 4; +pub const VRING_USED_F_NO_NOTIFY: u16 = 1; +pub const VIRTIO_PCI_VRING_ALIGN: u32 = 4096; +pub const VIRTIO_F_NOTIFY_ON_EMPTY: u64 = 24; +pub const VIRTIO_RING_F_INDIRECT_DESC: u64 = 28; +pub const VIRTIO_RING_F_EVENT_IDX: u64 = 29; +/// When host consumes a buffer, don't interrupt the guest. +pub const VRING_AVAIL_F_NO_INTERRUPT: u16 = 1; + +pub trait VirtioDeviceOps { + fn config_readb(&self, addr: u64) -> u8; + fn config_readw(&self, addr: u64) -> u16; + fn config_readl(&self, addr: u64) -> u32; + fn config_readq(&self, addr: u64) -> u64; + fn config_writeb(&self, addr: u64, value: u8); + fn config_writew(&self, addr: u64, value: u16); + fn config_writel(&self, addr: u64, value: u32); + fn config_writeq(&self, addr: u64, value: u64); + fn isr_readb(&self) -> u8; + fn enable_interrupt(&mut self); + fn disable_interrupt(&mut self); + fn get_device_features(&self) -> u64; + fn set_guest_features(&self, features: u64); + fn get_guest_features(&self) -> u64; + fn get_status(&self) -> u8; + fn set_status(&self, status: u8); + fn get_queue_nums(&self) -> u16; + fn get_generation(&self) -> u8; + fn queue_select(&self, index: u16); + fn get_queue_select(&self) -> u16; + fn set_queue_size(&self, size: u16); + fn get_queue_size(&self) -> u16; + fn activate_queue(&self, desc: u64, avail: u64, used: u64); + fn queue_was_notified(&self, virtqueue: Rc>) -> bool; + fn setup_virtqueue( + &self, + test_state: Rc>, + alloc: Rc>, + index: u16, + ) -> Rc>; + fn cleanup_virtqueue(&self, alloc: Rc>, desc_addr: u64); + fn init_virtqueue( + &mut self, + test_state: Rc>, + alloc: Rc>, + num_queues: usize, + ) -> Vec>>; + fn virtqueue_notify(&self, virtqueue: Rc>); + fn kick_virtqueue( + &self, + test_state: Rc>, + virtqueue: Rc>, + ); + fn poll_used_elem( + &self, + test_state: Rc>, + virtqueue: Rc>, + desc_idx: u32, + timeout_us: u64, + len: &mut Option, + wait_notified: bool, + ); + fn init_device( + &mut self, + test_state: Rc>, + alloc: Rc>, + features: u64, + num_queues: usize, + ) -> Vec>>; + fn destroy_device( + &mut self, + alloc: Rc>, + vqs: Vec>>, + ); + fn reset(&mut self); + fn negotiate_features(&mut self, features: u64); + fn set_features_ok(&mut self); + fn set_driver_ok(&self); + + fn start(&mut self) { + self.reset(); + self.set_acknowledge(); + self.set_driver(); + } + + fn set_acknowledge(&self) { + let status = self.get_status() | VIRTIO_CONFIG_S_ACKNOWLEDGE; + self.set_status(status); + + assert_eq!(self.get_status(), status); + } + + fn set_driver(&self) { + let status = self.get_status() | VIRTIO_CONFIG_S_DRIVER; + self.set_status(status); + + assert_eq!(self.get_status(), status); + } + + fn req_result(&self, test_state: Rc>, addr: u64, timeout_us: u64) -> u8 { + let start_time = time::Instant::now(); + let timeout_us = time::Duration::from_micros(timeout_us); + let mut value = test_state.borrow().readb(addr); + + while value == 0xFF { + test_state.borrow().clock_step_ns(100); + assert!( + time::Instant::now() - start_time <= timeout_us, + "The device failed to process the request!" + ); + value = test_state.borrow().readb(addr); + } + + value + } +} + +#[repr(C, packed(16))] +#[derive(Default, Copy, Clone, Debug)] +pub struct VringDesc { + addr: u64, + pub len: u32, + pub flags: u16, + next: u16, +} + +impl ByteCode for VringDesc {} +pub static VRING_DESC_SIZE: u64 = size_of::() as u64; + +#[allow(unused)] +#[repr(C, packed(2))] +pub struct VringAvail { + flags: u16, + idx: u16, + pub ring: Vec, +} + +#[repr(C, packed(4))] +pub struct VringUsedElem { + id: u32, + pub len: u32, +} + +#[repr(C, packed(4))] +pub struct VringUsed { + flags: u16, + pub idx: u16, + pub ring: Vec, +} + +#[allow(unused)] +struct Vring { + num: u32, + desc: VringDesc, + avail: VringAvail, + used: VringUsed, +} + +#[derive(Default)] +pub struct TestVringIndirectDesc { + pub desc: u64, + pub index: u16, + pub elem: u16, +} + +#[derive(Default)] +pub struct TestVringDescEntry { + pub data: u64, + pub len: u32, + pub write: bool, +} + +impl TestVringIndirectDesc { + pub fn new() -> Self { + Self { + ..Default::default() + } + } + + pub fn setup( + &mut self, + alloc: Rc>, + test_state: Rc>, + elem: u16, + ) { + self.elem = elem; + self.desc = alloc + .borrow_mut() + .alloc((size_of::() * elem as usize) as u64); + + for i in 0..elem - 1 { + test_state + .borrow() + .writeq(self.desc + (size_of::() * i as usize) as u64, 0); + + test_state.borrow().writel( + self.desc + + (size_of::() * i as usize + offset_of!(VringDesc, len)) as u64, + 0, + ); + + test_state.borrow().writew( + self.desc + + (size_of::() * i as usize + offset_of!(VringDesc, flags)) as u64, + VRING_DESC_F_NEXT, + ); + + test_state.borrow().writew( + self.desc + + (size_of::() * i as usize + offset_of!(VringDesc, next)) as u64, + i + 1, + ); + } + } + + pub fn add_desc( + &mut self, + test_state: Rc>, + data: u64, + len: u32, + write: bool, + ) { + assert!(self.index < self.elem); + + let mut flags = test_state.borrow().readw( + self.desc + + (size_of::() as u64 * u64::from(self.index)) + + offset_of!(VringDesc, flags) as u64, + ); + + if write { + flags |= VRING_DESC_F_WRITE; + } + + test_state.borrow().writeq( + self.desc + (size_of::() * self.index as usize) as u64, + data, + ); + test_state.borrow().writel( + self.desc + + (size_of::() * self.index as usize + offset_of!(VringDesc, len)) + as u64, + len, + ); + test_state.borrow().writew( + self.desc + + (size_of::() * self.index as usize + offset_of!(VringDesc, flags)) + as u64, + flags, + ); + + self.index += 1; + } + + pub fn set_desc_flag(&mut self, test_state: Rc>, idx: u64, flag: u16) { + test_state.borrow().writew( + self.desc + + (size_of::() * idx as usize + offset_of!(VringDesc, flags)) as u64, + flag, + ); + } +} + +#[derive(Default)] +pub struct TestVirtQueue { + pub desc: u64, + pub avail: u64, + pub used: u64, + pub index: u16, + pub size: u32, + pub free_head: u32, + pub num_free: u32, + pub align: u32, + last_used_idx: u16, + pub indirect: bool, + pub event: bool, + pub msix_entry: u16, + pub msix_addr: u64, + pub msix_data: u32, + pub queue_notify_off: u64, + pub desc_len: HashMap, +} + +impl TestVirtQueue { + pub fn new() -> Self { + Self { + ..Default::default() + } + } + + pub fn vring_init(&self, test_state: Rc>) { + // desc[i]->addr = 0, desc[i]->next = i + 1; + for i in 0..self.size - 1 { + test_state + .borrow() + .writeq(self.desc + (size_of::() * i as usize) as u64, 0); + test_state.borrow().writew( + self.desc + + (size_of::() * i as usize + offset_of!(VringDesc, next)) as u64, + (i + 1) as u16, + ); + } + + // virtqueue.avail.flags + test_state.borrow().writew(self.avail, 0); + // virtqueue.avail.idx + test_state + .borrow() + .writew(self.avail + offset_of!(VringAvail, idx) as u64, 0); + // virtqueue.avail.used_event + test_state.borrow().writew( + self.avail + + offset_of!(VringAvail, ring) as u64 + + (size_of::() * self.size as usize) as u64, + 0, + ); + + // virtqueue.used.flags + test_state.borrow().writew(self.used, 0); + // virtqueue.used.idx + test_state + .borrow() + .writew(self.used + offset_of!(VringUsed, idx) as u64, 0); + // virtqueue.used.avail_event + test_state.borrow().writew( + self.used + + offset_of!(VringUsed, ring) as u64 + + (size_of::() as u64 * u64::from(self.size)), + 0, + ); + } + + pub fn setup( + &mut self, + virtio_dev: &dyn VirtioDeviceOps, + alloc: Rc>, + index: u16, + ) { + let num_queues = virtio_dev.get_queue_nums(); + assert!(index <= num_queues); + + let features = virtio_dev.get_guest_features(); + virtio_dev.queue_select(index); + + let queue_size = u32::from(virtio_dev.get_queue_size()); + assert!(queue_size != 0); + assert!(queue_size & (queue_size - 1) == 0); + + self.index = index; + self.size = queue_size; + self.free_head = 0; + self.num_free = self.size; + self.align = VIRTIO_PCI_VRING_ALIGN; + self.indirect = (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) != 0; + self.event = (features & (1 << VIRTIO_RING_F_EVENT_IDX)) != 0; + + let addr = alloc + .borrow_mut() + .alloc(u64::from(get_vring_size(self.size, self.align))); + self.desc = addr; + self.avail = self.desc + u64::from(self.size * size_of::() as u32); + self.used = round_up( + self.avail + u64::from(size_of::() as u32 * (3 + self.size)), + u64::from(self.align), + ) + .unwrap(); + } + + /// Get used elements from used ring and save to self.desc_len + pub fn get_buf(&mut self, test_state: Rc>) -> bool { + let mut ret = false; + loop { + let index = test_state + .borrow() + .readw(self.used + offset_of!(VringUsed, idx) as u64); + if index == self.last_used_idx { + break; + } + + let elem_addr = self.used + + offset_of!(VringUsed, ring) as u64 + + u64::from(u32::from(self.last_used_idx) % self.size) + * size_of::() as u64; + + let id_addr = elem_addr + offset_of!(VringUsedElem, id) as u64; + let id_val = test_state.borrow().readl(id_addr); + let len_addr = elem_addr + offset_of!(VringUsedElem, len) as u64; + let len_val = test_state.borrow().readl(len_addr); + self.desc_len.insert(id_val, len_val); + + self.last_used_idx += 1; + ret = true; + } + ret + } + + pub fn get_avail_event(&self, test_state: Rc>) -> u16 { + assert!(self.event); + + test_state.borrow().readw( + self.used + + offset_of!(VringUsed, ring) as u64 + + (size_of::() as u64 * u64::from(self.size)), + ) + } + + pub fn set_used_event(&self, test_state: Rc>, index: u16) { + test_state.borrow().writew( + self.avail + + offset_of!(VringAvail, ring) as u64 + + (size_of::() as u64 * u64::from(self.size)), + index, + ); + } + + pub fn set_avail_flags(&self, test_state: Rc>, flags: u16) { + test_state + .borrow() + .writew(self.avail + offset_of!(VringAvail, flags) as u64, flags); + } + + fn set_avail_idx(&self, test_state: Rc>, index: u16) { + test_state + .borrow() + .writew(self.avail + offset_of!(VringAvail, idx) as u64, index); + } + + fn set_avail_ring(&self, test_state: Rc>, desc_idx: u16) { + let idx: u16 = test_state + .borrow() + .readw(self.avail + offset_of!(VringAvail, idx) as u64); + test_state.borrow().writew( + self.avail + + offset_of!(VringAvail, ring) as u64 + + (size_of::() * (u32::from(idx) % self.size) as usize) as u64, + desc_idx, + ); + } + + pub fn update_avail(&self, test_state: Rc>, desc_idx: u32) { + let idx: u16 = test_state + .borrow() + .readw(self.avail + offset_of!(VringAvail, idx) as u64); + // Update avail.used_event. + if self.event { + self.set_used_event(test_state.clone(), idx); + } + // avail.ring[idx] = desc_idx. + self.set_avail_ring(test_state.clone(), desc_idx as u16); + // Update avail.idx. + self.set_avail_idx(test_state, idx + 1); + } + + pub fn add( + &mut self, + test_state: Rc>, + data: u64, + len: u32, + write: bool, + ) -> u32 { + let free_head = self.free_head; + let mut flags: u16 = 0; + if write { + flags |= VRING_DESC_F_WRITE; + } + + let desc_elem = VringDesc { + addr: data, + len, + flags, + next: 0, + }; + self.add_elem_to_desc(test_state.clone(), desc_elem); + self.update_avail(test_state, free_head); + + free_head + } + + pub fn add_chained( + &mut self, + test_state: Rc>, + data_entries: Vec, + ) -> u32 { + let free_head = self.free_head; + + for (i, entry) in data_entries.iter().enumerate() { + let mut flags: u16 = 0; + let mut next_desc = 0; + if entry.write { + flags |= VRING_DESC_F_WRITE; + } + if i < data_entries.len() - 1 { + flags |= VRING_DESC_F_NEXT; + next_desc = self.free_head + 1; + } + + let desc_elem = VringDesc { + addr: entry.data, + len: entry.len, + flags, + next: next_desc as u16, + }; + self.add_elem_to_desc(test_state.clone(), desc_elem); + } + self.update_avail(test_state, free_head); + free_head + } + + pub fn add_indirect( + &mut self, + test_state: Rc>, + indirect: TestVringIndirectDesc, + mixed: bool, + ) -> u32 { + assert!(indirect.index >= indirect.elem); + + let free_head = self.free_head; + let desc_elem = VringDesc { + addr: indirect.desc, + len: size_of::() as u32 * u32::from(indirect.elem), + flags: VRING_DESC_F_INDIRECT, + next: 0, + }; + self.add_elem_to_desc(test_state.clone(), desc_elem); + if !mixed { + self.update_avail(test_state, free_head); + } + free_head + } + + // Add a vring desc elem to desc table. + fn add_elem_to_desc(&mut self, test_state: Rc>, elem: VringDesc) { + self.num_free -= 1; + let desc_elem_addr = self.desc + VRING_DESC_SIZE * u64::from(self.free_head); + test_state + .borrow() + .memwrite(desc_elem_addr, elem.as_bytes()); + self.free_head += 1; + } +} + +#[derive(Default)] +pub struct TestVirtioDev { + pub features: u64, + pub device_type: u16, + pub feature_negotiated: bool, +} + +impl TestVirtioDev { + pub fn new() -> Self { + Self { + ..Default::default() + } + } +} + +#[inline] +pub fn get_vring_size(num: u32, align: u32) -> u32 { + let desc_avail = + u64::from(size_of::() as u32 * num + size_of::() as u32 * (3 + num)); + let desc_avail_align = round_up(desc_avail, u64::from(align)).unwrap() as u32; + desc_avail_align + size_of::() as u32 * 3 + size_of::() as u32 * num +} diff --git a/tests/mod_test/src/libdriver/virtio_block.rs b/tests/mod_test/src/libdriver/virtio_block.rs new file mode 100644 index 0000000000000000000000000000000000000000..5cc5121c974bd0922f3f9bfafbe2b8c7c5d00595 --- /dev/null +++ b/tests/mod_test/src/libdriver/virtio_block.rs @@ -0,0 +1,465 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::mem::size_of; +use std::rc::Rc; + +use super::machine::TestStdMachine; +use super::malloc::GuestAllocator; +use super::virtio::TestVirtQueue; +use super::virtio::VirtioDeviceOps; +use super::virtio_pci_modern::TestVirtioPciDev; +use crate::libdriver::virtio::{ + TestVringDescEntry, VIRTIO_F_BAD_FEATURE, VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, +}; +use crate::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use crate::utils::ImageType; +use crate::utils::{cleanup_img, create_img, TEST_IMAGE_SIZE}; +use util::byte_code::ByteCode; +use util::num_ops::round_up; + +pub const VIRTIO_BLK_F_BARRIER: u64 = 0; +pub const VIRTIO_BLK_F_SIZE_MAX: u64 = 1; +pub const VIRTIO_BLK_F_SEG_MAX: u64 = 2; +pub const VIRTIO_BLK_F_GEOMETRY: u64 = 4; +pub const VIRTIO_BLK_F_RO: u64 = 5; +pub const VIRTIO_BLK_F_BLK_SIZE: u64 = 6; +pub const VIRTIO_BLK_F_SCSI: u64 = 7; +pub const VIRTIO_BLK_F_FLUSH: u64 = 9; +pub const VIRTIO_BLK_F_TOPOLOGY: u64 = 10; +pub const VIRTIO_BLK_F_CONFIG_WCE: u64 = 11; +pub const VIRTIO_BLK_F_MQ: u64 = 12; +pub const VIRTIO_BLK_F_DISCARD: u64 = 13; +pub const VIRTIO_BLK_F_WRITE_ZEROES: u64 = 14; +pub const VIRTIO_BLK_F_LIFETIME: u64 = 15; +pub const VIRTIO_BLK_F_SECURE_ERASE: u64 = 16; + +pub const VIRTIO_BLK_T_IN: u32 = 0; +pub const VIRTIO_BLK_T_OUT: u32 = 1; +pub const VIRTIO_BLK_T_FLUSH: u32 = 4; +pub const VIRTIO_BLK_T_GET_ID: u32 = 8; +pub const VIRTIO_BLK_T_DISCARD: u32 = 11; +pub const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; +pub const VIRTIO_BLK_T_ILLGEAL: u32 = 32; +/// Success +pub const VIRTIO_BLK_S_OK: u8 = 0; +/// IO error. +pub const VIRTIO_BLK_S_IOERR: u8 = 1; +/// Unsupported request. +pub const VIRTIO_BLK_S_UNSUPP: u8 = 2; + +pub const TIMEOUT_US: u64 = 15 * 1000 * 1000; +pub const DEFAULT_IO_REQS: u64 = 5; +pub const REQ_ADDR_LEN: u32 = 16; +pub const REQ_DATA_LEN: u32 = 512; +pub const REQ_STATUS_LEN: u32 = 1; +pub const REQ_DATA_OFFSET: u64 = REQ_ADDR_LEN as u64; +pub const REQ_STATUS_OFFSET: u64 = (REQ_ADDR_LEN + REQ_DATA_LEN) as u64; + +/// Used to compute the number of sectors. +pub const SECTOR_SHIFT: u8 = 9; +/// Max number sectors of per request. +pub const MAX_REQUEST_SECTORS: u32 = u32::MAX >> SECTOR_SHIFT; + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtBlkDiscardWriteZeroes { + pub sector: u64, + pub num_sectors: u32, + pub flags: u32, +} + +impl ByteCode for VirtBlkDiscardWriteZeroes {} + +#[allow(unused)] +pub struct TestVirtBlkReq { + req_type: u32, + io_priority: u32, + sector: u64, + pub data: String, + status: u8, +} + +impl TestVirtBlkReq { + pub fn new(req_type: u32, io_priority: u32, sector: u64, data_size: usize) -> Self { + Self { + req_type, + io_priority, + sector, + data: String::with_capacity(data_size), + status: 0, + } + } + + pub fn as_bytes(&self) -> Vec { + let mut bytes_buf = Vec::new(); + bytes_buf.append(&mut self.req_type.to_le_bytes().to_vec()); + bytes_buf.append(&mut self.io_priority.to_le_bytes().to_vec()); + bytes_buf.append(&mut self.sector.to_le_bytes().to_vec()); + bytes_buf + } +} + +pub fn create_blk( + image_type: &ImageType, + image_path: Rc, + device_args: Rc, + drive_args: Rc, + other_args: Rc, +) -> ( + Rc>, + Rc>, + Rc>, +) { + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + let mut extra_args: Vec<&str> = Vec::new(); + let img_type = match image_type { + &ImageType::Raw => "raw", + &ImageType::Qcow2 => "qcow2", + }; + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let blk_pci_args = format!( + "-device {},id=drv0,drive=drive0,bus=pcie.0,addr={}.0{}", + "virtio-blk-pci", pci_slot, device_args, + ); + args = blk_pci_args[..].split(' ').collect(); + extra_args.append(&mut args); + let blk_args = format!( + "-drive if=none,id=drive0,file={},format={}{}", + image_path, img_type, drive_args, + ); + args = blk_args.split(' ').collect(); + extra_args.append(&mut args); + + if !other_args.is_empty() { + args = other_args.split(' ').collect(); + extra_args.append(&mut args); + } + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let virtio_blk = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + + virtio_blk.borrow_mut().init(pci_slot, pci_fn); + + (virtio_blk, test_state, allocator) +} + +pub fn virtio_blk_request( + test_state: Rc>, + alloc: Rc>, + req: TestVirtBlkReq, + align: bool, +) -> u64 { + let status: u8 = 0xFF; + let data_size = req.data.capacity(); + + match req.req_type { + VIRTIO_BLK_T_IN | VIRTIO_BLK_T_OUT => { + assert_eq!(data_size % (REQ_DATA_LEN as usize), 0) + } + VIRTIO_BLK_T_FLUSH => {} + VIRTIO_BLK_T_GET_ID => {} + VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_WRITE_ZEROES => {} + VIRTIO_BLK_T_ILLGEAL => {} + _ => { + assert_eq!(data_size, 0) + } + } + + let addr = alloc + .borrow_mut() + .alloc((size_of::() + data_size + 512) as u64); + + let data_addr = if align { + round_up(addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + } else { + addr + REQ_DATA_OFFSET + }; + + let req_bytes = req.as_bytes(); + test_state.borrow().memwrite(addr, req_bytes.as_slice()); + let mut data_bytes = req.data.as_bytes().to_vec(); + data_bytes.resize(data_size, 0); + + // Write data to memory. If the data length is bigger than 4096, the memwrite() + // will return error. So, split it to 512 bytes. + let size = data_bytes.len(); + let mut offset = 0; + while offset < size { + let len = std::cmp::min(512, size - offset); + test_state.borrow().memwrite( + data_addr + offset as u64, + &data_bytes.as_slice()[offset..offset + len], + ); + offset += len; + } + test_state + .borrow() + .memwrite(data_addr + data_size as u64, &status.to_le_bytes()); + + addr +} + +pub fn add_blk_request( + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + req_type: u32, + sector: u64, + align: bool, +) -> (u32, u64) { + let mut read = true; + // [req_type, io_priority, sector, data_size] + let mut blk_req = TestVirtBlkReq::new(req_type, 1, sector, REQ_DATA_LEN as usize); + if req_type == VIRTIO_BLK_T_OUT { + blk_req.data.push_str("TEST"); + read = false; + } + // Get addr and write to Stratovirt. + let req_addr = virtio_blk_request(test_state.clone(), alloc, blk_req, align); + // Desc elem: [addr, len, flags, next]. + + let data_addr = if align { + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + } else { + req_addr + REQ_DATA_OFFSET + }; + + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: REQ_ADDR_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: data_addr, + len: REQ_DATA_LEN, + write: read, + }); + data_entries.push(TestVringDescEntry { + data: data_addr + u64::from(REQ_DATA_LEN), + len: REQ_STATUS_LEN, + write: true, + }); + + let free_head = vq.borrow_mut().add_chained(test_state, data_entries); + + (free_head, req_addr) +} + +/// Write DEFAULT_IO_REQS requests to disk. +pub fn virtio_blk_write( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + sector: u64, + align: bool, +) { + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + for i in sector..sector + DEFAULT_IO_REQS { + (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueue.clone(), + VIRTIO_BLK_T_OUT, + i, + align, + ); + } + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueue.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = if align { + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN) + } else { + req_addr + REQ_STATUS_OFFSET + }; + + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); +} + +/// Read 512 byte from disk. +pub fn virtio_blk_read( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + sector: u64, + align: bool, +) { + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc, + virtqueue.clone(), + VIRTIO_BLK_T_IN, + sector, + align, + ); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueue.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let data_addr = if align { + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + } else { + req_addr + u64::from(REQ_ADDR_LEN) + }; + + let status_addr = if align { + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN) + } else { + req_addr + REQ_STATUS_OFFSET + }; + + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + + assert_eq!( + String::from_utf8(test_state.borrow().memread(data_addr, 4)).unwrap(), + "TEST" + ); +} + +pub fn virtio_blk_read_write_zeroes( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + req_type: u32, + sector: u64, + data_len: usize, +) { + let mut read = true; + let mut blk_req = TestVirtBlkReq::new(req_type, 1, sector, data_len); + if req_type == VIRTIO_BLK_T_OUT { + unsafe { + blk_req.data.as_mut_vec().append(&mut vec![0; data_len]); + } + read = false; + } + let req_addr = virtio_blk_request(test_state.clone(), alloc, blk_req, false); + let data_addr = req_addr + u64::from(REQ_ADDR_LEN); + let data_entries: Vec = vec![ + TestVringDescEntry { + data: req_addr, + len: REQ_ADDR_LEN, + write: false, + }, + TestVringDescEntry { + data: data_addr, + len: data_len as u32, + write: read, + }, + TestVringDescEntry { + data: data_addr + data_len as u64, + len: REQ_STATUS_LEN, + write: true, + }, + ]; + let free_head = vq + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + blk.borrow().kick_virtqueue(test_state.clone(), vq.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vq, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + let status_addr = req_addr + u64::from(REQ_ADDR_LEN) + data_len as u64; + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + + if read { + assert_eq!( + test_state.borrow().memread(data_addr, data_len as u64), + vec![0; data_len], + ); + } +} + +pub fn virtio_blk_default_feature(blk: Rc>) -> u64 { + let mut features = blk.borrow().get_device_features(); + features &= !(VIRTIO_F_BAD_FEATURE + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | 1 << VIRTIO_BLK_F_SCSI); + + features +} + +pub fn set_up( + image_type: &ImageType, +) -> ( + Rc>, + Rc>, + Rc>, + Rc, +) { + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, image_type)); + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(String::from(",direct=false")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + (blk, test_state, alloc, image_path) +} + +pub fn tear_down( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, + image_path: Rc, +) { + blk.borrow_mut().destroy_device(alloc, vqs); + test_state.borrow_mut().stop(); + if !image_path.is_empty() { + cleanup_img(image_path.to_string()); + } +} diff --git a/tests/mod_test/src/libdriver/virtio_gpu.rs b/tests/mod_test/src/libdriver/virtio_gpu.rs new file mode 100644 index 0000000000000000000000000000000000000000..ac5236d8991f44500b51037115e3bad248b8462c --- /dev/null +++ b/tests/mod_test/src/libdriver/virtio_gpu.rs @@ -0,0 +1,928 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, mem::size_of, rc::Rc, slice::from_raw_parts, vec}; + +use super::{ + machine::TestStdMachine, + malloc::GuestAllocator, + pci::{PCIBarAddr, TestPciDev}, + pci_bus::TestPciBus, +}; +use crate::libdriver::virtio::{TestVirtQueue, TestVringDescEntry, VirtioDeviceOps}; +use crate::libdriver::virtio_pci_modern::TestVirtioPciDev; +use crate::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use util::byte_code::ByteCode; +use virtio::{ + VIRTIO_GPU_CMD_GET_DISPLAY_INFO, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING, VIRTIO_GPU_CMD_RESOURCE_CREATE_2D, + VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING, VIRTIO_GPU_CMD_RESOURCE_FLUSH, + VIRTIO_GPU_CMD_RESOURCE_UNREF, VIRTIO_GPU_CMD_SET_SCANOUT, VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D, + VIRTIO_GPU_CMD_UPDATE_CURSOR, +}; + +const TIMEOUT_US: u64 = 15 * 1000 * 1000; + +pub const VIRTIO_GPU_MAX_SCANOUTS: usize = 16; + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuCtrlHdr { + pub hdr_type: u32, + pub flags: u32, + pub fence_id: u64, + pub ctx_id: u32, + pub padding: u32, +} + +impl ByteCode for VirtioGpuCtrlHdr {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuRect { + pub x_coord: u32, + pub y_coord: u32, + pub width: u32, + pub height: u32, +} + +impl VirtioGpuRect { + pub fn new(x_coord: u32, y_coord: u32, width: u32, height: u32) -> Self { + Self { + x_coord, + y_coord, + width, + height, + } + } +} + +impl ByteCode for VirtioGpuRect {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuDisplayOne { + pub rect: VirtioGpuRect, + pub enabled: u32, + pub flags: u32, +} + +impl ByteCode for VirtioGpuDisplayOne {} + +#[repr(C)] +#[allow(unused)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuDisplayInfo { + pub header: VirtioGpuCtrlHdr, + pmodes: [VirtioGpuDisplayOne; VIRTIO_GPU_MAX_SCANOUTS], +} +impl ByteCode for VirtioGpuDisplayInfo {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuGetEdid { + pub scanouts: u32, + pub padding: u32, +} + +impl VirtioGpuGetEdid { + pub fn new(scanouts: u32) -> Self { + Self { + scanouts, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuGetEdid {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy)] +pub struct VirtioGpuRespEdid { + pub header: VirtioGpuCtrlHdr, + pub size: u32, + pub padding: u32, + pub edid: [u8; 1024], +} + +impl Default for VirtioGpuRespEdid { + fn default() -> Self { + VirtioGpuRespEdid { + header: VirtioGpuCtrlHdr::default(), + size: 0, + padding: 0, + edid: [0; 1024], + } + } +} + +impl ByteCode for VirtioGpuRespEdid {} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct VirtioGpuResourceCreate2d { + pub resource_id: u32, + pub format: u32, + pub width: u32, + pub height: u32, +} + +impl VirtioGpuResourceCreate2d { + pub fn new(resource_id: u32, format: u32, width: u32, height: u32) -> Self { + Self { + resource_id, + format, + width, + height, + } + } +} + +impl ByteCode for VirtioGpuResourceCreate2d {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuResourceUnref { + pub resource_id: u32, + pub padding: u32, +} + +impl VirtioGpuResourceUnref { + pub fn new(resource_id: u32) -> Self { + Self { + resource_id, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuResourceUnref {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuSetScanout { + pub rect: VirtioGpuRect, + pub scanout_id: u32, + pub resource_id: u32, +} + +impl VirtioGpuSetScanout { + pub fn new(rect: VirtioGpuRect, scanout_id: u32, resource_id: u32) -> Self { + Self { + rect, + scanout_id, + resource_id, + } + } +} + +impl ByteCode for VirtioGpuSetScanout {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuResourceFlush { + pub rect: VirtioGpuRect, + pub resource_id: u32, + pub padding: u32, +} + +impl VirtioGpuResourceFlush { + pub fn new(rect: VirtioGpuRect, resource_id: u32) -> Self { + Self { + rect, + resource_id, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuResourceFlush {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuTransferToHost2d { + pub rect: VirtioGpuRect, + pub offset: u64, + pub resource_id: u32, + pub padding: u32, +} + +impl VirtioGpuTransferToHost2d { + pub fn new(rect: VirtioGpuRect, offset: u64, resource_id: u32) -> Self { + Self { + rect, + offset, + resource_id, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuTransferToHost2d {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuResourceAttachBacking { + pub resource_id: u32, + pub nr_entries: u32, +} + +impl VirtioGpuResourceAttachBacking { + pub fn new(resource_id: u32, nr_entries: u32) -> Self { + Self { + resource_id, + nr_entries, + } + } +} + +impl ByteCode for VirtioGpuResourceAttachBacking {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuMemEntry { + pub addr: u64, + pub length: u32, + pub padding: u32, +} + +impl VirtioGpuMemEntry { + pub fn new(addr: u64, length: u32) -> Self { + Self { + addr, + length, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuMemEntry {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuResourceDetachBacking { + pub resource_id: u32, + pub padding: u32, +} + +impl VirtioGpuResourceDetachBacking { + pub fn new(resource_id: u32) -> Self { + Self { + resource_id, + padding: 0, + } + } +} + +impl ByteCode for VirtioGpuResourceDetachBacking {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuCursorPos { + pub scanout_id: u32, + pub x_coord: u32, + pub y_coord: u32, + pub padding: u32, +} + +impl ByteCode for VirtioGpuCursorPos {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +pub struct VirtioGpuUpdateCursor { + pub pos: VirtioGpuCursorPos, + pub resource_id: u32, + pub hot_x: u32, + pub hot_y: u32, + pub padding: u32, +} + +impl ByteCode for VirtioGpuUpdateCursor {} + +#[derive(Debug, Clone, Copy)] +pub enum DpyEvent { + QuerySurface = 0, + QueryCursor = 1, + GetSurface = 2, + GetCursor = 3, + Deactive = 4, +} + +pub struct TestDemoDpyDevice { + pub pci_dev: TestPciDev, + pub bar_addr: PCIBarAddr, + bar_idx: u8, + allocator: Rc>, +} + +pub struct TestVirtioGpu { + pub device: Rc>, + pub allocator: Rc>, + pub state: Rc>, + pub ctrl_q: Rc>, + pub cursor_q: Rc>, +} + +impl TestDemoDpyDevice { + pub fn new(pci_bus: Rc>, allocator: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar_addr: 0, + bar_idx: 0, + allocator, + } + } + + pub fn deactive(&mut self) { + let addr = self.allocator.borrow_mut().alloc(1); + self.pci_dev + .io_writeq(self.bar_addr, DpyEvent::Deactive as u64, addr); + } + + pub fn query_surface(&mut self) -> u16 { + let addr = self.allocator.borrow_mut().alloc(size_of::() as u64); + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + self.pci_dev + .io_writeq(self.bar_addr, DpyEvent::QuerySurface as u64, addr); + return test_state.borrow_mut().readw(addr); + } + + pub fn query_cursor(&mut self) -> u32 { + let addr = self.allocator.borrow_mut().alloc(size_of::() as u64); + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + self.pci_dev + .io_writeq(self.bar_addr, DpyEvent::QueryCursor as u64, addr); + return test_state.borrow_mut().readl(addr); + } + + pub fn get_surface(&mut self, size: u64) -> Vec { + let addr = self.allocator.borrow_mut().alloc(size_of::() as u64); + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + self.pci_dev + .io_writeq(self.bar_addr, DpyEvent::GetSurface as u64, addr); + return test_state.borrow_mut().memread(addr, size); + } + + pub fn get_cursor(&mut self, size: u64) -> Vec { + let addr = self.allocator.borrow_mut().alloc(size_of::() as u64); + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + self.pci_dev + .io_writeq(self.bar_addr, DpyEvent::GetCursor as u64, addr); + return test_state.borrow_mut().memread(addr, size); + } + + pub fn init(&mut self, pci_slot: u8) { + let devfn = pci_slot << 3; + assert!(self.pci_dev.find_pci_device(devfn)); + + self.pci_dev.enable(); + self.bar_addr = self.pci_dev.io_map(self.bar_idx); + } +} + +impl TestVirtioGpu { + pub fn new( + pci_bus: Rc>, + allocator: Rc>, + state: Rc>, + ) -> Self { + Self { + device: Rc::new(RefCell::new(TestVirtioPciDev::new(pci_bus))), + allocator, + state, + ctrl_q: Rc::new(RefCell::new(TestVirtQueue::new())), + cursor_q: Rc::new(RefCell::new(TestVirtQueue::new())), + } + } + + pub fn init(&mut self, pci_slot: u8, pci_fn: u8) { + self.device.borrow_mut().init(pci_slot, pci_fn); + self.device.borrow_mut().pci_dev.enable_msix(None); + self.device + .borrow_mut() + .setup_msix_configuration_vector(self.allocator.clone(), 0); + let features = self.device.borrow_mut().get_device_features(); + self.device.borrow_mut().negotiate_features(features); + self.device.borrow_mut().set_features_ok(); + + let ctrl_q = + self.device + .borrow_mut() + .setup_virtqueue(self.state.clone(), self.allocator.clone(), 0); + self.device + .borrow_mut() + .setup_virtqueue_intr(1, self.allocator.clone(), ctrl_q.clone()); + let cursor_q = + self.device + .borrow_mut() + .setup_virtqueue(self.state.clone(), self.allocator.clone(), 1); + self.device + .borrow_mut() + .setup_virtqueue_intr(2, self.allocator.clone(), cursor_q.clone()); + + self.ctrl_q = ctrl_q; + self.cursor_q = cursor_q; + + self.device.borrow_mut().set_driver_ok(); + } + + pub fn submit_request( + &mut self, + ctrl_q: bool, + hdr: &[u8], + hdr_ctx: Option<&[u8]>, + ctx: Option<&[u8]>, + resp: Option<&mut T>, + wait_resp: bool, + ) { + let mut offset = 0; + let mut vec = Vec::new(); + let hdr_len = hdr.len() as u64; + let mut hdr_ctx_len = 0; + if hdr_ctx.is_some() { + hdr_ctx_len = hdr_ctx.as_ref().unwrap().len() as u64; + } + let mut ctx_len = 0; + if ctx.is_some() { + ctx_len = ctx.as_ref().unwrap().len() as u64; + } + let mut resp_len = 0; + if resp.is_some() { + resp_len = size_of::() as u64; + } + + let addr = self + .allocator + .borrow_mut() + .alloc(hdr_len + hdr_ctx_len + ctx_len + resp_len); + + // write first + self.state.borrow().memwrite(addr, hdr); + + let mut tmp = TestVringDescEntry::default(); + + if hdr_ctx.is_some() { + self.state + .borrow() + .memwrite(addr + hdr_len, hdr_ctx.unwrap()); + let mut tmp = TestVringDescEntry::default(); + tmp.data = addr; + tmp.len = (hdr_len + hdr_ctx_len) as u32; + tmp.write = false; + vec.push(tmp); + offset += hdr_len + hdr_ctx_len; + } else { + tmp.data = addr; + tmp.len = hdr_len as u32; + tmp.write = false; + vec.push(tmp); + offset += hdr_len; + } + + if ctx.is_some() { + self.state.borrow().memwrite(addr + offset, ctx.unwrap()); + let mut tmp = TestVringDescEntry::default(); + tmp.data = addr + offset; + tmp.len = ctx_len as u32; + tmp.write = false; + vec.push(tmp); + offset += ctx_len; + } + + if resp.is_some() { + self.state + .borrow() + .memwrite(addr + offset, resp.as_ref().unwrap().as_bytes()); + let mut tmp = TestVringDescEntry::default(); + tmp.data = addr + offset; + tmp.len = resp_len as u32; + tmp.write = true; + vec.push(tmp); + } + + if ctrl_q { + let free_head = self + .ctrl_q + .borrow_mut() + .add_chained(self.state.clone(), vec); + + self.device + .borrow_mut() + .kick_virtqueue(self.state.clone(), self.ctrl_q.clone()); + + if wait_resp { + self.device.borrow_mut().poll_used_elem( + self.state.clone(), + self.ctrl_q.clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + } + } else { + let free_head = self + .cursor_q + .borrow_mut() + .add_chained(self.state.clone(), vec); + + self.device + .borrow_mut() + .kick_virtqueue(self.state.clone(), self.cursor_q.clone()); + + if wait_resp { + self.device.borrow_mut().poll_used_elem( + self.state.clone(), + self.cursor_q.clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + } + } + + if resp.is_some() { + let resp_bytes_new = self + .state + .borrow_mut() + .memread(addr + hdr_len + hdr_ctx_len + ctx_len, resp_len); + + let slice = + unsafe { from_raw_parts(resp_bytes_new.as_ptr() as *const T, size_of::()) }; + + *resp.unwrap() = slice[0].clone(); + } + } + + pub fn request_complete( + &mut self, + ctrl_q: bool, + hdr: &[u8], + hdr_ctx: Option<&[u8]>, + ctx: Option<&[u8]>, + resp: Option<&mut T>, + ) { + self.submit_request(ctrl_q, hdr, hdr_ctx, ctx, resp, true); + } +} + +#[derive(Clone, Debug)] +pub struct GpuDevConfig { + pub id: String, + pub max_outputs: u32, + pub edid: bool, + pub xres: u32, + pub yres: u32, + pub max_hostmem: u64, +} + +impl Default for GpuDevConfig { + fn default() -> Self { + Self { + id: String::from("gpu"), + max_outputs: 1, + edid: true, + xres: 1024, + yres: 768, + max_hostmem: 1024 * 1024 * 4, + } + } +} + +pub fn set_up( + gpu_conf: &GpuDevConfig, +) -> (Rc>, Rc>) { + let gpu_pci_slot: u8 = 0x4; + let gpu_pci_fn: u8 = 0x0; + let dpy_pci_slot: u8 = 0x3; + let dpy_pci_fn: u8 = 0x0; + + let mut args: Vec = Vec::new(); + // vm args + let vm_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + let mut vm_args = vm_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut vm_args); + // log args + let log_args = String::from("-D /tmp/virtio_gpu_test_log"); + let log_args: Vec<&str> = log_args[..].split(' ').collect(); + let mut log_args = log_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut log_args); + // virtio-gpu args + let gpu_args = format!( + "-device virtio-gpu-pci,id=drv0,bus=pcie.{},addr={}.0,max_hostmem={}", + gpu_pci_fn, gpu_pci_slot, gpu_conf.max_hostmem + ); + let gpu_args: Vec<&str> = gpu_args[..].split(' ').collect(); + let mut gpu_args = gpu_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut gpu_args); + // demo dpy device args + let dpy_args = format!( + "-device pcie-demo-dev,bus=pcie.{},addr={}.0,id=1,\ + bar_num=3,device_type=demo-display,bar_size=4096", + dpy_pci_fn, dpy_pci_slot + ); + let dpy_args: Vec<&str> = dpy_args[..].split(' ').collect(); + let mut dpy_args = dpy_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut dpy_args); + let args = args.iter().map(AsRef::as_ref).collect(); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new_bymem(test_state.clone(), 1280 * 1024 * 1024, 4096); + let allocator = machine.allocator.clone(); + + let demo_dpy = Rc::new(RefCell::new(TestDemoDpyDevice::new( + machine.pci_bus.clone(), + allocator.clone(), + ))); + demo_dpy.borrow_mut().init(dpy_pci_slot); + + let virtgpu = Rc::new(RefCell::new(TestVirtioGpu::new( + machine.pci_bus, + allocator, + test_state, + ))); + virtgpu.borrow_mut().init(gpu_pci_slot, gpu_pci_fn); + + (demo_dpy, virtgpu) +} + +pub fn tear_down(dpy: Rc>, gpu: Rc>) { + dpy.borrow_mut().deactive(); + gpu.borrow_mut().state.borrow_mut().stop(); +} + +// VIRTIO_GPU_CMD_GET_DISPLAY_INFO +pub fn get_display_info(gpu: &Rc>) -> VirtioGpuDisplayInfo { + let mut hdr = VirtioGpuCtrlHdr::default(); + hdr.hdr_type = VIRTIO_GPU_CMD_GET_DISPLAY_INFO; + + let mut resp = VirtioGpuDisplayInfo::default(); + + gpu.borrow_mut() + .request_complete(true, hdr.as_bytes(), None, None, Some(&mut resp)); + resp +} + +// VIRTIO_GPU_CMD_GET_EDID +pub fn get_edid(gpu: &Rc>, hdr_ctx: VirtioGpuGetEdid) -> VirtioGpuRespEdid { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_GET_EDID; + + let mut resp = VirtioGpuRespEdid::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +pub fn current_curosr_check(dpy: &Rc>, local: &Vec) -> bool { + let size = dpy.borrow_mut().query_cursor(); + if size as usize != local.len() { + return false; + } + let remote = dpy.borrow_mut().get_cursor(u64::from(size)); + + for (i, v) in remote.iter().enumerate() { + if v != local.get(i).unwrap() { + return false; + } + } + true +} + +pub fn current_surface_check(dpy: &Rc>, local: &Vec) -> bool { + let size = dpy.borrow_mut().query_surface(); + if size as usize != local.len() { + return false; + } + let remote = dpy.borrow_mut().get_surface(u64::from(size)); + + for (i, v) in remote.iter().enumerate() { + if v != local.get(i).unwrap() { + return false; + } + } + true +} + +// VIRTIO_GPU_CMD_RESOURCE_CREATE_2D +pub fn resource_create( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceCreate2d, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_RESOURCE_UNREF +pub fn resource_unref( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceUnref, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_UNREF; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_RESOURCE_FLUSH +pub fn resource_flush( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceFlush, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_FLUSH; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_UPDATE_CURSOR +pub fn update_cursor(gpu: &Rc>, resource_id: u32, scanout_id: u32) { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_UPDATE_CURSOR; + + let mut hdr_ctx = VirtioGpuUpdateCursor::default(); + + hdr_ctx.pos.scanout_id = scanout_id; + hdr_ctx.resource_id = resource_id; + + gpu.borrow_mut().request_complete::( + false, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + None, + ); +} + +// VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING +pub fn resource_attach_backing( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceAttachBacking, + ctxs: Vec, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING; + + let mut ctx: Vec = vec![]; + for i in ctxs { + // let tmp = &i; + // let mut tmp = tmp.as_bytes().to_vec(); + ctx.append(&mut i.as_bytes().to_vec()); + } + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + Some(&ctx), + Some(&mut resp), + ); + resp +} + +pub fn resource_attach_backing_with_invalid_ctx_len( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceAttachBacking, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING +pub fn resource_detach_backing( + gpu: &Rc>, + hdr_ctx: VirtioGpuResourceDetachBacking, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D +pub fn transfer_to_host( + gpu: &Rc>, + hdr_ctx: VirtioGpuTransferToHost2d, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + hdr.hdr_type = VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +// VIRTIO_GPU_CMD_SET_SCANOUT +pub fn set_scanout( + gpu: &Rc>, + hdr_ctx: VirtioGpuSetScanout, +) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + + hdr.hdr_type = VIRTIO_GPU_CMD_SET_SCANOUT; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut().request_complete( + true, + hdr.as_bytes(), + Some(hdr_ctx.as_bytes()), + None, + Some(&mut resp), + ); + resp +} + +pub fn invalid_cmd_test(gpu: &Rc>) -> VirtioGpuCtrlHdr { + let mut hdr = VirtioGpuCtrlHdr::default(); + hdr.hdr_type = VIRTIO_GPU_CMD_GET_DISPLAY_INFO - 1; + + let mut resp = VirtioGpuCtrlHdr::default(); + + gpu.borrow_mut() + .request_complete(true, hdr.as_bytes(), None, None, Some(&mut resp)); + resp +} diff --git a/tests/mod_test/src/libdriver/virtio_pci_modern.rs b/tests/mod_test/src/libdriver/virtio_pci_modern.rs new file mode 100644 index 0000000000000000000000000000000000000000..dfc0f2cee8fd6ca34354da37bcb4dbae8af4b562 --- /dev/null +++ b/tests/mod_test/src/libdriver/virtio_pci_modern.rs @@ -0,0 +1,719 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; +use std::time::{Duration, Instant}; + +use super::malloc::GuestAllocator; +use super::pci::{ + PCIBarAddr, PciMsixOps, TestPciDev, PCI_CAP_ID_VNDR, PCI_DEVICE_ID, PCI_SUBSYSTEM_ID, + PCI_VENDOR_ID, +}; +use super::pci_bus::TestPciBus; +use super::virtio::{ + TestVirtQueue, TestVirtioDev, VirtioDeviceOps, VIRTIO_CONFIG_S_DRIVER_OK, + VIRTIO_CONFIG_S_FEATURES_OK, VIRTIO_F_VERSION_1, +}; +use crate::libtest::TestState; +use util::offset_of; + +const VIRTIO_PCI_CAP_COMMON_CFG: u8 = 1; +const VIRTIO_PCI_CAP_NOTIFY_CFG: u8 = 2; +const VIRTIO_PCI_CAP_ISR_CFG: u8 = 3; +const VIRTIO_PCI_CAP_DEVICE_CFG: u8 = 4; + +#[repr(C, packed)] +pub struct VirtioPciCap { + cap_vndr: u8, + cap_next: u8, + cap_len: u8, + cfg_type: u8, + bar: u8, + id: u8, + padding: [u8; 2], + offset: u32, + length: u32, +} + +#[repr(C, packed)] +pub struct VirtioPciNotifyCap { + cap: VirtioPciCap, + notify_off_multiplier: u32, +} + +#[repr(C, packed)] +pub struct VirtioPciCommonCfg { + device_feature_select: u32, + device_feature: u32, + guest_feature_select: u32, + guest_feature: u32, + msix_config: u16, + num_queues: u16, + device_status: u8, + config_generation: u8, + + queue_select: u16, + queue_size: u16, + queue_msix_vector: u16, + pub queue_enable: u16, + pub queue_notify_off: u16, + pub queue_desc_lo: u32, + pub queue_desc_hi: u32, + pub queue_avail_lo: u32, + pub queue_avail_hi: u32, + pub queue_used_lo: u32, + pub queue_used_hi: u32, +} + +pub trait VirtioPCIMSIXOps { + fn set_config_vector(&self, entry: u16); + fn set_queue_vector(&self, vq_idx: u16, vector: u16); +} + +pub struct TestVirtioPciDev { + pub pci_dev: TestPciDev, + pub bar: PCIBarAddr, + pub bar_idx: u8, + pub config_msix_entry: u16, + pub config_msix_addr: u64, + pub config_msix_data: u32, + pub common_base: u32, + isr_base: u32, + pub notify_base: u32, + pub notify_off_multiplier: u32, + device_base: u32, + pub virtio_dev: TestVirtioDev, +} + +impl TestVirtioPciDev { + pub fn new(pci_bus: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar: 0, + bar_idx: 0, + config_msix_entry: 0, + config_msix_addr: 0, + config_msix_data: 0, + common_base: 0, + isr_base: 0, + notify_base: 0, + notify_off_multiplier: 0, + device_base: 0, + virtio_dev: TestVirtioDev::new(), + } + } + + pub fn init(&mut self, pci_slot: u8, pci_fn: u8) { + let devfn = pci_slot << 3 | pci_fn; + assert!(self.pci_dev.find_pci_device(devfn)); + + let device_type = self.pci_device_type_probe().unwrap_or(0); + self.virtio_dev.device_type = device_type; + self.enable(); + self.start(); + } + + fn enable(&mut self) { + self.pci_dev.enable(); + self.bar = self.pci_dev.io_map(self.bar_idx); + } + + fn find_structure( + &self, + cfg_type: u8, + bar: &mut u8, + offset: &mut u32, + cfg_addr: Rc>>, + ) -> bool { + let mut addr: u8 = 0; + loop { + addr = self.pci_dev.find_capability(PCI_CAP_ID_VNDR, addr); + if addr == 0 { + break; + } + + let config_type = self + .pci_dev + .config_readb(addr + offset_of!(VirtioPciCap, cfg_type) as u8); + if config_type != cfg_type { + continue; + } + + *bar = self + .pci_dev + .config_readb(addr + offset_of!(VirtioPciCap, bar) as u8); + *offset = self + .pci_dev + .config_readl(addr + offset_of!(VirtioPciCap, offset) as u8); + + if cfg_addr.borrow().is_some() { + cfg_addr.borrow_mut().replace(addr); + } + + return true; + } + + false + } + + fn pci_device_type_probe(&mut self) -> Option { + let device_type; + let vendor_id = self.pci_dev.config_readw(PCI_VENDOR_ID); + if vendor_id != 0x1af4 { + return None; + } + + let device_id = self.pci_dev.config_readw(PCI_DEVICE_ID); + if !(0x1000..=0x107f).contains(&device_id) { + return None; + } + if device_id < 0x1040 { + device_type = self.pci_dev.config_readw(PCI_SUBSYSTEM_ID); + } else { + device_type = device_id - 0x1040; + } + + self.pci_layout_probe(); + Some(device_type) + } + + fn pci_layout_probe(&mut self) -> bool { + let mut bar: u8 = 0; + let notify_cfg_addr: Rc>> = Rc::new(RefCell::new(Some(0))); + + let mut offset: u32 = 0; + + if !self.find_structure( + VIRTIO_PCI_CAP_COMMON_CFG, + &mut bar, + &mut offset, + Rc::new(RefCell::new(None)), + ) { + return false; + } + self.common_base = offset; + self.bar_idx = bar; + + if !self.find_structure( + VIRTIO_PCI_CAP_ISR_CFG, + &mut bar, + &mut offset, + Rc::new(RefCell::new(None)), + ) { + return false; + } + self.isr_base = offset; + assert!(bar == self.bar_idx); + + if !self.find_structure( + VIRTIO_PCI_CAP_NOTIFY_CFG, + &mut bar, + &mut offset, + notify_cfg_addr.clone(), + ) { + return false; + } + self.notify_base = offset; + assert!(bar == self.bar_idx); + + self.notify_off_multiplier = self.pci_dev.config_readl( + notify_cfg_addr.borrow().unwrap() + + offset_of!(VirtioPciNotifyCap, notify_off_multiplier) as u8, + ); + + if !self.find_structure( + VIRTIO_PCI_CAP_DEVICE_CFG, + &mut bar, + &mut offset, + Rc::new(RefCell::new(None)), + ) { + return false; + } + self.device_base = offset; + assert!(bar == self.bar_idx); + + true + } + + pub fn setup_msix_configuration_vector( + &mut self, + alloc: Rc>, + entry: u16, + ) { + self.config_msix_entry = entry; + self.config_msix_data = 0x12345678; + self.config_msix_addr = alloc.borrow_mut().alloc(4); + + self.pci_dev.set_msix_vector( + self.config_msix_entry, + self.config_msix_addr, + self.config_msix_data, + ); + self.set_config_vector(self.config_msix_entry); + } + + fn has_msix(&self, msix_addr: u64, msix_data: u32) -> bool { + self.pci_dev.has_msix(msix_addr, msix_data) + } + + pub fn setup_virtqueue_intr( + &self, + entry: u16, + alloc: Rc>, + virtqueue: Rc>, + ) { + virtqueue.borrow_mut().msix_entry = entry; + virtqueue.borrow_mut().msix_addr = alloc.borrow_mut().alloc(4); + virtqueue.borrow_mut().msix_data = 0x12345678; + + self.pci_dev.set_msix_vector( + virtqueue.borrow().msix_entry, + virtqueue.borrow().msix_addr, + virtqueue.borrow().msix_data, + ); + self.set_queue_vector(virtqueue.borrow().index, entry); + } +} + +impl VirtioDeviceOps for TestVirtioPciDev { + fn config_readb(&self, addr: u64) -> u8 { + self.pci_dev + .io_readb(self.bar, u64::from(self.device_base) + addr) + } + + fn config_readw(&self, addr: u64) -> u16 { + self.pci_dev + .io_readw(self.bar, u64::from(self.device_base) + addr) + } + + fn config_readl(&self, addr: u64) -> u32 { + self.pci_dev + .io_readl(self.bar, u64::from(self.device_base) + addr) + } + + fn config_readq(&self, addr: u64) -> u64 { + self.pci_dev + .io_readq(self.bar, u64::from(self.device_base) + addr) + } + + #[allow(unused)] + fn config_writeb(&self, addr: u64, value: u8) { + self.pci_dev + .io_writeb(self.bar, u64::from(self.device_base) + addr, value) + } + + #[allow(unused)] + fn config_writew(&self, addr: u64, value: u16) { + self.pci_dev + .io_writew(self.bar, u64::from(self.device_base) + addr, value) + } + + #[allow(unused)] + fn config_writel(&self, addr: u64, value: u32) { + self.pci_dev + .io_writel(self.bar, u64::from(self.device_base) + addr, value) + } + + #[allow(unused)] + fn config_writeq(&self, addr: u64, value: u64) { + self.pci_dev + .io_writeq(self.bar, u64::from(self.device_base) + addr, value) + } + + fn isr_readb(&self) -> u8 { + self.pci_dev.io_readb(self.bar, u64::from(self.isr_base)) + } + + fn enable_interrupt(&mut self) { + self.pci_dev.enable_msix(None); + } + + fn disable_interrupt(&mut self) { + self.pci_dev.disable_msix(); + } + + fn get_device_features(&self) -> u64 { + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, device_feature_select) as u64, + 0, + ); + let lo: u64 = u64::from(self.pci_dev.io_readl( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, device_feature) as u64, + )); + + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, device_feature_select) as u64, + 1, + ); + let hi: u64 = u64::from(self.pci_dev.io_readl( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, device_feature) as u64, + )); + (hi << 32) | lo + } + + fn set_guest_features(&self, features: u64) { + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, guest_feature_select) as u64, + 0, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, guest_feature) as u64, + features as u32, + ); + + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, guest_feature_select) as u64, + 1, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, guest_feature) as u64, + (features >> 32) as u32, + ); + } + + fn get_guest_features(&self) -> u64 { + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, guest_feature_select) as u64, + 0, + ); + let lo: u64 = u64::from(self.pci_dev.io_readl( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, guest_feature) as u64, + )); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + + offset_of!(VirtioPciCommonCfg, guest_feature_select) as u64, + 1, + ); + let hi: u64 = u64::from(self.pci_dev.io_readl( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, guest_feature) as u64, + )); + (hi << 32) | lo + } + + fn get_status(&self) -> u8 { + self.pci_dev.io_readb( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, device_status) as u64, + ) + } + + fn set_status(&self, status: u8) { + self.pci_dev.io_writeb( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, device_status) as u64, + status, + ) + } + + fn get_generation(&self) -> u8 { + self.pci_dev.io_readb( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, config_generation) as u64, + ) + } + + fn get_queue_nums(&self) -> u16 { + self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, num_queues) as u64, + ) + } + + fn queue_select(&self, index: u16) { + self.pci_dev.io_writew( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_select) as u64, + index, + ); + } + + fn get_queue_select(&self) -> u16 { + self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_select) as u64, + ) + } + + fn set_queue_size(&self, size: u16) { + self.pci_dev.io_writew( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_size) as u64, + size, + ) + } + + fn get_queue_size(&self) -> u16 { + self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_size) as u64, + ) + } + + fn activate_queue(&self, desc: u64, avail: u64, used: u64) { + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_desc_lo) as u64, + desc as u32, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_desc_hi) as u64, + (desc >> 32) as u32, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_avail_lo) as u64, + avail as u32, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_avail_hi) as u64, + (avail >> 32) as u32, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_used_lo) as u64, + used as u32, + ); + self.pci_dev.io_writel( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_used_hi) as u64, + (used >> 32) as u32, + ); + } + + fn poll_used_elem( + &self, + test_state: Rc>, + virtqueue: Rc>, + desc_idx: u32, + timeout_us: u64, + len: &mut Option, + wait_notified: bool, + ) { + let start_time = Instant::now(); + let timeout_us = Duration::from_micros(timeout_us); + + loop { + if (!wait_notified || self.queue_was_notified(virtqueue.clone())) + && virtqueue.borrow_mut().get_buf(test_state.clone()) + { + if let Some(got_len) = virtqueue.borrow().desc_len.get(&desc_idx) { + if let Some(len) = len { + *len = *got_len; + } + break; + } + } + assert!(Instant::now() - start_time < timeout_us); + } + } + + fn queue_was_notified(&self, virtqueue: Rc>) -> bool { + if self.pci_dev.msix_enabled { + return self.has_msix(virtqueue.borrow().msix_addr, virtqueue.borrow().msix_data); + } + + self.pci_dev.has_intx() + } + + fn setup_virtqueue( + &self, + test_state: Rc>, + alloc: Rc>, + index: u16, + ) -> Rc> { + let virtqueue = Rc::new(RefCell::new(TestVirtQueue::new())); + virtqueue.borrow_mut().setup(self, alloc, index); + virtqueue.borrow().vring_init(test_state); + + let desc = virtqueue.borrow().desc; + let avail = virtqueue.borrow().avail; + let used = virtqueue.borrow().used; + self.activate_queue(desc, avail, used); + + let notify_off = self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_notify_off) as u64, + ); + + virtqueue.borrow_mut().queue_notify_off = u64::from(self.notify_base) + + u64::from(notify_off) * u64::from(self.notify_off_multiplier); + + self.pci_dev.io_writew( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_enable) as u64, + 1, + ); + + virtqueue + } + + fn cleanup_virtqueue(&self, alloc: Rc>, desc_addr: u64) { + alloc.borrow_mut().free(desc_addr); + } + + fn virtqueue_notify(&self, virtqueue: Rc>) { + let index = virtqueue.borrow().index; + let notify_offset = virtqueue.borrow().queue_notify_off; + self.pci_dev.io_writew(self.bar, notify_offset, index); + } + + /// Notify the virtio device to process req. free_head is Head + /// of free buffer list of descriptor table. num_add is the number + /// of the io request added to the virtqueue. + fn kick_virtqueue( + &self, + test_state: Rc>, + virtqueue: Rc>, + ) { + let vq = virtqueue.borrow(); + let idx: u16 = test_state.borrow().readw(vq.avail + 2); + + if (!vq.event) || (idx > vq.get_avail_event(test_state)) { + self.virtqueue_notify(virtqueue.clone()); + } + } + + fn reset(&mut self) { + self.set_status(0); + assert_eq!(self.get_status(), 0); + self.virtio_dev.feature_negotiated = false; + } + + fn negotiate_features(&mut self, features: u64) { + self.virtio_dev.features = features; + self.set_guest_features(features); + } + + fn set_features_ok(&mut self) { + if (self.get_guest_features() & (1 << VIRTIO_F_VERSION_1)) != 0 { + let status: u8 = self.get_status() | VIRTIO_CONFIG_S_FEATURES_OK; + self.set_status(status); + assert_eq!(self.get_status(), status); + } + + self.virtio_dev.feature_negotiated = true; + } + + fn set_driver_ok(&self) { + let status = self.get_status() | VIRTIO_CONFIG_S_DRIVER_OK; + self.set_status(status); + assert_eq!(self.get_status(), status); + } + + fn init_virtqueue( + &mut self, + test_state: Rc>, + alloc: Rc>, + num_queues: usize, + ) -> Vec>> { + assert!(num_queues < (1 << 15)); + let mut virtqueues = Vec::new(); + for i in 0..num_queues { + let virtqueue = self.setup_virtqueue(test_state.clone(), alloc.clone(), i as u16); + self.setup_virtqueue_intr((i + 1) as u16, alloc.clone(), virtqueue.clone()); + virtqueues.push(virtqueue); + } + + virtqueues + } + + fn init_device( + &mut self, + test_state: Rc>, + alloc: Rc>, + features: u64, + num_queues: usize, + ) -> Vec>> { + // Reset device by write 0 to device status. + self.reset(); + self.set_acknowledge(); + self.set_driver(); + self.negotiate_features(features); + assert_eq!(self.get_guest_features(), features); + self.set_features_ok(); + // FIXME: add handling the specific device features as needed. + + self.pci_dev.enable_msix(None); + self.setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = self.init_virtqueue(test_state, alloc, num_queues); + + self.set_driver_ok(); + vqs + } + + fn destroy_device( + &mut self, + alloc: Rc>, + vqs: Vec>>, + ) { + self.reset(); + self.pci_dev.disable_msix(); + for vq in vqs.iter() { + self.cleanup_virtqueue(alloc.clone(), vq.borrow().desc); + } + } +} + +impl VirtioPCIMSIXOps for TestVirtioPciDev { + fn set_config_vector(&self, vector: u16) { + self.pci_dev.io_writew( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, msix_config) as u64, + vector, + ); + let vector_get: u16 = self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, msix_config) as u64, + ); + assert_eq!( + vector, vector_get, + "WARN: set config vector {}, get vector {}", + vector, vector_get + ); + } + + fn set_queue_vector(&self, vq_idx: u16, vector: u16) { + self.queue_select(vq_idx); + self.pci_dev.io_writew( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_msix_vector) as u64, + vector, + ); + let vector_get: u16 = self.pci_dev.io_readw( + self.bar, + u64::from(self.common_base) + offset_of!(VirtioPciCommonCfg, queue_msix_vector) as u64, + ); + if vector_get != vector { + println!("WARN: set vector {}, get vector {}", vector, vector_get); + } + } +} diff --git a/tests/mod_test/src/libdriver/virtio_rng.rs b/tests/mod_test/src/libdriver/virtio_rng.rs new file mode 100644 index 0000000000000000000000000000000000000000..082d714a096633b6408add03e7876aa35431dd9c --- /dev/null +++ b/tests/mod_test/src/libdriver/virtio_rng.rs @@ -0,0 +1,57 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; + +use super::machine::TestStdMachine; +use super::malloc::GuestAllocator; +use super::virtio_pci_modern::TestVirtioPciDev; +use crate::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; + +pub fn create_rng( + random_file: String, + max_bytes: u64, + period: u64, +) -> ( + Rc>, + Rc>, + Rc>, +) { + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + let mut extra_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let rng_pci_args = format!( + "-device {},rng=objrng0,max-bytes={},period={},bus=pcie.0,addr={}.0x0,id=rng-id", + "virtio-rng-pci", max_bytes, period, pci_slot + ); + args = rng_pci_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let rng_args = format!("-object rng-random,id=objrng0,filename={}", random_file); + args = rng_args.split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let rng = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + + rng.borrow_mut().init(pci_slot, pci_fn); + + (rng, test_state, allocator) +} diff --git a/tests/mod_test/src/libdriver/virtiofs.rs b/tests/mod_test/src/libdriver/virtiofs.rs new file mode 100644 index 0000000000000000000000000000000000000000..2dca34fde169bbd8782d373be5ad82518faa4c74 --- /dev/null +++ b/tests/mod_test/src/libdriver/virtiofs.rs @@ -0,0 +1,857 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; + +use util::byte_code::ByteCode; + +pub const FUSE_LOOKUP: u32 = 1; +pub const FUSE_FORGET: u32 = 2; +pub const FUSE_GETATTR: u32 = 3; +pub const FUSE_SETATTR: u32 = 4; +pub const FUSE_READLINK: u32 = 5; +pub const FUSE_SYMLINK: u32 = 6; +pub const FUSE_MKNOD: u32 = 8; +pub const FUSE_MKDIR: u32 = 9; +pub const FUSE_UNLINK: u32 = 10; +pub const FUSE_RMDIR: u32 = 11; +pub const FUSE_RENAME: u32 = 12; +pub const FUSE_LINK: u32 = 13; +pub const FUSE_OPEN: u32 = 14; +pub const FUSE_READ: u32 = 15; +pub const FUSE_WRITE: u32 = 16; +pub const FUSE_STATFS: u32 = 17; +pub const FUSE_RELEASE: u32 = 18; +pub const FUSE_FSYNC: u32 = 20; +pub const FUSE_SETXATTR: u32 = 21; +pub const FUSE_GETXATTR: u32 = 22; +pub const FUSE_LISTXATTR: u32 = 23; +pub const FUSE_REMOVEXATTR: u32 = 24; +pub const FUSE_FLUSH: u32 = 25; +pub const FUSE_INIT: u32 = 26; +pub const FUSE_OPENDIR: u32 = 27; +pub const FUSE_READDIR: u32 = 28; +pub const FUSE_RELEASEDIR: u32 = 29; +pub const FUSE_FSYNCDIR: u32 = 30; +pub const FUSE_GETLK: u32 = 31; +pub const FUSE_SETLK: u32 = 32; +pub const FUSE_SETLKW: u32 = 33; +pub const FUSE_ACCESS: u32 = 34; +pub const FUSE_CREATE: u32 = 35; +pub const FUSE_INTERRUPT: u32 = 36; +pub const FUSE_BMAP: u32 = 37; +pub const FUSE_DESTROY: u32 = 38; +pub const FUSE_IOCTL: u32 = 39; +pub const FUSE_POLL: u32 = 40; +pub const FUSE_NOTIFY_REPLY: u32 = 41; +pub const FUSE_BATCH_FORGET: u32 = 42; +pub const FUSE_FALLOCATE: u32 = 43; +pub const FUSE_READDIRPLUS: u32 = 44; +pub const FUSE_RENAME2: u32 = 45; +pub const FUSE_LSEEK: u32 = 46; +pub const FUSE_COPY_FILE_RANGE: u32 = 47; +pub const FUSE_SETUPMAPPING: u32 = 48; +pub const FUSE_REMOVEMAPPING: u32 = 49; + +/// The kernel version which is supported by fuse messages. +pub const FUSE_KERNEL_VERSION: u32 = 7; +/// The minor version which is supported by fuse messages. +pub const FUSE_KERNEL_MINOR_VERSION: u32 = 32; + +/// The supported bit that supports asynchronous read requests. +pub const FUSE_ASYNC_READ: u32 = 1 << 0; +/// The supported bit that supports posix file locks. +pub const FUSE_POSIX_LOCKS: u32 = 1 << 1; +/// The supported bit that supports the O_TRUNC open flag. +pub const FUSE_ATOMIC_O_TRUNC: u32 = 1 << 3; +/// The supported bit that supports lookups of "." and "..". +pub const FUSE_EXPORT_SUPPORT: u32 = 1 << 4; +/// The supported bit that don't apply umask to file mode on create operation. +pub const FUSE_DONT_MASK: u32 = 1 << 6; +/// The supported bit that supports BSD file locks. +pub const FUSE_FLOCK_LOCKS: u32 = 1 << 10; +/// The supported bit that automatically checks invalid cached file. +pub const FUSE_AUTO_INVAL_DATA: u32 = 1 << 12; +/// The supported bit that supports readdirplus. +pub const FUSE_DO_READDIRPLUS: u32 = 1 << 13; +/// The supported bit that supports adaptive readdirplus. +pub const FUSE_READDIRPLUS_AUTO: u32 = 1 << 14; +/// The supported bit that supports asynchronous direct I/O submission. +pub const FUSE_ASYNC_DIO: u32 = 1 << 15; +/// The supported bit that supports for parallel directory operations. +pub const FUSE_PARALLEL_DIROPS: u32 = 1 << 18; +/// The supported bit that supports POSIX ACLs. +pub const FUSE_POSIX_ACL: u32 = 1 << 20; +/// The supported bit that needs to reply the max number of pages in init fuse message. +pub const FUSE_MAX_PAGES: u32 = 1 << 22; + +pub const FATTR_MODE: u32 = 1 << 0; +pub const FATTR_UID: u32 = 1 << 1; +pub const FATTR_GID: u32 = 1 << 2; +pub const FATTR_SIZE: u32 = 1 << 3; +pub const FATTR_ATIME: u32 = 1 << 4; +pub const FATTR_MTIME: u32 = 1 << 5; +pub const FATTR_FH: u32 = 1 << 6; +pub const FATTR_ATIME_NOW: u32 = 1 << 7; +pub const FATTR_MTIME_NOW: u32 = 1 << 8; +pub const FATTR_LOCKOWNER: u32 = 1 << 9; +pub const FATTR_CTIME: u32 = 1 << 10; + +/// Successfully process the fuse message. +pub const FUSE_OK: i32 = 0; +pub const FUSE_SET_ATTR_MODE: u32 = 1 << 0; +pub const FUSE_SET_ATTR_UID: u32 = 1 << 1; +pub const FUSE_SET_ATTR_GID: u32 = 1 << 2; +pub const FUSE_SET_ATTR_SIZE: u32 = 1 << 3; +pub const FUSE_SET_ATTR_ATIME: u32 = 1 << 4; +pub const FUSE_SET_ATTR_MTIME: u32 = 1 << 5; +pub const FUSE_SET_ATTR_ATIME_NOW: u32 = 1 << 7; +pub const FUSE_SET_ATTR_MTIME_NOW: u32 = 1 << 8; +pub const FUSE_SET_ATTR_CTIME: u32 = 1 << 10; + +pub const XATTR_CREATE: u32 = 0x1; // set value, fail if attr already exists +pub const XATTR_REPLACE: u32 = 0x2; // set value, fail if attr does not exist + +pub const TEST_MAX_READAHEAD: u32 = 1048576; + +pub const TEST_FLAG: u32 = FUSE_ASYNC_READ + | FUSE_POSIX_LOCKS + | FUSE_ATOMIC_O_TRUNC + | FUSE_EXPORT_SUPPORT + | FUSE_DONT_MASK + | FUSE_FLOCK_LOCKS + | FUSE_AUTO_INVAL_DATA + | FUSE_DO_READDIRPLUS + | FUSE_READDIRPLUS_AUTO + | FUSE_ASYNC_DIO + | FUSE_PARALLEL_DIROPS + | FUSE_POSIX_ACL + | FUSE_MAX_PAGES; + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseInHeader { + pub len: u32, + pub opcode: u32, + pub unique: u64, + pub nodeid: u64, + pub uid: u32, + pub gid: u32, + pub pid: u32, + pub padding: u32, +} + +impl FuseInHeader { + pub fn new( + len: u32, + opcode: u32, + unique: u64, + nodeid: u64, + uid: u32, + gid: u32, + pid: u32, + padding: u32, + ) -> FuseInHeader { + FuseInHeader { + len, + opcode, + unique, + nodeid, + uid, + gid, + pid, + padding, + } + } +} + +impl ByteCode for FuseInHeader {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseOutHeader { + pub len: u32, + pub error: i32, + pub unique: u64, +} + +impl ByteCode for FuseOutHeader {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseAttr { + pub ino: u64, + pub size: u64, + pub blocks: u64, + pub atime: u64, + pub mtime: u64, + pub ctime: u64, + pub atimensec: u32, + pub mtimensec: u32, + pub ctimensec: u32, + pub mode: u32, + pub nlink: u32, + pub uid: u32, + pub gid: u32, + pub rdev: u32, + pub blksize: u32, + pub flags: u32, +} + +impl ByteCode for FuseAttr {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseEntryOut { + pub nodeid: u64, + pub generation: u64, + pub entry_valid: u64, + pub attr_valid: u64, + pub entry_valid_nsec: u32, + pub attr_valid_nsec: u32, + pub attr: FuseAttr, +} + +impl ByteCode for FuseEntryOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseForgetIn { + pub nlookup: u64, +} + +impl ByteCode for FuseForgetIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseForgetOut { + pub dummy: u64, +} + +impl ByteCode for FuseForgetOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseAttrOut { + pub attr_valid: u64, + pub attr_valid_nsec: u32, + pub dummy: u32, + pub attr: FuseAttr, +} + +impl ByteCode for FuseAttrOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseGetAttrIn { + pub getattr_flags: u32, + pub dummy: u32, + pub fh: u64, +} + +impl ByteCode for FuseGetAttrIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseSetattrIn { + pub valid: u32, + pub padding: u32, + pub fh: u64, + pub size: u64, + pub lock_owner: u64, + pub atime: u64, + pub mtime: u64, + pub ctime: u64, + pub atimensec: u32, + pub mtimensec: u32, + pub ctimensec: u32, + pub mode: u32, + pub unused4: u32, + pub uid: u32, + pub gid: u32, + pub unused5: u32, +} + +impl ByteCode for FuseSetattrIn {} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseMknodIn { + pub mode: u32, + pub rdev: u32, + pub umask: u32, + pub padding: u32, + pub name: String, +} + +impl FuseMknodIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.mode.as_bytes().to_vec()); + bytes.append(&mut self.rdev.as_bytes().to_vec()); + bytes.append(&mut self.umask.as_bytes().to_vec()); + bytes.append(&mut self.padding.as_bytes().to_vec()); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() * 4 + self.name.len() + 1 + } +} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseRenameIn { + pub newdir: u64, + pub oldname: String, + pub newname: String, +} + +impl FuseRenameIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.newdir.as_bytes().to_vec()); + bytes.append(&mut self.oldname.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes.append(&mut self.newname.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() + self.oldname.len() + self.newname.len() + 2 + } +} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseLinkIn { + pub oldnodeid: u64, + pub newname: String, +} + +impl FuseLinkIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.oldnodeid.as_bytes().to_vec()); + bytes.append(&mut self.newname.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() + self.newname.len() + 1 + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseOpenIn { + pub flags: u32, + pub unused: u32, +} + +impl ByteCode for FuseOpenIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseOpenOut { + pub fh: u64, + pub open_flags: u32, + pub padding: u32, +} + +impl ByteCode for FuseOpenOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseReadIn { + pub fh: u64, + pub offset: u64, + pub size: u32, + pub read_flags: u32, + pub lock_owner: u64, + pub flags: u32, + pub padding: u32, +} + +impl ByteCode for FuseReadIn {} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseWriteIn { + pub fh: u64, + pub offset: u64, + pub size: u32, + pub write_flags: u32, + pub lock_owner: u64, + pub flags: u32, + pub padding: u32, + pub write_buf: String, +} + +impl FuseWriteIn { + pub fn new(fh: u64, offset: u64, write_buf: String) -> Self { + FuseWriteIn { + fh, + offset, + size: (write_buf.len() + 1) as u32, + write_flags: 0_u32, + lock_owner: 0_u64, + flags: O_WRONLY, + padding: 0, + write_buf, + } + } + + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.fh.as_bytes().to_vec()); + bytes.append(&mut self.offset.as_bytes().to_vec()); + bytes.append(&mut self.size.as_bytes().to_vec()); + bytes.append(&mut self.write_flags.as_bytes().to_vec()); + bytes.append(&mut self.lock_owner.as_bytes().to_vec()); + bytes.append(&mut self.flags.as_bytes().to_vec()); + bytes.append(&mut self.padding.as_bytes().to_vec()); + bytes.append(&mut self.write_buf.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseWriteOut { + pub size: u32, + pub padding: u32, +} + +impl ByteCode for FuseWriteOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseKstatfs { + pub blocks: u64, + pub bfree: u64, + pub bavail: u64, + pub files: u64, + pub ffree: u64, + pub bsize: u32, + pub namelen: u32, + pub frsize: u32, + pub padding: u32, + pub spare: [u32; 6], +} + +impl ByteCode for FuseKstatfs {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseStatfsOut { + pub st: FuseKstatfs, +} + +impl ByteCode for FuseStatfsOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseReleaseIn { + pub fh: u64, + pub flags: u32, + pub release_flags: u32, + pub lock_owner: u64, +} + +impl ByteCode for FuseReleaseIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseFsyncIn { + pub fh: u64, + pub fsync_flags: u32, + pub padding: u32, +} + +impl ByteCode for FuseFsyncIn {} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseSetxattrIn { + pub size: u32, + pub flags: u32, + pub name: String, + pub value: String, +} + +impl FuseSetxattrIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.size.as_bytes().to_vec()); + bytes.append(&mut self.flags.as_bytes().to_vec()); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes.append(&mut self.value.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() * 2 + self.name.len() + self.value.len() + 2 + } +} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseGetxattrIn { + pub size: u32, + pub padding: u32, + pub name: String, +} + +impl FuseGetxattrIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.size.as_bytes().to_vec()); + bytes.append(&mut self.padding.as_bytes().to_vec()); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() * 2 + self.name.len() + 1 + } +} + +pub struct FuseRemoveXattrIn { + pub name: String, +} + +impl FuseRemoveXattrIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseInitIn { + pub major: u32, + pub minor: u32, + pub max_readahead: u32, + pub flags: u32, +} + +impl ByteCode for FuseInitIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseInitOut { + pub major: u32, + pub minor: u32, + pub max_readahead: u32, + pub flags: u32, + pub max_background: u16, + pub congestion_threshold: u16, + pub max_write: u32, + pub time_gran: u32, + pub max_pages: u16, + pub map_alignment: u16, + pub unused: [u32; 8], +} + +impl ByteCode for FuseInitOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseDirent { + pub ino: u64, + pub off: u64, + pub namelen: u32, + pub type_: u32, + pub name: [u8; 0], +} + +impl ByteCode for FuseDirent {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseDirentplus { + pub entry_out: FuseEntryOut, + pub dirent: FuseDirent, +} + +impl ByteCode for FuseDirentplus {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseFlushIn { + pub fh: u64, + pub unused: u32, + pub padding: u32, + pub lock_owner: u64, +} + +impl ByteCode for FuseFlushIn {} + +pub const F_RDLCK: u32 = 0; +pub const F_WRLCK: u32 = 1; +pub const F_UNLCK: u32 = 2; + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseFileLock { + pub start: u64, + pub end: u64, + pub lock_type: u32, + pub pid: u32, +} + +impl ByteCode for FuseFileLock {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseLkIn { + pub fh: u64, + pub owner: u64, + pub lk: FuseFileLock, + pub lk_flags: u32, + pub padding: u32, +} + +impl ByteCode for FuseLkIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseLkOut { + pub lk: FuseFileLock, +} + +impl ByteCode for FuseLkOut {} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseCreateIn { + pub flags: u32, + pub mode: u32, + pub umask: u32, + pub padding: u32, + pub name: String, +} + +impl FuseCreateIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.flags.as_bytes().to_vec()); + bytes.append(&mut self.mode.as_bytes().to_vec()); + bytes.append(&mut self.umask.as_bytes().to_vec()); + bytes.append(&mut self.padding.as_bytes().to_vec()); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseCreateOut { + pub create_out: FuseEntryOut, + pub open_out: FuseOpenOut, +} + +impl ByteCode for FuseCreateOut {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseBatchForgetIn { + pub count: u32, + pub dummy: u32, +} + +impl ByteCode for FuseBatchForgetIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseForgetDataIn { + pub ino: u64, + pub nlookup: u64, +} + +impl ByteCode for FuseForgetDataIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseFallocateIn { + pub fh: u64, + pub offset: u64, + pub length: u64, + pub mode: u32, + pub padding: u32, +} + +impl ByteCode for FuseFallocateIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseLseekIn { + pub fh: u64, + pub offset: u64, + pub whence: u32, + pub padding: u32, +} + +impl ByteCode for FuseLseekIn {} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FuseLseekOut { + pub offset: u64, +} + +impl ByteCode for FuseLseekOut {} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseLookupIn { + pub name: String, +} + +impl FuseLookupIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } +} + +pub struct FuseUnlinkrIn { + pub name: String, +} + +impl FuseUnlinkrIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + self.name.len() + 1 + } +} + +#[repr(C)] +#[derive(Debug)] +pub struct FusesysmlinkIn { + pub name: String, + pub linkname: String, +} + +impl FusesysmlinkIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes.append(&mut self.linkname.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } +} + +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct FuseMkdirIn { + pub mode: u32, + pub umask: u32, + pub name: String, +} + +impl FuseMkdirIn { + pub fn as_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.append(&mut self.mode.as_bytes().to_vec()); + bytes.append(&mut self.umask.as_bytes().to_vec()); + bytes.append(&mut self.name.as_bytes().to_vec()); + bytes.append(&mut vec![0]); + bytes + } + + pub fn len(&self) -> usize { + size_of::() * 2 + self.name.len() + 1 + } +} + +pub enum SeccompAction { + None, + Kill, + Log, + Trap, +} + +impl std::fmt::Display for SeccompAction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + SeccompAction::None => "none", + SeccompAction::Kill => "kill", + SeccompAction::Log => "log", + SeccompAction::Trap => "trap", + } + ) + } +} + +pub enum SandBoxMechanism { + Chroot, + Namespace, +} + +impl std::fmt::Display for SandBoxMechanism { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + SandBoxMechanism::Chroot => "chroot", + SandBoxMechanism::Namespace => "namespace", + } + ) + } +} + +// Read only. +pub const O_RDONLY: u32 = 0o000000; +// Write only. +pub const O_WRONLY: u32 = 0o000001; +// Read-Write. +pub const O_RDWR: u32 = 0o000002; +pub const O_CREAT: u32 = 0o000100; +pub const O_TRUNC: u32 = 0o001000; +pub const O_NONBLOCK: u32 = 0o004000; +// Direct disk access hint. +pub const O_DIRECT: u32 = 0o040000; +// Don't follow links. +pub const O_NOFOLLOW: u32 = 0o400000; + +// lseek. +pub const SEEK_SET: u32 = 0; +pub const SEEK_CUR: u32 = 1; +pub const SEEK_END: u32 = 2; diff --git a/tests/mod_test/src/libdriver/vnc.rs b/tests/mod_test/src/libdriver/vnc.rs new file mode 100644 index 0000000000000000000000000000000000000000..bf7034702bedc06b0804dd133c3b573e8e65eeb1 --- /dev/null +++ b/tests/mod_test/src/libdriver/vnc.rs @@ -0,0 +1,1463 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + cmp, + io::{self, Read, Write}, + net::{Shutdown, SocketAddr, TcpStream}, + os::unix::prelude::AsRawFd, + rc::Rc, + thread::sleep, + time::Duration, +}; + +use anyhow::{bail, Result}; +use core::time; +use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; + +use super::{ + machine::TestStdMachine, + malloc::GuestAllocator, + pci::{PCIBarAddr, TestPciDev, PCI_VENDOR_ID}, + pci_bus::TestPciBus, +}; +use crate::{ + libdriver::vnc::EncodingType::*, + libtest::{test_init, TestState, MACHINE_TYPE_ARG}, +}; + +const EPOLL_DEFAULT_TIMEOUT: i32 = 1000; +pub const MAX_RECVBUF_LEN: usize = 1024; +pub const READ_TIME_OUT: u64 = 30; +pub const RFB_PORT_OFFSET: u16 = 5900; +/// Size of subrectangle. +const HEXTILE_BLOCK_SIZE: usize = 16; +/// SubEncoding type of hextile. +const RAW: u8 = 0x01; +const BACKGROUND_SPECIFIC: u8 = 0x02; +const FOREGROUND_SPECIFIC: u8 = 0x04; +const ANY_SUBRECTS: u8 = 0x08; +const SUBRECTS_COLOURED: u8 = 0x10; + +pub const PIXMAN_A8B8G8R8: u32 = 0; +pub const PIXMAN_X2R10G10B10: u32 = 1; +pub const PIXMAN_R8G8B8: u32 = 2; +pub const PIXMAN_A1: u32 = 3; +pub const PIXMAN_YUY2: u32 = 4; +pub const REFRESH_TIME_INTERVAL: u64 = 3000 * 1000 * 1000; + +/// Input event. +#[derive(Debug, Clone, Copy, Default)] +pub enum InputEvent { + KbdEvent = 0, + MouseEvent = 1, + #[default] + InvalidEvent = 255, +} + +impl From for InputEvent { + fn from(v: u8) -> Self { + match v { + 0 => InputEvent::KbdEvent, + 1 => InputEvent::MouseEvent, + _ => InputEvent::InvalidEvent, + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct InputMessage { + pub event_type: InputEvent, + pub keycode: u16, + pub down: u8, + pub button: u32, + pub x: u32, + pub y: u32, +} + +/// GPU device Event. +#[derive(Debug, Clone, Copy, Default)] +pub enum GpuEvent { + ReplaceSurface = 0, + ReplaceCursor = 1, + GraphicUpdateArea = 2, + GraphicUpdateDirty = 3, + #[default] + Deactive = 4, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct TestGpuCmd { + pub event_type: GpuEvent, + pub x: u32, + pub y: u32, + pub w: u32, + pub h: u32, + pub data_len: u32, +} + +// Encodings Type +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum EncodingType { + EncodingRaw = 0x00000000, + EncodingCopyrect = 0x00000001, + EncodingHextile = 0x00000005, + EncodingZlib = 0x00000006, + EncodingTight = 0x00000007, + EncodingZrle = 0x00000010, + EncodingZywrle = 0x00000011, + EncodingCompresslevel0 = 0xFFFFFF00, + EncodingQualitylevel0 = 0xFFFFFFE0, + EncodingRichCursor = 0xFFFFFF11, + EncodingAlphaCursor = 0xFFFFFEC6, + EncodingDesktopresize = 0xFFFFFF21, + EncodingPointerTypeChange = 0xFFFFFEFF, + EncodingExtKeyEvent = 0xFFFFFEFE, + EncodingAudio = 0xFFFFFEFD, + EncodingTightPng = 0xFFFFFEFC, + EncodingLedState = 0xFFFFFEFB, + EncodingWmvi = 0x574D5669, + EncodingInvalid = 0xFFFFFFFF, +} + +impl EncodingType { + pub const ENCODINGTYPE: [Self; 18] = [ + EncodingRaw, + EncodingCopyrect, + EncodingHextile, + EncodingZlib, + EncodingTight, + EncodingZrle, + EncodingZywrle, + EncodingCompresslevel0, + EncodingQualitylevel0, + EncodingRichCursor, + EncodingAlphaCursor, + EncodingDesktopresize, + EncodingPointerTypeChange, + EncodingExtKeyEvent, + EncodingAudio, + EncodingTightPng, + EncodingLedState, + EncodingWmvi, + ]; +} + +impl From for EncodingType { + fn from(v: u32) -> Self { + match v { + 0x00000000 => EncodingType::EncodingRaw, + 0x00000001 => EncodingType::EncodingCopyrect, + 0x00000005 => EncodingType::EncodingHextile, + 0x00000006 => EncodingType::EncodingZlib, + 0x00000007 => EncodingType::EncodingTight, + 0x00000010 => EncodingType::EncodingZrle, + 0x00000011 => EncodingType::EncodingZywrle, + 0xFFFFFF00 => EncodingType::EncodingCompresslevel0, + 0xFFFFFFE0 => EncodingType::EncodingQualitylevel0, + 0xFFFFFF11 => EncodingType::EncodingRichCursor, + 0xFFFFFEC6 => EncodingType::EncodingAlphaCursor, + 0xFFFFFF21 => EncodingType::EncodingDesktopresize, + 0xFFFFFEFF => EncodingType::EncodingPointerTypeChange, + 0xFFFFFEFE => EncodingType::EncodingExtKeyEvent, + 0xFFFFFEFD => EncodingType::EncodingAudio, + 0xFFFFFEFC => EncodingType::EncodingTightPng, + 0xFFFFFEFB => EncodingType::EncodingLedState, + 0x574D5669 => EncodingType::EncodingWmvi, + _ => EncodingType::EncodingInvalid, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum RfbServerMsg { + FramebufferUpdate = 0, + SetColourMapEntries = 1, + InvalidMsg, +} + +impl From for RfbServerMsg { + fn from(v: u8) -> Self { + match v { + 0 => RfbServerMsg::FramebufferUpdate, + 1 => RfbServerMsg::SetColourMapEntries, + _ => RfbServerMsg::InvalidMsg, + } + } +} + +#[derive(Clone, Copy)] +pub enum UpdateState { + NotIncremental = 0, + Incremental = 1, +} + +#[derive(Clone, Copy)] +pub enum TestAuthType { + Invalid = 0, + VncAuthNone = 1, +} + +#[derive(Clone, Copy, Default, Debug)] +pub struct RfbPixelFormat { + bit_per_pixel: u8, + depth: u8, + big_endian: u8, + true_color_flag: u8, + red_max: u16, + green_max: u16, + blue_max: u16, + red_shift: u8, + green_shift: u8, + blue_shift: u8, + pad1: u8, + pad2: u16, +} + +impl RfbPixelFormat { + pub fn new( + bit_per_pixel: u8, + depth: u8, + big_endian: u8, + true_color_flag: u8, + red_max: u16, + green_max: u16, + blue_max: u16, + red_shift: u8, + green_shift: u8, + blue_shift: u8, + ) -> Self { + Self { + bit_per_pixel, + depth, + big_endian, + true_color_flag, + red_max, + green_max, + blue_max, + red_shift, + green_shift, + blue_shift, + pad1: 0_u8, + pad2: 0_u16, + } + } + + fn from_bytes(&mut self, buf: &Vec) { + self.bit_per_pixel = buf[0]; + self.depth = buf[1]; + self.big_endian = buf[2]; + self.true_color_flag = buf[3]; + self.red_max = u16::from_be_bytes([buf[4], buf[5]]); + self.green_max = u16::from_be_bytes([buf[6], buf[7]]); + self.blue_max = u16::from_be_bytes([buf[8], buf[9]]); + self.red_shift = buf[10]; + self.green_shift = buf[11]; + self.blue_shift = buf[12]; + self.pad1 = buf[13]; + self.pad2 = u16::from_be_bytes([buf[14], buf[15]]); + } + + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut self.bit_per_pixel.to_be_bytes().to_vec()); + buf.append(&mut self.depth.to_be_bytes().to_vec()); + buf.append(&mut self.big_endian.to_be_bytes().to_vec()); + buf.append(&mut self.true_color_flag.to_be_bytes().to_vec()); + buf.append(&mut self.red_max.to_be_bytes().to_vec()); + buf.append(&mut self.green_max.to_be_bytes().to_vec()); + buf.append(&mut self.blue_max.to_be_bytes().to_vec()); + buf.append(&mut self.red_shift.to_be_bytes().to_vec()); + buf.append(&mut self.green_shift.to_be_bytes().to_vec()); + buf.append(&mut self.blue_shift.to_be_bytes().to_vec()); + buf.append(&mut self.pad1.to_be_bytes().to_vec()); + buf.append(&mut self.pad2.to_be_bytes().to_vec()); + buf + } +} + +pub struct RfbFrameBuffHead { + pub flag: RfbServerMsg, + pub pad0: u8, + pub num_rects: u16, +} + +impl RfbFrameBuffHead { + fn new(buf: &Vec) -> Self { + assert!(buf.len() >= 4); + Self { + flag: RfbServerMsg::from(buf[0]), + pad0: buf[1], + num_rects: u16::from_be_bytes([buf[2], buf[3]]), + } + } +} + +#[derive(Debug)] +pub struct RfbFrameBuff { + pub x: u16, + pub y: u16, + pub w: u16, + pub h: u16, + pub enc: u32, +} + +impl RfbFrameBuff { + fn new(buf: &Vec) -> Self { + assert!(buf.len() >= 12); + Self { + x: u16::from_be_bytes([buf[0], buf[1]]), + y: u16::from_be_bytes([buf[2], buf[3]]), + w: u16::from_be_bytes([buf[4], buf[5]]), + h: u16::from_be_bytes([buf[6], buf[7]]), + enc: u32::from_be_bytes([buf[8], buf[9], buf[10], buf[11]]), + } + } +} + +pub struct RfbSetColourMap { + pub flag: RfbServerMsg, + pub pad0: u8, + pub first_color: u16, + pub num_of_colurs: u16, +} + +impl RfbSetColourMap { + fn new(buf: &Vec) -> Self { + assert!(buf.len() >= 6); + Self { + flag: RfbServerMsg::from(buf[0]), + pad0: buf[1], + first_color: u16::from_be_bytes([buf[2], buf[3]]), + num_of_colurs: u16::from_be_bytes([buf[4], buf[5]]), + } + } +} + +pub trait TestEventOperation { + fn to_be_bytes(&self) -> Vec; +} + +#[derive(Clone, Copy)] +pub enum RfbClientMessage { + RrbSetPixelFormat = 0, + RfbSetEncoding = 2, + RfbUpdateRequest = 3, + RfbKeyEvent = 4, + RfbPointerEvent = 5, + RfbClientCutText = 6, +} + +pub struct TestPointEvent { + pub event_type: RfbClientMessage, + pub button_mask: u8, + pub x: u16, + pub y: u16, +} + +impl TestPointEvent { + fn new(button_mask: u8, x: u16, y: u16) -> Self { + Self { + event_type: RfbClientMessage::RfbPointerEvent, + button_mask, + x, + y, + } + } +} + +impl TestEventOperation for TestPointEvent { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut (self.event_type as u8).to_be_bytes().to_vec()); + buf.append(&mut self.button_mask.to_be_bytes().to_vec()); + buf.append(&mut self.x.to_be_bytes().to_vec()); + buf.append(&mut self.y.to_be_bytes().to_vec()); + buf + } +} + +pub struct TestKeyEvent { + pub event_type: RfbClientMessage, + pub down: u8, + pub pad: u16, + pub keysym: u32, +} + +impl TestKeyEvent { + fn new(down: u8, keysym: u32) -> Self { + Self { + event_type: RfbClientMessage::RfbKeyEvent, + down, + pad: 0_u16, + keysym, + } + } +} + +impl TestEventOperation for TestKeyEvent { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut (self.event_type as u8).to_be_bytes().to_vec()); + buf.append(&mut self.down.to_be_bytes().to_vec()); + buf.append(&mut self.pad.to_be_bytes().to_vec()); + buf.append(&mut self.keysym.to_be_bytes().to_vec()); + buf + } +} + +pub struct TestSetupEncoding { + pub event_type: RfbClientMessage, + pub pad: u8, + pub num_encodings: u16, + pub encs: Vec, +} + +impl TestSetupEncoding { + fn new() -> Self { + Self { + event_type: RfbClientMessage::RfbSetEncoding, + pad: 0_u8, + num_encodings: 0_u16, + encs: Vec::new(), + } + } +} + +impl TestEventOperation for TestSetupEncoding { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut (self.event_type as u8).to_be_bytes().to_vec()); + buf.append(&mut self.pad.to_be_bytes().to_vec()); + buf.append(&mut self.num_encodings.to_be_bytes().to_vec()); + for enc in self.encs.iter() { + buf.append(&mut (*enc as u32).to_be_bytes().to_vec()); + } + buf + } +} + +pub struct TestUpdateFrameBuffer { + pub event_type: RfbClientMessage, + pub incremental: UpdateState, + pub x: u16, + pub y: u16, + pub w: u16, + pub h: u16, +} + +impl TestUpdateFrameBuffer { + fn new(incremental: UpdateState, x: u16, y: u16, w: u16, h: u16) -> Self { + Self { + event_type: RfbClientMessage::RfbUpdateRequest, + incremental, + x, + y, + w, + h, + } + } +} + +impl TestEventOperation for TestUpdateFrameBuffer { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut (self.event_type as u8).to_be_bytes().to_vec()); + buf.append(&mut (self.incremental as u8).to_be_bytes().to_vec()); + buf.append(&mut self.x.to_be_bytes().to_vec()); + buf.append(&mut self.y.to_be_bytes().to_vec()); + buf.append(&mut self.w.to_be_bytes().to_vec()); + buf.append(&mut self.h.to_be_bytes().to_vec()); + buf + } +} + +pub struct TestSetPixelFormat { + pub event_type: RfbClientMessage, + pub pad0: u8, + pub pad1: u16, + pub pf: RfbPixelFormat, +} + +impl TestSetPixelFormat { + fn new(pf: RfbPixelFormat) -> Self { + Self { + event_type: RfbClientMessage::RrbSetPixelFormat, + pad0: 0_u8, + pad1: 0_u16, + pf, + } + } +} + +impl TestEventOperation for TestSetPixelFormat { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append(&mut (self.event_type as u8).to_be_bytes().to_vec()); + buf.append(&mut self.pad0.to_be_bytes().to_vec()); + buf.append(&mut self.pad1.to_be_bytes().to_vec()); + buf.append(&mut self.pf.to_be_bytes()); + buf + } +} + +pub struct TestClientCut { + pub event_type: RfbClientMessage, + pub pad0: u8, + pub pad1: u16, + pub length: u32, + pub text: String, +} + +impl TestEventOperation for TestClientCut { + fn to_be_bytes(&self) -> Vec { + let mut buf: Vec = Vec::new(); + buf.append( + &mut (RfbClientMessage::RfbClientCutText as u8) + .to_be_bytes() + .to_vec(), + ); + buf.append(&mut self.pad0.to_be_bytes().to_vec()); + buf.append(&mut self.pad1.to_be_bytes().to_vec()); + buf.append(&mut (self.text.len()).to_be_bytes().to_vec()); + buf.append(&mut self.text.as_bytes().to_vec()); + buf + } +} + +/// Display mode information. +#[derive(Default)] +pub struct DisplayMode { + pub width: u16, + pub height: u16, + pub test_pf: RfbPixelFormat, + pub app_name: String, +} + +impl DisplayMode { + pub fn from_bytes(&mut self, buf: &mut Vec) { + self.width = u16::from_be_bytes([buf[0], buf[1]]); + self.height = u16::from_be_bytes([buf[2], buf[3]]); + buf.drain(..4); + + // Pixel format message. + self.test_pf.from_bytes(&buf[..16].to_vec()); + buf.drain(..16); + + // Application name + len. + let name_len = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]); + buf.drain(..4); + self.app_name = String::from_utf8(buf[..name_len as usize].to_vec()).unwrap(); + buf.drain(..name_len as usize); + + println!( + "Display information set by server:\n \ + application name: {:?} Image size: width: {:?}, height: {:?}\n \ + big endian: {:?}, true color flag: {:?} red max {:?} red shift {:?}\n \ + green max {:?} green shift {:?} blue max {:?} blue shift {:?}\n", + self.app_name, + self.width, + self.height, + self.test_pf.big_endian, + self.test_pf.true_color_flag, + self.test_pf.red_max, + self.test_pf.red_shift, + self.test_pf.green_max, + self.test_pf.green_shift, + self.test_pf.blue_max, + self.test_pf.blue_shift + ); + } + + pub fn check(&mut self) { + assert!(0 < self.width && self.width <= 2560); + assert!(0 < self.height && self.height <= 2048); + assert!(self.app_name.len() <= 100); + } +} + +pub trait IoOperations { + fn channel_write_full(&mut self, buf: &[u8]) -> Result; + fn channel_read_full(&mut self, buf: &mut Vec) -> Result; +} + +pub struct IoChannel { + stream: TcpStream, +} + +impl IoChannel { + pub fn new(stream: TcpStream) -> Self { + Self { stream } + } +} + +impl IoOperations for IoChannel { + fn channel_write_full(&mut self, buf: &[u8]) -> Result { + let buf_size = buf.len(); + let mut offset = 0; + while offset < buf_size { + let tmp_buf = &buf[offset..]; + match self.stream.write(tmp_buf) { + Ok(ret) => { + offset += ret; + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + return Ok(offset); + } + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => { + continue; + } + Err(e) => { + bail!("Unable to write msg on socket: {:?}", e); + } + } + } + + Ok(buf_size) + } + + fn channel_read_full(&mut self, buf: &mut Vec) -> Result { + let mut len = 0_usize; + loop { + let mut bytes = vec![0_u8; MAX_RECVBUF_LEN]; + match self.stream.read(&mut bytes) { + Ok(ret) => { + buf.append(&mut bytes[..ret].to_vec()); + len += ret; + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + return Ok(len); + } + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => { + continue; + } + Err(e) => { + bail!("Unable to read msg from socket: {:?}", e); + } + } + break; + } + + Ok(len) + } +} + +pub struct VncClient { + pub stream: TcpStream, + pub io_channel: Rc>, + test_state: Rc>, + pub display_mod: DisplayMode, + epoll: Epoll, + pub ready_events: Vec, +} + +impl VncClient { + pub fn new( + stream: TcpStream, + io_channel: Rc>, + test_state: Rc>, + ) -> Self { + let epoll = Epoll::new().unwrap(); + Self { + stream, + io_channel, + test_state, + display_mod: DisplayMode::default(), + epoll, + ready_events: vec![EpollEvent::default(); 1], + } + } + + pub fn epoll_ctl(&mut self, event: EpollEvent) -> io::Result<()> { + self.epoll + .ctl(ControlOperation::Add, self.stream.as_raw_fd(), event) + } + + /// Wait for events on the epoll. + /// or wait util the timeout. + /// Step: + /// 1. Switch listening event. + /// 2. Return if event happen or time out. + pub fn epoll_wait(&mut self, event_set: EventSet) -> io::Result { + let event = EpollEvent::new(event_set, self.stream.as_raw_fd() as u64); + self.epoll + .ctl(ControlOperation::Modify, self.stream.as_raw_fd(), event)?; + self.epoll + .wait(EPOLL_DEFAULT_TIMEOUT, &mut self.ready_events[..]) + } + + /// Read the data in the Stream util empty. + pub fn stream_read_to_end(&mut self) -> Result<()> { + let mut buf: Vec = Vec::new(); + let event = EpollEvent::new(EventSet::IN, self.stream.as_raw_fd() as u64); + self.epoll + .ctl(ControlOperation::Modify, self.stream.as_raw_fd(), event)?; + + match self + .epoll + .wait(EPOLL_DEFAULT_TIMEOUT, &mut self.ready_events[..]) + { + Ok(event_counts) if event_counts > 0 => { + self.io_channel.borrow_mut().channel_read_full(&mut buf)?; + buf.clear(); + } + _ => return Ok(()), + } + + Ok(()) + } + + /// Read message until the total number of bytes is exceed the expect. + pub fn read_msg(&mut self, buf: &mut Vec, expect: usize) -> Result { + let mut total_received: usize = 0; + loop { + if buf.len() >= expect { + break; + } + // Wait event. + match self.epoll_wait(EventSet::IN) { + Ok(n) if n > 0 => {} + _ => bail!("Io Channel is broken"), + } + let mut tmp_buf: Vec = Vec::new(); + let len = match self.io_channel.borrow_mut().channel_read_full(&mut tmp_buf) { + Ok(n) => { + total_received += n; + n + } + Err(e) => return Err(e), + }; + buf.append(&mut tmp_buf[..len].to_vec()); + } + + Ok(total_received) + } + + /// Write message. + pub fn write_msg(&mut self, buf: &[u8]) -> Result { + let total_byte = buf.len(); + loop { + let mut send_bytes: usize = 0; + match self.io_channel.borrow_mut().channel_write_full(buf) { + Ok(len) => send_bytes += len, + Err(e) => return Err(e), + } + + if send_bytes >= total_byte { + break; + } + } + Ok(total_byte) + } + + pub fn connect(&mut self, sec_type: TestAuthType) -> Result<()> { + let mut buf: Vec = Vec::new(); + println!("Connect to server."); + // Step 1: Exchange RFB Protocol: RFB 003.008. + self.read_msg(&mut buf, 12)?; + if "RFB 003.008\n".as_bytes().to_vec() != buf[..12].to_vec() { + bail!("Unsupported RFB version"); + } + self.write_msg("RFB 003.008\n".as_bytes())?; + buf.drain(..12); + + // Step 2: Auth num is 1. + self.read_msg(&mut buf, 1)?; + let auth_num = buf[0]; + assert!(auth_num > 0); + buf.drain(..1); + self.read_msg(&mut buf, auth_num as usize)?; + if sec_type as u8 != buf[0] { + bail!("Unsupported security type!"); + } + buf.drain(..auth_num as usize); + self.write_msg((sec_type as u8).to_be_bytes().as_ref())?; + + if let TestAuthType::VncAuthNone = sec_type { + // Step 3. Handle_auth: Authstate::No, Server accept auth and client send share + // mode. + self.read_msg(&mut buf, 4)?; + if buf[..4].to_vec() != [0_u8; 4].to_vec() { + bail!("Reject by vnc server"); + } + self.write_msg(0_u8.to_be_bytes().as_ref())?; + buf.drain(..4); + + // Step 4. display mode information init: width + height + pixelformat + app_name. + self.read_msg(&mut buf, 24)?; + self.display_mod.from_bytes(&mut buf); + self.display_mod.check(); + } + self.stream_read_to_end()?; + println!("Connection established!"); + Ok(()) + } + + /// Send point event to VncServer. + pub fn test_point_event(&mut self, button_mask: u8, x: u16, y: u16) -> Result<()> { + println!("Test point event."); + let test_event = TestPointEvent::new(button_mask, x, y); + self.write_msg(&mut test_event.to_be_bytes())?; + Ok(()) + } + + /// Send point event to VncServer. + pub fn test_key_event(&mut self, down: u8, keysym: u32) -> Result<()> { + println!("Test key event."); + let test_event = TestKeyEvent::new(down, keysym); + self.write_msg(&mut test_event.to_be_bytes())?; + Ok(()) + } + + /// Send set encodings to VncServer. + /// + /// # Arguments. + /// + /// * `enc_num` - total number of feature support by VncClient. + /// * `enc` - features supported by VncClient. + pub fn test_setup_encodings( + &mut self, + enc_num: Option, + enc: Option, + ) -> Result<()> { + println!("Test setup encodings"); + let mut test_event = TestSetupEncoding::new(); + if let Some(encoding) = enc { + test_event.encs.push(encoding); + test_event.num_encodings = enc_num.unwrap_or(1_u16); + } else { + for encoding in EncodingType::ENCODINGTYPE { + test_event.encs.push(encoding); + } + test_event.num_encodings = match enc_num { + Some(num) => num, + None => EncodingType::ENCODINGTYPE.len() as u16, + }; + } + self.write_msg(&mut test_event.to_be_bytes())?; + Ok(()) + } + + /// Sent update framebuffer request to VncServer. + pub fn test_update_request( + &mut self, + incremental: UpdateState, + x: u16, + y: u16, + w: u16, + h: u16, + ) -> Result<()> { + println!("Test update frambuff request."); + let test_event = TestUpdateFrameBuffer::new(incremental, x, y, w, h); + self.write_msg(&mut test_event.to_be_bytes())?; + Ok(()) + } + + /// Send set pixel format to VncClient. + pub fn test_set_pixel_format(&mut self, pf: RfbPixelFormat) -> Result<()> { + println!("Test set pixel format."); + let test_event = TestSetPixelFormat::new(pf); + self.write_msg(&mut test_event.to_be_bytes())?; + Ok(()) + } + + /// Send client cut event to VncServer. + pub fn test_send_client_cut(&mut self, client_cut: TestClientCut) -> Result<()> { + println!("Test send client cut event."); + self.write_msg(&mut client_cut.to_be_bytes())?; + Ok(()) + } + + /// Receive the framebuferr data, and verify the format. + /// + /// # Arguments + /// * `pf` - Pixel format set to server. + /// * `enc` - Image encoding type. + pub fn test_recv_server_data( + &mut self, + pf: RfbPixelFormat, + ) -> Result> { + let mut buf: Vec = Vec::new(); + let mut rfb_event: Vec<(RfbServerMsg, EncodingType)> = Vec::new(); + sleep(Duration::from_millis(50)); + self.test_state + .borrow_mut() + .clock_step_ns(REFRESH_TIME_INTERVAL); + loop { + // Wait event. + match self.epoll_wait(EventSet::IN) { + Ok(n) if n > 0 => {} + _ => break, + } + + self.read_msg(&mut buf, 1)?; + match RfbServerMsg::from(buf[0]) { + RfbServerMsg::FramebufferUpdate => { + self.read_msg(&mut buf, 4)?; + let frame_head = RfbFrameBuffHead::new(&mut buf); + buf.drain(..4); + println!("Total number of rects : {:?}", frame_head.num_rects); + for i in 0..frame_head.num_rects { + println!("Rect: {:?}", i + 1); + self.read_msg(&mut buf, 12)?; + let frame_buff = RfbFrameBuff::new(&mut buf); + buf.drain(..12); + rfb_event.push(( + RfbServerMsg::FramebufferUpdate, + EncodingType::from(frame_buff.enc), + )); + self.handle_server_msg(pf, frame_buff, &mut buf)?; + } + } + RfbServerMsg::SetColourMapEntries => { + rfb_event.push(( + RfbServerMsg::SetColourMapEntries, + EncodingType::EncodingInvalid, + )); + self.read_msg(&mut buf, 6)?; + let colour_map = RfbSetColourMap::new(&buf); + buf.drain(..6); + let message_len = colour_map.num_of_colurs * 6; + self.read_msg(&mut buf, message_len as usize)?; + buf.drain(..message_len as usize); + assert_eq!(buf.len(), 0_usize); + println!( + "Set Color Map Entries, total num of colours:{:?}", + colour_map.num_of_colurs + ); + } + _ => { + assert!(false); + } + } + } + Ok(rfb_event) + } + + /// Handle messages from Vnc Server. + fn handle_server_msg( + &mut self, + pf: RfbPixelFormat, + frame_buff: RfbFrameBuff, + buf: &mut Vec, + ) -> Result<()> { + match EncodingType::from(frame_buff.enc) { + EncodingType::EncodingRaw => { + self.parse_raw_image_data(pf, frame_buff, buf)?; + } + EncodingType::EncodingHextile => { + self.parse_hextile_image_data(pf, frame_buff, buf)?; + } + EncodingType::EncodingDesktopresize => { + self.display_mod.width = frame_buff.w; + self.display_mod.height = frame_buff.h; + } + EncodingType::EncodingRichCursor => { + let data_len = frame_buff.w * frame_buff.h * 4; + // cursor.data + mask + let mask_len = (frame_buff.w + 8 - 1) / 8 * frame_buff.h; + self.read_msg(buf, (data_len + mask_len) as usize)?; + buf.drain(..(data_len + mask_len) as usize); + } + EncodingType::EncodingAlphaCursor => { + let data_len = frame_buff.w * frame_buff.h * 4; + // EncodingType + cursor.data + self.read_msg(buf, (4 + data_len) as usize)?; + buf.drain(..(4 + data_len) as usize); + } + _ => { + assert!( + false, + "unsupported event type from client: {}", + frame_buff.enc + ); + } + } + Ok(()) + } + + fn parse_raw_image_data( + &mut self, + pf: RfbPixelFormat, + frame_buff: RfbFrameBuff, + buf: &mut Vec, + ) -> Result<()> { + let message_len: usize = + frame_buff.w as usize * frame_buff.h as usize * (pf.bit_per_pixel as usize / 8); + println!("Total bytes of image data: {:?}", message_len); + self.read_msg(buf, message_len)?; + buf.drain(..message_len); + Ok(()) + } + + fn parse_hextile_image_data( + &mut self, + pf: RfbPixelFormat, + frame_buff: RfbFrameBuff, + buf: &mut Vec, + ) -> Result<()> { + let bytes_per_pixel: usize = (pf.bit_per_pixel / 8) as usize; + let mut total_received: usize = 0; + for j in (0..frame_buff.h).step_by(HEXTILE_BLOCK_SIZE) { + for i in (0..frame_buff.w).step_by(HEXTILE_BLOCK_SIZE) { + self.read_msg(buf, 1)?; + let flag = buf[0]; + buf.drain(..1); + total_received += 1; + if flag & RAW != 0 { + let w = cmp::min(HEXTILE_BLOCK_SIZE as u16, frame_buff.w - i); + let h = cmp::min(HEXTILE_BLOCK_SIZE as u16, frame_buff.h - j); + let expect = w as usize * h as usize * bytes_per_pixel; + self.read_msg(buf, expect)?; + total_received += expect; + buf.drain(..expect); + } else { + // Background colour. + if flag & BACKGROUND_SPECIFIC != 0 { + self.read_msg(buf, bytes_per_pixel)?; + total_received += bytes_per_pixel; + buf.drain(..bytes_per_pixel); + } + // Foreground colour. + if flag & FOREGROUND_SPECIFIC != 0 { + self.read_msg(buf, bytes_per_pixel)?; + total_received += bytes_per_pixel; + buf.drain(..bytes_per_pixel); + } + + if flag & ANY_SUBRECTS != 0 { + self.read_msg(buf, 1)?; + total_received += 1; + let num_tiles = buf[0] as usize; + buf.drain(..1); + let expect = match flag & SUBRECTS_COLOURED == 0 { + true => num_tiles * 2, + false => num_tiles * (bytes_per_pixel + 2), + }; + self.read_msg(buf, expect)?; + total_received += expect; + buf.drain(..expect); + } + } + } + } + println!("Total bytes encoded by Hextile: {:?}", total_received); + Ok(()) + } + + pub fn disconnect(&mut self) -> Result<()> { + self.stream.shutdown(Shutdown::Both)?; + Ok(()) + } +} + +/// Create a new vnc client. +/// +/// # Arguments +/// +/// * `port` - Local port listened by vnc server. +pub fn create_new_client(test_state: Rc>, port: u16) -> Result { + let port = port + RFB_PORT_OFFSET; + let addrs = [SocketAddr::from(([127, 0, 0, 1], port))]; + let stream = TcpStream::connect(&addrs[..]).unwrap(); + stream + .set_nonblocking(true) + .expect("set nonblocking failed"); + stream + .set_read_timeout(Some(time::Duration::from_millis(READ_TIME_OUT))) + .unwrap(); + let stream_clone = stream.try_clone().expect("clone failed..."); + let io_channel = Rc::new(RefCell::new(IoChannel::new(stream_clone))); + let mut vnc_client = VncClient::new(stream, io_channel, test_state); + // Register epoll event. + let event = EpollEvent::new( + EventSet::READ_HANG_UP | EventSet::IN, + vnc_client.stream.as_raw_fd() as u64, + ); + vnc_client.epoll_ctl(event)?; + Ok(vnc_client) +} + +pub struct TestDemoGpuDevice { + pub pci_dev: TestPciDev, + pub bar_addr: PCIBarAddr, + bar_idx: u8, + allocator: Rc>, +} + +impl TestDemoGpuDevice { + pub fn new(pci_bus: Rc>, allocator: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar_addr: 0, + bar_idx: 0, + allocator, + } + } + + /// Send the deactive event to demo gpu. + pub fn deactive(&mut self) { + let cmd = TestGpuCmd { + event_type: GpuEvent::Deactive, + ..Default::default() + }; + self.do_gpu_event(&cmd); + } + + /// Replace the surface of the display. + /// The width and height corresponding the width and height of the surface. + pub fn replace_surface(&mut self, width: u32, height: u32, pixman_format: u32) { + let cmd = TestGpuCmd { + event_type: GpuEvent::ReplaceSurface, + w: width, + h: height, + data_len: pixman_format, + ..Default::default() + }; + self.do_gpu_event(&cmd); + } + + /// Update the cursor image for display. + pub fn replace_cursor( + &mut self, + width: u32, + height: u32, + hot_x: u32, + hot_y: u32, + mouse_data: u32, + ) { + let cmd = TestGpuCmd { + event_type: GpuEvent::ReplaceCursor, + x: hot_x, + y: hot_y, + w: width, + h: height, + data_len: mouse_data, + }; + self.do_gpu_event(&cmd); + } + + /// Change the pixel data of the specified area, + /// (x, y, w, h) represents the specific area on the image. + pub fn update_image_area(&mut self, x: u32, y: u32, w: u32, h: u32) { + let cmd = TestGpuCmd { + event_type: GpuEvent::GraphicUpdateArea, + x, + y, + w, + h, + ..Default::default() + }; + self.do_gpu_event(&cmd); + } + + /// Notify VNC that the specific area of pixel has been updated. + pub fn set_area_dirty(&mut self, x: u32, y: u32, w: u32, h: u32) { + let cmd = TestGpuCmd { + event_type: GpuEvent::GraphicUpdateDirty, + x, + y, + w, + h, + ..Default::default() + }; + self.do_gpu_event(&cmd); + } + + /// Send a gpu cmd. + pub fn do_gpu_event(&mut self, cmd: &TestGpuCmd) { + // Malloc a memory, and write the data in this addr. + let addr = self.allocator.borrow_mut().alloc(21); + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + test_state.borrow_mut().writeb(addr, cmd.event_type as u8); + test_state.borrow_mut().writel(addr + 1, cmd.x); + test_state.borrow_mut().writel(addr + 5, cmd.y); + test_state.borrow_mut().writel(addr + 9, cmd.w); + test_state.borrow_mut().writel(addr + 13, cmd.h); + test_state.borrow_mut().writel(addr + 17, cmd.data_len); + // Write to specific address. + self.pci_dev.io_writeq(self.bar_addr, 0_u64, addr); + test_state.borrow().clock_step_ns(REFRESH_TIME_INTERVAL); + println!("cmd : {:?}", cmd); + } + + pub fn set_devfn(&mut self, devfn: u8) { + self.pci_dev.devfn = devfn; + } + + pub fn find_pci_device(&mut self, devfn: u8) -> bool { + self.set_devfn(devfn); + if self.pci_dev.config_readw(PCI_VENDOR_ID) == 0xFFFF { + return false; + } + true + } + + pub fn init(&mut self, pci_slot: u8) { + let devfn = pci_slot << 3; + assert!(self.find_pci_device(devfn)); + + self.pci_dev.enable(); + self.bar_addr = self.pci_dev.io_map(self.bar_idx); + } +} + +pub struct TestDemoInputDevice { + pub pci_dev: TestPciDev, + pub bar_addr: PCIBarAddr, + bar_idx: u8, + mem_addr: u64, + allocator: Rc>, +} + +impl TestDemoInputDevice { + pub fn new(pci_bus: Rc>, allocator: Rc>) -> Self { + Self { + pci_dev: TestPciDev::new(pci_bus), + bar_addr: 0, + bar_idx: 0, + mem_addr: 0, + allocator, + } + } + + /// Alloc a memory space, and write the address to the input device configuration space. + pub fn activate(&mut self) { + let addr = self.allocator.borrow_mut().alloc(100); + self.mem_addr = addr; + self.pci_dev.io_writeq(self.bar_addr, 0, addr) + } + + /// Read an input event from a memory. + pub fn read_input_event(&mut self) -> InputMessage { + sleep(Duration::from_millis(50)); + let addr = self.mem_addr; + let test_state = self.pci_dev.pci_bus.borrow_mut().test_state.clone(); + + let mut msg = InputMessage::default(); + msg.event_type = InputEvent::from(test_state.borrow_mut().readb(addr)); + msg.keycode = test_state.borrow_mut().readw(addr + 1); + msg.down = test_state.borrow_mut().readb(addr + 3); + msg.button = test_state.borrow_mut().readl(addr + 4); + msg.x = test_state.borrow_mut().readl(addr + 8); + msg.y = test_state.borrow_mut().readl(addr + 12); + println!("kbd mouse event: {:?}", msg); + msg + } + + pub fn set_devfn(&mut self, devfn: u8) { + self.pci_dev.devfn = devfn; + } + + pub fn find_pci_device(&mut self, devfn: u8) -> bool { + self.set_devfn(devfn); + if self.pci_dev.config_readw(PCI_VENDOR_ID) == 0xFFFF { + return false; + } + true + } + + pub fn init(&mut self, pci_slot: u8) { + let devfn = pci_slot << 3; + assert!(self.find_pci_device(devfn)); + + self.pci_dev.enable(); + self.bar_addr = self.pci_dev.io_map(self.bar_idx); + self.activate(); + } +} + +pub struct DemoGpuConfig { + pub pci_slot: u8, + pub id: String, +} + +pub struct InputConfig { + pub pci_slot: u8, + pub id: String, +} + +/// Environment Setup. +pub fn set_up( + gpu_conf: Vec, + input_conf: InputConfig, + port: u16, +) -> ( + Vec>>, + Rc>, + Rc>, +) { + let mut args: Vec = Vec::new(); + // vm args. + let vm_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + let mut vm_args = vm_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut vm_args); + // Log. + let vm_args = String::from("-D /tmp/vnc_test.log"); + let vm_args: Vec<&str> = vm_args[..].split(' ').collect(); + let mut vm_args = vm_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut vm_args); + // Demo GPU Device. + for conf in &gpu_conf { + let gpu_args = format!( + "-device {},bus=pcie.0,addr={}.0,id={},bar_num=3,device_type=demo-gpu,bar_size=4096", + "pcie-demo-dev", conf.pci_slot, conf.id, + ); + let gpu_args: Vec<&str> = gpu_args[..].split(' ').collect(); + let mut gpu_args = gpu_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut gpu_args); + } + // Demo Input Device. + let input_args = format!( + "-device {},bus=pcie.0,addr={}.0,id={},bar_num=3,device_type=demo-input,bar_size=4096", + "pcie-demo-dev", input_conf.pci_slot, input_conf.id, + ); + let input_args: Vec<&str> = input_args[..].split(' ').collect(); + let mut input_args = input_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut input_args); + + // VNC server + let vnc_args = format!("-vnc 0.0.0.0:{}", port); + let vnc_args: Vec<&str> = vnc_args[..].split(' ').collect(); + let mut vnc_args = vnc_args.into_iter().map(|s| s.to_string()).collect(); + args.append(&mut vnc_args); + let args = args.iter().map(AsRef::as_ref).collect(); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let mut gpu_lists: Vec>> = Vec::new(); + for conf in gpu_conf { + let demo_gpu = Rc::new(RefCell::new(TestDemoGpuDevice::new( + machine.pci_bus.clone(), + allocator.clone(), + ))); + demo_gpu.borrow_mut().init(conf.pci_slot); + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + gpu_lists.push(demo_gpu); + } + + let input = Rc::new(RefCell::new(TestDemoInputDevice::new( + machine.pci_bus, + allocator, + ))); + input.borrow_mut().init(input_conf.pci_slot); + test_state.borrow().clock_step_ns(REFRESH_TIME_INTERVAL); + (gpu_lists, input, test_state) +} + +pub fn tear_down( + gpu_list: Vec>>, + _input: Rc>, + test_state: Rc>, +) { + for demo_gpu in gpu_list { + demo_gpu.borrow_mut().deactive(); + } + test_state.borrow_mut().stop(); +} + +/// Key mapping. +/// Vnc client send keysym -> vnc server +/// Vnc server send keycode -> usb. +pub const KEYEVENTLIST: [(&str, u16, u16); 41] = [ + ("space", 0x0020, 0x0039), + ("0", 0x0030, 0x000b), + ("1", 0x0031, 0x0002), + ("2", 0x0032, 0x0003), + ("3", 0x0033, 0x0004), + ("4", 0x0034, 0x0005), + ("5", 0x0035, 0x0006), + ("6", 0x0036, 0x0007), + ("7", 0x0037, 0x0008), + ("8", 0x0038, 0x0009), + ("9", 0x0039, 0x000a), + ("a", 0x0061, 0x001e), + ("b", 0x0062, 0x0030), + ("c", 0x0063, 0x002e), + ("d", 0x0064, 0x0020), + ("e", 0x0065, 0x0012), + ("f", 0x0066, 0x0021), + ("g", 0x0067, 0x0022), + ("h", 0x0068, 0x0023), + ("i", 0x0069, 0x0017), + ("j", 0x006a, 0x0024), + ("k", 0x006b, 0x0025), + ("l", 0x006c, 0x0026), + ("m", 0x006d, 0x0032), + ("n", 0x006e, 0x0031), + ("o", 0x006f, 0x0018), + ("p", 0x0070, 0x0019), + ("q", 0x0071, 0x0010), + ("r", 0x0072, 0x0013), + ("s", 0x0073, 0x001f), + ("t", 0x0074, 0x0014), + ("u", 0x0075, 0x0016), + ("v", 0x0076, 0x002f), + ("w", 0x0077, 0x0011), + ("x", 0x0078, 0x002d), + ("y", 0x0079, 0x0015), + ("z", 0x007a, 0x002c), + ("ctl", 0xFFE3, 0x001d), + ("alt", 0xFFE9, 0x0038), + ("caps_lock", 0xFFE5, 0x003a), + ("num_lock", 0xFF7F, 0x0045), +]; + +// Event type of Point. +pub const INPUT_POINT_LEFT: u8 = 0x01; +pub const INPUT_POINT_MIDDLE: u8 = 0x02; +pub const INPUT_POINT_RIGHT: u8 = 0x04; + +// Coordinates of pointer movement on the desktop. +pub const POINTEVENTLIST: [(u8, u16, u16); 16] = [ + (INPUT_POINT_LEFT, 0x0070, 0x0002), + (INPUT_POINT_RIGHT, 0x0000, 0x0005), + (INPUT_POINT_LEFT, 0x0005, 0x0005), + (INPUT_POINT_RIGHT, 0x0005, 0x0136), + (INPUT_POINT_MIDDLE, 0x0005, 0x0011), + (INPUT_POINT_LEFT, 0x0005, 0x0017), + (INPUT_POINT_RIGHT, 0x00aa, 0x0016), + (INPUT_POINT_LEFT, 0x0013, 0x0018), + (INPUT_POINT_RIGHT, 0x000b, 0x001b), + (INPUT_POINT_MIDDLE, 0x0078, 0x001b), + (INPUT_POINT_LEFT, 0x0011, 0x001b), + (INPUT_POINT_LEFT, 0x0011, 0x00c8), + (INPUT_POINT_MIDDLE, 0x0043, 0x00d2), + (INPUT_POINT_LEFT, 0x006d, 0x00c8), + (INPUT_POINT_MIDDLE, 0x00be, 0x00c8), + (INPUT_POINT_RIGHT, 0x00be, 0x0122), +]; + +pub const TEST_CLIENT_RAND_MSG: [u8; 256] = [ + 0x67, 0xa5, 0x3a, 0xeb, 0x4e, 0x30, 0xb0, 0x8d, 0xd7, 0x5e, 0x63, 0x3a, 0xdb, 0xb5, 0xd6, 0x51, + 0x54, 0x66, 0xb7, 0x38, 0xe3, 0xea, 0x89, 0x3b, 0xfa, 0x64, 0xfd, 0xed, 0xc7, 0xe5, 0xbb, 0x4d, + 0x60, 0x0e, 0x8c, 0xc8, 0x6d, 0x97, 0x1b, 0x17, 0xe8, 0x4c, 0x9a, 0xfa, 0x28, 0x03, 0xdb, 0x03, + 0xb5, 0x7f, 0xf1, 0x45, 0x5c, 0xb8, 0x8b, 0xe9, 0x1b, 0x62, 0xe3, 0xb6, 0x7c, 0x94, 0x96, 0xa1, + 0xbf, 0xd0, 0xc9, 0xde, 0x12, 0x3e, 0x21, 0x8a, 0x14, 0x0b, 0x3e, 0x4f, 0x9e, 0xc6, 0x92, 0xb3, + 0xed, 0x5b, 0x71, 0xa3, 0x88, 0x8e, 0x0b, 0x63, 0x66, 0x66, 0xd9, 0xf6, 0xfb, 0xa9, 0x2d, 0x98, + 0xea, 0x6b, 0x05, 0xe3, 0x21, 0xcf, 0x4a, 0xc9, 0x76, 0x1e, 0x6d, 0x00, 0xde, 0x0b, 0x9d, 0xa5, + 0xd0, 0xd1, 0xe4, 0x24, 0x92, 0x19, 0xb8, 0x66, 0xde, 0x6d, 0x1d, 0x98, 0x91, 0x63, 0xa7, 0x03, + 0xdf, 0xbc, 0x98, 0x56, 0x04, 0x8f, 0xf6, 0x92, 0xfe, 0xe5, 0x3b, 0xaf, 0x2e, 0x10, 0x85, 0x94, + 0xa9, 0xc1, 0xed, 0x0a, 0x39, 0x4a, 0xe9, 0x8a, 0x52, 0xa9, 0x8d, 0x13, 0x40, 0x28, 0x21, 0x43, + 0x8b, 0x75, 0x01, 0xf1, 0xf9, 0xde, 0x6e, 0xc6, 0x2c, 0xb0, 0x42, 0x78, 0x2b, 0xf8, 0x34, 0x24, + 0x7a, 0x71, 0xc7, 0x94, 0xac, 0xa8, 0x7d, 0x9b, 0x85, 0xfe, 0x47, 0xc9, 0xd4, 0x70, 0x07, 0x7a, + 0x63, 0x07, 0xb8, 0x83, 0xcb, 0xee, 0x1a, 0x24, 0x58, 0xb3, 0xc3, 0x48, 0xb8, 0xa2, 0x01, 0x8c, + 0x20, 0x3a, 0xe0, 0xe6, 0xa7, 0xf8, 0x5b, 0x1a, 0xd8, 0xfe, 0x7f, 0x4b, 0x50, 0x14, 0x4d, 0xe5, + 0x6f, 0x6f, 0x2f, 0xfa, 0xbb, 0x95, 0x85, 0xfc, 0x33, 0xe7, 0xcf, 0x0d, 0xe1, 0x28, 0x0e, 0xc0, + 0xba, 0xe8, 0xbd, 0x23, 0xc3, 0x7b, 0x25, 0x11, 0xf5, 0x30, 0x30, 0x5f, 0xb8, 0x57, 0xfe, 0xd5, +]; diff --git a/tests/mod_test/src/libtest.rs b/tests/mod_test/src/libtest.rs new file mode 100644 index 0000000000000000000000000000000000000000..c98c32244b51528f4661583fa3cf56f2a66e1f78 --- /dev/null +++ b/tests/mod_test/src/libtest.rs @@ -0,0 +1,383 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::io::Read; +use std::io::Write; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::path::Path; +use std::process::{Child, Command}; +use std::thread::sleep; +use std::time::Duration; +use std::time::Instant; +use std::{env, fs}; + +use hex; +use serde_json::Value; + +use crate::utils::get_tmp_dir; + +const MAX_SOCKET_MSG_LENGTH: usize = 8192; +#[cfg(target_arch = "x86_64")] +pub const MACHINE_TYPE_ARG: &str = "-machine q35"; +#[cfg(target_arch = "aarch64")] +pub const MACHINE_TYPE_ARG: &str = "-machine virt"; + +pub struct StreamHandler { + stream: UnixStream, + read_buffer: RefCell, +} + +impl StreamHandler { + fn new(stream: UnixStream) -> Self { + StreamHandler { + stream, + read_buffer: RefCell::new(String::new()), + } + } + + fn write_line(&self, cmd: &str) { + assert!(cmd.len() <= MAX_SOCKET_MSG_LENGTH); + self.stream + .try_clone() + .unwrap() + .write_all(cmd.as_bytes()) + .unwrap(); + } + + fn clear_stream(&self) { + let mut stream = self.stream.try_clone().unwrap(); + stream.set_nonblocking(true).unwrap(); + let _ = stream.read(&mut [0_u8; 1024]); + } + + fn read_line(&self, timeout: Duration) -> String { + let start = Instant::now(); + let mut resp = self.read_buffer.borrow_mut(); + let mut stream = self.stream.try_clone().unwrap(); + stream.set_nonblocking(true).unwrap(); + + let pos = loop { + if start + timeout < Instant::now() || resp.find('\n').is_some() { + break resp.find('\n'); + } + + let mut buff = [0u8; 1024]; + if let Ok(size) = stream.read(&mut buff) { + resp.push_str(String::from_utf8(buff[..size].to_vec()).unwrap().as_str()); + } + }; + + let (line, left) = resp.split_at(pos.unwrap()); + let line = line.trim().to_string(); + // Save the remaining strings to the buffer, except the prefix '\n'. + *resp = left[1..].to_string(); + line + } +} + +pub struct TestState { + process: Child, + test_sock: StreamHandler, + qmp_sock: StreamHandler, + pub resource_path: String, +} + +impl Drop for TestState { + fn drop(&mut self) { + if let Ok(None) = self.process.try_wait() { + self.process.kill().unwrap() + } + + if Path::new(&self.resource_path).exists() { + fs::remove_dir_all(&self.resource_path).unwrap(); + } + } +} + +impl TestState { + pub fn new( + process: Child, + test_sock: StreamHandler, + qmp_sock: StreamHandler, + resource_path: String, + ) -> Self { + let ts = Self { + process, + test_sock, + qmp_sock, + resource_path, + }; + ts.check_qmp_greet(); + ts + } + + pub fn stop(&mut self) { + self.qmp("{\"execute\": \"quit\"}"); + self.process.wait().unwrap(); + } + + fn check_qmp_greet(&self) { + let timeout = Duration::from_secs(10); + let resp: Value = + serde_json::from_slice(self.qmp_sock.read_line(timeout).as_bytes()).unwrap(); + assert!(resp.get("QMP").is_some()); + } + + pub fn wait_qmp_event(&self) -> Value { + let timeout = Duration::from_secs(10); + let resp: Value = + serde_json::from_slice(self.qmp_sock.read_line(timeout).as_bytes()).unwrap(); + assert!(resp.get("event").is_some()); + resp + } + + pub fn qmp(&self, cmd: &str) -> Value { + let timeout = Duration::from_secs(10); + self.qmp_sock.clear_stream(); + self.qmp_sock.write_line(cmd); + serde_json::from_slice(self.qmp_sock.read_line(timeout).as_bytes()).unwrap() + } + + pub fn qmp_read(&self) -> Value { + let timeout = Duration::from_secs(10); + serde_json::from_slice(self.qmp_sock.read_line(timeout).as_bytes()).unwrap() + } + + fn send_test_cmd(&self, cmd: &str) -> String { + let timeout = Duration::from_secs(10); + self.test_sock.write_line(cmd); + self.test_sock.read_line(timeout) + } + + fn send_read_cmd(&self, cmd: &str) -> u64 { + let buf = self.send_test_cmd(cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + match resp[0] { + "OK" => u64::from_str_radix(resp[1].replace("0x", "").as_str(), 16).unwrap(), + _ => panic!("Failed to execute {}.", cmd), + } + } + + fn send_write_cmd(&self, cmd: &str) { + let buf = self.send_test_cmd(cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + match resp[0] { + "OK" => (), + _ => panic!("Failed to execute {}", cmd), + } + } + + fn send_clock_cmd(&self, cmd: &str) -> u64 { + let buf = self.send_test_cmd(cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + match resp[0] { + "OK" => resp[1].parse::().unwrap(), + _ => panic!("Failed to execute {}.", cmd), + } + } + + pub fn readb(&self, addr: u64) -> u8 { + let cmd = format!("readb 0x{:x}", addr); + self.send_read_cmd(&cmd) as u8 + } + + pub fn readw(&self, addr: u64) -> u16 { + let cmd = format!("readw 0x{:x}", addr); + self.send_read_cmd(&cmd) as u16 + } + + pub fn readl(&self, addr: u64) -> u32 { + let cmd = format!("readl 0x{:x}", addr); + self.send_read_cmd(&cmd) as u32 + } + + pub fn readq(&self, addr: u64) -> u64 { + let cmd = format!("readq 0x{:x}", addr); + self.send_read_cmd(&cmd) + } + + pub fn memread(&self, addr: u64, size: u64) -> Vec { + let cmd = format!("read 0x{:x} 0x{:x}", addr, size); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + match resp[0] { + "OK" => { + let data = resp[1].replace("0x", ""); + hex::decode(data).unwrap() + } + _ => panic!("Failed to execute {}", cmd), + } + } + + pub fn writeb(&self, addr: u64, data: u8) { + let cmd = format!("writeb 0x{:x} 0x{:x}", addr, data); + self.send_write_cmd(&cmd); + } + + pub fn writew(&self, addr: u64, data: u16) { + let cmd = format!("writew 0x{:x} 0x{:x}", addr, data); + self.send_write_cmd(&cmd); + } + + pub fn writel(&self, addr: u64, data: u32) { + let cmd = format!("writel 0x{:x} 0x{:x}", addr, data); + self.send_write_cmd(&cmd); + } + + pub fn writeq(&self, addr: u64, data: u64) { + let cmd = format!("writeq 0x{:x} 0x{:x}", addr, data); + self.send_write_cmd(&cmd); + } + + pub fn memwrite(&self, addr: u64, data: &[u8]) { + let cmd = format!( + "write 0x{:x} 0x{:x} 0x{}", + addr, + data.len(), + hex::encode(data) + ); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + match resp[0] { + "OK" => (), + _ => panic!("Failed to execute {}", cmd), + } + } + + pub fn memset(&self, addr: u64, size: u64, pat: &[u8]) { + let cmd = format!("memset 0x{:x} 0x{:x} 0x{}", addr, size, hex::encode(pat)); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + match resp[0] { + "OK" => (), + _ => panic!("Failed to execute {}", cmd), + } + } + + pub fn clock_step(&self) -> u64 { + let cmd = "clock_step".to_string(); + self.send_clock_cmd(&cmd) + } + + pub fn clock_step_ns(&self, ns: u64) -> u64 { + let cmd = format!("clock_step {}", ns); + self.send_clock_cmd(&cmd) + } + + pub fn clock_set(&self, tgt: u64) -> u64 { + let cmd = format!("clock_set {}", tgt); + self.send_clock_cmd(&cmd) + } + + pub fn query_msix(&self, addr: u64, data: u32) -> bool { + let cmd = format!("query_msix {} {}", addr, data); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + match resp[0] { + "OK" => match resp[1] { + "TRUE" => true, + "FALSE" => false, + _ => panic!("Failed to execute {}.", cmd), + }, + _ => panic!("Failed to execute {}.", cmd), + } + } + + pub fn query_intx(&self, irq: u32) -> bool { + let cmd = format!("query_intx {}", irq); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + + match resp[0] { + "OK" => match resp[1] { + "TRUE" => true, + "FALSE" => false, + _ => panic!("Failed to execute {}.", cmd), + }, + _ => panic!("Failed to execute {}.", cmd), + } + } + + pub fn eoi_intx(&self, irq: u32) -> bool { + let cmd = format!("eoi_intx {}", irq); + let buf = self.send_test_cmd(&cmd); + let resp: Vec<&str> = buf.split(' ').collect(); + assert_eq!(resp.len(), 2); + match resp[0] { + "OK" => match resp[1] { + "TRUE" => true, + "FALSE" => false, + _ => panic!("Failed to execute {}.", cmd), + }, + _ => panic!("Failed to execute {}.", cmd), + } + } +} + +fn init_socket(path: &str) -> UnixListener { + let socket = Path::new(path); + if socket.exists() { + fs::remove_file(socket).unwrap(); + } + UnixListener::bind(socket).unwrap() +} + +fn connect_socket(path: &str) -> UnixStream { + UnixStream::connect(path).unwrap() +} + +fn socket_accept_wait(listener: UnixListener, timeout: Duration) -> Option { + let start = Instant::now(); + listener.set_nonblocking(true).unwrap(); + + while start + timeout > Instant::now() { + match listener.accept() { + Ok((stream, _addr)) => return Some(stream), + Err(_) => sleep(Duration::from_millis(100)), + }; + } + None +} + +pub fn test_init(extra_arg: Vec<&str>) -> TestState { + let binary_path = env::var("STRATOVIRT_BINARY").unwrap(); + let tmp_dir = get_tmp_dir(); + let test_socket = format!("{}/test.socket", tmp_dir); + let qmp_socket = format!("{}/qmp.socket", tmp_dir); + + let listener = init_socket(&test_socket); + + let mut cmd = Command::new(binary_path); + + #[cfg(target_env = "ohos")] + cmd.args(["-disable-seccomp"]); + + let child = cmd + .args(["-accel", "test"]) + .args(["-qmp", &format!("unix:{},server,nowait", qmp_socket)]) + .args(["-mod-test", &test_socket]) + .args(extra_arg) + .spawn() + .unwrap(); + + let test_sock = + StreamHandler::new(socket_accept_wait(listener, Duration::from_secs(10)).unwrap()); + let qmp_sock = StreamHandler::new(connect_socket(&qmp_socket)); + + TestState::new(child, test_sock, qmp_sock, tmp_dir) +} diff --git a/tests/mod_test/src/utils.rs b/tests/mod_test/src/utils.rs new file mode 100644 index 0000000000000000000000000000000000000000..06a2118a2fa30bbd8ba6d6ac0bbf33d8f0aef6da --- /dev/null +++ b/tests/mod_test/src/utils.rs @@ -0,0 +1,153 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs; +use std::path::Path; +use std::process::Command; + +use rand::distributions::Alphanumeric; +use rand::{thread_rng, Rng}; + +use crate::libdriver::qcow2::create_qcow2_img; + +pub fn get_rand_str(size: usize) -> String { + thread_rng() + .sample_iter(&Alphanumeric) + .take(size) + .map(char::from) + .collect() +} + +pub fn create_dir(dir_path: &str) { + fs::create_dir(dir_path).unwrap(); +} + +pub fn get_tmp_dir() -> String { + let dir_name = format!("/tmp/test-{}", get_rand_str(10)); + create_dir(&dir_name); + dir_name +} + +pub fn read_le_u16(input: &mut &[u8]) -> u16 { + let (int_bytes, rest) = input.split_at(std::mem::size_of::()); + *input = rest; + u16::from_le_bytes(int_bytes.try_into().unwrap()) +} + +pub fn read_le_u32(input: &mut &[u8]) -> u32 { + let (int_bytes, rest) = input.split_at(std::mem::size_of::()); + *input = rest; + u32::from_le_bytes(int_bytes.try_into().unwrap()) +} + +pub fn read_le_u64(input: &mut &[u8]) -> u64 { + let (int_bytes, rest) = input.split_at(std::mem::size_of::()); + *input = rest; + u64::from_le_bytes(int_bytes.try_into().unwrap()) +} + +pub fn swap_u16(value: u16) -> u16 { + value << 8 | value >> 8 +} + +pub fn swap_u32(value: u32) -> u32 { + let lower_u16 = u32::from(swap_u16(value as u16)); + let higher_u16 = u32::from(swap_u16((value >> 16) as u16)); + lower_u16 << 16 | higher_u16 +} + +pub fn swap_u64(value: u64) -> u64 { + let lower_u32 = u64::from(swap_u32(value as u32)); + let higher_u32 = u64::from(swap_u32((value >> 32) as u32)); + lower_u32 << 32 | higher_u32 +} + +pub const TEST_IMAGE_BITS: u64 = 26; +pub const TEST_IMAGE_SIZE: u64 = 1 << TEST_IMAGE_BITS; + +#[derive(Debug, PartialEq, Eq)] +pub enum ImageType { + Raw, + Qcow2, +} + +impl ImageType { + pub const IMAGE_TYPE: [Self; 2] = [ImageType::Raw, ImageType::Qcow2]; +} + +/// Create image file. +pub fn create_img(image_size: u64, flag: u8, image_type: &ImageType) -> String { + let rng_name: String = get_rand_str(8); + + assert!(cfg!(target_os = "linux")); + + let mut image_path = format!("/tmp/stratovirt-{}.img", rng_name); + if flag == 1 { + image_path = format!("/var/log/stratovirt-{}.img", rng_name); + } + + match image_type { + &ImageType::Raw => create_raw_img(image_path.clone(), image_size), + &ImageType::Qcow2 => create_qcow2_img(image_path.clone(), image_size), + } + + image_path +} + +fn create_raw_img(image_path: String, size: u64) { + let image_path_of = format!("of={}", &image_path); + let image_size_of = format!("bs={}", size); + let output = Command::new("dd") + .arg("if=/dev/zero") + .arg(&image_path_of) + .arg(&image_size_of) + .arg("count=1") + .output() + .expect("failed to create image"); + assert!(output.status.success()); +} + +/// Delete image file. +pub fn cleanup_img(image_path: String) { + let img_path = Path::new(&image_path); + assert!(img_path.exists()); + + let metadata = fs::metadata(img_path).expect("can not get file metadata"); + let file_type = metadata.file_type(); + assert!(file_type.is_file()); + + fs::remove_file(img_path).expect("lack permissions to remove the file"); +} + +pub fn support_numa() -> bool { + let numa_nodes_path = "/sys/devices/system/node/"; + + if Path::new(numa_nodes_path).exists() { + match fs::read_dir(numa_nodes_path) { + Ok(entries) => { + let mut has_nodes = false; + for entry in entries { + if let Ok(entry) = entry { + if entry.file_name().to_str().unwrap_or("").starts_with("node") { + has_nodes = true; + break; + } + } + } + has_nodes + } + Err(_) => false, + } + } else { + false + } +} diff --git a/tests/mod_test/tests/aarch64/acpi_test.rs b/tests/mod_test/tests/aarch64/acpi_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..563d5ebc9b6af7201ffa3fd70b62a27cf467fad4 --- /dev/null +++ b/tests/mod_test/tests/aarch64/acpi_test.rs @@ -0,0 +1,545 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, mem, rc::Rc}; + +use byteorder::{ByteOrder, LittleEndian}; + +use acpi::{ + AcpiGicCpu, AcpiGicDistributor, AcpiGicRedistributor, AcpiRsdp, AcpiSratGiccAffinity, + AcpiSratMemoryAffinity, AcpiTableHeader, CacheHierarchyNode, ProcessorHierarchyNode, +}; +use machine::aarch64::standard::{LayoutEntryType, MEM_LAYOUT}; +use mod_test::libdriver::fwcfg::bios_args; +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libtest::{test_init, TestState}; + +// Now facs table data length is 64. +const FACS_TABLE_DATA_LENGTH: u32 = 64; +// Now dsdt table data length is 3488. +const DSDT_TABLE_DATA_LENGTH: u32 = 3488; +// Now fadt table data length is 276. +const FADT_TABLE_DATA_LENGTH: u32 = 276; +// Now madt table data length is 744. +const MADT_TABLE_DATA_LENGTH: u32 = 744; +// Now gtdt table data length is 96. +const GTDT_TABLE_DATA_LENGTH: u32 = 96; +// Now dbg2 table data length is 87. +const DBG2_TABLE_DATA_LENGTH: u32 = 87; +// Now iort table data length is 128. +const IORT_TABLE_DATA_LENGTH: u32 = 128; +// Now spcr table data length is 80. +const SPCR_TABLE_DATA_LENGTH: u32 = 80; +// Now mcfg table data length is 60. +const MCFG_TABLE_DATA_LENGTH: u32 = 60; +// Now acpi tables data length is 6133(cpu number is 8). +const ACPI_TABLES_DATA_LENGTH_8: usize = 6139; +// Now acpi tables data length is 40574(cpu number is 200). +const ACPI_TABLES_DATA_LENGTH_200: usize = 40580; + +enum TABLE { + Fadt, + Madt, + Gtdt, + Dbg2, + Iort, + Spcr, + Mcfg, + Srat, + Slit, + Pptt, +} + +fn test_rsdp(test_state: &TestState, alloc: &mut GuestAllocator) -> u64 { + let file_name = "etc/acpi/rsdp"; + let mut read_data: Vec = Vec::with_capacity(mem::size_of::()); + + // Select FileDir entry and read it. + let file_size = test_state.fw_cfg_read_file( + alloc, + file_name, + &mut read_data, + mem::size_of::() as u32, + ); + + assert_eq!(file_size, mem::size_of::() as u32); + // Check RSDP signature: "RSD PTR". + assert_eq!(String::from_utf8_lossy(&read_data[..8]), "RSD PTR "); + // Check RSDP revision: 2. + assert_eq!(read_data[15], 2); + + // Check 32-bit address of RSDT table: 0 + let rsdt_addr = LittleEndian::read_u32(&read_data[16..]); + assert_eq!(rsdt_addr, 0); + + // Check 64-bit address of XSDT table. + let xsdt_addr = LittleEndian::read_u64(&read_data[24..]); + assert_ne!(xsdt_addr, 0); + + xsdt_addr +} + +fn check_dsdt(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "DSDT"); + assert_eq!(LittleEndian::read_u32(&data[4..]), DSDT_TABLE_DATA_LENGTH); // Check length +} + +fn check_facs(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "FACS"); + assert_eq!(LittleEndian::read_u32(&data[4..]), FACS_TABLE_DATA_LENGTH); // Check length +} + +fn check_fadt(data: &[u8]) -> (u32, u64) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "FACP"); + assert_eq!(LittleEndian::read_u32(&data[4..]), FADT_TABLE_DATA_LENGTH); // Check length + + // Enable HW_REDUCED_ACPI bit + assert_eq!(LittleEndian::read_i32(&data[112..]), 0x10_0500); + assert_eq!(LittleEndian::read_u16(&data[129..]), 0x3); // ARM Boot Architecture Flags + assert_eq!(LittleEndian::read_i32(&data[131..]), 3); // FADT minor revision + + // Check 32-bit address of FACS table. + let facs_addr = LittleEndian::read_u32(&data[36..]); + assert_eq!(facs_addr, 0); + + // Check 64-bit address of DSDT table. + let dsdt_addr = LittleEndian::read_u64(&data[140..]); + assert_ne!(dsdt_addr, 0); + + (facs_addr, dsdt_addr) +} + +fn check_madt(data: &[u8], cpu: u8) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "APIC"); + assert_eq!(LittleEndian::read_u32(&data[4..]), MADT_TABLE_DATA_LENGTH); // Check length + + let mut offset = 44; + + // Check GIC Distributor + assert_eq!( + data[offset + 1] as usize, + mem::size_of::() + ); + let gicd_addr = LittleEndian::read_u64(&data[(offset + 8)..]); + assert_eq!(gicd_addr, MEM_LAYOUT[LayoutEntryType::GicDist as usize].0); + + // Check GIC version + assert_eq!(data[offset + 20], 3); + + // Check GIC CPU + offset += mem::size_of::(); + for i in 0..cpu { + assert_eq!(data[offset + 1], 80); // The length of this structure + assert_eq!(LittleEndian::read_u32(&data[(offset + 4)..]), u32::from(i)); // CPU interface number + assert_eq!(LittleEndian::read_u32(&data[(offset + 8)..]), u32::from(i)); // ACPI processor UID + assert_eq!(LittleEndian::read_u32(&data[(offset + 12)..]), 5); // Flags + assert_eq!(LittleEndian::read_u32(&data[(offset + 20)..]), 23); // Performance monitoring interrupts + assert_eq!(LittleEndian::read_u64(&data[(offset + 56)..]), 25); // Virtual GIC maintenance interrupt + assert_eq!(LittleEndian::read_u64(&data[(offset + 68)..]), u64::from(i)); // MPIDR + offset += mem::size_of::(); + } + + // Check GIC Redistributor + let mut addr = LittleEndian::read_u64(&data[(offset + 4)..]); + assert_eq!(MEM_LAYOUT[LayoutEntryType::GicRedist as usize].0, addr); + + // Check GIC Its + offset += mem::size_of::(); + addr = LittleEndian::read_u64(&data[(offset + 8)..]); + assert_eq!(MEM_LAYOUT[LayoutEntryType::GicIts as usize].0, addr); +} + +fn check_gtdt(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "GTDT"); + assert_eq!(LittleEndian::read_u32(&data[4..]), GTDT_TABLE_DATA_LENGTH); // Check length + + assert_eq!(LittleEndian::read_u64(&data[36..]), 0xFFFF_FFFF_FFFF_FFFF); // Counter control block physical address + assert_eq!(LittleEndian::read_u32(&data[48..]), 29); // Secure EL1 interrupt + assert_eq!(LittleEndian::read_u32(&data[52..]), 0); // Secure EL1 flags + assert_eq!(LittleEndian::read_u32(&data[56..]), 30); // Non secure EL1 interrupt + assert_eq!(LittleEndian::read_u32(&data[60..]), 4); // Non secure EL1 flags + assert_eq!(LittleEndian::read_u32(&data[64..]), 27); // Virtual timer interrupt + assert_eq!(LittleEndian::read_u32(&data[68..]), 0); // Virtual timer flags + assert_eq!(LittleEndian::read_u32(&data[72..]), 26); // Non secure EL2 interrupt + assert_eq!(LittleEndian::read_u32(&data[76..]), 0); // Non secure EL2 flags + assert_eq!(LittleEndian::read_u64(&data[80..]), 0xFFFF_FFFF_FFFF_FFFF); // Counter base block physical address +} + +fn check_dbg2(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "DBG2"); + assert_eq!(LittleEndian::read_u32(&data[4..]), DBG2_TABLE_DATA_LENGTH); // Check length +} + +fn check_iort(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "IORT"); + assert_eq!(LittleEndian::read_u32(&data[4..]), IORT_TABLE_DATA_LENGTH); // Check length + + // Check IORT nodes is 2: ITS group node and Root Complex Node. + assert_eq!(LittleEndian::read_u32(&data[36..]), 2); + assert_eq!(LittleEndian::read_u32(&data[40..]), 48); // Node offset + assert_eq!(data[48], 0); // ITS group node + assert_eq!(LittleEndian::read_u16(&data[49..]), 24); // ITS node length + assert_eq!(data[51], 1); // ITS node revision + assert_eq!(LittleEndian::read_u32(&data[64..]), 1); // ITS count + assert_eq!(data[72], 2); // Root Complex Node + assert_eq!(LittleEndian::read_u16(&data[73..]), 56); // Length of Root Complex Node + assert_eq!(data[75], 3); // Revision of Root Complex Node + assert_eq!(LittleEndian::read_u32(&data[76..]), 1); // Identifier of Root Complex Node + assert_eq!(LittleEndian::read_u32(&data[80..]), 1); // Mapping counts of Root Complex Node + assert_eq!(LittleEndian::read_u32(&data[84..]), 36); // Mapping offset of Root Complex Node + assert_eq!(LittleEndian::read_u32(&data[88..]), 1); // Cache of coherent device + assert_eq!(data[95], 3); // Memory flags of coherent device + assert_eq!(data[104], 0x40); // Memory address size limit + assert_eq!(LittleEndian::read_u32(&data[112..]), 0xffff); // Identity RID mapping + + // Without SMMU, id mapping is the first node in ITS group node + assert_eq!(LittleEndian::read_u32(&data[120..]), 48); +} + +fn check_spcr(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "SPCR"); + assert_eq!(LittleEndian::read_u32(&data[4..]), SPCR_TABLE_DATA_LENGTH); // Check length + + assert_eq!(data[36], 3); // Interface type: ARM PL011 UART + assert_eq!(data[41], 8); // Bit width of AcpiGenericAddress + assert_eq!(data[43], 1); // Access width of AcpiGenericAddress + assert_eq!( + LittleEndian::read_u64(&data[44..]), + MEM_LAYOUT[LayoutEntryType::Uart as usize].0 + ); + assert_eq!(data[52], 1_u8 << 3); // Interrupt Type: Arm GIC interrupu + assert_eq!(LittleEndian::read_u32(&data[54..]), 39); // Irq number used by the UART + assert_eq!(data[58], 3); // Set baud rate: 3 = 9600 + assert_eq!(data[60], 1); // Stop bit + assert_eq!(data[61], 2); // Hardware flow control + + // PCI Device ID: it is not a PCI device + assert_eq!(LittleEndian::read_u16(&data[64..]), 0xffff); + // PCI Vendor ID: it is not a PCI device + assert_eq!(LittleEndian::read_u16(&data[66..]), 0xffff); +} + +fn check_mcfg(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "MCFG"); + assert_eq!(LittleEndian::read_u32(&data[4..]), MCFG_TABLE_DATA_LENGTH); // Check length + + assert_eq!( + LittleEndian::read_u64(&data[44..]), + MEM_LAYOUT[LayoutEntryType::HighPcieEcam as usize].0 + ); + assert_eq!(LittleEndian::read_u16(&data[52..]), 0); // PCI Segment Group Number + assert_eq!(data[54], 0); // Start Bus Number + assert_eq!(data[55], 255); // End Bus Number +} + +fn check_srat(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "SRAT"); + + // offset = AcpiTable.len = 36 + reserved.len = 12 + let mut offset = 36 + 12; + let mut base_addr = MEM_LAYOUT[LayoutEntryType::Mem as usize].0; + // Check Numa Node: + // -object memory-backend-ram,size=2G,id=mem0,host-nodes=0-1,policy=bind + // -object memory-backend-ram,size=2G,id=mem1,host-nodes=0-1,policy=bind + // -numa node,nodeid=0,cpus=0-3,memdev=mem0 + // -numa node,nodeid=1,cpus=4-7,memdev=mem1 + for i in 0..2 { + for j in 0..4 { + let proximity_domain = LittleEndian::read_u32(&data[(offset + 2)..]); + assert_eq!(proximity_domain, i); + let process_uid = LittleEndian::read_u32(&data[(offset + 6)..]); + assert_eq!(process_uid, (i * 4) + j); + offset += mem::size_of::(); + } + assert_eq!(LittleEndian::read_u64(&data[(offset + 8)..]), base_addr); + let size = LittleEndian::read_u64(&data[(offset + 16)..]); + assert_eq!(size, 0x8000_0000); + base_addr = base_addr + size; + offset += mem::size_of::(); + } +} + +fn check_slit(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "SLIT"); + + // offset = AcpiTable.len + NumaNode.len + let mut offset = 44; + // -numa dist,src=0,dst=1,val=30 + // -numa dist,src=1,dst=0,val=30 + for i in 0..2 { + for j in 0..2 { + if i == j { + assert_eq!(data[offset], 10); + } else { + assert_eq!(data[offset], 30); + } + offset += 1; + } + } +} + +fn check_pptt(data: &[u8]) { + assert_eq!(String::from_utf8_lossy(&data[..4]), "PPTT"); + + // offset = AcpiTable.len = 36 + let mut offset = 36; + // sockets = 1, clusters = 1, cores = 4, threads = 2 + // Check L3 cache type, next_level and attributes. + assert_eq!(data[offset], 1); + assert_eq!(LittleEndian::read_u32(&data[(offset + 8)..]), 0); + assert_eq!(data[offset + 21], 10); + + // Check sockets flags and processor_id. + offset += mem::size_of::(); + assert_eq!(LittleEndian::read_u32(&data[(offset + 4)..]), 1); + assert_eq!(LittleEndian::read_u32(&data[(offset + 12)..]), 0); + + // Check clusters flags and processor_id. + // Sockets have an L3 cache, so it's offset to add 4. + offset += mem::size_of::() + 4; + assert_eq!(LittleEndian::read_u32(&data[(offset + 4)..]), 0); + assert_eq!(LittleEndian::read_u32(&data[(offset + 12)..]), 0); + + for i in 0..4 { + // Check L2 cache type, next_level and attributes. + offset += mem::size_of::(); + assert_eq!(data[offset], 1); + assert_eq!(LittleEndian::read_u32(&data[(offset + 8)..]), 0); + assert_eq!(data[offset + 21], 10); + + // Check L1D cache type, next_level and attributes. + let next_level = offset as u32; + offset += mem::size_of::(); + assert_eq!(data[offset], 1); + assert_eq!(LittleEndian::read_u32(&data[(offset + 8)..]), next_level); + assert_eq!(data[offset + 21], 2); + + // Check L1I cache type, next_level and attributes. + offset += mem::size_of::(); + assert_eq!(data[offset], 1); + assert_eq!(LittleEndian::read_u32(&data[(offset + 8)..]), next_level); + assert_eq!(data[offset + 21], 4); + + // Check cores flags and processor_id. + offset += mem::size_of::(); + assert_eq!(LittleEndian::read_u32(&data[(offset + 4)..]), 0); + assert_eq!(LittleEndian::read_u32(&data[(offset + 12)..]), i); + + // Cores have L2, L1D, L1I cache, so it'3 offset to add 3 * 4; + offset += 3 * 4; + for j in 0..2 { + // Check threads flags and processor_id. + offset += mem::size_of::(); + assert_eq!(LittleEndian::read_u32(&data[(offset + 4)..]), 0xE); + assert_eq!(LittleEndian::read_u32(&data[(offset + 12)..]), i * 2 + j); + } + } +} + +fn test_tables(test_state: &TestState, alloc: &mut GuestAllocator, xsdt_addr: usize, cpu: u8) { + let file_name = "etc/acpi/tables"; + let mut read_data: Vec = Vec::with_capacity(ACPI_TABLES_DATA_LENGTH_8); + + // Select FileDir entry and read it. + let file_size = test_state.fw_cfg_read_file( + alloc, + file_name, + &mut read_data, + ACPI_TABLES_DATA_LENGTH_8 as u32, + ); + assert_eq!(file_size, ACPI_TABLES_DATA_LENGTH_8 as u32); + + // Check XSDT + assert_eq!( + String::from_utf8_lossy(&read_data[xsdt_addr..(xsdt_addr + 4)]), + "XSDT" + ); + + // XSDT entry: An array of 64-bit physical addresses that point to other DESCRIPTION_HEADERs. + // DESCRIPTION_HEADERs: FADT, MADT, GTDT, IORT, SPCR, MCFG, SRAT, SLIT, PPTT + let entry_addr = xsdt_addr + mem::size_of::(); + + // Check FADT + let mut offset = entry_addr + TABLE::Fadt as usize * 8; + let fadt_addr = LittleEndian::read_u64(&read_data[offset..]); + let (facs_addr, dsdt_addr) = check_fadt(&read_data[(fadt_addr as usize)..]); + + // Check FACS (FACS table is pointed to by the FADT table) + check_facs(&read_data[(facs_addr as usize)..]); + + // Check DSDT (DSDT table is pointed to by the FADT table) + check_dsdt(&read_data[(dsdt_addr as usize)..]); + + // Check MADT + offset = entry_addr + TABLE::Madt as usize * 8; + let madt_addr = LittleEndian::read_u64(&read_data[offset..]); + check_madt(&read_data[(madt_addr as usize)..], cpu); + + // Check GTDT + offset = entry_addr + TABLE::Gtdt as usize * 8; + let gtdt_addr = LittleEndian::read_u64(&read_data[offset..]); + check_gtdt(&read_data[(gtdt_addr as usize)..]); + + // Check DBG2 + offset = entry_addr + TABLE::Dbg2 as usize * 8; + let gtdt_addr = LittleEndian::read_u64(&read_data[offset..]); + check_dbg2(&read_data[(gtdt_addr as usize)..]); + + // Check IORT + offset = entry_addr + TABLE::Iort as usize * 8; + let iort_addr = LittleEndian::read_u64(&read_data[offset..]); + check_iort(&read_data[(iort_addr as usize)..]); + + // Check SPCR + offset = entry_addr + TABLE::Spcr as usize * 8; + let spcr_addr = LittleEndian::read_u64(&read_data[offset..]); + check_spcr(&read_data[(spcr_addr as usize)..]); + + // Check MCFG + offset = entry_addr + TABLE::Mcfg as usize * 8; + let mcfg_addr = LittleEndian::read_u64(&read_data[offset..]); + check_mcfg(&read_data[(mcfg_addr as usize)..]); + + // Check SRAT + offset = entry_addr + TABLE::Srat as usize * 8; + let srat_addr = LittleEndian::read_u64(&read_data[offset..]); + check_srat(&read_data[(srat_addr as usize)..]); + + // Check SLIT + offset = entry_addr + TABLE::Slit as usize * 8; + let slit_addr = LittleEndian::read_u64(&read_data[offset..]); + check_slit(&read_data[(slit_addr as usize)..]); + + // Check PPTT + offset = entry_addr + TABLE::Pptt as usize * 8; + let pptt_addr = LittleEndian::read_u64(&read_data[offset..]); + check_pptt(&read_data[(pptt_addr as usize)..]); +} + +fn check_madt_of_two_gicr( + test_state: &TestState, + alloc: &mut GuestAllocator, + xsdt_addr: usize, + cpus: usize, +) { + let file_name = "etc/acpi/tables"; + let mut read_data: Vec = Vec::with_capacity(ACPI_TABLES_DATA_LENGTH_200); + + // Select FileDir entry and read it. + test_state.fw_cfg_read_file( + alloc, + file_name, + &mut read_data, + ACPI_TABLES_DATA_LENGTH_200 as u32, + ); + + // XSDT entry: An array of 64-bit physical addresses that point to other DESCRIPTION_HEADERs. + // DESCRIPTION_HEADERs: FADT, MADT, GTDT, IORT, SPCR, MCFG, SRAT, SLIT, PPTT + let entry_addr = xsdt_addr + mem::size_of::(); + + // MADT offset base on XSDT + let mut offset = entry_addr + TABLE::Madt as usize * 8; + let madt_addr = LittleEndian::read_u64(&read_data[offset..]) as usize; + + // Check second GIC Redistributor + // Second GIC Redistributor addr offset base on MADT: header len = 44 + offset = 44 + + mem::size_of::() + + mem::size_of::() * cpus + + mem::size_of::(); + let addr = LittleEndian::read_u64(&read_data[(madt_addr + offset + 4)..]); + assert_eq!(MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize].0, addr); + let len = LittleEndian::read_u32(&read_data[(madt_addr + offset + 12)..]); + assert_eq!( + MEM_LAYOUT[LayoutEntryType::HighGicRedist as usize].1, + u64::from(len) + ); +} + +#[test] +fn test_acpi_virt() { + let mut args = Vec::new(); + bios_args(&mut args); + + let cpu = 8; + let cpu_args = format!( + "-smp {},sockets=1,cores=4,threads=2 -cpu host,pmu=on -m 4G", + cpu + ); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-ram,size=2G,id=mem0,host-nodes=0-1,policy=bind" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-ram,size=2G,id=mem1,host-nodes=0-1,policy=bind" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=0,cpus=0-3,memdev=mem0" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=1,cpus=4-7,memdev=mem1" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=0,dst=1,val=30".split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=1,dst=0,val=30".split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-serial pty".split(' ').collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let alloc = machine.allocator.clone(); + + let xsdt_addr = test_rsdp(&test_state.borrow(), &mut alloc.borrow_mut()); + test_tables( + &test_state.borrow(), + &mut alloc.borrow_mut(), + xsdt_addr as usize, + cpu, + ); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_acpi_two_gicr() { + let mut args = Vec::new(); + bios_args(&mut args); + + let cpus = 200; + let cpu_args = format!( + "-smp {},sockets=2,cores=50,threads=2 -cpu host,pmu=on -m 4G", + cpus + ); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let alloc = machine.allocator.clone(); + + let xsdt_addr = test_rsdp(&test_state.borrow(), &mut alloc.borrow_mut()); + check_madt_of_two_gicr( + &test_state.borrow(), + &mut alloc.borrow_mut(), + xsdt_addr as usize, + cpus, + ); + + test_state.borrow_mut().stop(); +} diff --git a/tests/mod_test/tests/aarch64/ged_test.rs b/tests/mod_test/tests/aarch64/ged_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..21895717401ea96e516674d29dced1d0a3b3cb98 --- /dev/null +++ b/tests/mod_test/tests/aarch64/ged_test.rs @@ -0,0 +1,98 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use machine::aarch64::standard::{LayoutEntryType, MEM_LAYOUT}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; + +pub const GED_ADDR_BASE: u64 = MEM_LAYOUT[LayoutEntryType::Ged as usize].0; +const ADD_ADDRESS: u64 = 1; + +fn ged_read_evt(ts: &TestState) -> u32 { + ts.readl(GED_ADDR_BASE) +} + +fn ged_read_abnormal(ts: &TestState) -> u32 { + ts.readl(GED_ADDR_BASE + ADD_ADDRESS) +} + +fn ged_write_evt(ts: &TestState, val: u32) { + ts.writel(GED_ADDR_BASE, val); +} + +fn ged_args(base_args: &mut Vec<&str>) { + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + base_args.append(&mut args); + args = "-no-shutdown".split(' ').collect(); + base_args.append(&mut args); + args = "-drive file=/usr/share/edk2/aarch64/QEMU_EFI-pflash.raw,if=pflash,unit=0,readonly=true" + .split(' ') + .collect(); + base_args.append(&mut args); +} + +fn set_up() -> TestState { + let mut args: Vec<&str> = Vec::new(); + ged_args(&mut args); + test_init(args) +} + +/// Test the read and write functions of a ged device. +/// +/// Steps +/// 1. Send qmp command "system_powerdown". +/// 2. Read ged event. +/// 3. Read abnormal address, except 0. +/// 4. Write event and read, excepy 0 because ged can't write. +#[test] +fn test_shutdown() { + let mut ts = set_up(); + + ts.qmp("{\"execute\": \"system_powerdown\"}"); + + let event = ged_read_evt(&ts); + assert_eq!(event, 1); + + let addr = ged_read_abnormal(&ts); + assert_eq!(addr, 0); + + ged_write_evt(&ts, 1); + let event = ged_read_evt(&ts); + assert_eq!(event, 0); + + ts.stop(); +} + +/// Verify that the restart function is normal. +/// +/// Steps +/// 1. Send qmp command "system_powerdown" and "system_reset" to achieve "reboot". +/// 2. Read ged event. +/// 3. Send qmp command "query-status" to get the status of vm, except "running". +#[test] +fn test_reboot() { + let mut ts = set_up(); + + ts.qmp("{\"execute\": \"system_powerdown\"}"); + ts.qmp_read(); + + let event = ged_read_evt(&ts); + assert_eq!(event, 1); + + ts.qmp("{\"execute\": \"system_reset\"}"); + ts.qmp_read(); + + let value = ts.qmp("{\"execute\": \"query-status\"}"); + let status = value["return"]["status"].as_str().unwrap().to_string(); + assert_eq!(status, "running".to_string()); + + ts.stop(); +} diff --git a/tests/mod_test/tests/aarch64/mod.rs b/tests/mod_test/tests/aarch64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..846da10ccd6f1cbb277aa1220234b38fad37ad02 --- /dev/null +++ b/tests/mod_test/tests/aarch64/mod.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod acpi_test; +mod ged_test; +mod pl031_test; +mod ramfb_test; diff --git a/tests/mod_test/tests/aarch64/pl031_test.rs b/tests/mod_test/tests/aarch64/pl031_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..a0b0a4340e2a02368283125cc245ee8785df63a1 --- /dev/null +++ b/tests/mod_test/tests/aarch64/pl031_test.rs @@ -0,0 +1,120 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::thread::sleep; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use rand::{thread_rng, Rng}; + +use devices::legacy::{RTC_CR, RTC_DR, RTC_IMSC, RTC_LR}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; + +const RTC_ADDR_BASE: u64 = 0x0901_0000; + +fn pl031_read_time(ts: &TestState) -> u32 { + ts.readl(RTC_ADDR_BASE + RTC_DR) +} + +fn pl031_set_time(ts: &TestState, time: u32) { + ts.writel(RTC_ADDR_BASE + RTC_LR, time); +} + +fn get_wall_time() -> u32 { + match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(tick) => tick.as_secs() as u32, + _ => panic!("Failed to get wall time."), + } +} + +fn pl031_read_reg(ts: &TestState, reg: u64) -> u32 { + ts.readl(RTC_ADDR_BASE + reg) +} + +fn pl031_write_reg(ts: &TestState, reg: u64, val: u32) { + ts.writel(RTC_ADDR_BASE + reg, val); +} + +fn set_up() -> TestState { + let extra_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + test_init(extra_args) +} + +#[test] +fn check_time() { + let mut ts = set_up(); + + let time1 = pl031_read_time(&ts); + let time2 = pl031_read_time(&ts); + + sleep(Duration::from_millis(2000)); + let time3 = pl031_read_time(&ts); + let time4 = pl031_read_time(&ts); + let wall_time = get_wall_time(); + + assert!((time2 - time1) <= 1); + assert!((time3 - time2) <= 3); + assert!((time3 - time2) >= 2); + assert!((time4 - time3) <= 1); + assert!((wall_time - time4) <= 1); + + ts.stop(); +} + +#[test] +fn set_time() { + let mut ts = set_up(); + let time1 = pl031_read_time(&ts); + + // Time passes about 5 years. + let time_lapse = 1_5768_0000; + pl031_set_time(&ts, time1 + time_lapse); + + let time2 = pl031_read_time(&ts); + + assert!((time2 - time1) >= time_lapse); + assert!((time2 - time1) <= time_lapse + 1); + + ts.stop(); +} + +#[test] +fn rtc_enable() { + let mut ts = set_up(); + + assert_eq!(pl031_read_reg(&ts, RTC_CR), 1); + ts.stop(); +} + +#[test] +fn set_mask() { + let mut ts = set_up(); + + pl031_write_reg(&ts, RTC_IMSC, 1); + + assert_eq!(pl031_read_reg(&ts, RTC_IMSC), 1); + ts.stop(); +} + +#[test] +fn reg_fuzz() { + let mut ts = set_up(); + let mut rng = thread_rng(); + + for _ in 0..1000 { + let reg = rng.gen_range(0..=32); + let val = rng.gen_range(0..=1024); + pl031_read_reg(&ts, reg); + pl031_write_reg(&ts, reg, val); + } + + ts.stop(); +} diff --git a/tests/mod_test/tests/aarch64/ramfb_test.rs b/tests/mod_test/tests/aarch64/ramfb_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..b147eb6550d8a690abe3e9790d4f837117fb45b2 --- /dev/null +++ b/tests/mod_test/tests/aarch64/ramfb_test.rs @@ -0,0 +1,260 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::fs; +use std::mem; +use std::path::Path; +use std::rc::Rc; + +use mod_test::libdriver::fwcfg::{bios_args, FwCfgDmaAccess}; +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libtest::{test_init, TestState}; +use mod_test::utils::{swap_u32, swap_u64}; + +const FRAMEBUFFER_SIZE: u64 = 3 * 1024 * 1024; +const RAMFB_FORMAT: u32 = 0x34325258; +const HORIZONTAL_RESOLUTION: u32 = 800; +const VERTICAL_RESOLUTION: u32 = 600; +const RAMFB_BPP: u32 = 4; +const ABNORMAL_FB_BASE: u64 = 0x60000001; + +#[repr(C, packed(1))] +#[derive(Default)] +struct RamfbConfig { + address: u64, + fourcc: u32, + flags: u32, + width: u32, + height: u32, + stride: u32, +} + +impl RamfbConfig { + fn new(base: u64) -> Self { + RamfbConfig { + address: base, + fourcc: RAMFB_FORMAT, + flags: 0, + width: HORIZONTAL_RESOLUTION, + height: VERTICAL_RESOLUTION, + stride: RAMFB_BPP * HORIZONTAL_RESOLUTION, + } + } + + fn write_to_file( + &self, + allocator: &mut GuestAllocator, + test_state: &TestState, + file_name: &str, + ) { + let ramfb_config_addr = allocator.alloc(mem::size_of::() as u64); + test_state.writeq(ramfb_config_addr, swap_u64(self.address)); + test_state.writel(ramfb_config_addr + 8, swap_u32(self.fourcc)); + test_state.writel(ramfb_config_addr + 12, swap_u32(self.flags)); + test_state.writel(ramfb_config_addr + 16, swap_u32(self.width)); + test_state.writel(ramfb_config_addr + 20, swap_u32(self.height)); + test_state.writel(ramfb_config_addr + 24, swap_u32(self.stride)); + + let access = allocator.alloc(mem::size_of::() as u64); + test_state.fw_cfg_write_file( + allocator, + file_name, + access, + ramfb_config_addr, + mem::size_of::() as u32, + ); + } +} + +#[test] +fn test_basic() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let ramfb_args = String::from("-device ramfb,id=ramfb1"); + args.append(&mut ramfb_args[..].split(' ').collect()); + let log_path = "/tmp/ramfb_test_basic.log"; + let log_args = format!("-D {}", log_path); + args.append(&mut log_args[..].split(' ').collect()); + + if Path::new(log_path).exists() { + fs::remove_file(log_path).unwrap(); + } + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let file_name = "etc/ramfb"; + let framebuffer_base = allocator.borrow_mut().alloc(FRAMEBUFFER_SIZE); + + let mut file_contents = String::from(""); + match fs::File::create(log_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + + let ramfb_config = RamfbConfig::new(framebuffer_base); + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + assert!( + file_contents + .find("ERROR: Failed to create image of ramfb!") + .is_none(), + "Failed to create image!" + ); + assert!(file_contents.find("ERROR").is_none(), "Unexpected error!"); + + match fs::remove_file(log_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + test_state.borrow_mut().stop(); +} + +#[test] +fn test_abnormal_param() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let ramfb_args = String::from("-device ramfb,id=ramfb1"); + args.append(&mut ramfb_args[..].split(' ').collect()); + let log_path = "/tmp/ramfb_test_abnormal.log"; + let log_args = format!("-D {}", log_path); + args.append(&mut log_args[..].split(' ').collect()); + + if Path::new(log_path).exists() { + fs::remove_file(log_path).unwrap(); + } + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let file_name = "etc/ramfb"; + let framebuffer_base = allocator.borrow_mut().alloc(FRAMEBUFFER_SIZE); + let mut file_contents = String::from(""); + + match fs::File::create(log_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + + // Set framebuffer address is abnormal. + let mut ramfb_config = RamfbConfig::new(ABNORMAL_FB_BASE); + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + assert!( + file_contents + .find("ERROR: Failed to get the host address of the framebuffer") + .is_some(), + "Failed to check framebuffer address!" + ); + + // Set drm format is unsupported. + ramfb_config.address = framebuffer_base; + ramfb_config.fourcc = 0; + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + assert!( + file_contents + .find("ERROR: Unsupported drm format") + .is_some(), + "Failed to check Fourcc!" + ); + + // Set width = 15, which is less than the minimum. + ramfb_config.fourcc = RAMFB_FORMAT; + ramfb_config.width = 15; + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + let mut err_msg = format!( + "ERROR: The resolution: {}x{} is unsupported", + 15, VERTICAL_RESOLUTION + ); + assert!( + file_contents.find(&err_msg).is_some(), + "Failed to check min width!" + ); + + // Set width = 16001, which is exceeded the maximum. + ramfb_config.width = 16001; + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + err_msg = format!( + "ERROR: The resolution: {}x{} is unsupported", + 16001, VERTICAL_RESOLUTION + ); + assert!( + file_contents.find(&err_msg).is_some(), + "Failed to check max width!" + ); + + // Set height = 15, which is less than the minimum. + ramfb_config.width = HORIZONTAL_RESOLUTION; + ramfb_config.height = 15; + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + err_msg = format!( + "ERROR: The resolution: {}x{} is unsupported", + HORIZONTAL_RESOLUTION, 15 + ); + assert!( + file_contents.find(&err_msg).is_some(), + "Failed to check min height!" + ); + + // Set height = 12001, which is exceeded the maximum. + ramfb_config.height = 12001; + ramfb_config.write_to_file(&mut allocator.borrow_mut(), &test_state.borrow(), file_name); + + match fs::read_to_string(log_path) { + Ok(contents) => file_contents = contents, + Err(e) => assert!(false, "{}", e), + } + err_msg = format!( + "ERROR: The resolution: {}x{} is unsupported", + HORIZONTAL_RESOLUTION, 12001 + ); + assert!( + file_contents.find(&err_msg).is_some(), + "Failed to check max height!" + ); + + match fs::remove_file(log_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + test_state.borrow_mut().stop(); +} diff --git a/tests/mod_test/tests/balloon_test.rs b/tests/mod_test/tests/balloon_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..8fbc400285b29b0934f8960354fdc88089c6970a --- /dev/null +++ b/tests/mod_test/tests/balloon_test.rs @@ -0,0 +1,1133 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::fs::{remove_file, File}; +use std::io::{self, BufRead, BufReader}; +use std::process::Command; +use std::rc::Rc; +use std::{thread, time}; + +use mod_test::utils::support_numa; +use serde_json::json; + +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{TestVirtQueue, TestVringDescEntry, VirtioDeviceOps}; +use mod_test::libdriver::virtio_pci_modern::{TestVirtioPciDev, VirtioPciCommonCfg}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use util::{byte_code::ByteCode, offset_of}; + +const BALLOON_F_DEFLATE_ON_OOM_TEST: u32 = 2; +const BALLOON_F_PRPORTING_TEST: u32 = 5; +const BALLOON_F_VERSION1_TEST: u64 = 32; +const PAGE_SIZE_UNIT: u64 = 4096; +const TIMEOUT_US: u64 = 15 * 1000 * 1000; +const MBSIZE: u64 = 1024 * 1024; +const MEM_BUFFER_PERCENT_DEFAULT: u32 = 50; +const MONITOR_INTERVAL_SECOND_DEFAULT: u32 = 10; +const ADDRESS_BASE: u64 = 0x4000_0000; + +fn read_lines(filename: String) -> io::Lines> { + let file = File::open(filename).unwrap(); + io::BufReader::new(file).lines() +} + +fn get_hugesize() -> u64 { + let mut free_page = 0_u64; + let lines = read_lines("/proc/meminfo".to_string()); + for line in lines { + if let Ok(info) = line { + if info.starts_with("HugePages_Free:") { + let free: Vec<&str> = info.split(':').collect(); + free_page = free[1].trim().parse::().unwrap(); + } + if info.starts_with("Hugepagesize:") { + let huges: Vec<&str> = info.split(':').collect(); + let sizes: Vec<&str> = huges[1].trim().split(' ').collect(); + let size = sizes[0].trim().parse::().unwrap(); + return free_page * size; + } + } + } + 0_u64 +} + +pub struct VirtioBalloonTest { + pub device: Rc>, + pub state: Rc>, + pub allocator: Rc>, + pub inf_queue: Rc>, + pub def_queue: Rc>, + pub fpr_queue: Option>>, + pub auto_queue: Option>>, +} + +pub struct BalloonTestCfg { + pub fpr: bool, + pub auto_balloon: bool, + pub percent: u32, + pub interval: u32, +} + +impl VirtioBalloonTest { + pub fn new( + memsize: u64, + page_size: u64, + shared: bool, + huge: bool, + cfg: BalloonTestCfg, + ) -> Self { + let pci_slot: u8 = 0x4; + let mut extra_args: Vec<&str> = Vec::new(); + let mut fpr_switch = String::from("false"); + let mut auto_switch = String::from("false"); + let mem_path = "-mem-path /tmp/stratovirt/hugepages".to_string(); + + let mut machine_args = MACHINE_TYPE_ARG.to_string(); + if shared { + machine_args.push_str(",mem-share=on"); + } + let mut args: Vec<&str> = machine_args.split(' ').collect(); + extra_args.append(&mut args); + + let mem_args = format!("-m {}", memsize); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + + if huge { + args = mem_path[..].split(' ').collect(); + extra_args.append(&mut args); + } + + if cfg.fpr { + fpr_switch = String::from("true"); + } + if cfg.auto_balloon { + auto_switch = String::from("true"); + } + let dev_args = format!( + "-device virtio-balloon-pci,id=drv0,bus=pcie.0,addr={}.0,free-page-reporting={},auto-balloon={},membuf-percent={},monitor-interval={}", + pci_slot, fpr_switch, auto_switch, cfg.percent, cfg.interval + ); + args = dev_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new_bymem(test_state.clone(), memsize * MBSIZE, page_size); + let allocator = machine.allocator.clone(); + + let dev = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + dev.borrow_mut().init(pci_slot, 0); + + let features = dev.borrow_mut().get_device_features(); + + let mut fpr_queue = None; + let mut auto_queue = None; + let mut que_num = 2_usize; + let mut idx = 2_usize; + if cfg.fpr { + que_num += 1; + } + if cfg.auto_balloon { + que_num += 1; + } + let ques = + dev.borrow_mut() + .init_device(test_state.clone(), allocator.clone(), features, que_num); + let inf_queue = ques[0].clone(); + let def_queue = ques[1].clone(); + if cfg.fpr { + fpr_queue = Some(ques[idx].clone()); + idx += 1; + } + if cfg.auto_balloon { + auto_queue = Some(ques[idx].clone()); + } + + VirtioBalloonTest { + device: dev, + state: test_state, + allocator, + inf_queue, + def_queue, + fpr_queue, + auto_queue, + } + } + + pub fn numa_node_new() -> Self { + let mut args: Vec<&str> = Vec::new(); + let mut extra_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + args.append(&mut extra_args); + + let cpu = 8; + let cpu_args = format!( + "-smp {},sockets=1,cores=4,threads=2 -cpu host,pmu=on -m 2G", + cpu + ); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-file,size=1G,id=mem0,host-nodes=0-1,policy=bind,share=on,mem-path=test.fd" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = + "-object memory-backend-memfd,size=1G,id=mem1,host-nodes=0-1,policy=bind,mem-prealloc=true" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=0,cpus=0-3,memdev=mem0" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=1,cpus=4-7,memdev=mem1" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=0,dst=1,val=30".split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=1,dst=0,val=30".split(' ').collect(); + args.append(&mut extra_args); + + extra_args = "-device virtio-balloon-pci,id=drv0,bus=pcie.0,addr=0x4.0" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new_bymem(test_state.clone(), 2 * MBSIZE, 4096); + let allocator = machine.allocator.clone(); + + let dev = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + dev.borrow_mut().init(4, 0); + + let features = dev.borrow_mut().get_device_features(); + + let ques = dev + .borrow_mut() + .init_device(test_state.clone(), allocator.clone(), features, 2); + let inf_queue = ques[0].clone(); + let def_queue = ques[1].clone(); + + VirtioBalloonTest { + device: dev, + state: test_state, + allocator, + inf_queue, + def_queue, + fpr_queue: None, + auto_queue: None, + } + } +} + +fn inflate_fun(shared: bool) { + let page_num = 255_i32; + let mut idx = 0_i32; + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, shared, false, cfg); + + let free_page = balloon + .allocator + .borrow_mut() + .alloc(page_num as u64 * PAGE_SIZE_UNIT); + let pfn = (free_page >> 12) as u32; + let pfn_addr = balloon.allocator.borrow_mut().alloc(PAGE_SIZE_UNIT); + while idx < page_num { + balloon + .state + .borrow_mut() + .writeb(free_page + PAGE_SIZE_UNIT * idx as u64, 1); + idx += 1; + } + // balloon Illegal addresses + balloon.state.borrow_mut().writel(pfn_addr, pfn - 1024); + let free_head = balloon + .inf_queue + .borrow_mut() + .add(balloon.state.clone(), pfn_addr, 4, false); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.inf_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.inf_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + // begin balloon addresses + let mut loop_num = page_num - 1; + let mut msg = Vec::new(); + + while loop_num >= 0 { + balloon + .state + .borrow_mut() + .writel(pfn_addr + 4 * loop_num as u64, pfn + loop_num as u32); + let entry = TestVringDescEntry { + data: pfn_addr + (loop_num as u64 * 4), + len: 4, + write: false, + }; + msg.push(entry); + loop_num -= 1; + } + let free_head = balloon + .inf_queue + .borrow_mut() + .add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.inf_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.inf_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + balloon.state.borrow_mut().stop(); +} + +fn balloon_fun(shared: bool, huge: bool) { + let page_num = 255_u32; + let mut idx = 0_u32; + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, shared, huge, cfg); + + let free_page = balloon + .allocator + .borrow_mut() + .alloc(u64::from(page_num) * PAGE_SIZE_UNIT); + let pfn = (free_page >> 12) as u32; + let pfn_addr = balloon.allocator.borrow_mut().alloc(PAGE_SIZE_UNIT); + while idx < page_num { + balloon + .state + .borrow_mut() + .writel(pfn_addr + 4 * u64::from(idx), pfn + idx); + balloon + .state + .borrow_mut() + .writeb(free_page + PAGE_SIZE_UNIT * u64::from(idx), 1); + idx += 1; + } + + // begin inflate addresses + let mut loop_num = 0_u32; + let mut msg = Vec::new(); + + while loop_num < page_num { + let entry = TestVringDescEntry { + data: pfn_addr + (u64::from(loop_num) * 4), + len: 4, + write: false, + }; + msg.push(entry); + loop_num += 1; + } + let free_head = balloon + .inf_queue + .borrow_mut() + .add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.inf_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.inf_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + // begin deflate addresses + let mut loop_num = 0_u32; + let mut msg = Vec::new(); + + while loop_num < page_num { + let entry = TestVringDescEntry { + data: pfn_addr + (u64::from(loop_num) * 4), + len: 4, + write: false, + }; + msg.push(entry); + loop_num += 1; + } + let free_head = balloon + .def_queue + .borrow_mut() + .add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.def_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.def_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + // begin deflate Illegal addresses + balloon.state.borrow_mut().writel(pfn_addr, pfn - 1024); + let free_head = balloon + .def_queue + .borrow_mut() + .add(balloon.state.clone(), pfn_addr, 4, false); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.def_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.def_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + balloon.state.borrow_mut().stop(); +} + +/// balloon device inflate test +/// TestStep: +/// 1.Init device +/// 2.Populate the inflate queue with illegal addresses +/// 3.Populate the inflate queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Memory need by addr +#[test] +fn balloon_inflate_001() { + inflate_fun(false); +} +/// balloon device inflate test +/// TestStep: +/// 1.Init device +/// 2.Populate the inflate queue with illegal addresses +/// 3.Populate the inflate queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.memory released by addr +#[test] +fn balloon_inflate_002() { + inflate_fun(true); +} + +fn create_huge_mem_path() { + let _output = Command::new("rm") + .arg("-rf") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to rm dir"); + + let _output = Command::new("mkdir") + .arg("-p") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to mkdir dir"); + + let _output = Command::new("mount") + .arg("-t") + .arg("hugetlbfs") + .arg("hugetlbfs") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to mount dir"); + + let _output = Command::new("sysctl") + .arg("vm.nr_hugepages=1024") + .output() + .expect("Failed to set count hugepages"); +} + +fn clean_huge_mem_path() { + let _output = Command::new("umount") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to mount dir"); +} + +/// balloon device deflate and inflate test +/// TestStep: +/// 1.Init device +/// 2.Populate the inflate queue with illegal addresses +/// 3.Populate the inflate queue with legal addresses +/// 4.Populate the deflate queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Memory re by addr +/// 4.Free memory +#[test] +fn balloon_fun_001() { + balloon_fun(false, false); +} + +/// balloon device deflate and inflate test +/// TestStep: +/// 1.Init device +/// 2.Populate the inflate queue with illegal addresses +/// 3.Populate the inflate queue with legal addresses +/// 4.Populate the deflate queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Memory reallocte by addr +/// 4.Free memory +#[test] +fn balloon_fun_002() { + balloon_fun(true, false); +} + +/// TestStep: +/// 1.Init device +/// 2.Populate the inflate queue with illegal addresses +/// 3.Populate the inflate queue with legal addresses +/// 4.Populate the deflate queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Memory reallocte by addr +/// 4.Free memory +#[test] +fn balloon_huge_fun_001() { + create_huge_mem_path(); + let size_kb = get_hugesize(); + if size_kb < 1024 * 1024 { + clean_huge_mem_path(); + return; + } + balloon_fun(false, true); + balloon_fun(true, true); + clean_huge_mem_path(); +} + +/// balloon device features config test +/// TestStep: +/// 1.Init device +/// 2.set guest feature 0xFFFFFFFFFFFFFFFF +/// Expect: +/// 1.Success +/// 2.guest feature equal device feature +#[test] +fn balloon_feature_001() { + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + let mut extra_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let mem_args = format!("-m {}", 128); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let dev_args = format!( + "-device {},id=drv0,bus=pcie.{},addr={}.0", + "virtio-balloon-pci", pci_fn, pci_slot + ); + args = dev_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new_bymem(test_state.clone(), 128 * MBSIZE, PAGE_SIZE_UNIT); + let allocator = machine.allocator.clone(); + + let dev = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + dev.borrow_mut().init(pci_slot, pci_fn); + + dev.borrow_mut().pci_dev.enable_msix(None); + dev.borrow_mut() + .setup_msix_configuration_vector(allocator, 0); + + let features = dev.borrow_mut().get_device_features(); + + dev.borrow_mut().set_guest_features(0xFFFFFFFFFFFFFFFF); + let features_guest = dev.borrow_mut().get_guest_features(); + assert_eq!(features, features_guest); + + test_state.borrow_mut().stop(); +} + +/// balloon device features config test +/// TestStep: +/// 1.Init device +/// 2.get device feature +/// Expect: +/// 1.Success +/// 2.feature OK +#[test] +fn balloon_feature_002() { + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + let mut extra_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let mem_args = format!("-m {}", 128); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let pci_args = format!( + "-device {},id=drv0,bus=pcie.{},addr={}.0,deflate-on-oom=true,free-page-reporting=true", + "virtio-balloon-pci", pci_fn, pci_slot + ); + args = pci_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new_bymem(test_state.clone(), 128 * MBSIZE, PAGE_SIZE_UNIT); + let allocator = machine.allocator.clone(); + + let dev = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + dev.borrow_mut().init(pci_slot, pci_fn); + + dev.borrow_mut().pci_dev.enable_msix(None); + dev.borrow_mut() + .setup_msix_configuration_vector(allocator, 0); + + let features = dev.borrow_mut().get_device_features(); + + assert_eq!( + features, + 1u64 << BALLOON_F_VERSION1_TEST + | 1u64 << BALLOON_F_PRPORTING_TEST + | 1u64 << BALLOON_F_DEFLATE_ON_OOM_TEST + ); + + dev.borrow_mut() + .set_guest_features(1u64 << BALLOON_F_VERSION1_TEST); + let features_guest = dev.borrow_mut().get_guest_features(); + assert_eq!(1u64 << BALLOON_F_VERSION1_TEST, features_guest); + + test_state.borrow_mut().stop(); +} + +fn balloon_fpr_fun(shared: bool) { + let page_num = 255_u32; + let mut idx = 0_u32; + let cfg = BalloonTestCfg { + fpr: true, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, shared, false, cfg); + + let free_page = balloon + .allocator + .borrow_mut() + .alloc(u64::from(page_num) * PAGE_SIZE_UNIT); + let pfn = (free_page >> 12) as u32; + let pfn_addr = balloon.allocator.borrow_mut().alloc(PAGE_SIZE_UNIT); + while idx < page_num { + balloon + .state + .borrow_mut() + .writel(pfn_addr + 4 * u64::from(idx), pfn + idx); + balloon + .state + .borrow_mut() + .writeb(free_page + PAGE_SIZE_UNIT * u64::from(idx), 1); + idx += 1; + } + // balloon Illegal addresses + balloon.state.borrow_mut().writel(pfn_addr, pfn - 1024); + let fpr = balloon.fpr_queue.unwrap(); + let free_head = fpr + .borrow_mut() + .add(balloon.state.clone(), pfn_addr, 4, true); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), fpr.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + fpr.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + // begin fpr addresses + let mut loop_num = 0_u32; + let mut msg = Vec::new(); + + while loop_num < page_num { + let entry = TestVringDescEntry { + data: pfn_addr + (u64::from(loop_num) * 4), + len: 4, + write: true, + }; + msg.push(entry); + loop_num += 1; + } + let free_head = fpr.borrow_mut().add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), fpr.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + fpr, + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + balloon.state.borrow_mut().stop(); +} + +/// balloon device fpr features test +/// TestStep: +/// 1.Init device +/// 2.Populate the fpr queue with illegal addresses +/// 3.Populate the fpr queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Free memory +#[test] +fn balloon_fpr_001() { + balloon_fpr_fun(true); +} + +/// balloon device fpr features test +/// TestStep: +/// 1.Init device +/// 2.Populate the fpr queue with illegal addresses +/// 3.Populate the fpr queue with legal addresses +/// Expect: +/// 1.Success +/// 2.There are no exceptions in the process +/// 3.Free memory +#[test] +fn balloon_fpr_002() { + balloon_fpr_fun(false); +} + +struct VirtioBalloonConfig { + /// The target page numbers of balloon device. + pub num_pages: u32, + /// Number of pages we've actually got in balloon device. + pub actual: u32, + pub _reserved: u32, + pub _reserved1: u32, + /// Buffer percent is a percentage of memory actually needed by + /// the applications and services running inside the virtual machine. + /// This parameter takes effect only when VIRTIO_BALLOON_F_MESSAGE_VQ is supported. + /// Recommended value range: [20, 80] and default is 50. + pub membuf_percent: u32, + /// Monitor interval host wants to adjust VM memory size. + /// Recommended value range: [5, 300] and default is 10. + pub monitor_interval: u32, +} + +#[test] +fn query() { + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(2048, PAGE_SIZE_UNIT, false, false, cfg); + let ret = balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"query-balloon\"}"); + + assert_eq!( + *ret.get("return").unwrap(), + json!({"actual": 2147483648_u64}) + ); + + balloon.state.borrow_mut().stop(); +} + +/// balloon device qmp config test +/// TestStep: +/// 1.Init device +/// 2.qmp config page 512M +/// 3.qmp query result 512M +/// Expect: +/// 1/2/3.Success +#[test] +fn balloon_config_001() { + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, false, false, cfg); + + balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"balloon\", \"arguments\": {\"value\": 536870912}}"); + let ret = balloon.state.borrow_mut().qmp_read(); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 131072); + let actual = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, actual) as u64); + assert_eq!(actual, 0); + balloon + .device + .borrow_mut() + .config_writel(offset_of!(VirtioBalloonConfig, actual) as u64, 131072); + let actual = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, actual) as u64); + assert_eq!(actual, 131072); + let _actual = balloon + .device + .borrow_mut() + .config_readl((offset_of!(VirtioBalloonConfig, actual) + 8) as u64); + let ten_millis = time::Duration::from_millis(10); + thread::sleep(ten_millis); + let ret = balloon.state.borrow_mut().qmp_read(); + assert_eq!(*ret.get("data").unwrap(), json!({"actual": 536870912_u64})); + + balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"balloon\", \"arguments\": {\"value\": 1610612736}}"); + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 0); + balloon.state.borrow_mut().stop(); +} + +/// balloon device qmp config test +/// TestStep: +/// 1.Init device +/// 2.qmp config page 512M +/// 3.qmp query result 512M +/// Expect: +/// 1/2/3.Success +#[test] +fn balloon_config_002() { + let size_kb = get_hugesize(); + if size_kb < 1024 * 1024 { + return; + } + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, false, true, cfg); + + balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"balloon\", \"arguments\": {\"value\": 536870912}}"); + let ret = balloon.state.borrow_mut().qmp_read(); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 131072); + let actual = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, actual) as u64); + assert_eq!(actual, 0); + balloon + .device + .borrow_mut() + .config_writel(offset_of!(VirtioBalloonConfig, actual) as u64, 131072); + let actual = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, actual) as u64); + assert_eq!(actual, 131072); + let _actual = balloon + .device + .borrow_mut() + .config_readl((offset_of!(VirtioBalloonConfig, actual) + 8) as u64); + let ten_millis = time::Duration::from_millis(10); + thread::sleep(ten_millis); + let ret = balloon.state.borrow_mut().qmp_read(); + assert_eq!(*ret.get("data").unwrap(), json!({"actual": 536870912_u64})); + + balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"balloon\", \"arguments\": {\"value\": 1610612736}}"); + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 0); + balloon.state.borrow_mut().stop(); +} + +/// balloon device deactive config test +/// TestStep: +/// 1.Init device +/// 2.guest write queue disable +/// Expect: +/// 1/2.Success +#[test] +fn balloon_deactive_001() { + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: false, + percent: 0, + interval: 0, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, false, false, cfg); + + let bar = balloon.device.borrow().bar; + let common_base = u64::from(balloon.device.borrow().common_base); + + balloon.device.borrow().pci_dev.io_writel( + bar, + common_base + offset_of!(VirtioPciCommonCfg, queue_enable) as u64, + 0, + ); + + let ten_millis = time::Duration::from_millis(10); + thread::sleep(ten_millis); + + let ret = balloon + .state + .borrow_mut() + .qmp("{\"execute\": \"query-balloon\"}"); + assert_eq!( + *ret.get("return").unwrap(), + json!({"actual": 1073741824_u64}) + ); + balloon.state.borrow_mut().stop(); +} + +#[derive(Clone, Copy, Default)] +#[repr(packed(1))] +struct BalloonStat { + _tag: u16, + _val: u64, +} +impl ByteCode for BalloonStat {} +/// balloon device deactive config test +/// TestStep: +/// 1.Init device +/// 2.geust send msg to host by auto balloon +/// Expect: +/// 1/2.Success +#[test] +fn auto_balloon_test_001() { + let cfg = BalloonTestCfg { + fpr: false, + auto_balloon: true, + percent: MEM_BUFFER_PERCENT_DEFAULT, + interval: MONITOR_INTERVAL_SECOND_DEFAULT, + }; + let balloon = VirtioBalloonTest::new(1024, PAGE_SIZE_UNIT, false, false, cfg); + + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 0); + let percent = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, membuf_percent) as u64); + assert_eq!(percent, MEM_BUFFER_PERCENT_DEFAULT); + let interval = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, monitor_interval) as u64); + assert_eq!(interval, MONITOR_INTERVAL_SECOND_DEFAULT); + + let stat = BalloonStat { + _tag: 0, + _val: 131070, + }; + let msg_addr = balloon.allocator.borrow_mut().alloc(PAGE_SIZE_UNIT); + balloon + .state + .borrow_mut() + .memwrite(msg_addr, stat.as_bytes()); + + let auto_queue = balloon.auto_queue.unwrap(); + + let free_head = auto_queue.borrow_mut().add( + balloon.state.clone(), + msg_addr, + std::mem::size_of::() as u32, + false, + ); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), auto_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + auto_queue, + free_head, + TIMEOUT_US, + &mut None, + false, + ); + let num_pages = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, num_pages) as u64); + assert_eq!(num_pages, 131070); + balloon + .device + .borrow_mut() + .config_writel(offset_of!(VirtioBalloonConfig, actual) as u64, 131070); + let actual = balloon + .device + .borrow_mut() + .config_readl(offset_of!(VirtioBalloonConfig, actual) as u64); + assert_eq!(actual, 131070); +} + +#[test] +/// balloon device deactive config test +/// TestStep: +/// 1.Init device +/// 2.geust send msg to host by auto balloon +/// Expect: +/// 1/2.Success +fn balloon_numa1() { + if !support_numa() { + return; + } + + let page_num = 255_u32; + let mut idx = 0_u32; + let balloon = VirtioBalloonTest::numa_node_new(); + + let free_page = 0x4000_0000 + ADDRESS_BASE - 100 * PAGE_SIZE_UNIT; + let pfn = (free_page >> 12) as u32; + let pfn_addr = balloon.allocator.borrow_mut().alloc(PAGE_SIZE_UNIT); + while idx < page_num { + balloon + .state + .borrow_mut() + .writel(pfn_addr + 4 * u64::from(idx), pfn + idx); + balloon + .state + .borrow_mut() + .writeb(free_page + PAGE_SIZE_UNIT * u64::from(idx), 1); + idx += 1; + } + + // begin inflate addresses + let mut loop_num = 0_u32; + let mut msg = Vec::new(); + + while loop_num < page_num { + let entry = TestVringDescEntry { + data: pfn_addr + (u64::from(loop_num) * 4), + len: 4, + write: false, + }; + msg.push(entry); + loop_num += 1; + } + let free_head = balloon + .inf_queue + .borrow_mut() + .add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.inf_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.inf_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + // begin deflate addresses + let mut loop_num = 0_u32; + let mut msg = Vec::new(); + + while loop_num < page_num { + let entry = TestVringDescEntry { + data: pfn_addr + (u64::from(loop_num) * 4), + len: 4, + write: false, + }; + msg.push(entry); + loop_num += 1; + } + let free_head = balloon + .def_queue + .borrow_mut() + .add_chained(balloon.state.clone(), msg); + balloon + .device + .borrow_mut() + .kick_virtqueue(balloon.state.clone(), balloon.def_queue.clone()); + balloon.device.borrow_mut().poll_used_elem( + balloon.state.clone(), + balloon.def_queue.clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + balloon.state.borrow_mut().stop(); + remove_file("test.fd").unwrap(); +} diff --git a/tests/mod_test/tests/block_test.rs b/tests/mod_test/tests/block_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..1f0a513c6735250ae7af920669c9a8c5a0dc9571 --- /dev/null +++ b/tests/mod_test/tests/block_test.rs @@ -0,0 +1,2108 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::mem::size_of; +use std::os::linux::fs::MetadataExt; +use std::rc::Rc; +use std::time::{Duration, Instant}; +use std::{thread, time}; + +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::qcow2::CLUSTER_SIZE; +use mod_test::libdriver::qcow2::{check_snapshot, create_snapshot, delete_snapshot}; +use mod_test::libdriver::virtio::TestVringDescEntry; +use mod_test::libdriver::virtio::{TestVirtQueue, VirtioDeviceOps}; +use mod_test::libdriver::virtio_block::{ + add_blk_request, create_blk, set_up, tear_down, virtio_blk_default_feature, virtio_blk_read, + virtio_blk_read_write_zeroes, virtio_blk_request, virtio_blk_write, TestVirtBlkReq, + VirtBlkDiscardWriteZeroes, DEFAULT_IO_REQS, MAX_REQUEST_SECTORS, REQ_ADDR_LEN, REQ_DATA_LEN, + REQ_DATA_OFFSET, REQ_STATUS_LEN, TIMEOUT_US, VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_LIFETIME, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SECURE_ERASE, + VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, + VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, VIRTIO_BLK_S_UNSUPP, VIRTIO_BLK_T_DISCARD, + VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_ILLGEAL, VIRTIO_BLK_T_IN, + VIRTIO_BLK_T_OUT, VIRTIO_BLK_T_WRITE_ZEROES, +}; +use mod_test::libdriver::virtio_pci_modern::TestVirtioPciDev; +use mod_test::libtest::TestState; +use mod_test::utils::{create_img, ImageType, TEST_IMAGE_SIZE}; +use util::aio::{aio_probe, AioEngine}; +use util::byte_code::ByteCode; +use util::num_ops::round_up; +use util::offset_of; +use virtio::device::block::VirtioBlkConfig; + +const TEST_IMAGE_SIZE_1M: u64 = 1024 * 1024; +const DEFAULT_SECTOR_SIZE: u64 = 512; + +fn virtio_blk_discard_and_write_zeroes( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + req_data: &[u8], + status: u8, + need_poll_elem: bool, + discard: bool, +) { + let req_len = req_data.len(); + let mut blk_req = if discard { + TestVirtBlkReq::new(VIRTIO_BLK_T_DISCARD, 1, 0, req_len) + } else { + TestVirtBlkReq::new(VIRTIO_BLK_T_WRITE_ZEROES, 1, 0, req_len) + }; + blk_req.data = unsafe { String::from_utf8_unchecked(req_data.to_vec()) }; + let req_addr = virtio_blk_request(test_state.clone(), alloc, blk_req, false); + + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: REQ_ADDR_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + REQ_DATA_OFFSET, + len: req_len as u32, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + REQ_DATA_OFFSET + req_len as u64, + len: REQ_STATUS_LEN, + write: true, + }); + let free_head = virtqueue + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueue.clone()); + if need_poll_elem { + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + } + let status_addr = req_addr + REQ_DATA_OFFSET + req_len as u64; + let read_status = test_state.borrow().readb(status_addr); + assert_eq!(read_status, status); +} + +fn get_disk_size(img_path: Rc) -> u64 { + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(img_path.as_ref()) + .unwrap(); + let meta_data = file.metadata().unwrap(); + let blk_size = meta_data.st_blocks() * DEFAULT_SECTOR_SIZE; + blk_size >> 10 +} + +fn virtio_blk_check_discard_config(blk: Rc>) { + // (offset, expected_value). + let reqs = [ + ( + offset_of!(VirtioBlkConfig, max_discard_sectors), + MAX_REQUEST_SECTORS, + ), + (offset_of!(VirtioBlkConfig, max_discard_seg), 1), + (offset_of!(VirtioBlkConfig, discard_sector_alignment), 1), + ]; + for (offset, expected_value) in reqs { + assert_eq!(blk.borrow().config_readl(offset as u64), expected_value); + } +} + +fn virtio_blk_check_write_zeroes_config(blk: Rc>) { + // (offset, expected_value). + let reqs = [ + ( + offset_of!(VirtioBlkConfig, max_write_zeroes_sectors), + MAX_REQUEST_SECTORS, + ), + (offset_of!(VirtioBlkConfig, max_write_zeroes_seg), 1), + ]; + for (offset, expected_value) in reqs { + assert_eq!(blk.borrow().config_readl(offset as u64), expected_value); + } + let offset = offset_of!(VirtioBlkConfig, write_zeroes_may_unmap); + assert_eq!(blk.borrow().config_readb(offset as u64), 1); +} + +fn virtio_blk_get_id( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + serial_num: String, +) { + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc, + virtqueue.clone(), + VIRTIO_BLK_T_GET_ID, + 0, + true, + ); + blk.borrow().virtqueue_notify(virtqueue.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + + let data_addr = round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap(); + assert_eq!( + String::from_utf8( + test_state + .borrow() + .memread(data_addr, serial_num.len() as u64) + ) + .unwrap(), + serial_num + ); +} + +fn virtio_blk_flush( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + sector: u64, +) { + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc, + virtqueue.clone(), + VIRTIO_BLK_T_FLUSH, + sector, + true, + ); + blk.borrow().virtqueue_notify(virtqueue.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); +} + +fn virtio_blk_illegal_req( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + req_type: u32, +) { + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc, + virtqueue.clone(), + req_type, + 0, + true, + ); + blk.borrow().virtqueue_notify(virtqueue.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_UNSUPP); +} + +/// Block device sends I/O request. +/// TestStep: +/// 1. Init block device. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_basic() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, TEST_IMAGE_SIZE / u64::from(REQ_DATA_LEN)); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device negotiate different features. +/// TestStep: +/// 1. Init block device. +/// 2. Negotiate supported features. +/// 3. Negotiate unsupported features. +/// 4. Destroy device. +/// Expect: +/// 1/2/4: success, 3: failed. +#[test] +fn blk_features_negotiate() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &image_type)); + let device_args = Rc::new(String::from(",num-queues=4")); + let drive_args = Rc::new(String::from(",direct=false,readonly=on")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + + let mut features = blk.borrow().get_device_features(); + features |= 1 << VIRTIO_BLK_F_SEG_MAX + | 1 << VIRTIO_BLK_F_RO + | 1 << VIRTIO_BLK_F_FLUSH + | 1 << VIRTIO_BLK_F_MQ; + blk.borrow_mut().negotiate_features(features); + blk.borrow_mut().set_features_ok(); + assert_eq!(features, blk.borrow_mut().get_guest_features()); + + let unsupported_features = 1 << VIRTIO_BLK_F_BARRIER + | 1 << VIRTIO_BLK_F_SIZE_MAX + | 1 << VIRTIO_BLK_F_GEOMETRY + | 1 << VIRTIO_BLK_F_BLK_SIZE + | 1 << VIRTIO_BLK_F_TOPOLOGY + | 1 << VIRTIO_BLK_F_CONFIG_WCE + | 1 << VIRTIO_BLK_F_DISCARD + | 1 << VIRTIO_BLK_F_WRITE_ZEROES + | 1 << VIRTIO_BLK_F_LIFETIME + | 1 << VIRTIO_BLK_F_SECURE_ERASE; + features |= unsupported_features; + blk.borrow_mut().negotiate_features(features); + blk.borrow_mut().set_features_ok(); + assert_ne!(features, blk.borrow_mut().get_guest_features()); + assert_eq!( + unsupported_features & blk.borrow_mut().get_guest_features(), + 0 + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + Vec::new(), + image_path.clone(), + ); + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_SEG_MAX'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_SEG_MAX'. +/// 2. Do the I/O request, check seg_max. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_feature_seg_max() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let mut features = virtio_blk_default_feature(blk.clone()); + features |= 1 << VIRTIO_BLK_F_SEG_MAX; + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let seg_max = blk + .borrow() + .config_readl(offset_of!(VirtioBlkConfig, seg_max) as u64); + let queue_size = virtqueues[0].borrow_mut().size; + assert_eq!(seg_max, (queue_size - 2)); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_RO'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_RO'. +/// 2. Do the 'read' I/O request. +/// 3. Do the 'write' I/O request. +/// 4. Destroy device. +/// Expect: +/// 1/2/4: success, failed: 3. +#[test] +fn blk_feature_ro() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let mut features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, TEST_IMAGE_SIZE / u64::from(REQ_DATA_LEN)); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + Rc::new("".to_string()), + ); + + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(String::from(",direct=false,readonly=on")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + features |= 1 << VIRTIO_BLK_F_RO; + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, TEST_IMAGE_SIZE / u64::from(REQ_DATA_LEN)); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + true, + ); + blk.borrow().virtqueue_notify(virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_IOERR); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_FLUSH'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_FLUSH'. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_feature_flush() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let mut features = virtio_blk_default_feature(blk.clone()); + features |= 1 << VIRTIO_BLK_F_FLUSH; + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_flush( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 10, + ); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_MQ'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_MQ'. +/// 2. Do the I/O multiple queue requests. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_feature_mq() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &image_type)); + let device_args = Rc::new(String::from(",num-queues=4")); + let drive_args = Rc::new(String::from(",direct=false")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let mut features = virtio_blk_default_feature(blk.clone()); + features |= 1 << VIRTIO_BLK_F_MQ; + + let num_queues = 4; + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, num_queues); + + let cfg_num_queues = blk + .borrow() + .config_readw(offset_of!(VirtioBlkConfig, num_queues) as u64); + assert_eq!(num_queues as u16, cfg_num_queues); + + let mut free_head: Vec = Vec::with_capacity(num_queues); + let mut req_addr: Vec = Vec::with_capacity(num_queues); + for i in 0..num_queues { + let mut blk_req = + TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, i as u64, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + + req_addr.push(virtio_blk_request( + test_state.clone(), + alloc.clone(), + blk_req, + true, + )); + + let data_addr = round_up(req_addr[i] + u64::from(REQ_ADDR_LEN), 512).unwrap(); + + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr[i], + len: REQ_ADDR_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: data_addr, + len: REQ_DATA_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: data_addr + u64::from(REQ_DATA_LEN), + len: REQ_STATUS_LEN, + write: true, + }); + + free_head.push( + virtqueues[i] + .borrow_mut() + .add_chained(test_state.clone(), data_entries), + ); + } + + for i in 0..num_queues { + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[i].clone()); + } + + for i in 0..num_queues { + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[i].clone(), + free_head[i], + TIMEOUT_US, + &mut None, + true, + ); + } + + for i in 0..num_queues { + let status_addr = round_up(req_addr[i] + u64::from(REQ_ADDR_LEN), 512).unwrap() + + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + } + + for i in 0..num_queues { + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[i].clone(), + i as u64, + true, + ); + } + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request, configure all parameters. +/// TestStep: +/// 1. Init block device, configure all parameters. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_all_features() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE_1M, 1, &image_type)); + let device_args = Rc::new(String::from( + ",multifunction=on,serial=111111,num-queues=4,bootindex=1,iothread=iothread1", + )); + #[cfg(target_env = "ohos")] + let drive_args = Rc::new(String::from( + ",direct=false,readonly=off,throttling.iops-total=1024", + )); + #[cfg(not(target_env = "ohos"))] + let drive_args = if aio_probe(AioEngine::IoUring).is_ok() { + Rc::new(String::from( + ",direct=on,aio=io_uring,readonly=off,throttling.iops-total=1024", + )) + } else { + Rc::new(String::from( + ",direct=false,readonly=off,throttling.iops-total=1024", + )) + }; + let other_args = Rc::new(String::from("-object iothread,id=iothread1")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let mut features = virtio_blk_default_feature(blk.clone()); + features |= 1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_FLUSH | 1 << VIRTIO_BLK_F_MQ; + + let num_queues = 4; + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, num_queues); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request to a file with a size of 511b. +/// TestStep: +/// 1. Init block device with a 511b file. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/3: success, 2: failed. +#[test] +fn blk_small_file_511b() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let size = 511; + let image_path = Rc::new(create_img(size, 1, &image_type)); + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(String::from(",direct=false")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, size / u64::from(REQ_DATA_LEN)); + + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + + let req_addr = virtio_blk_request(test_state.clone(), alloc.clone(), blk_req, true); + let data_addr = round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap(); + + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: REQ_ADDR_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: data_addr, + len: REQ_DATA_LEN, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: data_addr + u64::from(REQ_DATA_LEN), + len: REQ_STATUS_LEN, + write: true, + }); + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_IOERR); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_IN, + 0, + true, + ); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_IOERR); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request, configured as 'serial=11111111111111111111'. +/// TestStep: +/// 1. Init block device, configured as 'serial=11111111111111111111'. +/// 2. Do the I/O request, check serial number. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_serial() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let serial_num = String::from("11111111111111111111"); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &image_type)); + let device_args = Rc::new(format!(",serial={}", serial_num)); + let drive_args = Rc::new(String::from(",direct=false")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_get_id( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + serial_num, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request, configured as 'throttling.iops-total=1'. +/// TestStep: +/// 1. Init block device, configured as 'throttling.iops-total=1'. +/// 2. Do the I/O request, check iops. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_iops() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &image_type)); + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(String::from(",direct=false,throttling.iops-total=1")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + + for i in 0..DEFAULT_IO_REQS { + (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_OUT, + i, + true, + ); + } + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + + loop { + test_state.borrow().clock_step_ns(100); + + if blk.borrow().queue_was_notified(virtqueues[0].clone()) + && virtqueues[0].borrow_mut().get_buf(test_state.clone()) + { + assert!(!virtqueues[0].borrow().desc_len.contains_key(&free_head)); + break; + } + } + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_ne!(status, VIRTIO_BLK_S_OK); + + let time_out = Instant::now() + Duration::from_micros(TIMEOUT_US); + loop { + test_state.borrow().clock_step(); + + if blk.borrow().queue_was_notified(virtqueues[0].clone()) + && virtqueues[0].borrow_mut().get_buf(test_state.clone()) + && virtqueues[0].borrow().desc_len.contains_key(&free_head) + { + break; + } + assert!(Instant::now() <= time_out); + } + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request, configured as 'aio=[off|threads|io_uring|native]'. +/// TestStep: +/// 1. Init block device, configured with different aio type. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_with_different_aio() { + let block_driver_cfg: Vec<(ImageType, &str, AioEngine)> = vec![ + (ImageType::Raw, "off", AioEngine::Off), + (ImageType::Qcow2, "off", AioEngine::Off), + (ImageType::Raw, "off", AioEngine::Threads), + (ImageType::Qcow2, "off", AioEngine::Threads), + #[cfg(not(target_env = "ohos"))] + (ImageType::Raw, "on", AioEngine::Native), + #[cfg(not(target_env = "ohos"))] + (ImageType::Raw, "on", AioEngine::IoUring), + ]; + + for (image_type, direct, aio_engine) in block_driver_cfg { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE_1M, 1, &image_type)); + let device_args = Rc::new(String::from("")); + let drive_args = if aio_probe(aio_engine).is_ok() { + Rc::new(format!(",direct={},aio={}", direct, aio_engine)) + } else { + continue; + }; + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request, configured as 'aio=io_uring'. +/// TestStep: +/// 1. Init block device, configured as 'aio=io_uring'. +/// 2. Do the I/O request. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[cfg(not(target_env = "ohos"))] +#[test] +fn blk_aio_io_uring() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE_1M, 1, &image_type)); + let device_args = Rc::new(String::from("")); + let drive_args = if aio_probe(AioEngine::IoUring).is_ok() { + Rc::new(String::from(",direct=on,aio=io_uring")) + } else { + Rc::new(String::from(",direct=false")) + }; + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + false, + ); + + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + false, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends an illegal type of I/O request. +/// TestStep: +/// 1. Init block device. +/// 2. Do the I/O request of illegal type. +/// 3. Destroy device. +/// Expect: +/// 1/3: success, 2: failed. +#[test] +fn blk_illegal_req_type() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_illegal_req( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_ILLGEAL, + ); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device configuration space read and write. +/// TestStep: +/// 1. Init block device. +/// 2. Read block device configuration space. +/// 3. Write block device configuration space. +/// 4. Destroy device. +/// Expect: +/// 1/2/4: success, 3: failed. +#[test] +fn blk_rw_config() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, TEST_IMAGE_SIZE / u64::from(REQ_DATA_LEN)); + + blk.borrow().config_writeq(0, 1024); + let capacity = blk.borrow().config_readq(0); + assert_ne!(capacity, 1024); + + let discard_sector_alignment = blk.borrow().config_readl(40); + blk.borrow().config_writel(40, 1024); + assert_eq!(blk.borrow().config_readl(40), discard_sector_alignment); + assert_ne!(blk.borrow().config_readl(40), 1024); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device send I/O requests in an abnormal format. +/// TestStep: +/// 1. Init block device. +/// 2. Do the I/O requests in an abnormal format. +/// 3. Destroy device. +/// Expect: +/// 1/3: success, 2: failed. +#[test] +fn blk_abnormal_req() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + + let req_addr = virtio_blk_request(test_state.clone(), alloc.clone(), blk_req, false); + + // Desc: req_hdr length 8, data length 256. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 8, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 8, + len: 256, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 264, + len: 1, + write: true, + }); + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + 264); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: req_hdr length 32. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 32, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 32, + len: 512, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 544, + len: 1, + write: true, + }); + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + 544); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: data length 256. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 16, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 16, + len: 256, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 272, + len: 1, + write: true, + }); + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + 272); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: data length 4, small size desc. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 16, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 16, + len: 4, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 20, + len: 1, + write: true, + }); + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + 20); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: miss data. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 16, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 16, + len: 1, + write: true, + }); + let _free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + thread::sleep(time::Duration::from_secs(1)); + + let status = test_state.borrow().readb(req_addr + 16); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: all 'out' desc. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 16, + write: true, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 16, + len: 512, + write: true, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 528, + len: 1, + write: true, + }); + let _free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + thread::sleep(time::Duration::from_secs(1)); + + let status = test_state.borrow().readb(req_addr + 528); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: data length 0. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 16, + write: false, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 16, + len: 0, + write: true, + }); + data_entries.push(TestVringDescEntry { + data: req_addr + 20, + len: 1, + write: true, + }); + let _free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + thread::sleep(time::Duration::from_secs(1)); + + let status = test_state.borrow().readb(req_addr + 20); + assert_ne!(status, VIRTIO_BLK_S_OK); + + // Desc: only status desc. + let mut data_entries: Vec = Vec::with_capacity(3); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: 1, + write: true, + }); + let _free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + thread::sleep(time::Duration::from_secs(1)); + + let status = test_state.borrow().readb(req_addr); + assert_ne!(status, VIRTIO_BLK_S_OK); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device send different types of I/O requests in parallel. +/// TestStep: +/// 1. Init block device. +/// 2. Do the different types I/O requests in parallel. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn blk_parallel_req() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let mut features = virtio_blk_default_feature(blk.clone()); + features |= 1 << VIRTIO_BLK_F_FLUSH; + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let mut free_head_vec: Vec = Vec::with_capacity(4); + let mut req_addr_vec: Vec = Vec::with_capacity(4); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + true, + ); + free_head_vec.push(free_head); + req_addr_vec.push(req_addr); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_IN, + 0, + true, + ); + free_head_vec.push(free_head); + req_addr_vec.push(req_addr); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_FLUSH, + 0, + true, + ); + free_head_vec.push(free_head); + req_addr_vec.push(req_addr); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_GET_ID, + 0, + true, + ); + free_head_vec.push(free_head); + req_addr_vec.push(req_addr); + + blk.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head_vec[3], + TIMEOUT_US, + &mut None, + true, + ); + + for i in 0..4 { + let status_addr = round_up(req_addr_vec[i] + u64::from(REQ_ADDR_LEN), 512).unwrap() + + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_OK); + } + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends an I/O request that exceeds the capacity range. +/// TestStep: +/// 1. Init block device. +/// 2. Do the I/O request that exceeds the capacity range. +/// 3. Destroy device. +/// Expect: +/// 1/3: success, 2: failed. +#[test] +fn blk_exceed_capacity() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let (blk, test_state, alloc, image_path) = set_up(&image_type); + + let features = virtio_blk_default_feature(blk.clone()); + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + let capacity = blk.borrow().config_readq(0); + assert_eq!(capacity, TEST_IMAGE_SIZE / u64::from(REQ_DATA_LEN)); + + let (free_head, req_addr) = add_blk_request( + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_OUT, + capacity + 1, + true, + ); + + blk.borrow().virtqueue_notify(virtqueues[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueues[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status_addr = + round_up(req_addr + u64::from(REQ_ADDR_LEN), 512).unwrap() + u64::from(REQ_DATA_LEN); + let status = test_state.borrow().readb(status_addr); + assert_eq!(status, VIRTIO_BLK_S_IOERR); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_DISCARD'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_DISCARD'. +/// 2. Do the discard request with different arguments. +/// 3. Destroy device. +/// Expect: +/// 1/3: success. +/// 2: success or failure, stratovirt process is normal. +#[test] +fn blk_feature_discard() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let req_len = std::mem::size_of::(); + // (sector, num_sectors, flags, req_len, enable_feature, discard, status) + let reqs = [ + (0, 2048, 0, req_len, true, "unmap", VIRTIO_BLK_S_OK), + (0, 2048, 0, req_len, false, "unmap", VIRTIO_BLK_S_OK), + (0, 2048, 0, 8, true, "unmap", 0xff), + (0, 2048, 0, 32, true, "unmap", VIRTIO_BLK_S_UNSUPP), + (0, 2048, 1, req_len, true, "unmap", VIRTIO_BLK_S_UNSUPP), + (0, 2048, 0xff, req_len, true, "unmap", VIRTIO_BLK_S_UNSUPP), + ( + 0, + (TEST_IMAGE_SIZE >> 9) as u32 + 1, + 0, + req_len, + true, + "unmap", + VIRTIO_BLK_S_IOERR, + ), + ( + 0, + MAX_REQUEST_SECTORS + 1, + 0, + req_len, + true, + "unmap", + VIRTIO_BLK_S_IOERR, + ), + (0, 2048, 0, req_len, false, "ignore", VIRTIO_BLK_S_UNSUPP), + ]; + let mut i = 1; + for (sector, num_sectors, flags, len, enabled, discard, status) in reqs { + println!("blk_feature_discard: request {}", i); + i += 1; + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &image_type)); + let full_disk_size = get_disk_size(image_path.clone()); + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(format!(",discard={},direct=false", discard)); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let mut features = virtio_blk_default_feature(blk.clone()); + if enabled { + features |= 1 << VIRTIO_BLK_F_DISCARD; + } else { + features &= !(1 << VIRTIO_BLK_F_DISCARD); + } + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + if discard != "ignore" { + virtio_blk_check_discard_config(blk.clone()); + } + + let mut need_poll_elem = true; + let req_data = if len == req_len { + let req = VirtBlkDiscardWriteZeroes { + sector, + num_sectors, + flags, + }; + req.as_bytes().to_vec() + } else { + if len < req_len { + need_poll_elem = false; + } + vec![0; len] + }; + virtio_blk_discard_and_write_zeroes( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + &req_data, + status, + need_poll_elem, + true, + ); + if image_type == ImageType::Raw && status == VIRTIO_BLK_S_OK { + let image_size = get_disk_size(image_path.clone()); + assert_eq!(image_size, full_disk_size - u64::from(num_sectors) / 2); + } else if image_type == ImageType::Qcow2 + && status == VIRTIO_BLK_S_OK + && ((u64::from(num_sectors) * 512) & (CLUSTER_SIZE - 1)) == 0 + { + // If the disk format is equal to Qcow2. + // the length of the num sectors needs to be aligned with the cluster size, + // otherwise the calculated file size is not accurate. + let image_size = get_disk_size(image_path.clone()); + let delete_num = (u64::from(num_sectors) * 512) >> 10; + assert_eq!(image_size, full_disk_size - delete_num); + } + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } + } +} + +/// Block device sends I/O request with feature 'VIRTIO_BLK_F_WRITE_ZEROES'. +/// TestStep: +/// 1. Init block device with feature 'VIRTIO_BLK_F_WRITE_ZEROES'. +/// 2. Do the write-zeroes request with different arguments. +/// 3. Destroy device. +/// Expect: +/// 1/3: success. +/// 2: success or failure, stratovirt process is normal. +#[test] +fn blk_feature_write_zeroes() { + for image_type in ImageType::IMAGE_TYPE { + println!("Image type: {:?}", image_type); + let wz_len = size_of::(); + let req_len = size_of::(); + // (sector, num_sectors, flags, req_len, enable_feature, write_zeroes, discard, status) + let reqs = [ + (0, 2048, 0, wz_len, true, "on", "ignore", VIRTIO_BLK_S_OK), + (0, 2048, 0, wz_len, true, "on", "unmap", VIRTIO_BLK_S_OK), + (0, 2048, 0, wz_len, false, "on", "ignore", VIRTIO_BLK_S_OK), + (0, 2048, 0, wz_len, false, "on", "unmap", VIRTIO_BLK_S_OK), + (0, 2048, 0, wz_len, true, "unmap", "ignore", VIRTIO_BLK_S_OK), + ( + 0, + 2048, + 0, + wz_len, + false, + "unmap", + "ignore", + VIRTIO_BLK_S_OK, + ), + ( + 0, + 2048, + 0, + wz_len, + false, + "off", + "ignore", + VIRTIO_BLK_S_UNSUPP, + ), + ( + 0, + 2048, + 0, + wz_len, + false, + "off", + "unmap", + VIRTIO_BLK_S_UNSUPP, + ), + (0, 2048, 1, wz_len, true, "unmap", "unmap", VIRTIO_BLK_S_OK), + (0, 8, 0, req_len, true, "unmap", "unmap", VIRTIO_BLK_S_OK), + (0, 0, 0, req_len, true, "on", "unmap", VIRTIO_BLK_S_OK), + ]; + let mut i = 1; + for (sector, num_sectors, flags, len, enabled, write_zeroes, discard, status) in reqs { + println!("blk_feature_write_zeroes: request {}", i); + i += 1; + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 1, &image_type)); + let full_disk_size = get_disk_size(image_path.clone()); + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(format!( + ",detect-zeroes={},discard={},direct=false", + write_zeroes, discard + )); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &image_type, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let mut features = virtio_blk_default_feature(blk.clone()); + if discard == "unmap" { + features |= 1 << VIRTIO_BLK_F_DISCARD; + } + if enabled { + features |= 1 << VIRTIO_BLK_F_WRITE_ZEROES; + } else { + features &= !(1 << VIRTIO_BLK_F_WRITE_ZEROES); + } + + let virtqueues = + blk.borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + if enabled { + virtio_blk_check_write_zeroes_config(blk.clone()); + } + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + if len == wz_len { + let req_data = VirtBlkDiscardWriteZeroes { + sector, + num_sectors, + flags, + }; + virtio_blk_discard_and_write_zeroes( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + req_data.as_bytes(), + status, + true, + false, + ); + } else { + virtio_blk_read_write_zeroes( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + 4096, + ); + } + + if write_zeroes != "off" { + virtio_blk_read_write_zeroes( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + VIRTIO_BLK_T_IN, + 0, + 512, + ); + } + + if image_type == ImageType::Raw + && status == VIRTIO_BLK_S_OK + && (write_zeroes == "unmap" && discard == "unmap" && flags == 1 || len != wz_len) + { + let image_size = get_disk_size(image_path.clone()); + assert_eq!(image_size, full_disk_size - u64::from(num_sectors) / 2); + } else if image_type == ImageType::Qcow2 + && status == VIRTIO_BLK_S_OK + && (write_zeroes == "unmap" && discard == "unmap" && flags == 1 || len != wz_len) + && ((u64::from(num_sectors) * 512) & (CLUSTER_SIZE - 1)) == 0 + { + // If the disk format is equal to Qcow2. + // the length of the num sectors needs to be aligned with the cluster size, + // otherwise the calculated file size is not accurate. + let image_size = get_disk_size(image_path.clone()); + let delete_num = (u64::from(num_sectors) * 512) >> 10; + assert_eq!(image_size, full_disk_size - delete_num); + } + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues, + image_path.clone(), + ); + } + } +} + +/// Block device using snapshot sends I/O request. +/// TestStep: +/// 1. Init block device. Create internal snapshot. +/// 2. Do the I/O request. +/// 3. Delete internal snapshot. +/// 4. Do the I/O request. +/// 5. Destroy device. +/// Expect: +/// 1/2/3/4/5: success. +#[test] +fn blk_snapshot_basic() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Qcow2); + let features = virtio_blk_default_feature(blk.clone()); + let virtqueues = blk + .borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + create_snapshot(test_state.clone(), "drive0", "snap0"); + assert!(check_snapshot(test_state.clone(), "snap0")); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + delete_snapshot(test_state.clone(), "drive0", "snap0"); + assert!(!check_snapshot(test_state.clone(), "snap0")); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down(blk, test_state, alloc, virtqueues, image_path); +} + +/// Block device whose backend file has snapshot sends I/O request. +/// TestStep: +/// 1. Create snapshot snap0 in qcow2 backend file. +/// 2. Init device. +/// 3. Do the I/O request. +/// 4. Create internal snapshot snap1. Delete internal snapshot snap0. +/// 5. Do the I/O request. +/// 6. Destroy device. +/// Expect: +/// 1/2/3/4/5/6: success. +#[test] +fn blk_snapshot_basic2() { + // Note: We can not use stratovirt-img to create snapshot now. + // So, we use qmp to create snapshot in existed qcow2 file. + // TODO: use stratovirt-img instead of qmp in the future. + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Qcow2); + let features = virtio_blk_default_feature(blk.clone()); + let virtqueues = blk + .borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + create_snapshot(test_state.clone(), "drive0", "snap0"); + assert!(check_snapshot(test_state.clone(), "snap0")); + tear_down(blk, test_state, alloc, virtqueues, Rc::new("".to_string())); + + let device_args = Rc::new(String::from("")); + let drive_args = Rc::new(String::from(",direct=false")); + let other_args = Rc::new(String::from("")); + let (blk, test_state, alloc) = create_blk( + &ImageType::Qcow2, + image_path.clone(), + device_args, + drive_args, + other_args, + ); + + let features = virtio_blk_default_feature(blk.clone()); + let virtqueues = blk + .borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + create_snapshot(test_state.clone(), "drive0", "snap1"); + assert!(check_snapshot(test_state.clone(), "snap1")); + + delete_snapshot(test_state.clone(), "drive0", "snap0"); + assert!(!check_snapshot(test_state.clone(), "snap0")); + + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + virtio_blk_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + true, + ); + + tear_down(blk, test_state, alloc, virtqueues, image_path); +} diff --git a/tests/mod_test/tests/fwcfg_test.rs b/tests/mod_test/tests/fwcfg_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..c51535a3cff57a55db3adda2571f77ed5b3f98f6 --- /dev/null +++ b/tests/mod_test/tests/fwcfg_test.rs @@ -0,0 +1,750 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::process::Command; +use std::rc::Rc; +use std::{fs, mem}; + +use byteorder::{ByteOrder, LittleEndian}; + +use devices::legacy::FwCfgEntryType; +use mod_test::libdriver::fwcfg::{bios_args, FW_CFG_BASE}; +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libtest::test_init; +use mod_test::utils::{cleanup_img, create_img, ImageType, TEST_IMAGE_SIZE}; +use mod_test::utils::{swap_u16, swap_u32}; + +// FwCfg Signature +const FW_CFG_DMA_SIGNATURE: u128 = 0x51454d5520434647; + +#[test] +fn test_signature() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let mut test_state = test_init(args); + + let mut read_data: Vec = Vec::with_capacity(4); + let target_data: [u8; 4] = [b'Q', b'E', b'M', b'U']; + + // Select Signature entry and read it. + test_state.fw_cfg_read_bytes(FwCfgEntryType::Signature as u16, &mut read_data, 4); + assert_eq!(read_data.as_slice(), target_data); + + test_state.stop(); +} + +#[test] +fn test_id() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let mut test_state = test_init(args); + + // Select Id entry and read it. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::Id as u16); + assert_eq!(read_data, 3); + + test_state.stop(); +} + +#[test] +fn test_nographic() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let mut test_state = test_init(args); + + // Select NoGraphic entry and read it. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::NoGraphic as u16); + assert_eq!(read_data, 0); + + test_state.stop(); +} + +#[test] +fn test_nbcpus() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let mut extra_args: Vec<&str> = "-smp 10".split(' ').collect(); + args.append(&mut extra_args); + let mut test_state = test_init(args); + + // Select NbCpus entry and read it. + let read_data = test_state.fw_cfg_read_u16(FwCfgEntryType::NbCpus as u16); + assert_eq!(read_data, 10); + + test_state.stop(); +} + +#[test] +fn test_kernel_initrd_cmdlint() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + assert!(cfg!(target_os = "linux")); + let kernel_path = "/tmp/kernel"; + let initrd_path = "/tmp/initrd"; + let kernel_of = format!("of={}", kernel_path); + let initrd_of = format!("of={}", initrd_path); + let mut output = Command::new("dd") + .arg("if=/dev/zero") + .arg(&kernel_of) + .arg("bs=1M") + .arg("count=10") + .output() + .expect("Failed to create tmp kernel"); + assert!(output.status.success()); + + output = Command::new("dd") + .arg("if=/dev/zero") + .arg(&initrd_of) + .arg("bs=1M") + .arg("count=1") + .output() + .expect("Failed to create tmp initrd"); + assert!(output.status.success()); + + let kernel_para = format!("-kernel {}", kernel_path); + let initrd_para = format!("-initrd {}", initrd_path); + let mut extra_args: Vec<&str> = kernel_para.split(' ').collect(); + args.append(&mut extra_args); + extra_args = initrd_para.split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-m 1G".split(' ').collect(); + args.append(&mut extra_args); + + // set cmdlint + let cmdline = "-append console=ttyS0 root=/dev/vda reboot=k panic=1"; + extra_args = cmdline.split(' ').collect(); + args.append(&mut extra_args); + let mut test_state = test_init(args); + + // Select KernelSize entry and read it. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::KernelSize as u16); + assert_eq!(read_data, 10 * 1024 * 1024); + + // Select InitrdAddr entry and read it. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::InitrdAddr as u16); + // Initrd addr = (mem start) + (mem end) - (initrd size) + let initrd_addr = 0x4000_0000 + 0x4000_0000 - 0x10_0000; + assert_eq!(read_data, initrd_addr); + + // Select CmdlineSize entry and read it. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::CmdlineSize as u16); + // cmdline size = cmdline - "-append". + let cmdline_size = cmdline.to_string().len() as u32 - 8; + assert_eq!(read_data, cmdline_size + 1); + + // Select CmdlineData entry and read it. + let mut read_data: Vec = Vec::with_capacity(cmdline_size as usize); + test_state.fw_cfg_read_bytes( + FwCfgEntryType::CmdlineData as u16, + &mut read_data, + cmdline_size, + ); + assert_eq!(String::from_utf8_lossy(&read_data), cmdline[8..]); + + fs::remove_file(kernel_path).expect("Failed to remove the kernel file"); + fs::remove_file(initrd_path).expect("Failed to remove the initrd file"); + test_state.stop(); +} + +#[test] +fn test_filedir_by_dma() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let file_name = "etc/boot-fail-wait"; + let mut read_data: Vec = Vec::with_capacity(mem::size_of::()); + + // Select FileDir entry and read it. + let file_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + file_name, + &mut read_data, + mem::size_of::() as u32, + ); + assert_eq!(file_size, mem::size_of::() as u32); + + let time_out = LittleEndian::read_u32(&read_data); + assert_eq!(time_out, 5); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_boot_index() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + + let dev_path = "/pci@ffffffffffffffff/scsi@1/disk@0,0\n\0".to_string(); + + let mut extra_args = + "-device virtio-blk-pci,id=drv0,drive=drive0,bus=pcie.0,addr=0x1.0,bootindex=0" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let image_para = format!( + "-drive if=none,id=drive0,file={},format=raw,direct=false", + image_path + ); + extra_args = image_para.split(' ').collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let file_name = "bootorder"; + let mut read_data: Vec = Vec::with_capacity(dev_path.len()); + + // Select FileDir entry and read it. + let file_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + file_name, + &mut read_data, + dev_path.len() as u32, + ); + assert_eq!(file_size, dev_path.len() as u32); + assert_eq!(&read_data, dev_path.as_bytes()); + + test_state.borrow_mut().stop(); + if !image_path.is_empty() { + cleanup_img(image_path) + } +} + +#[test] +fn test_smbios_type0() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let mut extra_args = "-smbios type=0,vendor=vendor0,version=version0,date=date0" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + assert_eq!(talble_len, 372); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + let table_type0_len = 24; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type0_len..table_type0_len + 7]), + "vendor0" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type0_len + 8..table_type0_len + 16]), + "version0" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type0_len + 17..table_type0_len + 22]), + "date0" + ); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_smbios_type1() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let mut extra_args = "-smbios type=0,vendor=vendor0,version=version0,date=date0" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let mut extra_args = "-smbios type=1,manufacturer=manufacturer1,product=product1,\ + version=12.2.2,serial=181a6bdf-ff98-4c5e-97ec-bff35fe41f6c,uuid=181a6bdf-ff98-4c5e-97ec-bff35fe41f6c,\ + family=Virtual,sku=sku1" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + assert_eq!(talble_len, 414); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + let table_type0_len = 24; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type0_len..table_type0_len + 7]), + "vendor0" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type0_len + 8..table_type0_len + 16]), + "version0" + ); + assert_eq!(read_table_date[48], 1); + assert_eq!(read_table_date[49], 27_u8); + let handle1 = LittleEndian::read_u16(&read_table_date[50..]); + assert_eq!(handle1, 0x100); + + assert_eq!( + String::from_utf8_lossy(&read_table_date[75..88]), + "manufacturer1" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[89..97]), + "product1" + ); + assert_eq!(String::from_utf8_lossy(&read_table_date[98..104]), "12.2.2"); + assert_eq!( + String::from_utf8_lossy(&read_table_date[105..141]), + "181a6bdf-ff98-4c5e-97ec-bff35fe41f6c" + ); + assert_eq!(String::from_utf8_lossy(&read_table_date[142..146]), "sku1"); + assert_eq!( + String::from_utf8_lossy(&read_table_date[147..154]), + "Virtual" + ); + // check uuid + assert_eq!(read_table_date[56], 0xdf); + assert_eq!(read_table_date[57], 0x6b); + assert_eq!(read_table_date[58], 0x1a); + assert_eq!(read_table_date[59], 0x18); + + assert_eq!(read_table_date[60], 0x98); + assert_eq!(read_table_date[61], 0xff); + + assert_eq!(read_table_date[62], 0x5e); + assert_eq!(read_table_date[63], 0x4c); + + assert_eq!(read_table_date[64], 0x97); + assert_eq!(read_table_date[65], 0xec); + + assert_eq!(read_table_date[66], 0xbf); + assert_eq!(read_table_date[67], 0xf3); + assert_eq!(read_table_date[68], 0x5f); + assert_eq!(read_table_date[69], 0xe4); + assert_eq!(read_table_date[70], 0x1f); + assert_eq!(read_table_date[71], 0x6c); + + test_state.borrow_mut().stop(); +} + +/// smbios table2 test +/// TestStep: +/// 1.Init device +/// 2.config type2 message +/// Expect: +/// 1.Success +/// 2.Verify that the data in the table is as expected +#[test] +fn test_smbios_type2() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let mut extra_args = "-smbios type=2,manufacturer=manufacturer2,product=product2,\ + version=version2,serial=serial2,asset=asset2,location=location2" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + let table_type2_len = 107; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len..table_type2_len + 13]), + "manufacturer2" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 14..table_type2_len + 22]), + "product2" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 23..table_type2_len + 31]), + "version2" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 32..table_type2_len + 39]), + "serial2" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 40..table_type2_len + 49]), + "location2" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 50..table_type2_len + 56]), + "asset2" + ); + + test_state.borrow_mut().stop(); +} + +/// TestStep: +/// 1.Init device +/// 2.config type3 message +/// Expect: +/// 1.Success +/// 2.Verify that the data in the table is as expected +#[test] +fn test_smbios_type3() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let mut extra_args = "-smbios type=3,manufacturer=manufacturer3,version=version3,\ + serial=serial3,asset=asset3,sku=sku3" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + let table_type3_len = 114; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type3_len..table_type3_len + 13]), + "manufacturer3" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type3_len + 14..table_type3_len + 22]), + "version3" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type3_len + 23..table_type3_len + 30]), + "serial3" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type3_len + 31..table_type3_len + 35]), + "sku3" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type3_len + 36..table_type3_len + 42]), + "asset3" + ); + + test_state.borrow_mut().stop(); +} + +/// TestStep: +/// 1.Init device +/// 2.config type4 message +/// Expect: +/// 1.Success +/// 2.Verify that the data in the table is as expected +#[test] +fn test_smbios_type4() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let cpu_args = "-smp 8,maxcpus=8,sockets=2,cores=2,threads=2".to_string(); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + + let mut extra_args = "-smbios type=4,sock_pfx=sock_pfx4,manufacturer=manufacturer4,\ + version=version4,serial=serial4,asset=asset4,part=part4,max-speed=65534,current-speed=65534" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + // check speed + assert_eq!(read_table_date[157], 0xFE); + assert_eq!(read_table_date[158], 0xFF); + assert_eq!(read_table_date[159], 0xFE); + assert_eq!(read_table_date[160], 0xFF); + + let table_type4_len = 185; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len..table_type4_len + 11]), + "sock_pfx4 0" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len + 12..table_type4_len + 25]), + "manufacturer4" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len + 26..table_type4_len + 34]), + "version4" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len + 35..table_type4_len + 42]), + "serial4" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len + 43..table_type4_len + 49]), + "asset4" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type4_len + 50..table_type4_len + 55]), + "part4" + ); + test_state.borrow_mut().stop(); +} + +/// TestStep: +/// 1.Init device +/// 2.config type17 message +/// Expect: +/// 1.Success +/// 2.Verify that the data in the table is as expected +#[test] +fn test_smbios_type17() { + let mut args: Vec<&str> = Vec::new(); + bios_args(&mut args); + + let cpu_args = "-smp 8,maxcpus=8,sockets=2,cores=2,threads=2".to_string(); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + + let mut extra_args = + "-smbios type=17,loc_pfx=loc_pfx17,bank=bank17,manufacturer=manufacturer17,\ + serial=serial17,asset=asset17,part=part17,speed=65534" + .split(' ') + .collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator; + + let anchor_file = "etc/smbios/smbios-anchor"; + let tables_file = "etc/smbios/smbios-tables"; + let mut read_data: Vec = Vec::with_capacity(24); + + // Select FileDir entry and read it. + let anchor_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + anchor_file, + &mut read_data, + 24_u32, + ); + + assert_eq!(anchor_size, 24_u32); + assert_eq!(String::from_utf8_lossy(&read_data[..5]), "_SM3_"); + assert_eq!(read_data[6], 24_u8); + let talble_len = LittleEndian::read_u32(&read_data[12..]); + assert_eq!(talble_len, 467); + + let mut read_table_date: Vec = Vec::with_capacity(talble_len as usize); + let talbles_size = test_state.borrow().fw_cfg_read_file( + &mut allocator.borrow_mut(), + tables_file, + &mut read_table_date, + talble_len, + ); + assert_eq!(talbles_size, talble_len); + // check speed + assert_eq!(read_table_date[337], 0xFE); + assert_eq!(read_table_date[338], 0xFF); + + let table_type2_len = 356; + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len..table_type2_len + 14]), + "manufacturer17" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 15..table_type2_len + 26]), + "loc_pfx17 0" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 27..table_type2_len + 33]), + "bank17" + ); + + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 34..table_type2_len + 42]), + "serial17" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 43..table_type2_len + 49]), + "part17" + ); + assert_eq!( + String::from_utf8_lossy(&read_table_date[table_type2_len + 50..table_type2_len + 57]), + "asset17" + ); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_exception_by_ctrl_reg() { + let mut args = Vec::new(); + bios_args(&mut args); + let mut test_state = test_init(args); + + // Select Signature entry and read it by control register. + test_state.writew(FW_CFG_BASE, swap_u16(FwCfgEntryType::Signature as u16)); + let read_data = test_state.readw(FW_CFG_BASE + 0x8); + + // Read data by control register always return 0. + assert_eq!(read_data, 0); + + test_state.stop(); +} + +#[test] +fn test_exception_scenarios() { + let mut args = Vec::new(); + bios_args(&mut args); + let mut test_state = test_init(args); + + // Select entry which is not exit and read it. + let read_data = test_state.fw_cfg_read_u32(0xffff); + assert_eq!(read_data, 0); + + // Read data exceeds the original size. + let read_data = test_state.fw_cfg_read_u32(FwCfgEntryType::Id as u16); + assert_eq!(read_data, 3); + assert_eq!(test_state.readl(FW_CFG_BASE), 0); + + // Read data offset: 0x17 + size: 4 > 0x18, which is overflow + assert_eq!(test_state.readl(FW_CFG_BASE + 0x17), 0); + + // Read FW_CFG_DMA_SIGNATURE high 32bit + assert_eq!( + swap_u32(test_state.readl(FW_CFG_BASE + 0x10)), + (FW_CFG_DMA_SIGNATURE >> 32) as u32 + ); + + test_state.stop(); +} diff --git a/tests/mod_test/tests/memory_test.rs b/tests/mod_test/tests/memory_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..9c553949f0e390d66100bae4cc744fd65c4574a3 --- /dev/null +++ b/tests/mod_test/tests/memory_test.rs @@ -0,0 +1,740 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::fs::{remove_file, File}; +use std::process::Command; +use std::rc::Rc; +use std::string::String; + +use mod_test::utils::support_numa; +use serde_json::{json, Value::String as JsonString}; + +use mod_test::{ + libdriver::{machine::TestStdMachine, malloc::GuestAllocator}, + libtest::{test_init, TestState, MACHINE_TYPE_ARG}, +}; + +pub struct MemoryTest { + pub state: Rc>, + pub allocator: Rc>, +} + +const MEM_SIZE: u64 = 2048; // 2GB +const PAGE_SIZE: u64 = 4096; +const ADDRESS_BASE: u64 = 0x4000_0000; +const ROM_DEV_PATH: &str = "rom_dev_file.fd"; +const RAM_DEV_PATH: &str = "ram_dev_file.fd"; + +impl MemoryTest { + pub fn new( + memsize: u64, + page_size: u64, + shared: bool, + prealloc: bool, + hugepage_path: Option, + ram_file: Option, + ) -> Self { + let mut extra_args: Vec<&str> = Vec::new(); + let mut args: Vec<&str> = "-machine".split(' ').collect(); + if shared { + args.push("virt,mem-share=on"); + } else { + args.push("virt"); + } + if prealloc { + args.push("-mem-prealloc"); + } + extra_args.append(&mut args); + + let mem_args = format!("-m {}", memsize); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let mem_args; + if let Some(file) = hugepage_path { + mem_args = format!("-mem-path {:?}", file); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + } + + let mem_args_path; + if let Some(file) = ram_file { + mem_args_path = format!("-mem-path {:?}", file); + args = mem_args_path[..].split(' ').collect(); + extra_args.append(&mut args); + } + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = + TestStdMachine::new_bymem(test_state.clone(), memsize * 1024 * 1024, page_size); + let allocator = machine.allocator; + + MemoryTest { + state: test_state, + allocator, + } + } +} + +fn ram_read_write(memory_test: &MemoryTest) { + let str = "test memory read write"; + let addr = memory_test.allocator.borrow_mut().alloc(PAGE_SIZE); + + memory_test + .state + .borrow_mut() + .memwrite(addr, str.as_bytes()); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, str.len() as u64); + assert_eq!(str, String::from_utf8(ret).unwrap()); + + memory_test.state.borrow_mut().stop(); +} + +/// Ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn normal_ram_read_write() { + ram_read_write(&MemoryTest::new( + MEM_SIZE, PAGE_SIZE, false, false, None, None, + )); +} + +/// Io region read and write Test. +/// TestStep: +/// 1. Add an io region. +/// 2. Write some data([0x1u8; 8]) to the address. +/// 3. Read data from the address and check it. +/// 4. Write overflow. +/// 5. Read overflow. +/// 6. Destroy device. +/// Expect: +/// 1/2/6: Success. +/// 4/5: Failed +/// 3: Got [0x2u8; 8]. The function of the device is to multiply the written value by 2. +#[test] +fn io_region_read_write() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = 0x100_0000_0000; // 1TB + + // Add a dummy device by qmp. The function of the device is to multiply the written value by 2 + // through the write interface and save it, and read the saved value through the read interface. + memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 1099511627776, \"size\": 4096, \"priority\": 99 }}"); + let data = [0x01u8; 8]; + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x02u8; 8]); + + memory_test.state.borrow_mut().stop(); +} + +/// Read and write the overlapping region Test. +/// TestStep: +/// 1. Write some data[0x1u8; 8] to the ram. +/// 2. Read the data([0x1u8; 8]) from the address. +/// 3. Add a region that overlaps the ram region. +/// 4. Write some data[0x1u8; 8] the overlaps region. +/// 5. Read data from the overlaps region. +/// Expect: +/// 1/3/4: success. +/// 2: Got [0x1u8; 8]. +/// 5: Got [0x2u8; 8]. We read the io region data witch has a higher priority. +#[test] +fn region_priority() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = memory_test.allocator.borrow_mut().alloc(PAGE_SIZE); + let data = [0x01u8; 8]; + + // Ram write and read. + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x01u8; 8]); + + // Add an overlapping region to write and read again. + let qmp_cmd = format!("{{ \"execute\": \"update_region\", \"arguments\": {{ \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": {}, \"size\": 4096, \"priority\": 99 }} }}", addr); + memory_test.state.borrow_mut().qmp(&qmp_cmd); + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x02u8; 8]); +} + +/// Some region update exception operations. +/// TestStep: +/// 1. Add the wrong attribute.(add read only for ram_device) +/// 2. Repeat adding region. +/// 3. Delete a non-existent region +/// 4. Add a region that extends beyond its father +/// Expect: +/// 1: Success. +/// 2/3: Failed. +#[test] +fn region_update_exception() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + + // Add read only attribute for io region. + let ret = memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 2199023255552, \"size\": 4096, \"priority\": 100, \"read_only_mode\": true }}"); + assert_eq!( + *ret.get("error").unwrap(), + json!({"class": JsonString("GenericError".to_string()), "desc": JsonString("set_rom_device_romd failed".to_string())}) + ); + + // Repeat adding region. + let ret = memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 1099511627776, \"size\": 4096, \"priority\": 99 }}"); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 1099511627776, \"size\": 4096, \"priority\": 99 }}"); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Delete a non-existent region + let ret = memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"delete\", \"region_type\": \"io_region\", \"offset\": 2199023255552, \"size\": 4096, \"priority\": 100 }}"); + assert_eq!( + *ret.get("error").unwrap(), + json!({"class": JsonString("GenericError".to_string()), "desc": JsonString("delete subregion failed".to_string())}) + ); + + // Add a region that extends beyond its father + let ret = memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 18446744073709551615, \"size\": 4096, \"priority\": 99 }}"); + assert_eq!( + *ret.get("error").unwrap(), + json!({"class": JsonString("GenericError".to_string()), "desc": JsonString("add subregion failed".to_string())}) + ); + + memory_test.state.borrow_mut().stop(); +} + +/// Rom device region write Test. +/// TestStep: +/// 1. Add a rom_device region with read_only_mode equals false. +/// 2. Write some data([0x01u8; 8]) to the rom device. +/// 3. Read data from the rom device and check it. +/// 4. Write overflow test. +/// 5. Read overflow test. +/// 6. Add a rom_device region with read_only_mode equals true. +/// 7. Write some data([0x01u8; 8]) to the rom device. +/// 8. Read data from the rom device and check it. +/// Expect: +/// 1/2/6/7: Success. +/// 4/5: Failed. +/// 3: Got [0x02u8; 8] from the device. The read and write behavior is the same as io region. +/// 8: Got [0x00u8; 8] fro the device. The write operation does nothing, and read the original +/// data. +#[test] +fn rom_device_region_readwrite() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = 0x1_0000_0000; // 4GB + + // Add a dummy rom device by qmp. The function of the device is to multiply the written value by + // 2 through the write interface and save it, and read the saved value through the read + // interface. + let file = File::create(ROM_DEV_PATH).unwrap(); + file.set_len(PAGE_SIZE).unwrap(); + let qmp_str = format!( + "{{ \"execute\": \"update_region\", + \"arguments\": {{ \"update_type\": \"add\", + \"region_type\": \"rom_device_region\", + \"offset\": 4294967296, + \"size\": 4096, + \"priority\": 99, + \"read_only_mode\": false, + \"device_fd_path\": {:?} }} }}", + ROM_DEV_PATH + ); + memory_test.state.borrow_mut().qmp(&qmp_str); + let data = [0x01u8; 8]; + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x02u8; 8]); + remove_file(ROM_DEV_PATH).unwrap(); + + // Write overflow + memory_test + .state + .borrow_mut() + .memwrite(addr + PAGE_SIZE - 1, &data); + // Read overflow + let ret = memory_test + .state + .borrow_mut() + .memread(addr + PAGE_SIZE - 1, std::mem::size_of::() as u64); + assert_eq!(ret, [0x00u8; 8]); + + // Add a dummy rom device by qmp. And set read only mode. The write operation is sent to the + // device. The device can set the write mode to writable according to the device status during + // the write operation, or directly return an error indicating that the write is not allowed. + // The read operation is the same as that of IO region. + let file = File::create(ROM_DEV_PATH).unwrap(); + file.set_len(PAGE_SIZE).unwrap(); + let qmp_str = format!( + "{{ \"execute\": \"update_region\", + \"arguments\": {{ \"update_type\": \"add\", + \"region_type\": \"rom_device_region\", + \"offset\": 4294967296, + \"size\": 4096, + \"priority\": 99, + \"read_only_mode\": true, + \"device_fd_path\": {:?} }} }}", + ROM_DEV_PATH + ); + memory_test.state.borrow_mut().qmp(&qmp_str); + let data = [0x01u8; 8]; + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x00u8; 8]); + remove_file(ROM_DEV_PATH).unwrap(); + + memory_test.state.borrow_mut().stop(); +} + +/// Ram device region write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data([0x01u8; 8]) to the ram device. +/// 3. Read data from the ram device and check it. +/// 4. Write overflow. +/// 5. Read overflow. +/// 6. Destroy device. +/// Expect: +/// 1/2/6: Success. +/// 4/5: Failed. +/// 3: Got [0x01u8; 8] from the device. The read and write behavior is the same as ram. +#[test] +fn ram_device_region_readwrite() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = 0x1_0000_0000; // 4GB + + let file = File::create(RAM_DEV_PATH).unwrap(); + file.set_len(PAGE_SIZE).unwrap(); + let qmp_str = format!( + "{{ \"execute\": \"update_region\", + \"arguments\": {{ \"update_type\": \"add\", + \"region_type\": \"ram_device_region\", + \"offset\": 4294967296, + \"size\": 4096, + \"priority\": 99, + \"device_fd_path\": {:?} }} }}", + RAM_DEV_PATH + ); + memory_test.state.borrow_mut().qmp(&qmp_str); + let data = [0x01u8; 8]; + memory_test.state.borrow_mut().memwrite(addr, &data); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + assert_eq!(ret, [0x01u8; 8]); + + // Write overflow + memory_test + .state + .borrow_mut() + .memwrite(addr + PAGE_SIZE - 1, &data); + // Read overflow + let ret = memory_test + .state + .borrow_mut() + .memread(addr + PAGE_SIZE - 1, std::mem::size_of::() as u64); + let mut data_err = [0x00u8; 8]; + data_err[0] = 0x01u8; + assert_eq!(ret, data_err); + + memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"delete\", \"region_type\": \"ram_device_region\", \"offset\": 4294967296, \"size\": 4096, \"priority\": 99 }}"); + + remove_file(RAM_DEV_PATH).unwrap(); + + memory_test.state.borrow_mut().stop(); +} + +/// Io region ioeventfd read and write Test. +/// TestStep: +/// 1. Add an io region with ioeventfd(data: 1, size 8). +/// 2. Write 1 to the ioeventfd. +/// 3. Read data from the address and check it. +/// 4. Write 2 to the ioeventfd. +/// 5. Read data from the address and check it. +/// Expect: +/// 1/2/4: success. +/// 3: Got value 0. +/// 5: Got value 4. +#[test] +fn io_region_ioeventfd() { + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = 0x100_0000_0000; // 1TB + + memory_test + .state + .borrow_mut() + .qmp("{ \"execute\": \"update_region\", \"arguments\": { \"update_type\": \"add\", \"region_type\": \"io_region\", \"offset\": 1099511627776, \"size\": 4096, \"priority\": 99, \"ioeventfd\": true, \"ioeventfd_data\": 1, \"ioeventfd_size\": 8 }}"); + memory_test.state.borrow_mut().writeq(addr, 1); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + let cmp = [0x0u8; 8]; + assert_eq!(ret, cmp); + + memory_test.state.borrow_mut().writeq(addr, 2); + let ret = memory_test + .state + .borrow_mut() + .memread(addr, std::mem::size_of::() as u64); + let mut cmp = [0x0u8; 8]; + cmp[0] = 4; + assert_eq!(ret, cmp); + + memory_test.state.borrow_mut().stop(); +} + +/// Shared ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn shared_ram_read_write() { + ram_read_write(&MemoryTest::new( + MEM_SIZE, PAGE_SIZE, true, false, None, None, + )); +} + +/// Prealloc ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn prealloc_ram_read_write() { + ram_read_write(&MemoryTest::new( + MEM_SIZE, PAGE_SIZE, false, true, None, None, + )); +} + +/// Hugepage ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[cfg(not(target_env = "ohos"))] +#[test] +fn hugepage_ram_read_write() { + // crate hugetlbfs directory + let _output = Command::new("rm") + .arg("-rf") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to rm directory"); + let _output = Command::new("mkdir") + .arg("-p") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to create directory"); + + // mount hugetlbfs on a directory on host + let output = Command::new("mount") + .arg("-t") + .arg("hugetlbfs") + .arg("hugetlbfs") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to mount hugetlbfs"); + assert!(output.status.success()); + + // set the count of hugepages + let output = Command::new("sysctl") + .arg("vm.nr_hugepages=1024") + .output() + .expect("Failed to set the count of hugepages"); + assert!(output.status.success()); + + ram_read_write(&MemoryTest::new( + MEM_SIZE, + PAGE_SIZE, + false, + false, + Some("/tmp/stratovirt/hugepages".to_string()), + None, + )); + + // remove hugetlbfs + let _output = Command::new("umount") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to mount hugetlbfs"); + let _output = Command::new("rm") + .arg("-rf") + .arg("/tmp/stratovirt/hugepages") + .output() + .expect("Failed to rm directory"); +} + +/// File backend ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn filebackend_ram_read_write() { + // crate hugetlbfs directory + let _output = Command::new("rm") + .arg("-rf") + .arg("/tmp/stratovirt/dir") + .output() + .expect("Failed to rm directory"); + let _output = Command::new("mkdir") + .arg("-p") + .arg("/tmp/stratovirt/dir") + .output() + .expect("Failed to create directory"); + let _output = Command::new("touch") + .arg("/tmp/stratovirt/dir/ram-file") + .output() + .expect("Failed to create directory"); + + ram_read_write(&MemoryTest::new( + MEM_SIZE, + PAGE_SIZE, + false, + false, + None, + Some("/tmp/stratovirt/dir/ram-file".to_string()), + )); +} + +/// Ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn ram_readwrite_exception() { + let str = "test memory read write"; + const SIZE: u64 = 22; + let str_overflow = "test memory read write overflow"; + const SIZE_OVERFLOW: u64 = 31; + let memory_test = MemoryTest::new(MEM_SIZE, PAGE_SIZE, false, false, None, None); + let addr = 0x100_0000_0000; // 1TB + + // The start address is out of range. + memory_test + .state + .borrow_mut() + .memwrite(addr, str.as_bytes()); + let ret = memory_test.state.borrow_mut().memread(addr, SIZE); + assert_eq!(ret, [0u8; SIZE as usize]); + + // The start address is in range, but the size is out of bounds. + memory_test.state.borrow_mut().memwrite( + MEM_SIZE * 1024 * 1024 - SIZE + ADDRESS_BASE, + str_overflow.as_bytes(), + ); + let ret = memory_test.state.borrow_mut().memread(addr, SIZE_OVERFLOW); + assert_eq!(ret, [0u8; SIZE_OVERFLOW as usize]); + + memory_test.state.borrow_mut().stop(); +} + +/// Ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. And the read/write will across +/// numa. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn ram_readwrite_numa() { + if !support_numa() { + return; + } + + let mut args: Vec<&str> = Vec::new(); + let mut extra_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + args.append(&mut extra_args); + + let cpu = 8; + let cpu_args = format!( + "-smp {},sockets=1,cores=4,threads=2 -cpu host,pmu=on -m 2G", + cpu + ); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-ram,size=1G,id=mem0,host-nodes=0-1,policy=bind" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-ram,size=1G,id=mem1,host-nodes=0-1,policy=bind" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=0,cpus=0-3,memdev=mem0" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=1,cpus=4-7,memdev=mem1" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=0,dst=1,val=30".split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=1,dst=0,val=30".split(' ').collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + + let str = "test memory read write"; + let start_base = ADDRESS_BASE + MEM_SIZE * 1024 * 1024 / 2 - 4; + test_state.borrow_mut().memwrite(start_base, str.as_bytes()); + let ret = test_state + .borrow_mut() + .memread(start_base, str.len() as u64); + assert_eq!(str, String::from_utf8(ret).unwrap()); + + test_state.borrow_mut().stop(); +} + +/// Ram read and write Test. +/// TestStep: +/// 1. Start device. +/// 2. Write some data("test memory read write") to the address. And the read/write will across +/// numa. +/// 3. Read data from the address and check it. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn ram_readwrite_numa1() { + if !support_numa() { + return; + } + + let mut args: Vec<&str> = Vec::new(); + let mut extra_args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + args.append(&mut extra_args); + + let cpu = 8; + let cpu_args = format!( + "-smp {},sockets=1,cores=4,threads=2 -cpu host,pmu=on -m 2G", + cpu + ); + let mut extra_args = cpu_args.split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-object memory-backend-file,size=1G,id=mem0,host-nodes=0-1,policy=bind,share=on,mem-path=test.fd" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = + "-object memory-backend-memfd,size=1G,id=mem1,host-nodes=0-1,policy=bind,mem-prealloc=true" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=0,cpus=0-3,memdev=mem0" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa node,nodeid=1,cpus=4-7,memdev=mem1" + .split(' ') + .collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=0,dst=1,val=30".split(' ').collect(); + args.append(&mut extra_args); + extra_args = "-numa dist,src=1,dst=0,val=30".split(' ').collect(); + args.append(&mut extra_args); + + let test_state = Rc::new(RefCell::new(test_init(args))); + + let str = "test memory read write"; + let start_base = ADDRESS_BASE + MEM_SIZE * 1024 * 1024 / 2 - 4; + test_state.borrow_mut().memwrite(start_base, str.as_bytes()); + let ret = test_state + .borrow_mut() + .memread(start_base, str.len() as u64); + assert_eq!(str, String::from_utf8(ret).unwrap()); + test_state.borrow_mut().qmp("{\"execute\": \"query-mem\"}"); + + let file = File::create(RAM_DEV_PATH).unwrap(); + file.set_len(PAGE_SIZE).unwrap(); + let qmp_str = format!( + "{{ \"execute\": \"update_region\", + \"arguments\": {{ \"update_type\": \"add\", + \"region_type\": \"ram_device_region\", + \"offset\": 1099511627776, + \"size\": 4096, + \"priority\": 99, + \"device_fd_path\": {:?} }} }}", + RAM_DEV_PATH + ); + test_state.borrow_mut().qmp(&qmp_str); + + test_state.borrow_mut().qmp("{\"execute\": \"query-mem\"}"); + remove_file(RAM_DEV_PATH).unwrap(); + test_state.borrow_mut().stop(); + remove_file("test.fd").unwrap(); +} diff --git a/tests/mod_test/tests/mod.rs b/tests/mod_test/tests/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..6c134f721b4596f83634125e1efdf11535bc7209 --- /dev/null +++ b/tests/mod_test/tests/mod.rs @@ -0,0 +1,33 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(target_arch = "aarch64")] +mod aarch64; +mod balloon_test; +mod block_test; +mod fwcfg_test; +mod memory_test; +mod net_test; +mod pci_test; +mod rng_test; +mod scream_test; +mod scsi_test; +mod serial_test; +mod usb_camera_test; +mod usb_storage_test; +mod usb_test; +mod virtio_gpu_test; +mod virtio_test; +mod virtiofs_test; +mod vnc_test; +#[cfg(target_arch = "x86_64")] +mod x86_64; diff --git a/tests/mod_test/tests/net_test.rs b/tests/mod_test/tests/net_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..34ffe1d36a17282ffee1aae3934b2a35a77d2a33 --- /dev/null +++ b/tests/mod_test/tests/net_test.rs @@ -0,0 +1,2018 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::mem::size_of; +use std::process::Command; +use std::rc::Rc; +use std::thread::sleep; +use std::time; + +use rand::Rng; +use serde_json::json; + +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{ + TestVirtQueue, TestVringDescEntry, VirtioDeviceOps, VringAvail, VringDesc, VringUsed, + VringUsedElem, VIRTIO_CONFIG_S_DRIVER_OK, VIRTIO_CONFIG_S_NEEDS_RESET, VIRTIO_F_VERSION_1, + VIRTIO_RING_F_EVENT_IDX, VRING_DESC_F_WRITE, VRING_DESC_SIZE, +}; +use mod_test::libdriver::virtio_pci_modern::{TestVirtioPciDev, VirtioPciCommonCfg}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use util::byte_code::ByteCode; +use util::offset_of; + +/// Device handles packets with partial checksum. +const VIRTIO_NET_F_CSUM: u32 = 0; +/// Driver handles packets with partial checksum. +const VIRTIO_NET_F_GUEST_CSUM: u32 = 1; +/// Driver can receive TSOv4. +const VIRTIO_NET_F_GUEST_TSO4: u32 = 7; +/// Driver can receive TSOv6. +const VIRTIO_NET_F_GUEST_TSO6: u32 = 8; +/// Driver can receive UFO. +const VIRTIO_NET_F_GUEST_UFO: u32 = 10; +/// Device can receive TSOv4. +const VIRTIO_NET_F_HOST_TSO4: u32 = 11; +/// Device can receive TSOv6. +const VIRTIO_NET_F_HOST_TSO6: u32 = 12; +/// Device can receive UFO. +const VIRTIO_NET_F_HOST_UFO: u32 = 14; +/// Control channel is available. +const VIRTIO_NET_F_CTRL_VQ: u32 = 17; +/// Control channel RX mode support. +const VIRTIO_NET_F_CTRL_RX: u32 = 18; +/// Control channel VLAN filtering. +const VIRTIO_NET_F_CTRL_VLAN: u32 = 19; +/// Extra RX mode control support. +const VIRTIO_NET_F_CTRL_RX_EXTRA: u32 = 20; +/// Set Mac Address through control channel. +const VIRTIO_NET_F_CTRL_MAC_ADDR: u32 = 23; + +/// The device sets control ok status to driver. +pub const VIRTIO_NET_OK: u8 = 0; +/// The device sets control err status to driver. +pub const VIRTIO_NET_ERR: u8 = 1; + +/// Driver can send control commands. +pub const VIRTIO_NET_CTRL_RX: u8 = 0; +/// Control commands for promiscuous mode. +pub const VIRTIO_NET_CTRL_RX_PROMISC: u8 = 0; +/// Control commands for all-multicast receive. +pub const VIRTIO_NET_CTRL_RX_ALLMULTI: u8 = 1; +/// Control commands for all-unicast receive. +pub const VIRTIO_NET_CTRL_RX_ALLUNI: u8 = 2; +/// Control commands for suppressing multicast receive. +pub const VIRTIO_NET_CTRL_RX_NOMULTI: u8 = 3; +/// Control commands for suppressing unicast receive. +pub const VIRTIO_NET_CTRL_RX_NOUNI: u8 = 4; +/// Control commands for suppressing broadcast receive. +pub const VIRTIO_NET_CTRL_RX_NOBCAST: u8 = 5; + +/// The driver can send control commands for MAC address filtering. +pub const VIRTIO_NET_CTRL_MAC: u8 = 1; +/// The driver sets the unicast/multicast address table. +pub const VIRTIO_NET_CTRL_MAC_TABLE_SET: u8 = 0; +/// The driver sets the default MAC address which rx filtering accepts. +pub const VIRTIO_NET_CTRL_MAC_ADDR_SET: u8 = 1; + +/// The driver can send control commands for vlan filtering. +pub const VIRTIO_NET_CTRL_VLAN: u8 = 2; +/// The driver adds a vlan id to the vlan filtering table. +pub const VIRTIO_NET_CTRL_VLAN_ADD: u8 = 0; +/// The driver adds a vlan id from the vlan filtering table. +pub const VIRTIO_NET_CTRL_VLAN_DEL: u8 = 1; + +/// Driver configure the class before enabling virtqueue. +pub const VIRTIO_NET_CTRL_MQ: u8 = 4; +/// Driver configure the command before enabling virtqueue. +pub const VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: u16 = 0; + +const QUEUE_SIZE_NET: u16 = 256; + +const DEFAULT_NET_FEATURES: u64 = 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_CSUM + | 1 << VIRTIO_NET_F_GUEST_CSUM + | 1 << VIRTIO_NET_F_GUEST_TSO4 + | 1 << VIRTIO_NET_F_GUEST_TSO6 + | 1 << VIRTIO_NET_F_GUEST_UFO + | 1 << VIRTIO_NET_F_HOST_TSO4 + | 1 << VIRTIO_NET_F_HOST_TSO6 + | 1 << VIRTIO_NET_F_HOST_UFO + | 1 << VIRTIO_NET_F_CTRL_RX + | 1 << VIRTIO_NET_F_CTRL_VLAN + | 1 << VIRTIO_NET_F_CTRL_RX_EXTRA + | 1 << VIRTIO_NET_F_CTRL_MAC_ADDR + | 1 << VIRTIO_NET_F_CTRL_VQ + | 1 << VIRTIO_RING_F_EVENT_IDX; + +const TIMEOUT_US: u64 = 15 * 1000 * 1000; + +const VIRTIO_NET_HDR_SIZE: usize = size_of::(); +/// dest_mac(6), source_mac(6), ether_type(2). +const ETHERNET_HDR_SIZE: usize = 14; +/// Arp packet header. +const ARP_HDR_SIZE: usize = 8; + +/// The maximum incoming packet(tcp/udp): 65536 byte, +/// plus ethernet header: 14 byte, +/// plus virtio_net_hdr: 12 byte. +const MAX_PACKET_LEN: u64 = 65562; + +/// The Mac Address length. +const MAC_ADDR_LEN: usize = 6; +/// The source mac address in arp packet. +const ARP_SOURCE_MAC: [u8; MAC_ADDR_LEN] = [0x52, 0x54, 0x00, 0x12, 0x34, 0x56]; +const CMD_LINE_MAC: [u8; MAC_ADDR_LEN] = [0x52, 0x54, 0x00, 0x12, 0x34, 0x57]; +const MAX_MAC_TABLE_LEN: usize = 64; +const TEST_MAC_ADDR_NUMS: u8 = 2; + +static USED_ELEM_SIZE: u64 = size_of::() as u64; + +#[repr(C, packed)] +pub struct VirtioNetConfig { + /// Mac Address. + pub mac: [u8; MAC_ADDR_LEN], + /// Device status. + pub status: u16, + /// Maximum number of each of transmit and receive queues. + pub max_virtqueue_pairs: u16, + /// Maximum Transmission Unit. + pub mtu: u16, + /// Speed, in units of 1Mb. + pub speed: u32, + /// 0x00 - half duplex + /// 0x01 - full duplex + pub duplex: u8, +} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct CtrlHdr { + class: u8, + cmd: u8, +} +impl ByteCode for CtrlHdr {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct CtrlMacAddr { + ctrl_hdr: CtrlHdr, + mac: [u8; MAC_ADDR_LEN], + ack: u8, +} +impl ByteCode for CtrlMacAddr {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct CtrlRxInfo { + ctrl_hdr: CtrlHdr, + switch: u8, + ack: u8, +} +impl CtrlRxInfo { + pub fn new(class: u8, cmd: u8, switch: u8) -> Self { + CtrlRxInfo { + ctrl_hdr: CtrlHdr { class, cmd }, + switch, + ack: 0xff, + } + } +} +impl ByteCode for CtrlRxInfo {} + +#[repr(C, packed)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct CtrlVlanInfo { + ctrl_hdr: CtrlHdr, + vid: u16, + ack: u8, +} +impl CtrlVlanInfo { + pub fn new(class: u8, cmd: u8, vid: u16) -> Self { + CtrlVlanInfo { + ctrl_hdr: CtrlHdr { class, cmd }, + vid, + ack: 0xff, + } + } +} +impl ByteCode for CtrlVlanInfo {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct EthernetHdr { + dst_mac: [u8; MAC_ADDR_LEN], + src_mac: [u8; MAC_ADDR_LEN], + // 0x0800: IP + // 0x0806: ARP + // 0x86dd: IPV6 + // 0x0810: 802.1Q Tag, it has vlan id + e_type: [u8; 2], +} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct EthernetHdrVlan { + dst_mac: [u8; MAC_ADDR_LEN], + src_mac: [u8; MAC_ADDR_LEN], + tpid: [u8; 2], + vlan_id: [u8; 2], + // 0x0800: IP + // 0x0806: ARP + // 0x86dd: IPV6 + // 0x0810: 802.1Q Tag, it has vlan id + e_type: [u8; 2], +} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct ArpPacket { + h_type: [u8; 2], + p_type: [u8; 2], + h_len: u8, + p_len: u8, + op: [u8; 2], + src_mac: [u8; 6], + src_ip: [u8; 4], + dst_mac: [u8; 6], + dst_ip: [u8; 4], +} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct ArpRequestPacket { + net_hdr: VirtioNetHdr, + eth_hdr: EthernetHdr, + arp_packet: ArpPacket, +} +impl ByteCode for ArpRequestPacket {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy, Default)] +struct ArpRequestPacketVlan { + net_hdr: VirtioNetHdr, + eth_hdr: EthernetHdrVlan, + arp_packet: ArpPacket, +} +impl ByteCode for ArpRequestPacketVlan {} + +#[repr(C)] +#[allow(unused)] +#[derive(Clone, Copy)] +struct MacAddress { + address: [u8; MAC_ADDR_LEN], +} +impl ByteCode for MacAddress {} +impl Default for MacAddress { + fn default() -> Self { + MacAddress { + address: [0; MAC_ADDR_LEN], + } + } +} + +#[repr(C, packed(2))] +#[allow(unused)] +#[derive(Clone, Copy)] +struct CtrlMacTableReq { + ctrl_hdr: CtrlHdr, + uni_entries: u32, + uni_macs: [MacAddress; MAX_MAC_TABLE_LEN + 1], + mul_entries: u32, + mul_macs: [MacAddress; MAX_MAC_TABLE_LEN + 1], + ack: u8, +} +impl ByteCode for CtrlMacTableReq {} +impl Default for CtrlMacTableReq { + fn default() -> Self { + CtrlMacTableReq { + ctrl_hdr: CtrlHdr::default(), + uni_entries: 0, + uni_macs: [MacAddress::default(); MAX_MAC_TABLE_LEN + 1], + mul_entries: 0, + mul_macs: [MacAddress::default(); MAX_MAC_TABLE_LEN + 1], + ack: 0xff, + } + } +} + +/// Packet header. +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +struct VirtioNetHdr { + flags: u8, + gso_type: u8, + hdr_len: u16, + gso_size: u16, + csum_start: u16, + csum_offset: u16, + num_buffers: u16, +} +impl ByteCode for VirtioNetHdr {} + +fn execute_cmd(cmd: String, check: bool) { + let args = cmd.split(' ').collect::>(); + if args.is_empty() { + return; + } + + let mut cmd_exe = Command::new(args[0]); + for i in 1..args.len() { + cmd_exe.arg(args[i]); + } + + let output = cmd_exe + .output() + .unwrap_or_else(|_| panic!("Failed to execute {}", cmd)); + println!("{:?}", args); + if check { + assert!(output.status.success()); + } +} + +fn execute_cmd_unchecked(cmd: String) { + execute_cmd(cmd, false); +} + +fn execute_cmd_checked(cmd: String) { + execute_cmd(cmd, true); +} + +fn create_tap(id: u8, mq: bool) { + let br_name = "mst_net_qbr".to_string() + &id.to_string(); + let tap_name = "mst_net_qtap".to_string() + &id.to_string(); + execute_cmd_checked("ip link add name ".to_string() + &br_name + " type bridge"); + if mq { + execute_cmd_checked("ip tuntap add ".to_string() + &tap_name + " mode tap multi_queue"); + } else { + execute_cmd_checked("ip tuntap add ".to_string() + &tap_name + " mode tap"); + } + execute_cmd_checked("ip link set ".to_string() + &tap_name + " master " + &br_name); + execute_cmd_checked("ip link set ".to_string() + &br_name + " up"); + execute_cmd_checked("ip link set ".to_string() + &tap_name + " up"); + execute_cmd_checked( + "ip address add ".to_string() + + &id.to_string() + + ".1.1." + + &id.to_string() + + "/24 dev " + + &br_name, + ); +} + +fn clear_tap(id: u8, mq: bool) { + let br_name = "mst_net_qbr".to_string() + &id.to_string(); + let tap_name = "mst_net_qtap".to_string() + &id.to_string(); + execute_cmd_unchecked("ip link set ".to_string() + &tap_name + " down"); + execute_cmd_unchecked("ip link set ".to_string() + &br_name + " down"); + if mq { + execute_cmd_unchecked("ip tuntap del ".to_string() + &tap_name + " mode tap multi_queue"); + } else { + execute_cmd_unchecked("ip tuntap del ".to_string() + &tap_name + " mode tap"); + } + execute_cmd_unchecked("ip link delete ".to_string() + &br_name + " type bridge"); +} + +#[allow(unused)] +pub fn create_net( + id: u8, + mq: bool, + num_queues: u16, + with_mac: bool, + iothread: bool, +) -> ( + Rc>, + Rc>, + Rc>, +) { + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + let mut extra_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let mut iothread_arg = ""; + if iothread { + let mut args: Vec<&str> = "-object iothread,id=iothread1".split(' ').collect(); + extra_args.append(&mut args); + iothread_arg = ",iothread=iothread1"; + } + // Multi-queue command line. + let mut mq_flag = ""; + let mut mq_queues = "".to_string(); + if mq { + mq_flag = ",mq=on"; + mq_queues = ",queues=".to_string() + &num_queues.to_string(); + } + let mut mac_address = ""; + if with_mac { + // Same as CMD_LINE_MAC. + mac_address = ",mac=52:54:00:12:34:57"; + } + let net_pci_args = format!( + "-device {},id=net0,netdev=netdev0,bus=pcie.{},addr={}.0{}{}{}", + "virtio-net-pci", pci_fn, pci_slot, mq_flag, mac_address, iothread_arg, + ); + args = net_pci_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let net_args = + String::from("-netdev tap,id=netdev0,ifname=mst_net_qtap") + &id.to_string() + &mq_queues; + args = net_args.split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + let virtio_net = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + virtio_net.borrow_mut().init(pci_slot, pci_fn); + + (virtio_net, test_state, allocator) +} + +fn set_up( + id: u8, + mq: bool, + num_queues: u16, + with_mac: bool, +) -> ( + Rc>, + Rc>, + Rc>, +) { + clear_tap(id, mq); + create_tap(id, mq); + create_net(id, mq, num_queues, with_mac, false) +} + +// Set the iothread argument in command line. +fn set_up_iothread( + id: u8, + mq: bool, + num_queues: u16, + with_mac: bool, +) -> ( + Rc>, + Rc>, + Rc>, +) { + clear_tap(id, mq); + create_tap(id, mq); + create_net(id, mq, num_queues, with_mac, true) +} + +fn tear_down( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, + id: u8, + mq: bool, +) { + net.borrow_mut().destroy_device(alloc, vqs); + test_state.borrow_mut().stop(); + clear_tap(id, mq); +} + +/// Alloc space for rx virtqueue. +fn fill_rx_vq( + test_state: Rc>, + alloc: Rc>, + vq: Rc>, +) { + let size = vq.borrow().size; + for _ in 0..size { + let addr = alloc.borrow_mut().alloc(MAX_PACKET_LEN); + vq.borrow_mut() + .add(test_state.clone(), addr, MAX_PACKET_LEN as u32, true); + } + vq.borrow().set_used_event(test_state, 0); +} + +fn init_net_device( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + features: u64, + num_queues: usize, +) -> Vec>> { + net.borrow_mut().reset(); + net.borrow_mut().set_acknowledge(); + net.borrow_mut().set_driver(); + net.borrow_mut().negotiate_features(features); + net.borrow_mut().set_features_ok(); + net.borrow_mut().pci_dev.enable_msix(None); + net.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = net + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), num_queues); + for i in 0..num_queues / 2 { + fill_rx_vq(test_state.clone(), alloc.clone(), vqs[i * 2].clone()); + } + net.borrow().set_driver_ok(); + + vqs +} + +fn poll_used_ring( + test_state: Rc>, + vq: Rc>, + arp_request: &[u8], + need_reply: bool, +) -> bool { + let mut start = 0_u64; + let mut idx = test_state + .borrow() + .readw(vq.borrow().used + offset_of!(VringUsed, idx) as u64); + while start < u64::from(idx) { + for i in start..u64::from(idx) { + let len = test_state.borrow().readw( + vq.borrow().used + + offset_of!(VringUsed, ring) as u64 + + i * USED_ELEM_SIZE + + offset_of!(VringUsedElem, len) as u64, + ); + if len == arp_request.len() as u16 { + let id = test_state.borrow().readw( + vq.borrow().used + offset_of!(VringUsed, ring) as u64 + i * USED_ELEM_SIZE, + ); + + let addr = test_state + .borrow() + .readq(vq.borrow().desc + u64::from(id) * VRING_DESC_SIZE); + let packets = test_state.borrow().memread(addr, u64::from(len)); + let src_mac_pos = VIRTIO_NET_HDR_SIZE + ETHERNET_HDR_SIZE + ARP_HDR_SIZE; + let dst_mac_pos = src_mac_pos + 10; + if arp_request[src_mac_pos..src_mac_pos + MAC_ADDR_LEN] + == packets[dst_mac_pos..dst_mac_pos + MAC_ADDR_LEN] + { + if need_reply { + return true; + } else { + assert!(false); + } + } + } + } + start = u64::from(idx); + vq.borrow().set_used_event(test_state.clone(), start as u16); + idx = test_state + .borrow() + .readw(vq.borrow().used + offset_of!(VringUsed, idx) as u64); + } + false +} + +fn check_arp_mac( + net: Rc>, + test_state: Rc>, + vq: Rc>, + arp_request: &[u8], + need_reply: bool, +) { + let start_time = time::Instant::now(); + let timeout_us = time::Duration::from_micros(TIMEOUT_US); + let timeout_us_no_reply = time::Duration::from_micros(TIMEOUT_US / 5); + loop { + if need_reply { + assert!(time::Instant::now() - start_time < timeout_us); + if !net.borrow().queue_was_notified(vq.clone()) { + continue; + } + } else if time::Instant::now() - start_time > timeout_us_no_reply { + return; + } + + if poll_used_ring(test_state.clone(), vq.clone(), arp_request, need_reply) { + return; + } + } +} + +fn get_arp_request(id: u8) -> ArpRequestPacket { + ArpRequestPacket { + net_hdr: VirtioNetHdr::default(), + eth_hdr: EthernetHdr { + dst_mac: [0xff; MAC_ADDR_LEN], + src_mac: ARP_SOURCE_MAC, + e_type: [0x08, 0x06], + }, + arp_packet: ArpPacket { + h_type: [0x00, 0x01], + p_type: [0x08, 0x00], + h_len: 0x06, + p_len: 0x04, + op: [0x00, 0x01], + src_mac: ARP_SOURCE_MAC, + src_ip: [id, 0x01, 0x01, id + 1], + dst_mac: [0x00; MAC_ADDR_LEN], + dst_ip: [id, 0x01, 0x01, id], + }, + } +} + +fn get_arp_request_vlan(id: u8) -> ArpRequestPacketVlan { + ArpRequestPacketVlan { + net_hdr: VirtioNetHdr::default(), + eth_hdr: EthernetHdrVlan { + dst_mac: [0xff; MAC_ADDR_LEN], + src_mac: ARP_SOURCE_MAC, + tpid: [0x81, 0x00], + vlan_id: [0x08, 0x01], + e_type: [0x08, 0x06], + }, + arp_packet: ArpPacket { + h_type: [0x00, 0x01], + p_type: [0x08, 0x00], + h_len: 0x06, + p_len: 0x04, + op: [0x00, 0x01], + src_mac: ARP_SOURCE_MAC, + src_ip: [id, 0x01, 0x01, id + 1], + dst_mac: [0x00; MAC_ADDR_LEN], + dst_ip: [id, 0x01, 0x01, id], + }, + } +} + +fn send_request( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + request: &[u8], +) { + let length = request.len() as u64; + let addr = alloc.borrow_mut().alloc(length); + + let k_bytes = 1024; + let num_k = length / k_bytes; + let mut offset; + // write 1024 bytes once. + for i in 0..num_k { + offset = i * k_bytes; + test_state.borrow().memwrite( + addr + offset, + &request[offset as usize..(offset + k_bytes) as usize], + ); + } + let res = length % k_bytes; + if res > 0 { + offset = num_k * k_bytes; + test_state + .borrow() + .memwrite(addr + offset, &request[offset as usize..]); + } + let free_head = vq + .borrow_mut() + .add(test_state.clone(), addr, request.len() as u32, false); + net.borrow().virtqueue_notify(vq.clone()); + net.borrow() + .poll_used_elem(test_state, vq, free_head, TIMEOUT_US, &mut None, true); +} + +fn send_arp_request( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, + arp_request: &[u8], + need_reply: bool, +) { + send_request( + net.clone(), + test_state.clone(), + alloc, + vqs[1].clone(), + arp_request, + ); + check_arp_mac(net, test_state, vqs[0].clone(), arp_request, need_reply); +} + +fn check_device_status(net: Rc>, status: u8) { + let start_time = time::Instant::now(); + let timeout_us = time::Duration::from_micros(TIMEOUT_US); + loop { + if net.borrow().get_status() & status > 0 { + break; + } + sleep(time::Duration::from_millis(50)); + assert!(time::Instant::now() - start_time < timeout_us); + } +} + +/// Send and receive packet test. +/// TestStep: +/// 1. Init device. +/// 2. Send ARP packet and check the reply. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_rx_tx_test() { + let id = TEST_MAC_ADDR_NUMS; + let (net, test_state, alloc) = set_up(id, false, 0, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + 3, + ); + + let arp_request = get_arp_request(id); + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + true, + ); + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Send and receive packet test with iothread. +/// TestStep: +/// 1. Init device. +/// 2. Send ARP packet and check the reply. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_rx_tx_test_iothread() { + let id = 2 * TEST_MAC_ADDR_NUMS; + let (net, test_state, alloc) = set_up_iothread(id, false, 0, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + 3, + ); + + let arp_request = get_arp_request(id); + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + true, + ); + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Test the control mq command. +/// TestStep: +/// 1. Init device: enable multi-queue and VIRTIO_NET_CTRL_MQ. +/// 2. Send VIRTIO_NET_CTRL_MQ to set vq pairs: +/// 1) set normal vq pairs; +/// 2) set invalid request length; +/// 3) set invalid request cmd; +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_ctrl_mq_test() { + let id = 3 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 4; + let queues: usize = 2 * queue_pairs as usize + 1; + let (net, test_state, alloc) = set_up(id, true, queue_pairs, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + queues, + ); + + // (test_type, queue_pairs, ack) + // test_type: + // 0 - normal request + // 1 - invalid request length + // 2 - invalid cmd + let reqs = [ + (0, queue_pairs, VIRTIO_NET_OK), + (0, u16::MAX, VIRTIO_NET_ERR), + (0, 0, VIRTIO_NET_ERR), + (1, queue_pairs, VIRTIO_NET_ERR), + (2, queue_pairs, VIRTIO_NET_ERR), + ]; + + for (test_type, vq_pairs, status) in reqs { + let ack: u8 = 0xff; + // The message: CtrlHdr, vq_pairs, ack. + let addr = alloc + .borrow_mut() + .alloc(size_of::() as u64 + 2 + 1); + + let mut cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET as u8; + if test_type == 2 { + cmd = u8::MAX; + } + let ctrl_hdr = CtrlHdr { + class: VIRTIO_NET_CTRL_MQ, + cmd, + }; + test_state.borrow().memwrite(addr, ctrl_hdr.as_bytes()); + test_state + .borrow() + .writew(addr + size_of::() as u64, vq_pairs); + test_state + .borrow() + .writeb(addr + size_of::() as u64 + 2, ack); + + let ctrl_vq = &vqs[queues - 1]; + // CtrlHdr + vq_pairs. + let mut len = size_of::() as u32 + 2; + if test_type == 1 { + len -= 1; + } + + let data_entries: Vec = vec![ + TestVringDescEntry { + data: addr, + len, + write: false, + }, + TestVringDescEntry { + data: addr + size_of::() as u64 + 2, + len: 1, + write: true, + }, + ]; + let free_head = ctrl_vq + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + net.borrow() + .kick_virtqueue(test_state.clone(), ctrl_vq.clone()); + + net.borrow().poll_used_elem( + test_state.clone(), + ctrl_vq.clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let ack = test_state + .borrow() + .readb(addr + size_of::() as u64 + 2); + assert_eq!(ack, status); + } + + tear_down(net, test_state, alloc, vqs, id, true); +} + +/// Write or Read mac address from device config. +fn net_config_mac_rw( + net: Rc>, + mac: Option<&[u8; MAC_ADDR_LEN]>, +) -> [u8; MAC_ADDR_LEN] { + if let Some(mac) = mac { + for i in 0..MAC_ADDR_LEN { + net.borrow() + .config_writeb((offset_of!(VirtioNetConfig, mac) + i) as u64, mac[i]); + } + } + + let mut mac_read = [0_u8; MAC_ADDR_LEN]; + for i in 0..MAC_ADDR_LEN { + mac_read[i] = net + .borrow() + .config_readb((offset_of!(VirtioNetConfig, mac) + i) as u64); + } + mac_read +} + +/// Virtio net configure is not allowed to change except mac. +fn write_net_config_check(net: Rc>, offset: u64, value: u64, size: u8) { + let origin_value = u64::from(net.borrow().config_readw(offset)); + assert_ne!(origin_value, value); + match size { + 1 => net.borrow().config_writeb(offset, value as u8), + 2 => net.borrow().config_writew(offset, value as u16), + 4 => net.borrow().config_writel(offset, value as u32), + _ => (), + }; + let value = u64::from(net.borrow().config_readw(offset)); + assert_eq!(origin_value, value); +} + +/// Write value to virtio net configure, and check the write result. +/// TestStep: +/// 1. Init device. +/// 2. Write value to virtio net configure which can not be changed except mac in some conditions. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_write_and_check_config() { + let id = 4 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + let reqs = [ + DEFAULT_NET_FEATURES & !(1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_NET_F_CTRL_MAC_ADDR), + DEFAULT_NET_FEATURES, + ]; + for features in reqs { + let (net, test_state, alloc) = set_up(id, false, queue_pairs, true); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + features, + queues, + ); + + // Get the mac address in the device configure space. + let mac_origin = net_config_mac_rw(net.clone(), None); + assert_eq!(mac_origin, CMD_LINE_MAC); + + // Write 0xff:0xff:0xff:0xff:0xff to virtio_net_config->mac. + let mac = net_config_mac_rw(net.clone(), Some(&[0xff; MAC_ADDR_LEN])); + if features & (1 << VIRTIO_F_VERSION_1) != 0 { + assert_eq!(mac, mac_origin); + } else { + assert_eq!(mac, [0xff; MAC_ADDR_LEN]); + } + + // Write abnornal value to virito_net_config. + write_net_config_check( + net.clone(), + offset_of!(VirtioNetConfig, status) as u64, + u64::from(u16::MAX), + 2, + ); + write_net_config_check( + net.clone(), + offset_of!(VirtioNetConfig, max_virtqueue_pairs) as u64, + u64::from(u16::MAX), + 2, + ); + write_net_config_check( + net.clone(), + offset_of!(VirtioNetConfig, mtu) as u64, + u64::from(u16::MAX), + 2, + ); + write_net_config_check( + net.clone(), + offset_of!(VirtioNetConfig, speed) as u64, + u64::from(u32::MAX), + 4, + ); + write_net_config_check( + net.clone(), + offset_of!(VirtioNetConfig, duplex) as u64, + u64::from(u8::MAX), + 1, + ); + + write_net_config_check( + net.clone(), + size_of:: as u64 + 1, + u64::from(u8::MAX), + 1, + ); + + tear_down( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs, + id, + false, + ); + } +} + +// Send request with control virtqueue. +fn send_ctrl_vq_request( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, + ctrl_data: &[u8], + ack: u8, +) { + let ctrl_vq = &vqs[2]; + let addr = alloc.borrow_mut().alloc(ctrl_data.len() as u64); + test_state.borrow().memwrite(addr, ctrl_data); + let data_entries: Vec = vec![ + TestVringDescEntry { + data: addr, + len: ctrl_data.len() as u32 - 1, + write: false, + }, + TestVringDescEntry { + data: addr + ctrl_data.len() as u64 - 1, + len: 1, + write: true, + }, + ]; + let free_head = ctrl_vq + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + net.borrow() + .kick_virtqueue(test_state.clone(), ctrl_vq.clone()); + + net.borrow().poll_used_elem( + test_state.clone(), + ctrl_vq.clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let res_ack = test_state.borrow().readb(addr + ctrl_data.len() as u64 - 1); + assert_eq!(res_ack, ack); +} + +// Set uni_entries/mul_entries macs to unicast/multicast mac table. +fn ctrl_vq_set_mac_table( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, + uni_entries: u32, + mul_entries: u32, + ack: u8, +) { + let mut ctrl_mac_table = CtrlMacTableReq { + ctrl_hdr: CtrlHdr { + class: VIRTIO_NET_CTRL_MAC, + cmd: VIRTIO_NET_CTRL_MAC_TABLE_SET, + }, + uni_entries, + uni_macs: [MacAddress::default(); MAX_MAC_TABLE_LEN + 1], + mul_entries, + ..CtrlMacTableReq::default() + }; + + for i in 0..uni_entries + mul_entries { + let mut mac = MacAddress { + address: ARP_SOURCE_MAC, + }; + mac.address[MAC_ADDR_LEN - 1] += i as u8 + 1; + if i < uni_entries { + ctrl_mac_table.uni_macs[i as usize] = mac; + } else { + mac.address[0] += 1; + ctrl_mac_table.mul_macs[(i - uni_entries) as usize] = mac; + } + } + + let mut ctrl_data: Vec = Vec::new(); + let mut offset = offset_of!(CtrlMacTableReq, uni_macs) + uni_entries as usize * MAC_ADDR_LEN; + ctrl_data.append(&mut ctrl_mac_table.as_bytes()[..offset].to_vec()); + ctrl_data.append(&mut mul_entries.as_bytes().to_vec()); + offset = offset_of!(CtrlMacTableReq, mul_macs); + ctrl_data.append( + &mut ctrl_mac_table.as_bytes()[offset..offset + mul_entries as usize * MAC_ADDR_LEN] + .to_vec(), + ); + ctrl_data.append(&mut ctrl_mac_table.ack.as_bytes().to_vec()); + + assert_eq!( + 11 + (uni_entries + mul_entries) as usize * MAC_ADDR_LEN, + ctrl_data.len() + ); + + send_ctrl_vq_request(net, test_state, alloc, vqs, &ctrl_data, ack); +} + +fn ctrl_vq_set_mac_address( + net: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, +) { + // Get the mac address in the device configure space. + let mac_origin = net_config_mac_rw(net.clone(), None); + assert_eq!(mac_origin, CMD_LINE_MAC); + // Set mac address. + let ctrl_mac_addr = CtrlMacAddr { + ctrl_hdr: CtrlHdr { + class: VIRTIO_NET_CTRL_MAC, + cmd: VIRTIO_NET_CTRL_MAC_ADDR_SET, + }, + mac: ARP_SOURCE_MAC, + ack: 0xff, + }; + send_ctrl_vq_request( + net.clone(), + test_state, + alloc, + vqs, + ctrl_mac_addr.as_bytes(), + VIRTIO_NET_OK, + ); + // Check mac address result. + let config_mac = net_config_mac_rw(net, None); + assert_eq!(config_mac, ARP_SOURCE_MAC); +} + +/// Test the control vlan command. +/// TestStep: +/// 1. Init device with control vq. +/// 2. Test the control vlan command: +/// 1) add vid 0/0/1/0xfff/0xffff, success(ignore invalid/repeated value) +/// 2) del vid 0/0/1/0xfff/0xffff, success(ignore invalid/repeated value) +/// 3) invalid ctrl class and cmd, expect reply error +/// 4) invalid ctrl cmd, expect reply error +/// 5) invalid vid length, expect reply error +/// 3. Send ARP packet and check the reply. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn virtio_net_ctrl_vlan_test() { + let id = 5 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + let (net, test_state, alloc) = set_up(id, false, queue_pairs, false); + + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + queues, + ); + + // Turn off rx mode promisc. + let ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, 0); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_rx_info.as_bytes(), + VIRTIO_NET_OK, + ); + + let reqs = [ + (0, VIRTIO_NET_OK), + (0, VIRTIO_NET_OK), + (1, VIRTIO_NET_OK), + (0xfff, VIRTIO_NET_OK), + (0xffff, VIRTIO_NET_ERR), + ]; + // Test VIRTIO_NET_CTRL_VLAN_ADD. + for (vid, ack) in reqs { + let ctrl_vlan_info = CtrlVlanInfo::new(VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, vid); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_vlan_info.as_bytes(), + ack, + ); + } + // Test VIRTIO_NET_CTRL_VLAN_DEL. + for (vid, ack) in reqs { + let ctrl_vlan_info = CtrlVlanInfo::new(VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_DEL, vid); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_vlan_info.as_bytes(), + ack, + ); + } + // Test invalid class and cmd. + let ctrl_vlan_info = CtrlVlanInfo::new(u8::MAX, u8::MAX, 0); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_vlan_info.as_bytes(), + VIRTIO_NET_ERR, + ); + // Test invalid cmd. + let ctrl_vlan_info = CtrlVlanInfo::new(VIRTIO_NET_CTRL_VLAN, u8::MAX, 0); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_vlan_info.as_bytes(), + VIRTIO_NET_ERR, + ); + // Test invalid vid length. + let ctrl_vlan_info = CtrlVlanInfo::new(VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, 0); + let data_size = size_of::() - 1; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &ctrl_vlan_info.as_bytes()[..data_size], + VIRTIO_NET_ERR, + ); + + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + get_arp_request(id).as_bytes(), + true, + ); + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + get_arp_request_vlan(id).as_bytes(), + false, + ); + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Test the control mac command. +/// TestStep: +/// 1. Init device with control vq. +/// 2. Test the control mac command: +/// 1) set mac address +/// 2) set mac table with different unicast and multicast entries +/// 3) invalid test: +/// a) invalid unicast entries +/// b) invalid unicast mac table +/// c) invalid ctrl mac cmd +/// 3. Send ARP packet and check the reply. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn virtio_net_ctrl_mac_test() { + let id = 6 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + let max_table_len = MAX_MAC_TABLE_LEN as u32; + // (type, unicast_macs, multicast_macs) + let mac_reqs = [ + (VIRTIO_NET_CTRL_MAC_ADDR_SET, 0, 0), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, 2, 2), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, 2, max_table_len - 2 + 1), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, max_table_len - 2, 2), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, max_table_len + 1, 0), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, max_table_len + 1, 2), + (VIRTIO_NET_CTRL_MAC_TABLE_SET, 2, max_table_len), + (u8::MAX, 0, 0), + ]; + + for (mac_type, uni, mul) in mac_reqs { + let (net, test_state, alloc) = set_up(id, false, queue_pairs, true); + + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + queues, + ); + + let mut arp_request = get_arp_request(id); + // Test VIRTIO_NET_CTRL_MAC_ADDR_SET. + match mac_type { + VIRTIO_NET_CTRL_MAC_ADDR_SET => { + // Normal test. + ctrl_vq_set_mac_address( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ); + // Abnormal test: mac_address(2 byte), ack(1byte) + let req_data_bytes = [ + VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + 0, + 0, + 0xff, + ]; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &req_data_bytes, + VIRTIO_NET_ERR, + ); + } + VIRTIO_NET_CTRL_MAC_TABLE_SET => { + // Turn off rx mode promisc. + let ctrl_rx_info = + CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, 0); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_rx_info.as_bytes(), + VIRTIO_NET_OK, + ); + + // Test VIRTIO_NET_CTRL_MAC_TABLE_SET. + ctrl_vq_set_mac_table( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + uni, + mul, + VIRTIO_NET_OK, + ); + arp_request.arp_packet.src_mac[MAC_ADDR_LEN - 1] += 2; + } + _ => { + // Invalid unicast entries test. + let req_data_bytes = [VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, 0, 0xff]; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &req_data_bytes, + VIRTIO_NET_ERR, + ); + // Invalid unicast mac table test. + let req_data_bytes = [ + VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_TABLE_SET, + 0, + 0, + 0, + 1, + 0, + 0xff, + ]; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &req_data_bytes, + VIRTIO_NET_ERR, + ); + // Invalid cmd test. + let req_data_bytes = [VIRTIO_NET_CTRL_MAC, u8::MAX, 0, 0, 0, 0, 0, 0, 0, 0, 0xff]; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &req_data_bytes, + VIRTIO_NET_ERR, + ); + } + } + + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + true, + ); + + tear_down( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs, + id, + false, + ); + } +} + +/// Test the control rx command. +/// TestStep: +/// 1. Init device with control vq. +/// 2. Test the control rx command: +/// 1) PROMISC/NOUNI/ALLUNI/NOBCAST/NOMULTI/ALLMULTI +/// 2) invalid class/cmd/switch +/// 3. Send ARP packet and check the reply. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn virtio_net_ctrl_rx_test() { + let id = 7 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + // (req_type, cmd, value, need_reply, with_mac, ack) + let reqs = [ + (1, VIRTIO_NET_CTRL_RX_PROMISC, 0, true, false, VIRTIO_NET_OK), + (1, VIRTIO_NET_CTRL_RX_NOUNI, 1, false, false, VIRTIO_NET_OK), + (1, VIRTIO_NET_CTRL_RX_ALLUNI, 1, true, false, VIRTIO_NET_OK), + (1, u8::MAX, 0, true, false, VIRTIO_NET_ERR), + (1, VIRTIO_NET_CTRL_RX_NOBCAST, 1, false, true, VIRTIO_NET_OK), + ( + 2, + VIRTIO_NET_CTRL_RX_NOMULTI, + 1, + false, + false, + VIRTIO_NET_OK, + ), + ( + 2, + VIRTIO_NET_CTRL_RX_ALLMULTI, + 1, + true, + false, + VIRTIO_NET_OK, + ), + ( + 2, + u8::MAX, + MAX_MAC_TABLE_LEN as u8, + true, + false, + VIRTIO_NET_ERR, + ), + (2, u8::MAX, 2, true, false, VIRTIO_NET_ERR), + (3, 0, 0, true, false, VIRTIO_NET_ERR), + (u8::MAX, 0, 0, true, false, VIRTIO_NET_ERR), + ]; + + for (req_type, cmd, value, need_reply, with_mac, ack) in reqs { + let (net, test_state, alloc) = set_up(id, false, queue_pairs, with_mac); + + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + queues, + ); + + let mut arp_request = get_arp_request(id); + // Turn off rx mode promisc. + let ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, 0); + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_rx_info.as_bytes(), + VIRTIO_NET_OK, + ); + let mut ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, 0, 0); + match req_type { + 1 => { + ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, cmd, value); + } + 2 => { + ctrl_vq_set_mac_table( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + 0, + u32::from(value), + VIRTIO_NET_OK, + ); + arp_request.arp_packet.src_mac[0] += 1; + arp_request.arp_packet.src_mac[MAC_ADDR_LEN - 1] += 1; + ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, cmd, value); + } + 3 => { + // Test invalid class. + ctrl_rx_info = CtrlRxInfo::new(u8::MAX, 0, 0); + } + _ => { + // Test no switch data. + let ctrl_rx_data = [VIRTIO_NET_CTRL_RX, 0, 0xff]; + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + &ctrl_rx_data, + VIRTIO_NET_ERR, + ); + } + } + + if req_type != u8::MAX { + send_ctrl_vq_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + ctrl_rx_info.as_bytes(), + ack, + ); + } + + if cmd == VIRTIO_NET_CTRL_RX_NOBCAST { + // Test receive filter: broadcast. + arp_request.arp_packet.src_mac = [0xff; 6]; + } + + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + need_reply, + ); + + tear_down( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs, + id, + false, + ); + } +} + +/// Test the control abnormal command. +/// TestStep: +/// 1. Init device with control vq. +/// 2. Test the control rx command without ack(2 times), expect NEEDS_RESET. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_ctrl_abnormal_test() { + let id = 8 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + let (net, test_state, alloc) = set_up(id, false, queue_pairs, false); + + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + queues, + ); + + let ctrl_rx_info = CtrlRxInfo::new(VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_PROMISC, 0); + let ctrl_data = &ctrl_rx_info.as_bytes(); + + // The first request cause device needs reset. + // The second request test if the device can handle request. + let test_num = 2; + for i in 0..test_num { + let ctrl_vq = &vqs[2]; + let addr = alloc.borrow_mut().alloc(ctrl_data.len() as u64); + test_state.borrow().memwrite(addr, ctrl_data); + + // ctrl_rx_info.switch: u8 + let mut data_len = 1; + if i == test_num - 1 { + // ctrl_rx_info.switch and ctrl_rx_info.ack + data_len = 2; + } + let data_entries: Vec = vec![ + // ctrl_rx_info.ctrl_hdr.class: u8 + TestVringDescEntry { + data: addr, + len: 1, + write: false, + }, + // ctrl_rx_info.ctrl_hdr.cmd: u8 + TestVringDescEntry { + data: addr + 1, + len: 1, + write: false, + }, + TestVringDescEntry { + data: addr + 2, + len: data_len, + write: false, + }, + ]; + ctrl_vq + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + net.borrow() + .kick_virtqueue(test_state.clone(), ctrl_vq.clone()); + check_device_status(net.clone(), VIRTIO_CONFIG_S_NEEDS_RESET); + } + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Test the abnormal rx/tx request. +/// TestStep: +/// 1. Init device. +/// 2. Test the rx/tx request: +/// 1) rx queue is full, and recover it +/// 2) cause the tx packet limitation once +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_abnormal_rx_tx_test() { + let id = 9 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + let (net, test_state, alloc) = set_up(id, false, queue_pairs, false); + + net.borrow_mut().reset(); + net.borrow_mut().set_acknowledge(); + net.borrow_mut().set_driver(); + net.borrow_mut().negotiate_features(DEFAULT_NET_FEATURES); + net.borrow_mut().set_features_ok(); + net.borrow_mut().pci_dev.enable_msix(None); + net.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = net + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), queues); + fill_rx_vq(test_state.clone(), alloc.clone(), vqs[0].clone()); + + // Test rx queue is full. + // Set 0 to rx->avail->idx. + test_state.borrow().writew(vqs[0].borrow().avail + 2, 0); + net.borrow().set_driver_ok(); + + // Test send 256 packet to exceed the handle_tx limitation once. + let request = get_arp_request(id); + let length = request.as_bytes().len() as u64; + let size = net.borrow().get_queue_size(); + assert_eq!(size, QUEUE_SIZE_NET); + for _ in 0..size { + let addr = alloc.borrow_mut().alloc(length); + test_state.borrow().memwrite(addr, request.as_bytes()); + vqs[1] + .borrow_mut() + .add(test_state.clone(), addr, length as u32, false); + } + net.borrow().virtqueue_notify(vqs[1].clone()); + + // Recover the rx->avail->idx for receiving packets. + test_state + .borrow() + .writew(vqs[0].borrow().avail + 2, vqs[0].borrow().size as u16); + net.borrow().virtqueue_notify(vqs[0].clone()); + + // Check rx vq is ok. + let start_time = time::Instant::now(); + let timeout_us = time::Duration::from_micros(TIMEOUT_US / 5); + loop { + if net.borrow().queue_was_notified(vqs[0].clone()) + && vqs[0].borrow_mut().get_buf(test_state.clone()) + { + break; + } + assert!(time::Instant::now() - start_time < timeout_us); + } + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Test the abnormal rx/tx request 2. +/// TestStep: +/// 1. Init device. +/// 2. Test the rx/tx request: +/// 1) handle rx error +/// 2) handle tx error +/// 3) test tx packet with no in_iovec +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_abnormal_rx_tx_test_2() { + let id = 10 * TEST_MAC_ADDR_NUMS; + let queue_pairs: u16 = 1; + let queues: usize = 2 * queue_pairs as usize + 1; + + for i in 0..3 { + let (net, test_state, alloc) = set_up(id, false, queue_pairs, false); + + net.borrow_mut().reset(); + net.borrow_mut().set_acknowledge(); + net.borrow_mut().set_driver(); + net.borrow_mut().negotiate_features(DEFAULT_NET_FEATURES); + net.borrow_mut().set_features_ok(); + net.borrow_mut().pci_dev.enable_msix(None); + net.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = net + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), queues); + fill_rx_vq(test_state.clone(), alloc.clone(), vqs[0].clone()); + + // Test rx packet failed with invalid avail->ring[i]. + if i == 0 { + test_state + .borrow() + .writew(vqs[0].borrow().avail + 4, u16::MAX); + } + // Set driver ok. + let status = net.borrow().get_status() | VIRTIO_CONFIG_S_DRIVER_OK; + net.borrow().set_status(status); + + let request = get_arp_request(id); + let length = request.as_bytes().len() as u64; + let addr = alloc.borrow_mut().alloc(length); + test_state.borrow().memwrite(addr, request.as_bytes()); + vqs[1] + .borrow_mut() + .add(test_state.clone(), addr, length as u32, false); + if i == 1 { + // Test tx packet failed with invalid avail->ring[i]. + test_state.borrow().writew( + vqs[1].borrow().avail + offset_of!(VringAvail, ring) as u64, + u16::MAX, + ); + } else if i == 2 { + // Test tx packet with no in_iovec. + test_state.borrow().writew( + vqs[1].borrow().desc + offset_of!(VringDesc, flags) as u64, + VRING_DESC_F_WRITE, + ); + } + net.borrow().virtqueue_notify(vqs[1].clone()); + // Check if it will affect the stratovirt when device broken. + net.borrow().virtqueue_notify(vqs[0].clone()); + check_device_status(net.clone(), VIRTIO_CONFIG_S_NEEDS_RESET); + sleep(time::Duration::from_millis(5000)); + + let ret = test_state + .borrow() + .qmp("{\"execute\": \"qmp_capabilities\"}"); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + tear_down( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs, + id, + false, + ); + } +} + +/// Test set abnormal feature. +/// TestStep: +/// 1. Init device, set abnormal feature 40 which will be ignored. +/// 2. Send ARP packet and check the reply. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_set_abnormal_feature() { + let id = 11 * TEST_MAC_ADDR_NUMS; + let (net, test_state, alloc) = set_up(id, false, 0, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES | 1 << 40, + 3, + ); + assert_eq!(net.borrow().get_guest_features(), DEFAULT_NET_FEATURES); + + let arp_request = get_arp_request(id); + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + true, + ); + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Send abnormal packet. +/// TestStep: +/// 1. Init device. +/// 2. Send abnormal packet: +/// 1) invalid virtio_net_hdr +/// 2) random a packet +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2. success or failure. +/// 1/3/4: success. +#[test] +fn virtio_net_send_abnormal_packet() { + let id = 12 * TEST_MAC_ADDR_NUMS; + let (net, test_state, alloc) = set_up(id, false, 0, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + 3, + ); + + let mut arp_request = get_arp_request(id); + arp_request.net_hdr.flags = u8::MAX; + send_arp_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs.clone(), + arp_request.as_bytes(), + false, + ); + + let data_bytes = arp_request.as_mut_bytes(); + let mut rng = rand::thread_rng(); + let test_packets = 8; + for _ in 0..test_packets { + for _ in 0..data_bytes.len() / 3 { + let idx = rng.gen_range(0..data_bytes.len()); + data_bytes[idx] = rng.gen_range(0..0xff); + } + + send_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs[1].clone(), + data_bytes, + ); + } + + for _ in 0..test_packets { + let mut data_bytes = [0; MAX_PACKET_LEN as usize + 8]; + for j in 0..MAX_PACKET_LEN as usize + 8 { + data_bytes[j] = rng.gen_range(0..0xff); + } + send_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs[1].clone(), + &data_bytes, + ); + } + + let ret = test_state + .borrow() + .qmp("{\"execute\": \"qmp_capabilities\"}"); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + tear_down(net, test_state, alloc, vqs, id, false); +} + +/// Send and receive packet test with mq. +/// TestStep: +/// 1. Init device with mq. +/// 2. Send ARP packet and check the reply. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_rx_tx_mq_test() { + let id = 13 * TEST_MAC_ADDR_NUMS; + let num_queues = 2; + let (net, test_state, alloc) = set_up(id, true, num_queues, false); + + // Three virtqueues: tx/rx/ctrl. + let vqs = init_net_device( + net.clone(), + test_state.clone(), + alloc.clone(), + DEFAULT_NET_FEATURES, + 2 * num_queues as usize + 1, + ); + + for i in 0..num_queues { + send_request( + net.clone(), + test_state.clone(), + alloc.clone(), + vqs[i as usize * 2 + 1].clone(), + get_arp_request(id + i as u8 * TEST_MAC_ADDR_NUMS).as_bytes(), + ); + } + + tear_down(net, test_state, alloc, vqs, id, true); +} + +/// Test the abnormal rx/tx request 3. +/// TestStep: +/// 1. Init device. +/// 2. Test the rx/tx request with not enable virtqueue: +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_net_abnormal_rx_tx_test_3() { + let id = 14 * TEST_MAC_ADDR_NUMS; + let (net, test_state, alloc) = set_up(id, false, 0, false); + + net.borrow_mut().reset(); + net.borrow_mut().set_acknowledge(); + net.borrow_mut().set_driver(); + net.borrow_mut().negotiate_features(DEFAULT_NET_FEATURES); + net.borrow_mut().set_features_ok(); + net.borrow_mut().pci_dev.enable_msix(None); + net.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + + let mut vqs = Vec::new(); + // Init virtqueue, but don't enable it. + for i in 0..3 { + let vq = Rc::new(RefCell::new(TestVirtQueue::new())); + vq.borrow_mut().setup(&*net.borrow(), alloc.clone(), i); + vq.borrow().vring_init(test_state.clone()); + + let desc = vq.borrow().desc; + let avail = vq.borrow().avail; + let used = vq.borrow().used; + net.borrow().activate_queue(desc, avail, used); + + let notify_off = net.borrow().pci_dev.io_readw( + net.borrow().bar, + u64::from(net.borrow().common_base) + + offset_of!(VirtioPciCommonCfg, queue_notify_off) as u64, + ); + + vq.borrow_mut().queue_notify_off = u64::from(net.borrow().notify_base) + + u64::from(notify_off) * u64::from(net.borrow().notify_off_multiplier); + + net.borrow() + .setup_virtqueue_intr(i + 1, alloc.clone(), vq.clone()); + vqs.push(vq); + } + fill_rx_vq(test_state.clone(), alloc.clone(), vqs[0].clone()); + + // Set driver ok without check. + let status = net.borrow().get_status() | VIRTIO_CONFIG_S_DRIVER_OK; + net.borrow().set_status(status); + + let request = get_arp_request(id); + let length = request.as_bytes().len() as u64; + let addr = alloc.borrow_mut().alloc(length); + test_state.borrow().memwrite(addr, request.as_bytes()); + vqs[1] + .borrow_mut() + .add(test_state.clone(), addr, length as u32, false); + net.borrow().virtqueue_notify(vqs[1].clone()); + sleep(time::Duration::from_millis(500)); + let used_idx = test_state + .borrow() + .readw(vqs[1].borrow().used + offset_of!(VringUsed, idx) as u64); + assert_eq!(used_idx, 0); + + tear_down(net, test_state, alloc, vqs, id, false); +} diff --git a/tests/mod_test/tests/pci_test.rs b/tests/mod_test/tests/pci_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..dcad2ea4aeedfcaac3c081be34433007a6a7ddf8 --- /dev/null +++ b/tests/mod_test/tests/pci_test.rs @@ -0,0 +1,3024 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::rc::Rc; +use std::{thread, time}; + +use serde_json::json; + +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::pci::*; +use mod_test::libdriver::pci_bus::{PciBusOps, TestPciBus}; +use mod_test::libdriver::virtio::{TestVirtQueue, VirtioDeviceOps, VIRTIO_F_VERSION_1}; +use mod_test::libdriver::virtio_block::{ + add_blk_request, virtio_blk_default_feature, virtio_blk_read, virtio_blk_write, + VIRTIO_BLK_T_OUT, +}; +use mod_test::libdriver::virtio_pci_modern::TestVirtioPciDev; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use mod_test::utils::{cleanup_img, create_img, read_le_u16, ImageType, TEST_IMAGE_SIZE}; + +const VIRTIO_PCI_VENDOR: u16 = 0x1af4; +const BLK_DEVICE_ID: u16 = 0x1042; +const MAX_DEVICE_NUM_IN_MULTIFUNC: u8 = 248; +const MAX_DEVICE_NUM: u8 = 32; +const TIMEOUT_S: u64 = 1; + +#[derive(Clone, Copy)] +struct DemoDev { + bar_num: u8, + bar_size: u64, + bus_num: u8, + dev_num: u8, +} + +fn fmt_demo_deves(cfg: DemoDev, num: u8) -> String { + let mut dev_str: String = String::new(); + + for i in 1..num + 1 { + let tmp = format!( + "-device pcie-demo-dev,addr=0x{:x},bus=pcie.{},id=demo{},bar_num={},bar_size={}", + cfg.dev_num + i - 1, + cfg.bus_num, + i, + cfg.bar_num, + cfg.bar_size + ); + let sep = match i { + 1 => "", + _ => " ", + }; + dev_str = format!("{}{}{}", dev_str, sep, tmp); + } + + dev_str +} + +fn init_demo_dev(cfg: DemoDev, dev_num: u8) -> (Rc>, Rc>) { + let mut demo_dev_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + demo_dev_args.append(&mut args); + + let mut args: Vec<&str> = "-D /tmp/oscar.log".split(' ').collect(); + demo_dev_args.append(&mut args); + + let demo_str = fmt_demo_deves(cfg, dev_num); + args = demo_str[..].split(' ').collect(); + demo_dev_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(demo_dev_args))); + let machine = Rc::new(RefCell::new(TestStdMachine::new(test_state.clone()))); + let _allocator = machine.borrow().allocator.clone(); + + let mut pci_dev = TestPciDev::new(machine.borrow().pci_bus.clone()); + let devfn = cfg.dev_num << 3; + pci_dev.devfn = devfn; + + pci_dev.set_bus_num(cfg.bus_num); + pci_dev.enable(); + + (Rc::new(RefCell::new(pci_dev)), test_state) +} + +#[derive(Default, Clone)] +pub struct MsixVector { + pub msix_entry: u16, + pub msix_addr: u64, + pub msix_data: u32, +} + +impl MsixVector { + fn new(entry: u16, alloc: Rc>) -> Self { + Self { + msix_entry: entry, + msix_addr: alloc.borrow_mut().alloc(4), + msix_data: 0x12345678, + } + } +} + +pub struct RootPort { + pub rp_dev: TestPciDev, + pub rp_misx_vector: MsixVector, +} + +impl RootPort { + fn new( + machine: Rc>, + alloc: Rc>, + bus_num: u8, + devfn: u8, + ) -> Self { + let mut root_port = TestPciDev::new(machine.borrow().pci_bus.clone()); + root_port.set_bus_num(bus_num); + root_port.devfn = devfn; + assert_eq!(root_port.config_readw(PCI_SUB_CLASS_DEVICE), 0x0604); + + root_port.enable(); + root_port.enable_msix(None); + let root_port_msix = MsixVector::new(0, alloc); + root_port.set_msix_vector( + root_port_msix.msix_entry, + root_port_msix.msix_addr, + root_port_msix.msix_data, + ); + root_port.init_notification(); + root_port.clear_slot_event(); + + Self { + rp_dev: root_port, + rp_misx_vector: root_port_msix, + } + } +} + +fn build_root_port_args(root_port_nums: u8) -> Vec { + if root_port_nums == 0 { + return Vec::new(); + } + let mut multifunc = false; + if root_port_nums > 32 { + multifunc = true; + } + + let mut root_port_args: Vec = Vec::with_capacity(root_port_nums as usize); + let mut addr = 1; + let mut func = 0; + for bus in 1..=root_port_nums { + let mut arg = format!( + "-device pcie-root-port,port=0x0,chassis=1,bus=pcie.0,addr={:#x}.{},id=pcie.{}", + addr, func, bus + ); + + if func == 0 && multifunc { + arg.push_str(",multifunction=on"); + } + + if multifunc { + addr = bus / 8 + 1; + func += 1; + func %= 8; + } else { + addr += 1; + func = 0; + } + + root_port_args.push(arg); + } + + root_port_args +} + +fn build_blk_args( + blk_nums: u8, + attach_in_rp: bool, + multifunc: bool, +) -> Vec<(String, u8, u8, u8, u8, bool)> { + if multifunc { + assert!(blk_nums < MAX_DEVICE_NUM_IN_MULTIFUNC); + } else { + assert!(blk_nums < MAX_DEVICE_NUM); + } + + let mut blk_args: Vec<(String, u8, u8, u8, u8, bool)> = Vec::with_capacity(blk_nums as usize); + let mut slot = 0; + let mut func = 0; + let mut nums = 0; + let mut bus = 0; + + if attach_in_rp { + bus = 1; + } else { + slot = 1; + } + + while nums < blk_nums { + if multifunc { + if func % 8 == 0 { + blk_args.push((String::from("virtio-blk-pci"), nums, bus, slot, func, true)); + } else { + blk_args.push((String::from("virtio-blk-pci"), nums, bus, slot, func, false)); + } + func += 1; + slot += func / 8 + } else { + blk_args.push((String::from("virtio-blk-pci"), nums, bus, slot, func, false)); + bus += 1; + } + nums += 1; + } + + blk_args +} + +fn build_blk_driver_args(blk_nums: u8) -> (Vec, Vec) { + let mut driver_args: Vec = Vec::new(); + let mut image_paths: Vec = Vec::new(); + + for i in 0..blk_nums { + let image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(image_path.clone()); + let driver_arg_str = format!( + "-drive if=none,id=drive-{},file={},format=raw,direct=false", + i, image_path + ); + driver_args.push(driver_arg_str.clone()); + } + + (driver_args, image_paths) +} + +fn build_hotplug_blk_cmd( + hotplug_blk_id: u8, + hotplug_image_path: String, + bus_num: u8, + slot: u8, + func: u8, +) -> (String, String) { + let add_blk_command = format!( + "{{\"execute\": \"blockdev-add\", \ + \"arguments\": {{\"node-name\": \"drive-{}\", \"file\": {{\"driver\": \ + \"file\", \"filename\": \"{}\", \"aio\": \"off\"}}, \ + \"cache\": {{\"direct\": false}}, \"read-only\": false}}}}", + hotplug_blk_id, hotplug_image_path + ); + + let add_device_command = format!( + "{{\"execute\":\"device_add\", \ + \"arguments\": {{\"id\":\"blk-{}\", \"driver\":\"virtio-blk-pci\", \ + \"drive\": \"drive-{}\", \"addr\":\"{:#x}.{:#x}\", \"bus\": \"pcie.{}\"}}}}", + hotplug_blk_id, hotplug_blk_id, slot, func, bus_num + ); + + (add_blk_command, add_device_command) +} + +fn build_hotunplug_blk_cmd(unplug_blk_id: u8) -> (String, String) { + let delete_device_command = format!( + "{{\"execute\": \"device_del\",\ + \"arguments\": {{\"id\":\"blk-{}\"}}}}", + unplug_blk_id + ); + + let delete_blk_command = format!( + "{{\"execute\": \"blockdev-del\",\ + \"arguments\": {{\"node-name\":\"drive-{}\"}}}}", + unplug_blk_id + ); + + (delete_device_command, delete_blk_command) +} + +fn build_all_device_args( + root_port_nums: u8, + pci_device_param: Vec<(String, u8, u8, u8, u8, bool)>, +) -> Vec { + let mut device_args: Vec = Vec::new(); + let mut root_port_args = build_root_port_args(root_port_nums); + if !root_port_args.is_empty() { + device_args.append(&mut root_port_args); + } + + for i in 0..pci_device_param.len() { + let mut device_arg_str = format!( + "-device {},id=blk-{},drive=drive-{},bus=pcie.{},addr={}.{}", + pci_device_param.get(i).unwrap().0, + pci_device_param.get(i).unwrap().1, + pci_device_param.get(i).unwrap().1, + pci_device_param.get(i).unwrap().2, + pci_device_param.get(i).unwrap().3, + pci_device_param.get(i).unwrap().4, + ); + + if pci_device_param.get(i).unwrap().5 { + let multi_func_arg = String::from(",multifunction=on"); + device_arg_str.push_str(&multi_func_arg); + } + device_args.push(device_arg_str.clone()); + } + + device_args +} + +fn create_blk( + machine: Rc>, + bus_num: u8, + pci_slot: u8, + pci_fn: u8, +) -> Rc> { + let virtio_blk = Rc::new(RefCell::new(TestVirtioPciDev::new( + machine.borrow().pci_bus.clone(), + ))); + virtio_blk.borrow_mut().pci_dev.set_bus_num(bus_num); + virtio_blk.borrow_mut().init(pci_slot, pci_fn); + virtio_blk +} + +fn create_machine( + root_port_nums: u8, + device_args: Vec, + driver_args: Vec, + other_args: Option>, +) -> ( + Rc>, + Rc>, + Rc>, +) { + let mut extra_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + for device_arg in device_args.iter() { + let mut arg = device_arg[..].split(' ').collect(); + extra_args.append(&mut arg); + } + + for driver_arg in driver_args.iter() { + let mut arg = driver_arg[..].split(' ').collect(); + extra_args.append(&mut arg); + } + + let mut args: Vec = Vec::new(); + if other_args.is_some() { + args = other_args.unwrap(); + } + for other_arg in args.iter() { + let mut arg = other_arg[..].split(' ').collect(); + extra_args.append(&mut arg); + } + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = Rc::new(RefCell::new(TestStdMachine::new(test_state.clone()))); + machine + .borrow() + .pci_bus + .borrow() + .pci_auto_bus_scan(root_port_nums); + let allocator = machine.borrow().allocator.clone(); + + (test_state, machine, allocator) +} + +fn set_up( + root_port_nums: u8, + blk_nums: u8, + attach_in_rp: bool, + multifunc: bool, +) -> ( + Rc>, + Rc>, + Rc>, + Vec, +) { + let device_args = build_all_device_args( + root_port_nums, + build_blk_args(blk_nums, attach_in_rp, multifunc), + ); + let (blk_driver_args, image_paths) = build_blk_driver_args(blk_nums); + let (test_state, machine, alloc) = + create_machine(root_port_nums, device_args, blk_driver_args, None); + (test_state, machine, alloc, image_paths) +} + +fn tear_down( + blk: Option>>, + test_state: Rc>, + alloc: Rc>, + vqs: Option>>>, + image_paths: Option>, +) { + if blk.is_some() { + blk.clone().unwrap().borrow_mut().reset(); + blk.clone().unwrap().borrow_mut().pci_dev.disable_msix(); + } + if vqs.is_some() { + blk.unwrap() + .borrow_mut() + .destroy_device(alloc, vqs.unwrap()); + } + + test_state.borrow_mut().stop(); + if let Some(img_paths) = image_paths { + img_paths.iter().for_each(|image_path| { + cleanup_img(image_path.to_string()); + }) + } +} + +fn validate_config_value_2byte( + pci_bus: Rc>, + bus_num: u8, + devfn: u8, + offset: u8, + expected_value: u16, + mask: u16, +) { + let config_value = pci_bus.borrow().config_readw(bus_num, devfn, offset); + assert_eq!(config_value & mask, expected_value); +} + +fn validate_config_perm_1byte( + pci_dev: TestPciDev, + offset: u8, + expected_value: u8, + written_value: u8, + mask: u8, +) { + let config_value = + pci_dev + .pci_bus + .borrow() + .config_readb(pci_dev.bus_num, pci_dev.devfn, offset); + assert_eq!(config_value & mask, expected_value); + + pci_dev + .pci_bus + .borrow() + .config_writeb(pci_dev.bus_num, pci_dev.devfn, offset, written_value); + + let config_value = + pci_dev + .pci_bus + .borrow() + .config_readb(pci_dev.bus_num, pci_dev.devfn, offset); + assert_eq!(config_value & mask, expected_value); +} + +fn validate_config_perm_2byte( + pci_dev: TestPciDev, + offset: u8, + expected_value: u16, + written_value: u16, + mask: u16, +) { + pci_dev + .pci_bus + .borrow() + .config_writew(pci_dev.bus_num, pci_dev.devfn, offset, written_value); + let config_value = + pci_dev + .pci_bus + .borrow() + .config_readw(pci_dev.bus_num, pci_dev.devfn, offset); + assert_eq!(config_value & mask, expected_value); +} + +fn validate_config_perm_4byte( + pci_dev: TestPciDev, + offset: u8, + expected_value: u32, + written_value: u32, + mask: u32, +) { + let config_value = + pci_dev + .pci_bus + .borrow() + .config_readl(pci_dev.bus_num, pci_dev.devfn, offset); + assert_eq!(config_value & mask, expected_value); + + pci_dev + .pci_bus + .borrow() + .config_writel(pci_dev.bus_num, pci_dev.devfn, offset, written_value); + + let config_value = + pci_dev + .pci_bus + .borrow() + .config_readl(pci_dev.bus_num, pci_dev.devfn, offset); + assert_eq!(config_value & mask, expected_value); +} + +fn get_slot_ctl_val(root_port: Rc>) -> (u16, u8) { + let rp_borrowed = root_port.borrow(); + let exp_cap_addr = rp_borrowed.rp_dev.find_capability(0x10, 0); + let slot_ctl = rp_borrowed + .rp_dev + .config_readw(exp_cap_addr + PCI_EXP_SLTCTL); + + (slot_ctl, exp_cap_addr) +} + +fn power_on_device(root_port: Rc>) { + let (slot_ctl, addr) = get_slot_ctl_val(root_port.clone()); + + let mask = PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_PCC; + root_port.borrow().rp_dev.config_writew( + addr + PCI_EXP_SLTCTL, + (slot_ctl & !mask) | PCI_EXP_SLTCTL_PWR_IND_ON | PCI_EXP_SLTCTL_PWR_ON, + ); +} + +fn power_off_device(root_port: Rc>) { + let (slot_ctl, addr) = get_slot_ctl_val(root_port.clone()); + + let mask = PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_PCC; + root_port.borrow().rp_dev.config_writew( + addr + PCI_EXP_SLTCTL, + (slot_ctl & !mask) | PCI_EXP_SLTCTL_PWR_IND_OFF | PCI_EXP_SLTCTL_PWR_OFF, + ); +} + +fn power_indicator_blink(root_port: Rc>) { + let (slot_ctl, addr) = get_slot_ctl_val(root_port.clone()); + + let mask = PCI_EXP_SLTCTL_PIC; + root_port.borrow().rp_dev.config_writew( + addr + PCI_EXP_SLTCTL, + (slot_ctl & !mask) | PCI_EXP_SLTCTL_PWR_IND_BLINK, + ); +} + +fn power_indicator_off(root_port: Rc>) { + let (slot_ctl, addr) = get_slot_ctl_val(root_port.clone()); + + let mask = PCI_EXP_SLTCTL_PIC; + root_port.borrow().rp_dev.config_writew( + addr + PCI_EXP_SLTCTL, + (slot_ctl & !mask) | PCI_EXP_SLTCTL_PWR_IND_OFF, + ); +} + +fn validate_blk_io_success( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, +) { + let features = virtio_blk_default_feature(blk.clone()); + let virtqueues = blk + .borrow_mut() + .init_device(test_state.clone(), alloc.clone(), features, 1); + + validate_std_blk_io(blk.clone(), test_state, virtqueues.clone(), alloc.clone()); + + blk.borrow_mut().pci_dev.disable_msix(); + blk.borrow() + .cleanup_virtqueue(alloc, virtqueues[0].borrow().desc); +} + +fn simple_blk_io_req( + blk: Rc>, + test_state: Rc>, + virtqueue: Rc>, + alloc: Rc>, +) -> u32 { + let (free_head, _req_addr) = add_blk_request( + test_state, + alloc, + virtqueue.clone(), + VIRTIO_BLK_T_OUT, + 0, + false, + ); + blk.borrow().virtqueue_notify(virtqueue); + + free_head +} + +fn wait_intr_timeout( + blk: Rc>, + virtqueue: Rc>, + timeout: u64, +) -> bool { + let start_time = time::Instant::now(); + let timeout = time::Duration::from_secs(timeout); + + loop { + if blk.borrow().queue_was_notified(virtqueue.clone()) { + return false; + } + + if time::Instant::now() - start_time > timeout { + return true; + } + } +} + +fn validate_std_blk_io( + blk: Rc>, + test_state: Rc>, + virtqueues: Vec>>, + alloc: Rc>, +) { + virtio_blk_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + 0, + false, + ); + + virtio_blk_read(blk, test_state, alloc, virtqueues[0].clone(), 0, false); +} + +fn wait_root_port_msix(root_port: Rc>) -> bool { + let start_time = time::Instant::now(); + let timeout = time::Duration::from_secs(TIMEOUT_S); + let rp_borrowed = root_port.borrow(); + loop { + if rp_borrowed.rp_dev.has_msix( + rp_borrowed.rp_misx_vector.msix_addr, + rp_borrowed.rp_misx_vector.msix_data, + ) { + return true; + } + if (time::Instant::now() - start_time) >= timeout { + return false; + } + } +} + +fn wait_root_port_intx(root_port: Rc>) -> bool { + let start_time = time::Instant::now(); + let timeout = time::Duration::from_secs(TIMEOUT_S); + let rp_borrowed = root_port.borrow(); + loop { + if rp_borrowed.rp_dev.has_intx() { + return true; + } + if (time::Instant::now() - start_time) >= timeout { + return false; + } + } +} + +fn lookup_all_cap_addr(cap_id: u8, pci_dev: TestPciDev) -> Vec { + let mut addr = pci_dev.config_readb(PCI_CAPABILITY_LIST); + let mut cap_addrs: Vec = Vec::new(); + loop { + let cap = pci_dev.config_readb(addr); + if cap == cap_id { + cap_addrs.push(addr); + } + + addr = pci_dev.config_readb(addr + PCI_CAP_LIST_NEXT); + if addr == 0 { + break; + } + } + cap_addrs +} + +fn get_msix_flag(pci_dev: TestPciDev) -> u16 { + let addr = pci_dev.find_capability(PCI_CAP_ID_MSIX, 0); + assert_ne!(addr, 0); + + pci_dev.config_readw(addr + PCI_MSIX_MSG_CTL) +} + +fn set_msix_enable(pci_dev: TestPciDev) { + let addr = pci_dev.find_capability(PCI_CAP_ID_MSIX, 0); + let old_value = get_msix_flag(pci_dev.clone()); + pci_dev.config_writew(addr + PCI_MSIX_MSG_CTL, old_value | PCI_MSIX_MSG_CTL_ENABLE); +} + +fn set_msix_disable(pci_dev: TestPciDev) { + let addr = pci_dev.find_capability(PCI_CAP_ID_MSIX, 0); + let old_value = get_msix_flag(pci_dev.clone()); + pci_dev.config_writew( + addr + PCI_MSIX_MSG_CTL, + old_value & !PCI_MSIX_MSG_CTL_ENABLE, + ); +} + +fn mask_msix_global(pci_dev: TestPciDev) { + let addr = pci_dev.find_capability(PCI_CAP_ID_MSIX, 0); + let old_value = get_msix_flag(pci_dev.clone()); + pci_dev.config_writew( + addr + PCI_MSIX_MSG_CTL, + old_value | PCI_MSIX_MSG_CTL_MASKALL, + ); +} + +fn unmask_msix_global(pci_dev: TestPciDev) { + let addr = pci_dev.find_capability(PCI_CAP_ID_MSIX, 0); + let old_value = get_msix_flag(pci_dev.clone()); + pci_dev.config_writew( + addr + PCI_MSIX_MSG_CTL, + old_value & !PCI_MSIX_MSG_CTL_MASKALL, + ); +} + +fn mask_msix_vector(pci_dev: TestPciDev, vector: u16) { + let offset: u64 = pci_dev.msix_table_off + u64::from(vector * PCI_MSIX_ENTRY_SIZE); + + let vector_mask = pci_dev.io_readl(pci_dev.msix_table_bar, offset + PCI_MSIX_ENTRY_VECTOR_CTRL); + + pci_dev.io_writel( + pci_dev.msix_table_bar, + offset + PCI_MSIX_ENTRY_VECTOR_CTRL, + vector_mask | PCI_MSIX_ENTRY_CTRL_MASKBIT, + ); +} + +fn unmask_msix_vector(pci_dev: TestPciDev, vector: u16) { + let offset: u64 = pci_dev.msix_table_off + u64::from(vector * PCI_MSIX_ENTRY_SIZE); + + let vector_control = + pci_dev.io_readl(pci_dev.msix_table_bar, offset + PCI_MSIX_ENTRY_VECTOR_CTRL); + + pci_dev.io_writel( + pci_dev.msix_table_bar, + offset + PCI_MSIX_ENTRY_VECTOR_CTRL, + vector_control & !PCI_MSIX_ENTRY_CTRL_MASKBIT, + ); +} + +fn hotplug_blk( + test_state: Rc>, + root_port: Rc>, + image_paths: &mut Vec, + hotplug_blk_id: u8, + bus: u8, + slot: u8, + func: u8, +) { + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 2:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, bus, slot, func); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let ret = test_state.borrow().qmp(&add_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the vendor id for the virtio block device hotplugged. + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + bus, + slot << 3 | func, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + validate_hotplug(root_port.clone()); + handle_isr(root_port.clone()); + power_on_device(root_port); +} + +fn hotunplug_blk( + test_state: Rc>, + blk: Rc>, + root_port: Rc>, + hotunplug_blk_id: u8, +) { + // Hotunplug the virtio block device. + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotunplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + validate_hotunplug(root_port.clone()); + handle_isr(root_port.clone()); + power_off_device(root_port.clone()); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + test_state.borrow().wait_qmp_event(); + + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + validate_cmd_complete(root_port.clone()); + handle_isr(root_port); + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); +} + +fn handle_isr(root_port: Rc>) { + let cap_exp_addr = root_port.borrow().rp_dev.find_capability(PCI_CAP_ID_EXP, 0); + let mut status = root_port.borrow().rp_dev.pci_bus.borrow().config_readw( + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + ); + + status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC; + root_port.borrow().rp_dev.pci_bus.borrow().config_writew( + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + status, + ); +} + +fn validate_hotplug(root_port: Rc>) { + let cap_exp_addr = root_port.borrow().rp_dev.find_capability(PCI_CAP_ID_EXP, 0); + let mask = PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PDC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PDC, + mask, + ); +} + +fn validate_hotunplug(root_port: Rc>) { + let cap_exp_addr = root_port.borrow().rp_dev.find_capability(PCI_CAP_ID_EXP, 0); + let mask = PCI_EXP_SLTSTA_ABP; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP, + mask, + ); +} + +fn validate_cmd_complete(root_port: Rc>) { + let cap_exp_addr = root_port.borrow().rp_dev.find_capability(PCI_CAP_ID_EXP, 0); + let mask = PCI_EXP_SLTSTA_CC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_CC, + mask, + ); +} + +/// Query the config of the device which has attached the bus. +#[test] +fn test_pci_device_discovery_001() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(blk_nums, root_port_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // Verify the vendor id for non-existent devices. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + 1, + 1 << 3, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + // Verify the device id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_DEVICE_ID, + BLK_DEVICE_ID, + 0xFFFF, + ); + + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug the device which has attached the bus and hotplug another block device. +#[test] +fn test_pci_device_discovery_002() { + let blk_nums = 1; + let root_port_nums = 2; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port_1 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create a root port whose bdf is 0:2:0. + let root_port_2 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 2 << 3, + ))); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine.clone(), 1, 0, 0); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + // Hotplug a block device whose id is 0. + hotunplug_blk(test_state.clone(), blk, root_port_1, 0); + + // Hotplug a block device whose id is 1 and bdf is 2:0:0. + hotplug_blk( + test_state.clone(), + root_port_2, + &mut image_paths, + 1, + 2, + 0, + 0, + ); + + // Create a block device whose bdf is 2:0:0. + let blk = create_blk(machine, 2, 0, 0); + // Verify the vendor id for the virtio block device hotplugged. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +/// Repeat hotplug the same device and query the related ecam space(vendor id). +#[test] +fn test_pci_device_discovery_003() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine, + alloc.clone(), + 0, + 1 << 3, + ))); + + // Verify the vendor id for the virtio block device hotplugged. + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port_nums, + 0, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + let blk_id = 1; + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(blk_id, hotplug_image_path, 1, 0, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + // Verify the vendor id for the virtio block device hotplugged. + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port_nums, + 0, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug and hotunplug the same device. +#[test] +fn test_pci_device_discovery_004() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let blk_id = 0; + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path); + + // Hotplug a block device whose id is 0 and bdf is 1:0:0. + hotplug_blk( + test_state.clone(), + root_port.clone(), + &mut image_paths, + blk_id, + 1, + 0, + 0, + ); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // Hotunplug the virtio block device whose id is 0. + hotunplug_blk(test_state.clone(), blk, root_port, blk_id); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Check the permission and initial value of type0 pci device's configuration space. +#[test] +fn test_pci_type0_config() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // Verify that the vendor id of type0 device is read-only. + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0x1234, + 0xFFFF, + ); + // Verify that the device id of type0 device is read-only. + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_DEVICE_ID, + BLK_DEVICE_ID, + 0x1234, + 0xFFFF, + ); + + // verify that the lower three bits of the command register of type0 device is readable and + // writable. + validate_config_perm_2byte(blk.borrow().pci_dev.clone(), PCI_COMMAND, 0x4, 0x4, 0x7); + + // verify that the interrupt status of the status register of type0 device is read-only. + let intr_status = blk.borrow().pci_dev.pci_bus.borrow().config_readw( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_STATUS, + ) & PCI_STATUS_INTERRUPT; + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_STATUS, + intr_status, + !intr_status, + PCI_STATUS_INTERRUPT, + ); + + // verify that the capabilities list of the status register of type0 device is read-only. + let cap_list = blk.borrow().pci_dev.pci_bus.borrow().config_readw( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_STATUS, + ) & PCI_STATUS_CAP_LIST; + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_STATUS, + cap_list, + !cap_list, + PCI_STATUS_CAP_LIST, + ); + + // verify that the revision id of type0 device is read-only. + let revision_id = blk.borrow().pci_dev.pci_bus.borrow().config_readb( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_REVISION_ID, + ); + validate_config_perm_1byte( + blk.borrow().pci_dev.clone(), + PCI_REVISION_ID, + 1, + !revision_id, + 0xff, + ); + + let sub_class = blk.borrow().pci_dev.pci_bus.borrow().config_readb( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_SUB_CLASS_DEVICE, + ); + // verify that the sub class id of type0 device is read-only. + validate_config_perm_1byte( + blk.borrow().pci_dev.clone(), + PCI_SUB_CLASS_DEVICE, + sub_class, + !sub_class, + 0xFF, + ); + + // verify that the header type of type0 device is read-only. + let header_type = blk.borrow().pci_dev.pci_bus.borrow().config_readb( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_HEADER_TYPE, + ); + validate_config_perm_1byte( + blk.borrow().pci_dev.clone(), + PCI_HEADER_TYPE, + header_type, + !header_type, + 0xff, + ); + + // verify that the capabilities pointer of type0 device is read-only. + let cap_pointer = blk.borrow().pci_dev.pci_bus.borrow().config_readb( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_CAPABILITY_LIST, + ); + validate_config_perm_1byte( + blk.borrow().pci_dev.clone(), + PCI_CAPABILITY_LIST, + cap_pointer, + !cap_pointer, + 0xFF, + ); + + // verify that the sub vendor id of type0 device is read-only. + let sub_vender_id = blk.borrow().pci_dev.pci_bus.borrow().config_readw( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_SUBSYSTEM_VENDOR_ID, + ); + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_SUBSYSTEM_VENDOR_ID, + sub_vender_id, + !sub_vender_id, + 0xFFFF, + ); + // verify that the sub system id of type0 device is read-only. + let sub_system_id = blk.borrow().pci_dev.pci_bus.borrow().config_readw( + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_SUBSYSTEM_ID, + ); + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + PCI_SUBSYSTEM_ID, + sub_system_id, + !sub_system_id, + 0xFFFF, + ); + + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +/// Check the permission and initial value of type1 pci device's configuration space. +#[test] +fn test_pci_type1_config() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = RootPort::new(machine, alloc.clone(), 0, 1 << 3); + + assert_eq!(root_port.rp_dev.config_readb(PCI_PRIMARY_BUS), 0); + assert_ne!(root_port.rp_dev.config_readb(PCI_SECONDARY_BUS), 0); + assert_ne!(root_port.rp_dev.config_readb(PCI_SUBORDINATE_BUS), 0); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +#[test] +fn test_pci_type1_reset() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = RootPort::new(machine, alloc.clone(), 0, 1 << 3); + + let command = root_port.rp_dev.config_readw(PCI_COMMAND); + let cmd_memory = command & u16::from(PCI_COMMAND_MEMORY); + + // Bitwise inversion of memory space enable. + let write_cmd = if cmd_memory != 0 { + command & u16::from(!PCI_COMMAND_MEMORY) + } else { + command | u16::from(PCI_COMMAND_MEMORY) + }; + root_port.rp_dev.config_writew(PCI_COMMAND, write_cmd); + let old_command = root_port.rp_dev.config_readw(PCI_COMMAND); + assert_eq!(old_command, write_cmd); + + root_port + .rp_dev + .config_writeb(PCI_BRIDGE_CONTROL, BRIDGE_CTL_SEC_BUS_RESET); + + // Sleep three seconds to wait root port reset second bus. + let sleep_s = time::Duration::from_secs(3); + thread::sleep(sleep_s); + + let new_command = root_port.rp_dev.config_readw(PCI_COMMAND); + // verify that the block device is reset. + assert_ne!(old_command, new_command); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Verify that out-of-bounds access to the configuration space +#[test] +fn test_out_boundary_config_access() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + let devfn = 1 << 3 | 1; + let addr = + machine.borrow().pci_bus.borrow().ecam_alloc_ptr + u64::from((devfn as u32) << 12) - 1; + + let write_value = u16::max_value(); + let buf = write_value.to_le_bytes(); + test_state.borrow().memwrite(addr, &buf); + + let mut buf: &[u8] = &test_state.borrow().memread(addr, 2)[0..2]; + let read_value = read_le_u16(&mut buf); + assert_ne!(write_value, read_value); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Verify that out-of-size access to the configuration space +#[test] +fn test_out_size_config_access() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = RootPort::new(machine, alloc.clone(), 0, 1 << 3); + + let vendor_device_id = root_port.rp_dev.config_readl(PCI_VENDOR_ID); + let command_status = root_port.rp_dev.config_readl(PCI_COMMAND); + let value = root_port.rp_dev.config_readq(0); + assert_ne!( + value, + u64::from(vendor_device_id) << 32 | u64::from(command_status) + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Verify that out-of-bounds access to the msix bar space. +#[test] +fn test_out_boundary_msix_access() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = RootPort::new(machine, alloc.clone(), 0, 1 << 3); + + // Out-of-bounds access to the msix table. + let write_value = u32::max_value(); + root_port.rp_dev.io_writel( + root_port.rp_dev.msix_table_bar, + PCI_MSIX_ENTRY_VECTOR_CTRL + 2, + write_value, + ); + let read_value = root_port.rp_dev.io_readl( + root_port.rp_dev.msix_table_bar, + PCI_MSIX_ENTRY_VECTOR_CTRL + 2, + ); + assert_ne!(write_value, read_value); + + // Out-of-bounds access to the msix pba. + let _read_value = root_port + .rp_dev + .io_readq(root_port.rp_dev.msix_table_bar, 4); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +#[test] +fn test_repeat_io_map_bar() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + // Verify that the function of the block device is normal. + validate_std_blk_io(blk.clone(), test_state.clone(), vqs.clone(), alloc.clone()); + + let old_feature = blk.borrow().get_guest_features(); + let old_bar_addr = blk.borrow().bar; + + // IO map the bar of virtio block device again. + let bar_idx = blk.borrow().bar_idx; + let bar_addr = blk.borrow().pci_dev.io_map(bar_idx); + blk.borrow_mut().bar = bar_addr; + // Verify that the configuration of virtio block can be read normally. + assert_eq!(blk.borrow().get_guest_features(), old_feature); + // Verify that the common config bar of virtio block has changed. + assert_ne!(blk.borrow().bar, old_bar_addr); + + // Verify that the function of the block device is normal. + validate_std_blk_io(blk.clone(), test_state.clone(), vqs.clone(), alloc.clone()); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +#[test] +fn test_pci_type0_msix_config() { + let blk_nums = 1; + let root_port_nums = 0; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, false, false); + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 0, 1, 0); + + // Verify that there is only one msix capability addr of the type0 pci device. + let blk_cap_msix_addrs = lookup_all_cap_addr(PCI_CAP_ID_MSIX, blk.borrow().pci_dev.clone()); + assert_eq!(blk_cap_msix_addrs.len(), 1); + + // Verify that the table size of msix is read-only. + let table_size = blk + .borrow() + .pci_dev + .config_readw(blk_cap_msix_addrs[0] + PCI_MSIX_MSG_CTL) + & PCI_MSIX_MSG_CTL_TSIZE; + validate_config_perm_2byte( + blk.borrow().pci_dev.clone(), + blk_cap_msix_addrs[0] + PCI_MSIX_MSG_CTL, + table_size, + !table_size, + PCI_MSIX_MSG_CTL_TSIZE, + ); + + // Verify that the table size of msix is read-only. + let msix_table = blk + .borrow() + .pci_dev + .config_readl(blk_cap_msix_addrs[0] + PCI_MSIX_TABLE); + let msix_table_bir = msix_table & PCI_MSIX_TABLE_BIR; + // Verify that the bir of table of the type0 pci device is less than or equal to 5. + assert!(msix_table_bir <= 5); + // Verify that the msix table of the type0 pci device is read-only. + validate_config_perm_4byte( + blk.borrow().pci_dev.clone(), + blk_cap_msix_addrs[0] + PCI_MSIX_TABLE, + msix_table, + !msix_table, + 0xFFFFFFFF, + ); + + let msix_pba = blk + .borrow() + .pci_dev + .config_readl(blk_cap_msix_addrs[0] + PCI_MSIX_PBA); + let msix_pba_bir = msix_pba & PCI_MSIX_PBA_BIR; + // Verify that the bir of pba of the type0 pci device is less than or equal to 5. + assert!(msix_pba_bir <= 5); + // Verify that the msix pba of the type0 pci device is read-only. + validate_config_perm_4byte( + blk.borrow().pci_dev.clone(), + blk_cap_msix_addrs[0] + PCI_MSIX_PBA, + msix_pba, + !msix_pba, + 0xFFFFFFFF, + ); + + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +/// Test whether the Function Mask bit in the control register for MSI-X works well, +/// which means that when it's set, msix pends notification, and starts to notify as +/// soon as the mask bit is cleared by the OS. +#[test] +fn test_pci_msix_global_ctl() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + set_msix_disable(blk.borrow().pci_dev.clone()); + let free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + alloc.clone(), + ); + + // Verify that the os can not receive msix interrupt when msix is disabled. + assert!(wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + set_msix_enable(blk.borrow().pci_dev.clone()); + let _free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + alloc.clone(), + ); + // Verify that the os can receive msix interrupt when msix is enabled. + assert!(!wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + + mask_msix_global(blk.borrow().pci_dev.clone()); + + let free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + alloc.clone(), + ); + // Verify that the os can not receive msix interrupt when the function of vectors is masked. + assert!(wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + + unmask_msix_global(blk.borrow().pci_dev.clone()); + // Verify that the os can receive msix interrupt when the function of vectors is unmasked. + assert!(!wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +/// Test whether the Mask bit in the vector register in msix table works well, +/// which means that when it's set, msix pends notification of the related vector, +/// and starts to notify as soon as the mask bit is cleared by the OS. +#[test] +fn test_pci_msix_local_ctl() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + mask_msix_vector(blk.borrow().pci_dev.clone(), 1); + let free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + alloc.clone(), + ); + // Verify that the os can not receive msix interrupt when the vectors of virtqueue is masked. + assert!(wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + + unmask_msix_vector(blk.borrow().pci_dev.clone(), 1); + // Verify that the os canreceive msix interrupt when the vectors of virtqueue is unmasked. + assert!(!wait_intr_timeout(blk.clone(), vqs[0].clone(), TIMEOUT_S)); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +#[test] +fn test_alloc_abnormal_vector() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // 1. Init device. + blk.borrow_mut().reset(); + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + blk.borrow_mut().set_features_ok(); + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + + let queue_num = blk.borrow().get_queue_nums(); + + let virtqueue = blk + .borrow() + .setup_virtqueue(test_state.clone(), alloc.clone(), 0_u16); + blk.borrow() + .setup_virtqueue_intr((queue_num + 2) as u16, alloc.clone(), virtqueue.clone()); + blk.borrow().set_driver_ok(); + + let _free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + virtqueue.clone(), + alloc.clone(), + ); + // Verify that the os can not receive msix interrupt when the vectors of virtqueue is . + assert!(wait_intr_timeout(blk.clone(), virtqueue.clone(), TIMEOUT_S)); + + blk.borrow_mut() + .cleanup_virtqueue(alloc.clone(), virtqueue.borrow().desc); + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +#[test] +fn test_intx_basic() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + let blk = create_blk(machine, 1, 0, 0); + + // 1. Init device. + blk.borrow_mut().reset(); + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + blk.borrow_mut().set_features_ok(); + + set_msix_disable(blk.borrow().pci_dev.clone()); + blk.borrow_mut().pci_dev.set_intx_irq_num(1_u8); + + let virtqueue = blk + .borrow() + .setup_virtqueue(test_state.clone(), alloc.clone(), 0_u16); + blk.borrow().set_driver_ok(); + + let free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + virtqueue.clone(), + alloc.clone(), + ); + // Verify that the os can receive INTx interrupt when msix is disabled. + assert!(!wait_intr_timeout( + blk.clone(), + virtqueue.clone(), + TIMEOUT_S + )); + let mut intr_status = blk.borrow().pci_dev.config_readw(PCI_STATUS) & PCI_STATUS_INTERRUPT != 0; + assert!(intr_status); + + let isr = blk.borrow().isr_readb(); + assert_eq!(isr, 1); + intr_status = blk.borrow().pci_dev.config_readw(PCI_STATUS) & PCI_STATUS_INTERRUPT != 0; + assert!(!intr_status); + + blk.borrow().pci_dev.eoi_intx(); + + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue.clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + blk.borrow_mut() + .cleanup_virtqueue(alloc.clone(), virtqueue.borrow().desc); + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +#[test] +fn test_intx_disable() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + let blk = create_blk(machine, 1, 0, 0); + + // 1. Init device. + blk.borrow_mut().reset(); + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + blk.borrow_mut().set_features_ok(); + + set_msix_disable(blk.borrow().pci_dev.clone()); + blk.borrow_mut().pci_dev.set_intx_irq_num(1_u8); + + let virtqueue = blk + .borrow() + .setup_virtqueue(test_state.clone(), alloc.clone(), 0_u16); + blk.borrow().set_driver_ok(); + + // Disable INTx. + let command = blk.borrow().pci_dev.config_readw(PCI_COMMAND); + blk.borrow() + .pci_dev + .config_writew(PCI_COMMAND, command | PCI_COMMAND_INTX_DISABLE); + + let free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + virtqueue.clone(), + alloc.clone(), + ); + // Verify that the os can not receive INTx interrupt when msix is disabled. + assert!(wait_intr_timeout(blk.clone(), virtqueue.clone(), TIMEOUT_S)); + + let isr = blk.borrow().isr_readb(); + assert_eq!(isr, 1); + + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue.clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + // Enable INTx. + blk.borrow() + .pci_dev + .config_writew(PCI_COMMAND, command & !PCI_COMMAND_INTX_DISABLE); + + let _free_head = simple_blk_io_req( + blk.clone(), + test_state.clone(), + virtqueue.clone(), + alloc.clone(), + ); + + // Verify that the os can receive INTx interrupt when msix is disabled. + assert!(!wait_intr_timeout( + blk.clone(), + virtqueue.clone(), + TIMEOUT_S + )); + let isr = blk.borrow().isr_readb(); + assert_eq!(isr, 1); + + blk.borrow().poll_used_elem( + test_state.clone(), + virtqueue.clone(), + free_head, + TIMEOUT_S, + &mut None, + false, + ); + + blk.borrow_mut() + .cleanup_virtqueue(alloc.clone(), virtqueue.borrow().desc); + tear_down(Some(blk), test_state, alloc, None, Some(image_paths)); +} + +/// Basic hotplug testcase. +#[test] +fn test_pci_hotplug_001() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Hotplug a block device whose id is 1 and bdf is 1:0:0. + hotplug_blk(test_state.clone(), root_port, &mut image_paths, 0, 1, 0, 0); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + validate_std_blk_io(blk.clone(), test_state.clone(), vqs.clone(), alloc.clone()); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +/// Hotplug two devices at the same time. +#[test] +fn test_pci_hotplug_002() { + let blk_nums = 0; + let root_port_nums = 2; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port_1 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create a root port whose bdf is 0:2:0. + let root_port_2 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 2 << 3, + ))); + + // Hotplug a block device whose id is 1 and bdf is 1:0:0. + hotplug_blk( + test_state.clone(), + root_port_1, + &mut image_paths, + 1, + 1, + 0, + 0, + ); + let blk_1 = create_blk(machine.clone(), 1, 0, 0); + + // Hotplug a block device whose id is 2 and bdf is 2:0:0. + hotplug_blk( + test_state.clone(), + root_port_2, + &mut image_paths, + 2, + 2, + 0, + 0, + ); + let blk_2 = create_blk(machine, 2, 0, 0); + + validate_blk_io_success(blk_1, test_state.clone(), alloc.clone()); + validate_blk_io_success(blk_2, test_state.clone(), alloc.clone()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug the device in non-zero slot. +#[test] +fn test_pci_hotplug_003() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, _machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose id is 0, bdf is 1:1:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(0, hotplug_image_path, 1, 1, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + // Verify that hotpluging the device in non-zero slot will fail. + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug the device in the bus 0. +#[test] +fn test_pci_hotplug_004() { + let blk_nums = 0; + let root_port_nums = 0; + let (test_state, _machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + let hotplug_blk_id = 1; + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, 0, 1, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug a device which dosn't have the backend file. +#[test] +fn test_pci_hotplug_005() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, _machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path); + + let hotplug_blk_id = 0; + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, String::from(""), 1, 0, 0); + + let ret = test_state.borrow().qmp(&add_blk_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug a device which dosn't have the backend file. +#[test] +fn test_pci_hotplug_006() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, _machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + let hotplug_blk_id = 0; + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, 2, 0, 0); + + let ret = test_state.borrow().qmp(&add_blk_command); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug using INTx interrupt testcase. +#[test] +fn test_pci_hotplug_007() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + set_msix_disable(root_port.borrow().rp_dev.clone()); + root_port.borrow_mut().rp_dev.set_intx_irq_num(1_u8); + + // Hotplug a block device whose id is 1 and bdf is 1:0:0. + let bus = 1; + let slot = 0; + let func = 0; + let hotplug_blk_id = 0; + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, bus, slot, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let ret = test_state.borrow().qmp(&add_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the vendor id for the virtio block device hotplugged. + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + bus, + slot << 3 | func, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + assert!( + wait_root_port_intx(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + validate_hotplug(root_port.clone()); + handle_isr(root_port.clone()); + power_on_device(root_port); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + validate_std_blk_io(blk.clone(), test_state.clone(), vqs.clone(), alloc.clone()); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +/// Basic hotunplug testcase. +#[test] +fn test_pci_hotunplug_001() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // Hotunplug the block device whose bdf is 1:0:0. + hotunplug_blk(test_state.clone(), blk, root_port, 0); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug a device that does not exist. +#[test] +fn test_pci_hotunplug_002() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, _machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Hotunplug a device that does not exist. + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(0); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug a device but power indicator of root port is abnormal. +#[test] +fn test_pci_hotunplug_003() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:2:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + let unplug_blk_id = 0; + // Hotunplug the block device attaching the root port. + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + // The block device will not be unplugged when it is power on. + power_on_device(root_port.clone()); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the vendor id for the virtio block device is correct. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + + // The block device will not be unplugged when indicator of power is blinking. + power_indicator_blink(root_port.clone()); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + let (delete_device_command, _delete_blk_command) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + // The block device will be unplugged when indicator of power and slot is power off. + power_off_device(root_port); + test_state.borrow().wait_qmp_event(); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug two device at the same time. +#[test] +fn test_pci_hotunplug_004() { + let blk_nums = 2; + let root_port_nums = 2; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create root port whose bdf is 0:1:0. + let root_port_1 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create root port whose bdf is 0:2:0. + let root_port_2 = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 2 << 3, + ))); + + // Create a block device whose bdf is 1:0:0. + let blk_1 = create_blk(machine.clone(), 1, 0, 0); + + // Create a block device whose bdf is 2:0:0. + let blk_2 = create_blk(machine, 2, 0, 0); + + let unplug_blk_id = 0; + let (delete_device_command, delete_blk_command_1) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let unplug_blk_id = 1; + let (delete_device_command, delete_blk_command_2) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_msix(root_port_1.clone()), + "Wait for interrupt of root port timeout" + ); + + assert!( + wait_root_port_msix(root_port_2.clone()), + "Wait for interrupt of root port timeout" + ); + + power_off_device(root_port_1); + test_state.borrow().wait_qmp_event(); + + power_off_device(root_port_2); + test_state.borrow().wait_qmp_event(); + + // The block device will be unplugged when indicator of power and slot is power off. + let ret = test_state.borrow().qmp(&delete_blk_command_1); + assert_eq!(*ret.get("return").unwrap(), json!({})); + // The block device will be unplugged when indicator of power and slot is power off. + let ret = test_state.borrow().qmp(&delete_blk_command_2); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk_1.borrow().pci_dev.pci_bus.clone(), + blk_1.borrow().pci_dev.bus_num, + blk_1.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk_2.borrow().pci_dev.pci_bus.clone(), + blk_2.borrow().pci_dev.bus_num, + blk_2.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Repeate hotunplug the same device. +#[test] +fn test_pci_hotunplug_005() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let blk = create_blk(machine, 1, 0, 0); + + // Hotplug the block device whose id is 0 and bdf is 1:0:0. + hotunplug_blk(test_state.clone(), blk, root_port, 0); + + let (delete_device_command, _delete_blk_command) = build_hotunplug_blk_cmd(0); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug the device attaching the host bus +#[test] +fn test_pci_hotunplug_006() { + let blk_nums = 1; + let root_port_nums = 0; + let (test_state, _machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, false, false); + + let unplug_blk_id = 0; + let (delete_device_command, _delete_blk_command) = build_hotunplug_blk_cmd(unplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Guest sets PIC/PCC twice during hotunplug, the device ignores the 2nd write to speed up +/// hotunplug. +#[test] +fn test_pci_hotunplug_007() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:2:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + let unplug_blk_id = 0; + // Hotunplug the block device attaching the root port. + let (delete_device_command, _delete_blk_command) = build_hotunplug_blk_cmd(unplug_blk_id); + let _ret = test_state.borrow().qmp(&delete_device_command); + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + // The block device will be unplugged when indicator of power and slot is power off. + power_off_device(root_port.clone()); + // Trigger a 2nd write to PIC/PCC, which will be ignored by the device, and causes no harm. + power_off_device(root_port); + + test_state.borrow().wait_qmp_event(); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplug using INTx interrupt testcase. +#[test] +fn test_pci_hotunplug_008() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + set_msix_disable(root_port.borrow().rp_dev.clone()); + root_port.borrow_mut().rp_dev.set_intx_irq_num(1_u8); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + // Hotunplug the block device whose bdf is 1:0:0. + let hotunplug_blk_id = 0; + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotunplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + + assert!( + wait_root_port_intx(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + validate_hotunplug(root_port.clone()); + handle_isr(root_port.clone()); + power_off_device(root_port.clone()); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + test_state.borrow().wait_qmp_event(); + + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_intx(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + validate_cmd_complete(root_port.clone()); + handle_isr(root_port); + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplug and hotunplug in sequence. +#[test] +fn test_pci_hotplug_combine_001() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:2:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let hotplug_blk_id = 0; + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, 1, 0, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_on_device(root_port.clone()); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine.clone(), 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + // Verify that the function of the block device is normal. + validate_std_blk_io(blk.clone(), test_state.clone(), vqs, alloc.clone()); + + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_off_device(root_port.clone()); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + test_state.borrow().wait_qmp_event(); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the vendor id for the virtio block device. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + let hotplug_blk_id = 1; + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotplug_blk_id, hotplug_image_path, 1, 0, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_on_device(root_port.clone()); + + // Verify the virtio block device has been plugged. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + // Verify that the function of the block device is normal. + validate_std_blk_io(blk.clone(), test_state.clone(), vqs, alloc.clone()); + + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_off_device(root_port); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + test_state.borrow().wait_qmp_event(); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify that the virtio block device has been unplugged. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotunplugging during hotplugging. +#[test] +fn test_pci_hotplug_combine_002() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let hotplug_blk_id = 0; + // Hotplug a block device whose id is 0 and bdf is 1:0:0. + hotplug_blk( + test_state.clone(), + root_port.clone(), + &mut image_paths, + hotplug_blk_id, + 1, + 0, + 0, + ); + + power_indicator_off(root_port.clone()); + + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + validate_blk_io_success(blk.clone(), test_state.clone(), alloc.clone()); + + // Hotplug the block device whose id is 0 and bdf is 1:0:0. + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(0); + let ret = test_state.borrow().qmp(&delete_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_indicator_blink(root_port.clone()); + + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + // Verify the virtio block device has not been unplugged. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + VIRTIO_PCI_VENDOR, + 0xFFFF, + ); + + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + + handle_isr(root_port.clone()); + power_off_device(root_port); + test_state.borrow().wait_qmp_event(); + + let ret = test_state.borrow().qmp(&delete_blk_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + // Verify that the virtio block device has been unplugged. + validate_config_value_2byte( + blk.borrow().pci_dev.pci_bus.clone(), + blk.borrow().pci_dev.bus_num, + blk.borrow().pci_dev.devfn, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// Hotplugging during hotunpluging. +#[test] +fn test_pci_hotplug_combine_003() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let hotunplug_blk_id = 0; + // Hotunplug the block device attaching the root port; + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(hotunplug_blk_id); + let ret = test_state.borrow().qmp(&delete_device_command); + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotunplug_blk_id, hotplug_image_path, 1, 0, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let ret = test_state.borrow().qmp(&add_device_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + + power_off_device(root_port); + test_state.borrow().wait_qmp_event(); + + let hotplug_image_path = create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw); + image_paths.push(hotplug_image_path.clone()); + // Hotplug a block device whose bdf is 1:0:0. + let (add_blk_command, add_device_command) = + build_hotplug_blk_cmd(hotunplug_blk_id, hotplug_image_path, 1, 0, 0); + let ret = test_state.borrow().qmp(&add_blk_command); + assert!(!(*ret.get("error").unwrap()).is_null()); + let ret = test_state.borrow().qmp(&add_device_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + let blk = create_blk(machine, 1, 0, 0); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + // Verify that the function of the block device is normal. + validate_std_blk_io(blk.clone(), test_state.clone(), vqs.clone(), alloc.clone()); + + tear_down(Some(blk), test_state, alloc, Some(vqs), Some(image_paths)); +} + +/// Validate express capability of the root port. +#[test] +fn test_pci_root_port_exp_cap() { + let blk_nums = 0; + let root_port_nums = 1; + let (test_state, machine, alloc, mut image_paths) = + set_up(root_port_nums, blk_nums, true, false); + let nlw_range: Vec = [1, 2, 4, 8, 16, 32].to_vec(); + let cls_range: Vec = [1, 2, 3, 4, 5, 6, 7].to_vec(); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + + let cap_exp_addr = root_port.borrow().rp_dev.find_capability(PCI_CAP_ID_EXP, 0); + + let negotiated_link_width_mask = PCI_EXP_LNKSTA_NLW; + let negotiated_link_width = (root_port + .borrow() + .rp_dev + .config_readw(cap_exp_addr + PCI_EXP_LNKSTA) + & negotiated_link_width_mask) + >> 4; + assert!(nlw_range.binary_search(&negotiated_link_width).is_ok()); + + let current_link_speed_mask = PCI_EXP_LNKSTA_CLS; + let current_link_speed = root_port + .borrow() + .rp_dev + .config_readw(cap_exp_addr + PCI_EXP_LNKSTA) + & current_link_speed_mask; + assert!(cls_range.binary_search(¤t_link_speed).is_ok()); + + let dllla_mask = PCI_EXP_LNKSTA_DLLLA; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_LNKSTA, + 0, + dllla_mask, + ); + + let abp_mask = PCI_EXP_SLTSTA_ABP; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + abp_mask, + ); + + let pds_mask = PCI_EXP_SLTSTA_PDS; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + pds_mask, + ); + + let pdc_mask = PCI_EXP_SLTSTA_PDC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + pdc_mask, + ); + + let pcc_mask = PCI_EXP_SLTCTL_PCC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC, + pcc_mask, + ); + + let hotplug_blk_id = 0; + // Hotplug a block device whose id is 0 and bdf is 1:0:0. + hotplug_blk( + test_state.clone(), + root_port.clone(), + &mut image_paths, + hotplug_blk_id, + 1, + 0, + 0, + ); + // Create a block device whose bdf is 1:0:0. + let blk = create_blk(machine, 1, 0, 0); + + let nlw_mask = PCI_EXP_LNKSTA_NLW; + let negotiated_link_width = (root_port.borrow().rp_dev.pci_bus.borrow().config_readw( + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_LNKSTA, + ) & nlw_mask) + >> 4; + assert!(nlw_range.binary_search(&negotiated_link_width).is_ok()); + + let cls_mask = PCI_EXP_LNKSTA_CLS; + let current_link_speed = root_port.borrow().rp_dev.pci_bus.borrow().config_readw( + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_LNKSTA, + ) & cls_mask; + assert!(cls_range.binary_search(¤t_link_speed).is_ok()); + + let dllla_mask = PCI_EXP_LNKSTA_DLLLA; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA, + dllla_mask, + ); + + let abp_mask = PCI_EXP_SLTSTA_ABP; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + abp_mask, + ); + + let pds_mask = PCI_EXP_SLTSTA_PDS; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS, + pds_mask, + ); + + let pdc_mask = PCI_EXP_SLTSTA_PDC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + pdc_mask, + ); + + let pcc_mask = PCI_EXP_SLTCTL_PCC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTCTL, + 0, + pcc_mask, + ); + + // Hotplug the block device whose id is 0 and bdf is 1:0:0. + hotunplug_blk(test_state.clone(), blk, root_port.clone(), hotplug_blk_id); + + let dllla_mask = PCI_EXP_LNKSTA_DLLLA; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_LNKSTA, + 0, + dllla_mask, + ); + + let abp_mask = PCI_EXP_SLTSTA_ABP; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + abp_mask, + ); + + let pds_mask = PCI_EXP_SLTSTA_PDS; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + pds_mask, + ); + + let pdc_mask = PCI_EXP_SLTSTA_PDC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTSTA, + 0, + pdc_mask, + ); + + let pcc_mask = PCI_EXP_SLTCTL_PCC; + validate_config_value_2byte( + root_port.borrow().rp_dev.pci_bus.clone(), + root_port.borrow().rp_dev.bus_num, + root_port.borrow().rp_dev.devfn, + cap_exp_addr + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC, + pcc_mask, + ); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// r/w demo dev's mmio +#[test] +fn test_pci_combine_000() { + let cfg = DemoDev { + bar_num: 3, + bar_size: 0x100_0000, // 16MB + bus_num: 0, + dev_num: 5, + }; + + let (pci_dev, test_state) = init_demo_dev(cfg, 1); + + let bar_addr = pci_dev.borrow().io_map(0); + + let start = bar_addr; + + test_state.borrow().writeb(start, 5); + let out = test_state.borrow().readb(start); + + assert!(out == 10); // just multiply it with 2. + test_state.borrow().writeb(start + 2, 7); + let out = test_state.borrow().readb(start + 2); + assert!(out == 14); // just multiply it with 2. + + test_state.borrow_mut().stop(); +} + +/// change memory enabled during r/w demo dev's mmio +#[test] +fn test_pci_combine_001() { + let cfg = DemoDev { + bar_num: 3, + bar_size: 0x100_0000, // 16MB + bus_num: 0, + dev_num: 5, + }; + + let (pci_dev, test_state) = init_demo_dev(cfg, 1); + let dev_locked = pci_dev.borrow(); + + let bar_addr = dev_locked.io_map(1); + + // set memory enabled = 0 + let mut val = dev_locked.config_readw(PCI_COMMAND); + val &= !u16::from(PCI_COMMAND_MEMORY); + dev_locked.config_writew(PCI_COMMAND, val); + + // mmio r/w stops working. + test_state.borrow().writeb(bar_addr, 5); + let out = test_state.borrow().readb(bar_addr); + assert_ne!(out, 10); + + // set memory enabled = 1 + val |= u16::from(PCI_COMMAND_MEMORY); + dev_locked.config_writew(PCI_COMMAND, val); + + // mmio r/w gets back to work. + test_state.borrow().writeb(bar_addr, 5); + let out = test_state.borrow().readb(bar_addr); + assert_eq!(out, 0); + + drop(dev_locked); + + test_state.borrow_mut().stop(); +} + +/// r/w mmio during hotunplug +#[test] +fn test_pci_combine_002() { + let blk_nums = 1; + let root_port_nums = 1; + let (test_state, machine, alloc, image_paths) = set_up(root_port_nums, blk_nums, true, false); + + // Create a root port whose bdf is 0:1:0. + let root_port = Rc::new(RefCell::new(RootPort::new( + machine.clone(), + alloc.clone(), + 0, + 1 << 3, + ))); + let blk = Rc::new(RefCell::new(TestVirtioPciDev::new( + machine.borrow().pci_bus.clone(), + ))); + blk.borrow_mut().pci_dev.bus_num = 1; + blk.borrow_mut().init(0, 0); + let bar_addr = blk.borrow().bar; + + let (delete_device_command, delete_blk_command) = build_hotunplug_blk_cmd(0); + let ret = test_state.borrow().qmp(&delete_device_command); + + // r/w mmio during hotunplug + test_state.borrow().writeb(bar_addr, 5); + assert!(test_state.borrow().readb(bar_addr) == 5); + + assert!( + wait_root_port_msix(root_port.clone()), + "Wait for interrupt of root port timeout" + ); + power_off_device(root_port); + + // r/w mmio during hotunplug + test_state.borrow().writeb(bar_addr, 5); + assert!(test_state.borrow().readb(bar_addr) != 5); + + assert_eq!(*ret.get("return").unwrap(), json!({})); + test_state.borrow().qmp_read(); + let ret = test_state.borrow().qmp(&delete_blk_command); + assert_eq!(*ret.get("return").unwrap(), json!({})); + + validate_config_value_2byte( + machine.borrow().pci_bus.clone(), + root_port_nums, + 0, + PCI_VENDOR_ID, + 0xFFFF, + 0xFFFF, + ); + // r/w mmio during hotunplug + test_state.borrow().writeb(bar_addr, 5); + assert!(test_state.borrow().readb(bar_addr) != 5); + + tear_down(None, test_state, alloc, None, Some(image_paths)); +} + +/// too large bar space +#[test] +fn test_pci_combine_003() { + let mut cfg = DemoDev { + bar_num: 3, + bar_size: 0x100_0000, // 16MB + bus_num: 0, + dev_num: 5, + }; + + let (pci_dev, _) = init_demo_dev(cfg, 1); + let bar_addr = pci_dev.borrow().io_map(0); + // the mmio space is 78MB, bar1 got over bounded + assert!(bar_addr != INVALID_BAR_ADDR); + + cfg.bar_size = 0x1000_0000; // 2GB + let (pci_dev, _) = init_demo_dev(cfg, 1); + let bar_addr = pci_dev.borrow().io_map(0); + + assert!(bar_addr == INVALID_BAR_ADDR); +} diff --git a/tests/mod_test/tests/pvpanic_test.rs b/tests/mod_test/tests/pvpanic_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..fd084959909e76ebdb1cc0b5e134b33128f88c2f --- /dev/null +++ b/tests/mod_test/tests/pvpanic_test.rs @@ -0,0 +1,181 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::fs; +use std::path::Path; +use std::rc::Rc; + +use devices::misc::pvpanic::{PVPANIC_CRASHLOADED, PVPANIC_PANICKED}; +use devices::pci::config::{ + PCI_CLASS_SYSTEM_OTHER, PCI_DEVICE_ID_REDHAT_PVPANIC, PCI_SUBDEVICE_ID_QEMU, + PCI_VENDOR_ID_REDHAT, PCI_VENDOR_ID_REDHAT_QUMRANET, +}; +use mod_test::{ + libdriver::{machine::TestStdMachine, pci::*}, + libtest::{test_init, TestState, MACHINE_TYPE_ARG}, +}; + +const TMP_LOG_PATH: &str = "/tmp/pvpanic-mst.log"; +const BUS_NUM: u8 = 0; +const ADDR: u8 = 7; +const DEFAULT_SUPPORTED_FEATURE: u8 = (PVPANIC_PANICKED | PVPANIC_CRASHLOADED) as u8; + +#[derive(Clone, Copy)] +struct PvPanicDevCfg { + bus_num: u8, + addr: u8, + supported_features: u8, +} + +impl Default for PvPanicDevCfg { + fn default() -> Self { + Self { + bus_num: BUS_NUM, + addr: ADDR, + supported_features: DEFAULT_SUPPORTED_FEATURE, + } + } +} + +impl PvPanicDevCfg { + fn init(&self, enable_log: bool) -> (Rc>, Rc>) { + let mut test_machine_args: Vec<&str> = Vec::new(); + + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + test_machine_args.append(&mut args); + + if enable_log { + let mut args: Vec<&str> = vec!["-D", TMP_LOG_PATH]; + test_machine_args.append(&mut args); + } + + let pvpanic_str = fmt_pvpanic_deves(*self); + args = pvpanic_str[..].split(' ').collect(); + test_machine_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(test_machine_args))); + let machine = Rc::new(RefCell::new(TestStdMachine::new(test_state.clone()))); + + let mut pvpanic_pci_dev = TestPciDev::new(machine.borrow().pci_bus.clone()); + let devfn = self.addr << 3; + pvpanic_pci_dev.devfn = devfn; + + pvpanic_pci_dev.set_bus_num(self.bus_num); + pvpanic_pci_dev.enable(); + + (Rc::new(RefCell::new(pvpanic_pci_dev)), test_state) + } +} + +fn fmt_pvpanic_deves(cfg: PvPanicDevCfg) -> String { + format!( + "-device pvpanic,id=pvpanic_pci,bus=pcie.{},addr=0x{},supported-features={}", + cfg.bus_num, cfg.addr, cfg.supported_features, + ) +} + +/// PvPanic device read config space. +/// TestStep: +/// 1. Init device. +/// 2. Read PvPanic device config space. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn test_pvpanic_read_config() { + let cfg = PvPanicDevCfg::default(); + + let (pvpanic_pci_dev, test_state) = cfg.init(false); + + let info = pvpanic_pci_dev.borrow().config_readw(PCI_VENDOR_ID); + assert_eq!(info, PCI_VENDOR_ID_REDHAT); + + let info = pvpanic_pci_dev.borrow().config_readw(PCI_DEVICE_ID); + assert_eq!(info, PCI_DEVICE_ID_REDHAT_PVPANIC); + + let info = pvpanic_pci_dev.borrow().config_readw(PCI_SUB_CLASS_DEVICE); + assert_eq!(info, PCI_CLASS_SYSTEM_OTHER); + + let info = pvpanic_pci_dev + .borrow() + .config_readw(PCI_SUBSYSTEM_VENDOR_ID); + assert_eq!(info, PCI_VENDOR_ID_REDHAT_QUMRANET); + + let info = pvpanic_pci_dev.borrow().config_readw(PCI_SUBSYSTEM_ID); + assert_eq!(info, PCI_SUBDEVICE_ID_QEMU); + + test_state.borrow_mut().stop(); +} + +/// PvPanic device read supported features. +/// TestStep: +/// 1. Init device. +/// 2. Read supported features of PvPanic to emulate front-end driver. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn test_pvpanic_read_supported_features() { + let cfg = PvPanicDevCfg::default(); + + let (pvpanic_pci_dev, test_state) = cfg.init(false); + + let bar_addr = pvpanic_pci_dev.borrow().io_map(0); + + let start = bar_addr; + + let info = test_state.borrow().readb(start); + assert_eq!(info, DEFAULT_SUPPORTED_FEATURE); + + test_state.borrow_mut().stop(); +} + +/// PvPanic device write events. +/// TestStep: +/// 1. Init device. +/// 2. Write 3 types of events to PvPanic bar0 to emulate front-end driver and check device behaviors via log. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn test_pvpanic_write_events() { + let cfg = PvPanicDevCfg::default(); + + if Path::new(TMP_LOG_PATH).exists() { + fs::remove_file(TMP_LOG_PATH).unwrap(); + } + + let (pvpanic_pci_dev, test_state) = cfg.init(true); + + let bar_addr = pvpanic_pci_dev.borrow().io_map(0); + let start = bar_addr; + let tmp_log_path = String::from(TMP_LOG_PATH); + let write_test_params: [(u8, &str); 3] = [ + (PVPANIC_PANICKED as u8, "pvpanic: panicked event"), + (PVPANIC_CRASHLOADED as u8, "pvpanic: crashloaded event"), + (!DEFAULT_SUPPORTED_FEATURE, "pvpanic: unknown event"), + ]; + + for &(data, expected_log_content) in write_test_params.iter() { + test_state.borrow().writeb(start, data); + let tmp_log_content = std::fs::read_to_string(&tmp_log_path).unwrap(); + + assert!(tmp_log_content.contains(expected_log_content)); + } + + test_state.borrow_mut().stop(); + match fs::remove_file(TMP_LOG_PATH) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } +} diff --git a/tests/mod_test/tests/rng_test.rs b/tests/mod_test/tests/rng_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..9296ebb084b4578a32b1558d4cd974b2adf5be38 --- /dev/null +++ b/tests/mod_test/tests/rng_test.rs @@ -0,0 +1,380 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::collections::HashSet; +use std::os::unix::fs::FileTypeExt; +use std::path::Path; +use std::rc::Rc; +use std::time::{Duration, Instant}; + +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{ + TestVirtQueue, TestVringDescEntry, VirtioDeviceOps, VIRTIO_F_VERSION_1, +}; +use mod_test::libdriver::virtio_pci_modern::TestVirtioPciDev; +use mod_test::libdriver::virtio_rng::create_rng; +use mod_test::libtest::TestState; + +const TIMEOUT_US: u64 = 10 * 1000 * 1000; +const RANDOM_FILE: &str = "/dev/random"; +const RNG_DATA_BYTES: u64 = 64; +const THRESHOLD: usize = 10; +const DEFAULT_RNG_REQS: u64 = 6; + +fn get_random_file() -> String { + let random_file: String = RANDOM_FILE.to_string(); + let path = Path::new(&random_file); + if path.exists() && path.metadata().unwrap().file_type().is_char_device() { + return random_file; + } + + panic!("Failed to get random file."); +} + +// Check if the distinct random numbers are greater than the THRESHOLD. +fn random_num_check(data: Vec) -> bool { + let mut rand_set: HashSet = HashSet::new(); + + for num in data.iter() { + if *num != 0 { + rand_set.insert(*num); + + if rand_set.len() > THRESHOLD { + return true; + } + } + } + + false +} + +// Read RNG_DATA_BYTES bytes from virtio-rng device, and +// perform DEFAULT_RNG_REQS reqs. +fn virtio_rng_read_batch( + rng: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + bytes: u64, +) -> Vec { + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + let mut len = Some(0); + + for _i in 0..DEFAULT_RNG_REQS { + req_addr = alloc.borrow_mut().alloc(bytes); + free_head = virtqueue + .borrow_mut() + .add(test_state.clone(), req_addr, bytes as u32, true); + } + + rng.borrow() + .kick_virtqueue(test_state.clone(), virtqueue.clone()); + rng.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut len, + true, + ); + + assert!(len.unwrap() >= 1); + assert!(u64::from(len.unwrap()) <= bytes); + + test_state.borrow().memread(req_addr, RNG_DATA_BYTES) +} + +// Read RNG_DATA_BYTES*DEFAULT_RNG_REQS bytes from virtio-rng device. +fn virtio_rng_read_chained( + rng: Rc>, + test_state: Rc>, + alloc: Rc>, + virtqueue: Rc>, + bytes: u64, +) -> Vec { + let req_addr = alloc.borrow_mut().alloc(bytes * DEFAULT_RNG_REQS); + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_RNG_REQS as usize); + let mut len = Some(0); + + for i in 0..DEFAULT_RNG_REQS { + data_entries.push(TestVringDescEntry { + data: req_addr + i * bytes, + len: bytes as u32, + write: true, + }); + } + + let free_head = virtqueue + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + + rng.borrow() + .kick_virtqueue(test_state.clone(), virtqueue.clone()); + rng.borrow().poll_used_elem( + test_state.clone(), + virtqueue, + free_head, + TIMEOUT_US, + &mut len, + true, + ); + + assert!(len.unwrap() >= 1); + assert!(u64::from(len.unwrap()) <= bytes * DEFAULT_RNG_REQS); + + test_state.borrow().memread(req_addr, RNG_DATA_BYTES) +} + +fn tear_down( + rng: Rc>, + test_state: Rc>, + alloc: Rc>, + vqs: Vec>>, +) { + rng.borrow_mut().destroy_device(alloc, vqs); + test_state.borrow_mut().stop(); +} + +/// Rng device read random numbers function test. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request, check random numbers. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn rng_read() { + let max_bytes = 1024; + let period = 1000; + + let random_file = get_random_file(); + let (rng, test_state, alloc) = create_rng(random_file, max_bytes, period); + + let virtqueues = rng.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut data = virtio_rng_read_chained( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + RNG_DATA_BYTES, + ); + assert!(random_num_check(data)); + + data = virtio_rng_read_chained( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + RNG_DATA_BYTES, + ); + assert!(random_num_check(data)); + + tear_down(rng, test_state, alloc, virtqueues); +} + +/// Rng device batch read random numbers function test. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request, check random numbers. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn rng_read_batch() { + let max_bytes = 1024; + let period = 1000; + + let random_file = get_random_file(); + let (rng, test_state, alloc) = create_rng(random_file, max_bytes, period); + + let virtqueues = rng.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut data = virtio_rng_read_batch( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + RNG_DATA_BYTES, + ); + assert!(random_num_check(data)); + + data = virtio_rng_read_batch( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + RNG_DATA_BYTES, + ); + assert!(random_num_check(data)); + + tear_down(rng, test_state, alloc, virtqueues); +} + +/// Rng device rate limit random numbers reading test. +/// TestStep: +/// 1. Init device with rate limit 64 bytes/sec. +/// 2. Do the I/O request, check random numbers. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn rng_limited_rate() { + let max_bytes = 64; + let period = 1000; + + let random_file = get_random_file(); + let (rng, test_state, alloc) = create_rng(random_file, max_bytes, period); + + let virtqueues = rng.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let data = virtio_rng_read_chained( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + RNG_DATA_BYTES, + ); + assert!(random_num_check(data)); + + let req_addr = alloc.borrow_mut().alloc(RNG_DATA_BYTES * DEFAULT_RNG_REQS); + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_RNG_REQS as usize); + + for i in 0..DEFAULT_RNG_REQS { + data_entries.push(TestVringDescEntry { + data: req_addr + i * RNG_DATA_BYTES, + len: RNG_DATA_BYTES as u32, + write: true, + }); + } + + let free_head = virtqueues[0] + .borrow_mut() + .add_chained(test_state.clone(), data_entries); + rng.borrow() + .kick_virtqueue(test_state.clone(), virtqueues[0].clone()); + assert!(!random_num_check( + test_state.borrow().memread(req_addr, RNG_DATA_BYTES) + )); + + let time_out = Instant::now() + Duration::from_micros(TIMEOUT_US); + loop { + test_state.borrow().clock_step(); + if rng.borrow().queue_was_notified(virtqueues[0].clone()) + && virtqueues[0].borrow_mut().get_buf(test_state.clone()) + { + assert!(virtqueues[0].borrow().desc_len.contains_key(&free_head)); + break; + } + assert!(Instant::now() <= time_out); + } + + assert!(random_num_check(test_state.borrow().memread( + req_addr + (DEFAULT_RNG_REQS - 1) * RNG_DATA_BYTES, + RNG_DATA_BYTES + ))); + + tear_down(rng, test_state, alloc, virtqueues); +} + +/// Rng device read a large number of random numbers test. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request, check random numbers. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn rng_read_with_max() { + let max_bytes = 1000000000; + let period = 1000; + let max_bytes_read = 2048000; + + let random_file = get_random_file(); + let (rng, test_state, alloc) = create_rng(random_file, max_bytes, period); + + let virtqueues = rng.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut data = virtio_rng_read_chained( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + max_bytes_read, + ); + assert!(random_num_check(data)); + + data = virtio_rng_read_chained( + rng.clone(), + test_state.clone(), + alloc.clone(), + virtqueues[0].clone(), + max_bytes_read, + ); + assert!(random_num_check(data)); + + tear_down(rng, test_state, alloc, virtqueues); +} + +/// Rng device read/write config space. +/// TestStep: +/// 1. Init device. +/// 2. Read/write rng device config space. +/// 3. Destroy device. +/// Expect: +/// 1/3: success, 2: failed. +#[test] +fn rng_rw_config() { + let max_bytes = 1024; + let period = 1000; + + let random_file = get_random_file(); + let (rng, test_state, alloc) = create_rng(random_file, max_bytes, period); + + let virtqueues = rng.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let config = rng.borrow().config_readq(0); + assert_eq!(config, 0); + + rng.borrow().config_writeq(0, 0xff); + let config = rng.borrow().config_readq(0); + assert_ne!(config, 0xff); + + tear_down(rng, test_state, alloc, virtqueues); +} diff --git a/tests/mod_test/tests/scream_test.rs b/tests/mod_test/tests/scream_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..d7669fe09b012f8c511b84badb101b45e893760c --- /dev/null +++ b/tests/mod_test/tests/scream_test.rs @@ -0,0 +1,574 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + fs::{self, File}, + io::{Read, Write}, + mem, + path::Path, + rc::Rc, + thread, +}; + +use core::time; + +use devices::misc::scream::{ + audio_demo::INITIAL_VOLUME_VAL, ShmemHeader, ShmemStreamFmt, ShmemStreamHeader, + IVSHMEM_BAR0_STATUS, IVSHMEM_BAR0_VOLUME, SCREAM_MAGIC, STATUS_PLAY_BIT, STATUS_START_BIT, +}; +use mod_test::{ + libdriver::{ivshmem::TestIvshmemDev, machine::TestStdMachine}, + libtest::{test_init, TestState, MACHINE_TYPE_ARG}, + utils::get_rand_str, +}; +use util::{num_ops::read_data_u32, offset_of}; + +const PLAY_BASE: u64 = mem::size_of::() as u64; +const PLAY_DADA_OFFSET: u64 = mem::size_of::() as u64; + +const RECORD_BASE: u64 = PLAY_BASE + mem::size_of::() as u64; +const RECORD_DATA_OFFSET: u64 = PLAY_DADA_OFFSET + (AUDIO_CHUNK_SIZE * AUDIO_CHUNK_CNT) as u64; + +const IVSHMEM_DEFAULT_SIZE: u32 = 2; +const AUDIO_CHUNK_SIZE: u32 = 4; +const AUDIO_CHUNK_CNT: u32 = 7; +const AUDIO_DEFAULT_DATA: [u8; (AUDIO_CHUNK_SIZE * AUDIO_CHUNK_CNT) as usize] = [ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, +]; + +const POLL_DELAY_MS: u64 = 20; +const POLL_MAX_CNT: u32 = 5; + +fn get_audio_file_name() -> (String, String) { + let playback_path = format!("/tmp/audio-{}.pcm", get_rand_str(8)); + let record_path = format!("/tmp/audio-{}.pcm", get_rand_str(8)); + (playback_path, record_path) +} + +fn set_up( + size: u32, + pci_slot: u8, + playback_path: String, + record_path: String, +) -> (Rc>, Rc>) { + let mut extra_args: Vec<&str> = Vec::new(); + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + + extra_args.append(&mut args); + + let scream_device = format!( + "-device ivshmem-scream,memdev=scream,id=scream,interface=Demo,playback={},record={},bus=pcie.0,addr={}", + playback_path, record_path, pci_slot, + ); + args = scream_device.split(' ').collect(); + extra_args.append(&mut args); + + let object = format!( + "-object memory-backend-ram,id=scream,share=on,size={}M", + size + ); + args = object.split(' ').collect(); + extra_args.append(&mut args); + + let test_state = Rc::new(RefCell::new(test_init(extra_args))); + let machine = TestStdMachine::new(test_state.clone()); + + let ivshmem = Rc::new(RefCell::new(TestIvshmemDev::new(machine.pci_bus))); + + (ivshmem, test_state) +} + +fn stream_header_init(ivshmem: &mut TestIvshmemDev, base: u64, offset: u64) { + // set chunk_idx + ivshmem.writew(base + offset_of!(ShmemStreamHeader, chunk_idx) as u64, 0); + // set max_chunks + ivshmem.writew( + base + offset_of!(ShmemStreamHeader, max_chunks) as u64, + AUDIO_CHUNK_CNT as u16 - 1, + ); + // set chunk_size + ivshmem.writel( + base + offset_of!(ShmemStreamHeader, chunk_size) as u64, + AUDIO_CHUNK_SIZE, + ); + // set offset + ivshmem.writel( + base + offset_of!(ShmemStreamHeader, offset) as u64, + offset as u32, + ); + + let fmt_base = base + offset_of!(ShmemStreamHeader, fmt) as u64; + // set fmt_generation + ivshmem.writel( + fmt_base + offset_of!(ShmemStreamFmt, fmt_generation) as u64, + 1, + ); + // set rate + ivshmem.writeb(fmt_base + offset_of!(ShmemStreamFmt, rate) as u64, 128); + // set size + ivshmem.writeb(fmt_base + offset_of!(ShmemStreamFmt, size) as u64, 16); + // set channel + ivshmem.writeb(fmt_base + offset_of!(ShmemStreamFmt, channels) as u64, 2); + // set channel_map + ivshmem.writel(fmt_base + offset_of!(ShmemStreamFmt, channel_map) as u64, 3); + + // Setting is_started, it must be set at the end. Otherwise, the fmt data may not be updated in + // time. + ivshmem.writel(base + offset_of!(ShmemStreamHeader, is_started) as u64, 1); +} + +fn play_header_init(ivshmem: &mut TestIvshmemDev) { + // set magic + ivshmem.writeq(0, SCREAM_MAGIC); + let base = PLAY_BASE; + stream_header_init(ivshmem, base, PLAY_DADA_OFFSET); +} + +fn play_audio_data_init(playback: String) { + if Path::new(playback.as_str()).exists() { + match fs::remove_file(playback.clone()) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + } + match fs::File::create(playback) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } +} + +fn record_header_init(ivshmem: &mut TestIvshmemDev) { + play_header_init(ivshmem); + let base = RECORD_BASE; + stream_header_init(ivshmem, base, RECORD_DATA_OFFSET); +} + +fn record_audio_data_init(record: String) { + if Path::new(record.as_str()).exists() { + match fs::remove_file(record.clone()) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + } + let mut file = match fs::File::create(record) { + Ok(file) => file, + Err(e) => { + assert!(false, "{}", e); + return; + } + }; + match file.write(&AUDIO_DEFAULT_DATA) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } +} + +fn audio_data_init(playback: String, record: String) { + play_audio_data_init(playback); + record_audio_data_init(record); +} + +fn scream_tmp_clear(playback_path: String, record_path: String) { + if Path::new(playback_path.as_str()).exists() { + match fs::remove_file(playback_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + } + + if Path::new(record_path.as_str()).exists() { + match fs::remove_file(record_path) { + Ok(_) => {} + Err(e) => assert!(false, "{}", e), + } + } +} + +fn read_and_check_data(file: &mut File, src: &[u8], len: u32) { + let mut data: [u8; AUDIO_CHUNK_SIZE as usize] = [0; AUDIO_CHUNK_SIZE as usize]; + let size = match file.read(&mut data) { + Ok(size) => size, + Err(e) => { + assert!(false, "{}", e); + 0 + } + }; + assert_eq!(size, len as usize); + if len != 0 { + assert_eq!(data, src); + } +} + +/// scream device playback audio. +/// TestStep: +/// 1. Init scream device. +/// 2. Send first audio frame. +/// 3. Send four consecutive audio frames. +/// 4. change audio format. +/// 5. Stop VM. +/// 6. Check audio frames from audio file. +/// Expect: +/// 1/2/3/4/5/6: success. +#[test] +fn scream_playback_basic_test() { + let pci_slot = 0x1; + let (playback_path, record_path) = get_audio_file_name(); + audio_data_init(playback_path.clone(), record_path.clone()); + let (ivshmem, test_state) = set_up( + IVSHMEM_DEFAULT_SIZE, + pci_slot, + playback_path.clone(), + record_path.clone(), + ); + ivshmem.borrow_mut().init(pci_slot); + + // Wait for 1s until the scream device is initialized and enters the polling state to + // prevent subsequent audio frame data loss. + thread::sleep(time::Duration::from_millis(1000)); + + play_header_init(&mut ivshmem.borrow_mut()); + ivshmem + .borrow_mut() + .writel_reg(IVSHMEM_BAR0_STATUS, STATUS_PLAY_BIT | STATUS_START_BIT); + + thread::sleep(time::Duration::from_millis(POLL_DELAY_MS)); + + // write one audio chunk + for i in 0..AUDIO_CHUNK_SIZE { + ivshmem.borrow_mut().writeb( + PLAY_DADA_OFFSET + u64::from(AUDIO_CHUNK_SIZE + i), + AUDIO_DEFAULT_DATA[i as usize], + ); + } + + // update play header chunk_idx + ivshmem.borrow_mut().writew( + PLAY_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64, + 1, + ); + + thread::sleep(time::Duration::from_millis(1000)); + + // When four consecutive frames of data are written, only the last two frames of data can be + // read. + for i in 0..AUDIO_CHUNK_SIZE { + ivshmem.borrow_mut().writeb( + PLAY_DADA_OFFSET + u64::from(i), + AUDIO_DEFAULT_DATA[i as usize], + ); + } + + // update play header chunk_idx + ivshmem.borrow_mut().writew( + PLAY_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64, + 0, + ); + + thread::sleep(time::Duration::from_millis(1000)); + + // Reformat audio, change fmt_generation from 1 to 2. + let fmt_base = PLAY_BASE + offset_of!(ShmemStreamHeader, fmt) as u64; + ivshmem.borrow_mut().writel( + fmt_base + offset_of!(ShmemStreamFmt, fmt_generation) as u64, + 2, + ); + + ivshmem.borrow_mut().writew( + PLAY_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64, + 1, + ); + + thread::sleep(time::Duration::from_millis(1000)); + + // Stop the StratoVirt process before verifying data. Otherwise, + // audio data may not be updated to the file. + test_state.borrow_mut().stop(); + + let mut file = match File::open(playback_path.clone()) { + Ok(file) => file, + Err(e) => { + assert!(false, "{}", e); + return; + } + }; + + // Check first frame + read_and_check_data( + &mut file, + &AUDIO_DEFAULT_DATA[0..AUDIO_CHUNK_SIZE as usize], + AUDIO_CHUNK_SIZE, + ); + + // Check penultimate frame + read_and_check_data(&mut file, &[0; AUDIO_CHUNK_SIZE as usize], AUDIO_CHUNK_SIZE); + + // Check last frame + read_and_check_data( + &mut file, + &AUDIO_DEFAULT_DATA[0..AUDIO_CHUNK_SIZE as usize], + AUDIO_CHUNK_SIZE, + ); + + // No audio frame after audio format changed. + read_and_check_data(&mut file, &[0; AUDIO_CHUNK_SIZE as usize], 0); + + scream_tmp_clear(playback_path, record_path); +} + +/// scream device volume synchronization. +/// TestStep: +/// 1. Init scream device. +/// 2. Check volume's initial value. +/// 3. Set volume and read back to check. +/// 4. Stop VM. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn scream_volume_sync_test() { + let pci_slot = 0x1; + let (playback_path, record_path) = get_audio_file_name(); + audio_data_init(playback_path.clone(), record_path.clone()); + let (ivshmem, test_state) = set_up( + IVSHMEM_DEFAULT_SIZE, + pci_slot, + playback_path.clone(), + record_path.clone(), + ); + ivshmem.borrow_mut().init(pci_slot); + + let init_val = ivshmem.borrow_mut().readl_reg(IVSHMEM_BAR0_VOLUME); + assert_eq!(init_val, INITIAL_VOLUME_VAL); + + ivshmem.borrow_mut().writel_reg(IVSHMEM_BAR0_VOLUME, 0xff); + let second_val = ivshmem.borrow_mut().readl_reg(IVSHMEM_BAR0_VOLUME); + assert_eq!(second_val, 0xff); + + test_state.borrow_mut().stop(); + scream_tmp_clear(playback_path, record_path); +} + +/// scream device record audio. +/// TestStep: +/// 1. Init scream device and start recording. +/// 2. Check first frame audio. +/// 3. Check last frame audio. +/// 4. Stop VM. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn scream_record_basic_test() { + let pci_slot = 0x1; + let (playback_path, record_path) = get_audio_file_name(); + audio_data_init(playback_path.clone(), record_path.clone()); + let (ivshmem, test_state) = set_up( + IVSHMEM_DEFAULT_SIZE, + pci_slot, + playback_path.clone(), + record_path.clone(), + ); + ivshmem.borrow_mut().init(pci_slot); + + record_header_init(&mut ivshmem.borrow_mut()); + ivshmem + .borrow_mut() + .writel_reg(IVSHMEM_BAR0_STATUS, STATUS_START_BIT); + + let mut cnt = 0; + let mut chunk_idx = 0; + // Waiting for first chunk data write to ivshmem, then check first chunk data. + while cnt < POLL_MAX_CNT { + thread::sleep(time::Duration::from_millis(POLL_DELAY_MS >> 1)); + + // read chunk_idx + let offset = RECORD_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64; + chunk_idx = ivshmem.borrow_mut().readw(offset); + if chunk_idx > 0 { + break; + } + cnt += 1; + } + + assert_eq!(chunk_idx, 1); + + let audio_data = ivshmem.borrow_mut().readl(RECORD_DATA_OFFSET); + let mut check_data = 0; + read_data_u32( + &AUDIO_DEFAULT_DATA[0..AUDIO_CHUNK_SIZE as usize], + &mut check_data, + ); + + assert_eq!(audio_data, check_data); + + // Sleep 2S to wait last chunk data write to ivshmem, and check last chunk data. + thread::sleep(time::Duration::from_millis(2000)); + // read chunk_idx + let offset = RECORD_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64; + chunk_idx = ivshmem.borrow_mut().readw(offset); + + assert_eq!( + u32::from(chunk_idx), + AUDIO_CHUNK_CNT % (AUDIO_CHUNK_CNT - 1) + ); + + let audio_data = ivshmem.borrow_mut().readl(RECORD_DATA_OFFSET); + let mut check_data = 0; + let start = ((AUDIO_CHUNK_CNT - 1) * AUDIO_CHUNK_SIZE) as usize; + let end = (AUDIO_CHUNK_CNT * AUDIO_CHUNK_SIZE) as usize; + read_data_u32(&AUDIO_DEFAULT_DATA[start..end], &mut check_data); + + assert_eq!(audio_data, check_data); + + test_state.borrow_mut().stop(); + scream_tmp_clear(playback_path, record_path); +} + +/// scream device exception 001. +/// TestStep: +/// 1. Init scream device. +/// 2. Set buffer offset exceeded shared memory size. +/// 3. Check StratoVirt process. +/// 4. Stop VM. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn scream_exception_001() { + let pci_slot = 0x1; + let (playback_path, record_path) = get_audio_file_name(); + audio_data_init(playback_path.clone(), record_path.clone()); + let (ivshmem, test_state) = set_up( + IVSHMEM_DEFAULT_SIZE, + pci_slot, + playback_path.clone(), + record_path.clone(), + ); + ivshmem.borrow_mut().init(pci_slot); + + play_header_init(&mut ivshmem.borrow_mut()); + record_header_init(&mut ivshmem.borrow_mut()); + + // Setting playback and record buffer offset exceeded shared memory size. + let playback_offset = PLAY_BASE + offset_of!(ShmemStreamHeader, offset) as u64; + let mut buffer_offset = IVSHMEM_DEFAULT_SIZE * 1024 * 1024 + 1; + ivshmem.borrow_mut().writel(playback_offset, buffer_offset); + + let record_offset = RECORD_BASE + offset_of!(ShmemStreamHeader, offset) as u64; + buffer_offset = IVSHMEM_DEFAULT_SIZE * 1024 * 1024 + 2; + ivshmem.borrow_mut().writel(record_offset, buffer_offset); + + // Wait for 1s, query StratoVirt status. + thread::sleep(time::Duration::from_millis(1000)); + + let value = test_state + .borrow_mut() + .qmp("{\"execute\": \"query-status\"}"); + let status = value["return"]["status"].as_str().unwrap().to_string(); + assert_eq!(status, "running".to_string()); + + play_header_init(&mut ivshmem.borrow_mut()); + record_header_init(&mut ivshmem.borrow_mut()); + + // Setting chunk_idx > max_chunk + let mut chunk_offset = PLAY_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64; + ivshmem + .borrow_mut() + .writew(chunk_offset, AUDIO_CHUNK_CNT as u16); + chunk_offset = RECORD_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64; + ivshmem + .borrow_mut() + .writew(chunk_offset, AUDIO_CHUNK_CNT as u16); + + // Wait for 1s, query StratoVirt status. + thread::sleep(time::Duration::from_millis(1000)); + + let value = test_state + .borrow_mut() + .qmp("{\"execute\": \"query-status\"}"); + let status = value["return"]["status"].as_str().unwrap().to_string(); + assert_eq!(status, "running".to_string()); + + test_state.borrow_mut().stop(); + scream_tmp_clear(playback_path, record_path); +} + +/// scream device exception 002. +/// TestStep: +/// 1. Init scream device. +/// 2. Set invalid channels and channel_map. +/// 3. Send audio data. +/// 4. Stop VM. +/// 5. Check audio frames from audio file. +/// Expect: +/// 1/2/3/4/5: success. +#[test] +fn scream_exception_002() { + let pci_slot = 0x1; + let (playback_path, record_path) = get_audio_file_name(); + audio_data_init(playback_path.clone(), record_path.clone()); + let (ivshmem, test_state) = set_up( + IVSHMEM_DEFAULT_SIZE, + pci_slot, + playback_path.clone(), + record_path.clone(), + ); + ivshmem.borrow_mut().init(pci_slot); + + // Wait for 1s until the scream device is initialized and enters the polling state to + // prevent subsequent audio frame data loss. + thread::sleep(time::Duration::from_millis(1000)); + + play_header_init(&mut ivshmem.borrow_mut()); + + thread::sleep(time::Duration::from_millis(POLL_DELAY_MS)); + + // Setting channels and channel_map to 0. + let fmt_base = PLAY_BASE + offset_of!(ShmemStreamHeader, fmt) as u64; + ivshmem + .borrow_mut() + .writeb(fmt_base + offset_of!(ShmemStreamFmt, channels) as u64, 0); + ivshmem + .borrow_mut() + .writel(fmt_base + offset_of!(ShmemStreamFmt, channel_map) as u64, 0); + + // write one audio chunk + for i in 0..AUDIO_CHUNK_SIZE { + ivshmem.borrow_mut().writeb( + PLAY_DADA_OFFSET + u64::from(AUDIO_CHUNK_SIZE + i), + AUDIO_DEFAULT_DATA[i as usize], + ); + } + + // update play header chunk_idx + ivshmem.borrow_mut().writew( + PLAY_BASE + offset_of!(ShmemStreamHeader, chunk_idx) as u64, + 1, + ); + + thread::sleep(time::Duration::from_millis(1000)); + + // Stop the StratoVirt process before verifying data. Otherwise, + // audio data may not be updated to the file. + test_state.borrow_mut().stop(); + + let mut file = match File::open(playback_path.clone()) { + Ok(file) => file, + Err(e) => { + assert!(false, "{}", e); + return; + } + }; + + read_and_check_data(&mut file, &[0; AUDIO_CHUNK_SIZE as usize], 0); + + scream_tmp_clear(playback_path, record_path); +} diff --git a/tests/mod_test/tests/scsi_test.rs b/tests/mod_test/tests/scsi_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..acbdf17a284e34e8640a8a886cc9a7b408ecbbef --- /dev/null +++ b/tests/mod_test/tests/scsi_test.rs @@ -0,0 +1,2429 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::mem::size_of; +use std::rc::Rc; +use std::slice::from_raw_parts; +use std::{thread, time}; + +use rand::Rng; + +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{ + TestVirtQueue, TestVringDescEntry, VirtioDeviceOps, VIRTIO_CONFIG_S_NEEDS_RESET, + VIRTIO_F_BAD_FEATURE, VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, +}; +use mod_test::libdriver::virtio_pci_modern::TestVirtioPciDev; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use mod_test::utils::{cleanup_img, create_img, ImageType, TEST_IMAGE_SIZE}; +#[cfg(not(target_env = "ohos"))] +use util::aio::{aio_probe, AioEngine}; +use util::byte_code::ByteCode; +use util::offset_of; + +const TEST_VIRTIO_SCSI_CDB_SIZE: usize = 32; +const TEST_VIRTIO_SCSI_SENSE_SIZE: usize = 96; + +/// According to Virtio Spec. +/// Max_channel should be 0. +/// Max_target should be less than or equal to 255. +const TEST_VIRTIO_SCSI_MAX_TARGET: u16 = 255; +/// Max_lun should be less than or equal to 16383 (2^14 - 1). +const TEST_VIRTIO_SCSI_MAX_LUN: u32 = 16383; + +const TIMEOUT_US: u64 = 10 * 1000 * 1000; +const DEFAULT_SCSI_DESC_ELEM: usize = 3; + +/// Default serial number of scsi device. +const DEFAULT_SCSI_SERIAL: &str = "123456"; + +const READ_10: u8 = 0x28; +const WRITE_10: u8 = 0x2a; +const TEST_UNIT_READY: u8 = 0x00; +const INQUIRY: u8 = 0x12; +const REPORT_LUNS: u8 = 0xa0; +const READ_CAPACITY_10: u8 = 0x25; +const MODE_SENSE: u8 = 0x1a; +const REQUEST_SENSE: u8 = 0x03; +const GET_CONFIGURATION: u8 = 0x46; +const READ_DISC_INFORMATION: u8 = 0x51; +const GET_EVENT_STATUS_NOTIFICATION: u8 = 0x4a; +const READ_TOC: u8 = 0x43; + +const VIRTIO_SCSI_S_OK: u8 = 0; +const VIRTIO_SCSI_S_BAD_TARGET: u8 = 3; + +/// Mode page codes for mode sense/set. +const MODE_PAGE_CACHING: u8 = 0x08; +const MODE_PAGE_CAPABILITIES: u8 = 0x2a; +const MODE_PAGE_ALLS: u8 = 0x3f; + +/// Basic length of fixed format sense data. +const TEST_SCSI_SENSE_LEN: u32 = 18; + +const MODE_SENSE_LEN_DATA_LEN: u8 = 36; +const READ_DISC_INFORMATION_DATA_LEN: u8 = 34; +const GET_CONFIGURATION_DATA_LEN: u8 = 40; +const GET_EVENT_STATUS_NOTIFICATION_DATA_LEN: u8 = 8; +const REPORT_LUNS_DATA_LEN: u8 = 16; +const INQUIRY_TARGET_DATA_LEN: u8 = 36; +const READ_CAPACITY_10_DATA_LEN: u8 = 8; +const INQUIRY_DATA_LEN: u8 = 96; +const MODE_SENSE_PAGE_CACHE_LEN_DATA_LEN: u8 = 32; +const MODE_SENSE_PAGE_ALL_DATA_LEN: u8 = 44; +const INQUIRY_SUPPORTED_VPD_PAGES_DATA_LEN: u8 = 10; +const INQUIRY_UNIT_SERIAL_NUMBER_DATA_LEN: u8 = 254; +const INQUIRY_DEVICE_IDENTIFICATION_DATA_LEN: u8 = 254; +const INQUIRY_BLOCK_LIMITS_DATA_LEN: u8 = 64; +const INQUIRY_BLOCK_DEVICE_CHARACTERISTICS_DATA_LEN: u8 = 64; +const INQUIRY_LOGICAL_BLOCK_PROVISIONING_DATA_LEN: u8 = 8; +const INQUIRY_REFERRALS_DATA_LEN: u8 = 64; +const READ_TOC_DATA_LEN: u8 = 20; +const READ_TOC_MSF_DATA_LEN: u8 = 12; +const READ_TOC_FORMAT_DATA_LEN: u8 = 12; + +const GOOD: u8 = 0x00; +const CHECK_CONDITION: u8 = 0x02; + +struct VirtioScsiTest { + cntlr: Rc>, + scsi_devices: Vec, + state: Rc>, + alloc: Rc>, + queues: Vec>>, +} + +impl VirtioScsiTest { + /// Init test case. It will create a virtio-scsi controller with a scsi device using given args. + /// + /// # Arguments + /// + /// * `scsi_type` - The type of the only scsi device. Supports Harddisk and CD-ROM. + /// * `target` - The given target id of the only scsi device. + /// * `lun` - The given lun id of the only scsi device. + /// * `image_size`- The size of the backend image. + /// * `iothread` - If true, virtio-scsi controller will use iothread to process IO. + /// + /// # Return + /// + /// * `VirtioScsiTest` - Basic object for most tests, including the virtio scsi controller, + /// - the scsi device's config, the state of testcase, the memory management + /// - structure and virtqueues of this controller. + fn testcase_start_with_config( + scsi_type: ScsiDeviceType, + target: u8, + lun: u16, + image_size: u64, + iothread: bool, + ) -> VirtioScsiTest { + let image_path = Rc::new(create_img(image_size, 1, &ImageType::Raw)); + + let cntlrcfg = CntlrConfig { + id: 0, + use_iothread: iothread, + }; + + let readonly = scsi_type != ScsiDeviceType::ScsiHd; + let scsi_devices: Vec = vec![ScsiDeviceConfig { + cntlr_id: 0, + device_type: scsi_type, + image_path, + target, + lun, + read_only: readonly, + direct: false, + aio: TestAioType::AioOff, + serial: Some(DEFAULT_SCSI_SERIAL.to_string()), + }]; + + let (cntlr, state, alloc) = scsi_test_init(cntlrcfg, scsi_devices.clone()); + let features = virtio_scsi_default_feature(cntlr.clone()); + let queues = cntlr + .borrow_mut() + .init_device(state.clone(), alloc.clone(), features, 3); + + VirtioScsiTest { + cntlr, + scsi_devices, + state, + alloc, + queues, + } + } + + fn general_testcase_run(scsi_type: ScsiDeviceType, target: u8, lun: u16) -> VirtioScsiTest { + VirtioScsiTest::testcase_start_with_config(scsi_type, target, lun, TEST_IMAGE_SIZE, false) + } + + // String is not end with "/0" in Rust, so we should add data_in_len parameter to control length + // of the reading data. + fn virtio_scsi_do_command( + &mut self, + req: TestVirtioScsiCmdReq, + data_out: &Option, + resp: &mut TestVirtioScsiCmdResp, + data_in: &mut Vec, + data_in_len: u32, + ) { + assert!(data_in_len <= data_in.capacity() as u32); + + let virtqueue = &self.queues[2]; + let mut len = Some(0); + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_SCSI_DESC_ELEM); + + // Request Header. + let cmdreq_len = size_of::() as u64; + let req_addr = self.alloc.borrow_mut().alloc(cmdreq_len); + let req_bytes = req.as_bytes(); + self.state.borrow().memwrite(req_addr, req_bytes); + + data_entries.push(TestVringDescEntry { + data: req_addr, + len: cmdreq_len as u32, + write: false, + }); + + // Data out. + if let Some(data) = data_out { + let out_len = data.len() as u32; + let out_bytes = data.as_bytes().to_vec(); + let out_addr = self.alloc.borrow_mut().alloc(u64::from(out_len)); + self.state.borrow().memwrite(out_addr, out_bytes.as_slice()); + data_entries.push(TestVringDescEntry { + data: out_addr, + len: out_len, + write: false, + }); + } + + // Response. + let cmdresp_len = size_of::() as u64; + let resp_addr = self + .alloc + .borrow_mut() + .alloc(cmdresp_len + u64::from(data_in_len)); + let resp_bytes = resp.as_bytes(); + self.state.borrow().memwrite(resp_addr, resp_bytes); + + // Data in. + data_entries.push(TestVringDescEntry { + data: resp_addr, + len: cmdresp_len as u32, + write: true, + }); + + if data_in_len > 0 { + data_entries.push(TestVringDescEntry { + data: resp_addr + cmdresp_len, + len: data_in_len, + write: true, + }); + } + + let free_head = virtqueue + .borrow_mut() + .add_chained(self.state.clone(), data_entries); + + self.cntlr + .borrow() + .kick_virtqueue(self.state.clone(), virtqueue.clone()); + self.cntlr.borrow().poll_used_elem( + self.state.clone(), + virtqueue.clone(), + free_head, + TIMEOUT_US, + &mut len, + true, + ); + + let resp_bytes_new = self.state.borrow().memread(resp_addr, cmdresp_len); + let slice = unsafe { + from_raw_parts( + resp_bytes_new.as_ptr() as *const TestVirtioScsiCmdResp, + size_of::(), + ) + }; + *resp = slice[0]; + + if data_in_len > 0 { + data_in.append( + self.state + .borrow() + .memread(resp_addr + cmdresp_len, u64::from(data_in_len)) + .as_mut(), + ); + } + } + + fn scsi_cdb_test(&mut self, cdb_test: CdbTest) -> Option> { + let scsi_req = TestVirtioScsiCmdReq::new(cdb_test.target, cdb_test.lun, cdb_test.cdb); + let mut scsi_resp = TestVirtioScsiCmdResp::default(); + let mut data_in = Vec::::with_capacity(cdb_test.data_in_length as usize); + + self.virtio_scsi_do_command( + scsi_req, + &cdb_test.data_out, + &mut scsi_resp, + &mut data_in, + cdb_test.data_in_length, + ); + + assert_eq!(scsi_resp.response, cdb_test.expect_response); + assert_eq!(scsi_resp.status, cdb_test.expect_status); + if let Some(result_vec) = cdb_test.expect_result_data { + assert_eq!(result_vec, data_in); + } + if let Some(sense_vec) = cdb_test.expect_sense { + assert_eq!(sense_vec, scsi_resp.sense); + } + + if cdb_test.data_in_length != 0 { + Some(data_in) + } else { + None + } + } + + fn testcase_tear_down(&mut self) { + self.cntlr + .borrow_mut() + .destroy_device(self.alloc.clone(), self.queues.clone()); + self.state.borrow_mut().stop(); + for device in self.scsi_devices.iter() { + cleanup_img(device.image_path.clone().to_string()); + } + } + + // Basic IO function test. + fn scsi_try_io(&mut self, target: u8, lun: u16, scsi_type: ScsiDeviceType) { + // Test: scsi command: WRITE_10. + // Write to LBA(logical block address) 0, transfer length 1 sector. + // Test Result: Check if scsi command WRITE_10 was handled successfully for scsi harddisk + // and was failure for scsi CD-ROM. + let mut write_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + write_cdb[0] = WRITE_10; + write_cdb[8] = 0x1; // 1 logical sector. CD: 2048 Bytes. HD: 512 Bytes. + let (expect_status, expect_sense, data) = if scsi_type == ScsiDeviceType::ScsiHd { + (GOOD, None, vec![0x8; 512]) + } else { + ( + CHECK_CONDITION, + Some(get_sense_bytes(SCSI_SENSE_IO_ERROR)), + vec![0x8; 2048], + ) + }; + let write_data = String::from_utf8(data).unwrap(); + + let cdb_test_args = CdbTest { + cdb: write_cdb, + target, + lun, + data_out: Some(write_data.clone()), + data_in_length: 0, + expect_response: VIRTIO_SCSI_S_OK, + expect_status, + expect_result_data: None, + expect_sense, + }; + self.scsi_cdb_test(cdb_test_args); + + // Test: scsi command: READ_10. + // Read from LBA(logical block address) 0, transfer length 1. + // Test Result: Check if scsi command READ_10 was handled successfully. And check the read + // data is the right data which was sent in WRITE_10 test for scsi harddisk. + let mut read_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_cdb[0] = READ_10; + read_cdb[8] = 0x1; // 1 sector. + + let data_in_length = write_data.len() as u32; + let expect_result_data = match scsi_type { + ScsiDeviceType::ScsiHd => Some(write_data.into_bytes()), + ScsiDeviceType::ScsiCd => None, + }; + + let cdb_test_args = CdbTest { + cdb: read_cdb, + target, + lun, + data_out: None, + data_in_length, // Read 1 sector data. + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data, + expect_sense: None, + }; + self.scsi_cdb_test(cdb_test_args); + } +} + +struct CdbTest { + cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE], + target: u8, + lun: u16, + data_out: Option, + data_in_length: u32, + expect_response: u8, + expect_status: u8, + expect_result_data: Option>, + expect_sense: Option<[u8; TEST_VIRTIO_SCSI_SENSE_SIZE]>, +} + +#[derive(Default)] +struct ScsiSense { + /// Sense key. + key: u8, + /// Additional sense code. + asc: u8, + /// Additional sense code qualifier. + ascq: u8, +} + +const SCSI_SENSE_INVALID_OPCODE: ScsiSense = ScsiSense { + key: 0x05, + asc: 0x20, + ascq: 0x00, +}; + +const SCSI_SENSE_INVALID_FIELD: ScsiSense = ScsiSense { + key: 0x05, + asc: 0x24, + ascq: 0x00, +}; + +const SCSI_SENSE_LUN_NOT_SUPPORTED: ScsiSense = ScsiSense { + key: 0x05, + asc: 0x25, + ascq: 0x00, +}; + +const SCSI_SENSE_NO_SENSE: ScsiSense = ScsiSense { + key: 0, + asc: 0, + ascq: 0, +}; + +const SCSI_SENSE_IO_ERROR: ScsiSense = ScsiSense { + key: 0x0b, + asc: 0, + ascq: 0x06, +}; + +#[repr(C, packed)] +#[derive(Clone, Copy, Debug, Default)] +struct TestVirtioScsiCmdReq { + lun: [u8; 8], + tag: u64, + task_attr: u8, + prio: u8, + crn: u8, + cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE], +} + +impl TestVirtioScsiCmdReq { + fn new(target: u8, lun: u16, cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE]) -> Self { + let mut req = TestVirtioScsiCmdReq::default(); + let mut target_lun = [0_u8; 8]; + target_lun[0] = 1; + target_lun[1] = target; + target_lun[2] = (lun >> 8) as u8; + target_lun[3] = lun as u8; + + req.lun = target_lun; + req.cdb = cdb; + + req + } +} + +impl ByteCode for TestVirtioScsiCmdReq {} + +#[repr(C, packed)] +#[derive(Clone, Copy, Debug)] +struct TestVirtioScsiCmdResp { + sense_len: u32, + resid: u32, + status_qualifier: u16, + status: u8, + response: u8, + sense: [u8; TEST_VIRTIO_SCSI_SENSE_SIZE], +} + +impl ByteCode for TestVirtioScsiCmdResp {} + +impl Default for TestVirtioScsiCmdResp { + fn default() -> Self { + TestVirtioScsiCmdResp { + sense_len: 0, + resid: 0, + status_qualifier: 0, + status: 0, + response: 0, + sense: [0; TEST_VIRTIO_SCSI_SENSE_SIZE], + } + } +} + +struct CntlrConfig { + // Controller id. + id: u8, + // If true, use iothread. + use_iothread: bool, +} + +#[derive(PartialEq, Clone, Debug)] +enum ScsiDeviceType { + // Scsi Harddisk. + ScsiHd = 0, + // Scsi CD-ROM/DVD-ROM. + ScsiCd = 1, +} + +impl std::fmt::Display for ScsiDeviceType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + ScsiDeviceType::ScsiHd => "scsi-hd", + ScsiDeviceType::ScsiCd => "scsi-cd", + } + ) + } +} + +#[allow(dead_code)] +#[derive(Clone, Debug, Copy)] +enum TestAioType { + AioOff = 0, + AioNative = 1, + AioIOUring = 2, +} + +impl std::fmt::Display for TestAioType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + TestAioType::AioIOUring => "io_uring", + TestAioType::AioNative => "native", + TestAioType::AioOff => "off", + } + ) + } +} + +#[derive(Clone, Debug)] +struct ScsiDeviceConfig { + cntlr_id: u8, + device_type: ScsiDeviceType, + image_path: Rc, + target: u8, + lun: u16, + read_only: bool, + direct: bool, + aio: TestAioType, + serial: Option, +} + +impl ScsiDeviceConfig { + fn cmdline(&self) -> String { + let serial_args = if let Some(serial) = &self.serial { + format!(",serial={}", serial) + } else { + "".to_string() + }; + + let device_args = format!( + "-device {},bus=scsi{}.0,scsi-id={},lun={},drive=drive-scsi0-0-{}-{},id=scsi0-0-{}-{}{}", + self.device_type, self.cntlr_id, self.target, self.lun, self.target, self.lun, self.target, + self.lun, serial_args, + ); + + let drive_args = format!( + "-drive file={},id=drive-scsi0-0-{}-{},direct={},readonly={},aio={}", + self.image_path, self.target, self.lun, self.direct, self.read_only, self.aio, + ); + + format!("{} {} ", device_args, drive_args) + } +} + +fn get_sense_bytes(sense: ScsiSense) -> [u8; TEST_VIRTIO_SCSI_SENSE_SIZE] { + let mut bytes = [0; TEST_VIRTIO_SCSI_SENSE_SIZE]; + bytes[0] = 0x70; // Fixed. Current errors. + bytes[2] = sense.key; + bytes[7] = 10; // Fixed. sense length: 10; + bytes[12] = sense.asc; + bytes[13] = sense.ascq; + + bytes +} + +pub fn virtio_scsi_default_feature(cntlr: Rc>) -> u64 { + let mut features = cntlr.borrow().get_device_features(); + features &= + !(VIRTIO_F_BAD_FEATURE | 1 << VIRTIO_RING_F_INDIRECT_DESC | 1 << VIRTIO_RING_F_EVENT_IDX); + + features +} + +fn scsi_test_init( + controller: CntlrConfig, + scsidevice: Vec, +) -> ( + Rc>, + Rc>, + Rc>, +) { + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + + let pci_fn = 0; + let pci_slot = 0x4; + + let iothread_args = if controller.use_iothread { + let mut iothread_args_vec: Vec<&str> = "-object iothread,id=iothread1".split(' ').collect(); + args.append(&mut iothread_args_vec); + ",iothread=iothread1" + } else { + "" + }; + + let cntlr_args = format!( + "-device virtio-scsi-pci,id=scsi{},bus=pcie.0,addr={}.0{}", + controller.id, pci_slot, iothread_args + ); + let mut cntlr_str_vec: Vec<&str> = cntlr_args[..].split(' ').collect(); + args.append(&mut cntlr_str_vec); + + let mut scsi_device_args = String::new(); + + for device in scsidevice.iter() { + let disk_args = device.cmdline(); + scsi_device_args.push_str(&disk_args); + } + + let mut disk_args_vec: Vec<&str> = scsi_device_args.trim().split(' ').collect(); + args.append(&mut disk_args_vec); + + let test_state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(test_state.clone()); + let allocator = machine.allocator.clone(); + + let virtio_scsi = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + virtio_scsi.borrow_mut().init(pci_slot, pci_fn); + + (virtio_scsi, test_state, allocator) +} + +/// Virtio Scsi hard disk basic function test. target 31, lun 7. +/// TestStep: +/// 0. Init process. +/// 1. Traverse all possible targets from 0 to VIRTIO_SCSI_MAX_TARGET(255). (using scsi command +/// INQUIRY) (lun is always 0 in this traverse process). +/// 2. Get all luns info in target 31.(using scsi command REPORT_LUNS) +/// 3. Check if scsi device is OK.(using scsi command TEST_UNIT_READY) +/// 4. Get the capacity of the disk.(using scsi command READ_CAPACITY_10) +/// 5. Get the caching strategy of the disk.(using scsi command MODE_SENSE) +/// 6. Get some other information of the disk.(using scsi command INQUITY) +/// 7. Basic IO test. +/// 8. Test ends. Destroy device. +/// Expect: +/// 1. 1/2/3/4/5/6/7/8: success. +/// step 2. Response VIRTIO_SCSI_S_BAD_TARGET for INQUIRY command in target 0-30. +/// Response VIRTIO_SCSI_S_OK for INQUIRY command in target 31. +/// step 3. Reported lun is 7. +/// step 4. Response VIRTIO_SCSI_S_OK. +/// step 5. Get the right mode information of disk. +/// step 6. Get the right information of disk. +/// step 7. READ/WRITE is OK. +#[test] +fn scsi_hd_basic_test() { + let target = 31; + let lun = 7; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Test 1: scsi command: INQUIRY for scsi controller. + // Traverse all possible targets from 0 to VIRTIO_SCSI_MAX_TARGET(255). + // Note: stratovirt mst can only has 256 num free, so just traverse from 0 to 31. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[4] = INQUIRY_DATA_LEN; + for i in 0..32 { + // Test 1 Result: Only response 0 for target == 31. Otherwise response + // VIRTIO_SCSI_S_BAD_TARGET. + let expect_result = if i == u16::from(target) { + VIRTIO_SCSI_S_OK + } else { + VIRTIO_SCSI_S_BAD_TARGET + }; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target: i as u8, + lun: 0, + data_out: None, + data_in_length: u32::from(INQUIRY_DATA_LEN), + expect_response: expect_result, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + } + + // Test 2: scsi command: REPORT_LUNS. + // Test 2 Result: Check if scsi command REPORT_LUNS was handled successfully. + // And check the read data is the right lun information (target 31, lun 7). + let mut report_luns_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + report_luns_cdb[0] = REPORT_LUNS; + report_luns_cdb[9] = REPORT_LUNS_DATA_LEN; + let mut expect_result_vec = vec![0_u8; REPORT_LUNS_DATA_LEN as usize]; + // REPORT_LUNS parameter data format. + // Expect result: Only 1 lun and lun id is 7. + // Bytes[0..3]: Lun list length (n-7). + expect_result_vec[3] = 0x8; + // Bytes[4..7]: Reserved. + // Bytes[8..15]: Lun[first]. + expect_result_vec[9] = lun as u8; + // Bytes[n-7..n]: Lun[last]. + + let cdb_test_args = CdbTest { + cdb: report_luns_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(REPORT_LUNS_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3: scsi command: TEST_UNIT_READY. + // Test 3 Result: Check if scsi command TEST_UNIT_READY was handled successfully. + let mut test_unit_ready_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + test_unit_ready_cdb[0] = TEST_UNIT_READY; + let cdb_test_args = CdbTest { + cdb: test_unit_ready_cdb, + target, + lun, + data_out: None, + data_in_length: 0, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 4: scsi command: READ_CAPACITY_10 + // Test 4 Result: Check if scsi command READ_CAPACITY_10 was handled successfully. + // And the returned capacity is right. + let mut read_capacity_10_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_capacity_10_cdb[0] = READ_CAPACITY_10; + let cdb_test_args = CdbTest { + cdb: read_capacity_10_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_CAPACITY_10_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + let data_in = vst.scsi_cdb_test(cdb_test_args); + + // Bytes[0-3]: Returned Logical Block Address(the logical block address of the last logical + // block). + // Bytes[4-7]: Logical Block Length In Bytes. + // Total size = (last logical block address + 1) * block length. + assert_eq!( + (u64::from(u32::from_be_bytes( + data_in.as_ref().unwrap()[0..4].try_into().unwrap() + )) + 1) + * u64::from(u32::from_be_bytes( + data_in.as_ref().unwrap()[4..8].try_into().unwrap() + )), + TEST_IMAGE_SIZE + ); + + // Test 5: scsi command: MODE_SENSE. + // Byte2: bits[0-5]: page code. bits[6-7]: page control. + // Test 5.1 page code = MODE_PAGE_CACHING. + // Test 5.1 Result: Check if scsi command MODE_SENSE was handled successfully. + // And the returned mode data is right. + let mut mode_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + mode_sense_cdb[0] = MODE_SENSE; + mode_sense_cdb[2] = MODE_PAGE_CACHING; + mode_sense_cdb[4] = MODE_SENSE_PAGE_CACHE_LEN_DATA_LEN; + let mut expect_result_vec = vec![0; MODE_SENSE_PAGE_CACHE_LEN_DATA_LEN as usize]; + // MODE_SENSE MODE_PAGE_CACHING(0x8) parameter data format. + // Bytes[0-1]: Mode Data Length (n-1). + expect_result_vec[0] = 0x1f; + // Bytes[2]: Device Specific Parameter. + // Bytes[3]: Block Descriptor Length. + expect_result_vec[3] = 0x8; + // Byte[4]: density code. + // Bytes[5-7]: number of blocks. + expect_result_vec[5] = 0x2; + // Byte[8]: Reserved. + // Byte[9-11]: Block Length. + expect_result_vec[10] = 0x2; + // Bytes[12]: page code. + expect_result_vec[12] = 0x8; + // Byte[13]: page length(0x12). + expect_result_vec[13] = 0x12; + // Byte[14]: IC/ABPF/CAP/DISC/SIZE/WCE/MF/RCD. + expect_result_vec[14] = 0x4; + // Bytes[15-31]: do not support now. + + let cdb_test_args = CdbTest { + cdb: mode_sense_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(MODE_SENSE_PAGE_CACHE_LEN_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 5.2 page code = MODE_PAGE_ALLS. + // Test 5.2 Result: Check if scsi command MODE_SENSE was handled successfully. + // And the returned mode data is right. + let mut mode_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + mode_sense_cdb[0] = MODE_SENSE; + mode_sense_cdb[2] = MODE_PAGE_ALLS; + mode_sense_cdb[4] = MODE_SENSE_PAGE_ALL_DATA_LEN; + let mut expect_result_vec = vec![0; MODE_SENSE_PAGE_ALL_DATA_LEN as usize]; + // MODE_SENSE MODE_PAGE_ALLS parameter data format. + // Bytes[0-1]: Mode Data Length (n-1). + expect_result_vec[0] = MODE_SENSE_PAGE_ALL_DATA_LEN - 1; + // Bytes[2]: Device Specific Parameter. + // Bytes[3]: Block Descriptor Length. + expect_result_vec[3] = 0x8; + // Byte[4]: density code. + // Bytes[5-7]: number of blocks. + expect_result_vec[5] = 0x2; + // Byte[8]: Reserved. + // Bytes[9-11]: Block Length. + expect_result_vec[10] = 0x2; + // Bytes[12-23]: MODE_PAGE_R_W_ERROR(0x1) parameter data format. + // Byte[12]: page code. + expect_result_vec[12] = 0x1; + // Byte[13]: page length(0xa). + expect_result_vec[13] = 0xa; + // Byte[14]: AWRE/ARRE/TB/RC/EER/PER/DTE/DCR + expect_result_vec[14] = 0x80; + // Bytes[15-23]: do not support now. + // Bytes[24-43]: MODE_PAGE_CACHING(0x8) parameter data format. See test 5.1. + expect_result_vec[24] = 0x8; + expect_result_vec[25] = 0x12; + expect_result_vec[26] = 0x4; + let cdb_test_args = CdbTest { + cdb: mode_sense_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(MODE_SENSE_PAGE_ALL_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 6: scsi command: INQUIRY for scsi device. + // Byte1 bit0: EVPD(enable vital product data). + // Byte2: page code for vital product data. + // Test 6.1 EVPD = 0: Inquiry basic information of this scsi device such as vendor + // and product information. + // Test 6.1 Result: Check if scsi command INQUIRY was handled successfully. And + // it has product/vendor information. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[4] = INQUIRY_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + let data_in = vst.scsi_cdb_test(cdb_test_args); + assert!(std::str::from_utf8(&data_in.unwrap()) + .unwrap() + .contains("STRA")); + + // Test 6.2 EVPD = 1, byte_code = 0x00: Inquiry supported VPD Pages of this scsi device. + // Test 6.2 Result: Check if scsi command INQUIRY was handled successfully. And the + // returned supported VPD pages is right. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[4] = INQUIRY_SUPPORTED_VPD_PAGES_DATA_LEN; + let expect_result_vec = vec![0, 0, 0, 0x6, 0, 0x80, 0x83, 0xb0, 0xb1, 0xb2]; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_SUPPORTED_VPD_PAGES_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 6.3 EVPD = 1, byte_code = 0x80: Inquiry unit serial number. + // Test 6.3 Result: Check if scsi command INQUIRY was handled successfully. And the + // returned serial number is DEFAULT_SCSI_SERIAL. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0x80; + inquiry_cdb[4] = INQUIRY_UNIT_SERIAL_NUMBER_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_UNIT_SERIAL_NUMBER_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + let data_in = vst.scsi_cdb_test(cdb_test_args); + // Unit Serial Number starts from Byte 4. + assert!(std::str::from_utf8(&data_in.unwrap()[4..]) + .unwrap() + .contains(DEFAULT_SCSI_SERIAL)); + + // Test 6.4 EVPD = 1, byte_code = 0x83: Inquiry scsi device identification. + // Test 6.4 Result: Check if scsi command INQUIRY was handled successfully. + // Note: Stratovirt does not reply anything usefully for scsi device identification now. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0x83; + inquiry_cdb[4] = INQUIRY_DEVICE_IDENTIFICATION_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_DEVICE_IDENTIFICATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + let data_in = vst.scsi_cdb_test(cdb_test_args); + assert!(data_in.as_ref().unwrap()[1] == 0x83); + + // Test 6.5 EVPD = 1, byte_code = 0xb0: Inquiry scsi block limits. + // Test 6.5 Result: Check if scsi command INQUIRY was handled successfully. + // Note: Stratovirt does not reply anything usefully for scsi block limits now. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0xb0; + inquiry_cdb[4] = INQUIRY_BLOCK_LIMITS_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_BLOCK_LIMITS_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + let data_in = vst.scsi_cdb_test(cdb_test_args); + assert!(data_in.as_ref().unwrap()[1] == 0xb0); + assert!(data_in.unwrap()[3] == 64 - 4); + + // Test 6.6 EVPD = 1, byte_code = 0xb1: Inquiry block device characteristics. + // Test 6.6 Result: Check if scsi command INQUIRY was handled successfully. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0xb1; + inquiry_cdb[4] = INQUIRY_BLOCK_DEVICE_CHARACTERISTICS_DATA_LEN; + // Byte0: bits[0-4]: Scsi device type. + // Byte1: Page code. + // Byte2: Reserved. + // Byte3: page length(length - 4). + let mut expect_result_vec = vec![0, 0xb1, 0, 0x3c]; + expect_result_vec.resize(INQUIRY_BLOCK_DEVICE_CHARACTERISTICS_DATA_LEN as usize, 0); + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_BLOCK_DEVICE_CHARACTERISTICS_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 6.7 EVPD = 1, byte_code = 0xb2: Inquiry Logical Block Provisioning. + // Test 6.7 Result: Check if scsi command INQUIRY was handled successfully. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0xb2; + inquiry_cdb[4] = INQUIRY_LOGICAL_BLOCK_PROVISIONING_DATA_LEN; + // Byte0: bits[0-4]: Scsi device type. + // Byte1: Page code. + // Byte2: Reserved. + // Byte3: page length(length - 4). + // Byte4: Threshold exponent. + // Byte5: LBPU(bit 7) / LBPWS / LBPWS10 / LBPRZ / ANC_SUP / DP. + // Byte6: Threshold percentage / Provisioning Type. + // Byte7: Threshold percentage. + let expect_result_vec = vec![0, 0xb2, 0, 0x4, 0, 0x60, 0x1, 0]; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_LOGICAL_BLOCK_PROVISIONING_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 6.7 EVPD = 1, byte_code = 0xb3: Referrals VPD page. + // Test 6.7 Result: Check if scsi command INQUIRY was failure. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; + inquiry_cdb[2] = 0xb3; + inquiry_cdb[4] = INQUIRY_REFERRALS_DATA_LEN; + let expect_sense = get_sense_bytes(SCSI_SENSE_INVALID_FIELD); + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(INQUIRY_REFERRALS_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(expect_sense), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 7: basic io test. + vst.scsi_try_io(target, lun, ScsiDeviceType::ScsiHd); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi CD-ROM basic function test. target 0, lun 7. +/// TestStep: +/// 0. Init process. +/// 1. Get the mode page capabilities.(Using scsi command MODE_SENSE) +/// 2. Request if there exist errors.(Using scsi command REQUEST_SENSE) +/// 3. Read the table of Content.(Using scsi command READ_TOC) +/// 4. Read the disc information.(Using scsi command READ_DISC_INFORMATION) +/// 5. Get configuration of the CD/DVD.(Using scsi command GET_CONFIGURATION) +/// 6. Test CD/DVD's event status notification(Using scsi command GET_EVENT_STATUS_NOTIFICATION) +/// 7. Basic IO test. +/// 8. Test ends. Destroy device. +/// Note: +/// 1. Do not test TEST_UNIT_READY/REPORT_LUNS/READ_CAPACITY_10 again. See test scsi_hd_basic. +/// Expect: +/// 1. 1/2/3/4/5/6/7/8: success. +#[test] +fn scsi_cd_basic_test() { + let target = 0; + let lun = 7; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiCd, target, lun); + + // Test 1: scsi command: MODE_SENSE. + // Test 1.1 page code = MODE_PAGE_CAPABILITIES. + // Test 1.1 Result: Check if scsi command MODE_SENSE was handled successfully. + // And the returned mode data is right. + let mut mode_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + mode_sense_cdb[0] = MODE_SENSE; + mode_sense_cdb[2] = MODE_PAGE_CAPABILITIES; + mode_sense_cdb[4] = MODE_SENSE_LEN_DATA_LEN; + let mut expect_result_vec = vec![0; MODE_SENSE_LEN_DATA_LEN as usize]; + // MODE_SENSE MODE_PAGE_ALLS(0x2a) parameter data format. + // Byte[0]: Mode Data Length (n-1). + expect_result_vec[0] = MODE_SENSE_LEN_DATA_LEN - 1; + // Bytes[1-3]: 0. + // Byte[4]: PS/Reserved/Bits[0-5]: Page Code(0x2A). + expect_result_vec[4] = 0x2a; + // Byte[5]: Page Length(28). + expect_result_vec[5] = 28; + // Byte[6]: Reserved/Reserved/DVD-RAW Read(1)/DVD-R READ(1)/DVD-ROM READ(1)/ + // Method 2/CD-RW Read(1)/CD-R Read(1). + expect_result_vec[6] = 0x3b; + // Byte[7]: Reserved/Reserved/DVD-RAW WRITE/DVD-R WRITE/Reserved/Test Write/ + // CD-R/RW Write/CD-R Write. + // Byte[8]: BUF/Multi Session(1)/Mode 2 Form 2(1)/Mode 2 Form 1(1)/Digital Port 2(1)/ + // Digital Port 1(1)/Composite(1)/Audio Play(1). + expect_result_vec[8] = 0x7f; + // Byte[9]: Read Bar Code(1)/UPC(1)/ISRC(1)/C2 Pointers supported(1)/R-W Deinterleaved & + // corrected(1)/R-W supported(1)/CD-DA Stream is Accurate(1)/CD-DA Cmds supported(1). + expect_result_vec[9] = 0xff; + // Byte[10]: Bits[5-7]: Loading Mechanism Type(1)/Reserved/Eject(1)/Prevent Jumper(1)/ + // Lock State/Lock(1). + expect_result_vec[10] = 0x2d; + // Byte[11]: Bits[6-7]: Reserved/R-W in Lead-in/Side Change Capable/SSS/Changer Supports Disc + // Present/Separate Channel Mute/Separate volume levels + // Bytes[12-13]: Obsolete. + // Bytes[14-15]: Number of Volume Levels Supported. + expect_result_vec[15] = 0x2; + // Bytes[16-17]: Buffer Size Supported. + expect_result_vec[16] = 0x8; + // Bytes[18-25]: Do not support now. + let cdb_test_args = CdbTest { + cdb: mode_sense_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(MODE_SENSE_LEN_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2: scsi command: REQUEST_SENSE. + // Test 2 Result: Check if scsi command REQUEST_SENSE was handled successfully. + // And the returned sense is SCSI_SENSE_NO_SENSE. + let mut request_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + request_sense_cdb[0] = REQUEST_SENSE; + request_sense_cdb[4] = TEST_SCSI_SENSE_LEN as u8; + let cdb_test_args = CdbTest { + cdb: request_sense_cdb, + target, + lun, + data_out: None, + data_in_length: TEST_SCSI_SENSE_LEN, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_NO_SENSE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3: scsi command: READ_TOC. + // Test 3.1: + // Byte1 bit1: MSF = 0. Byte2 bits[0-3]: Format = 0; + // Test 3.1 Result: Check if scsi command READ_TOC was handled successfully. And check the read + // data is the same with the expect result. + let mut read_toc_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_toc_cdb[0] = READ_TOC; + read_toc_cdb[8] = READ_TOC_DATA_LEN; + let mut expect_result_vec = vec![0; READ_TOC_DATA_LEN as usize]; + // Bytes[0-1]: TOC Data Length. + expect_result_vec[1] = 0x12; + // Byte[2]: First Track Number. + expect_result_vec[2] = 1; + // Byte[3]: Last Track Number. + expect_result_vec[3] = 1; + // Byte[4]: Reserved. + // Byte[5]: Bits[5-7]: ADR, Bits[0-4]: CONTROL. + expect_result_vec[5] = 0x14; + // Byte[6]: Track Number. + expect_result_vec[6] = 0x1; + // Byte[7]: Reserved. + // Bytes[8-11]: Track Start Address(LBA form = 000000h, MSF form = 00:00:02:00). + // Byte[12]: Reserved. + // Byte[13]: Bits[5-7]: ADR, Bits[0-4]: CONTROL. + expect_result_vec[13] = 0x14; + // Byte[14]: Track Number. + expect_result_vec[14] = 0xaa; + // Byte[15]: Reserved. + // Bytes[16-19]: Track Start Address. + expect_result_vec[17] = 2; + let cdb_test_args = CdbTest { + cdb: read_toc_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_TOC_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3.2: scsi command: READ_TOC. + // Byte1 bit1: MSF = 1. + // Byte2 bits[0-3]: Format = 0; (Format(Select specific returned data format)(CD: 0,1,2)). + // Byte6: Track/Session Number. + // Test 3.2 Result: Check if scsi command READ_TOC was handled successfully. And check the read + // data is the same with the expect result. + let mut read_toc_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_toc_cdb[0] = READ_TOC; + read_toc_cdb[1] = 2; + read_toc_cdb[6] = 0xaa; + read_toc_cdb[8] = READ_TOC_MSF_DATA_LEN; + // Bytes[0-1]: TOC Data Length. + // Byte[2]: First Track Number. + // Byte[3]: Last Track Number. + // Byte[4]: Reserved. + // Byte[5]: Bits[5-7]: ADR, Bits[0-4]: CONTROL. + // Byte[6]: Track Number. + // Byte[7]: Reserved. + // Bytes[8-11]: Track Start Address(LBA form = 000000h, MSF form = 00:00:02:00). + let expect_result_vec = vec![0, 0xa, 1, 1, 0, 0x14, 0xaa, 0, 0, 0x1d, 9, 0x2f]; + let cdb_test_args = CdbTest { + cdb: read_toc_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_TOC_MSF_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3.3: scsi command: READ_TOC. + // Byte1 bit1: MSF = 0. + // Byte2 bits[0-3]: Format = 1; (Format(Select specific returned data format)(CD: 0,1,2)). + // Byte6: Track/Session Number. + // Test 3.3 Result: Check if scsi command READ_TOC was handled successfully. And check the read + // data is the same with the expect result. + let mut read_toc_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_toc_cdb[0] = READ_TOC; + read_toc_cdb[2] = 1; + read_toc_cdb[8] = READ_TOC_FORMAT_DATA_LEN; + let expect_result_vec = vec![0, 0xa, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]; + let cdb_test_args = CdbTest { + cdb: read_toc_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_TOC_FORMAT_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 4: scsi command: READ_DISC_INFORMATION. + // Test 4 Result: Check if scsi command READ_DISC_INFORMATION was handled successfully. And + // check the read data is the same with the expect result. + let mut read_disc_information_cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE] = + [0; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_disc_information_cdb[0] = READ_DISC_INFORMATION; + read_disc_information_cdb[8] = READ_DISC_INFORMATION_DATA_LEN; + // Bytes[0-1]: Disc Information Length(32). + // Byte2: Disc Information Data Type(000b) | Erasable(0) | State of last Session(01b) | + // Disc Status(11b). + // Byte3: Number of First Track on Disc. + // Byte4: Number of Sessions. + // Byte5: First Track Number in Last Session(Least Significant Byte). + // Byte6: Last Track Number in Last Session(Last Significant Byte). + // Byte7: DID_V | DBC_V | URU:Unrestricted Use Disc(1) | DAC_V | Reserved | Legacy | + // BG Format Status. + // Byte8: Disc Type(00h: CD-DA or CD-ROM Disc). + // Byte9: Number of sessions(Most Significant Byte). + // Byte10: First Trace Number in Last Session(Most Significant Byte). + // Byte11: Last Trace Number in Last Session(Most Significant Byte). + // Bytes12-15: Disc Identification. + // Bytes16-19: Last Session Lead-in Start Address. + // Bytes20-23: Last Possible Lead-Out Start Address. + // Bytes24-31: Disc Bar Code. + // Byte32: Disc Application Code. + // Byte33: Number of OPC Tables.(0) + let mut expect_result_vec = vec![0, 0x20, 0xe, 1, 1, 1, 1, 0x20]; + expect_result_vec.resize(READ_DISC_INFORMATION_DATA_LEN as usize, 0); + let cdb_test_args = CdbTest { + cdb: read_disc_information_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_DISC_INFORMATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 5: scsi command: GET_CONFIGURATION. + // The size of test img is TEST_IMAGE_SIZE(64M), so it is a CD-ROM. + // Test 5 Result: Check if scsi command GET_CONFIGURATION was handled successfully. And check + // the read data is the same with the expect result. + let mut get_configuration_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + get_configuration_cdb[0] = GET_CONFIGURATION; + get_configuration_cdb[8] = GET_CONFIGURATION_DATA_LEN; + let mut expect_result_vec = vec![0; GET_CONFIGURATION_DATA_LEN as usize]; + // Bytes[0-7]: Feature Header. + // Bytes[0-3]: Data Length(36 = 40 - 4). + expect_result_vec[3] = GET_CONFIGURATION_DATA_LEN - 4; + // Bytes[4-5]: Reserved. + // Bytes[6-7]: Current Profile. + expect_result_vec[7] = 8; + // Bytes[8-n]: Feature Descriptor(s): + // Bytes[8-19]: Feature 0: Profile List Feature: + // Bytes[8-9]: Feature code(0000h). + // Byte[10]: Bits[6-7]: Reserved. Bits[2-5]: Version. Bit 1: Persistent. Bit 0: Current(1). + expect_result_vec[10] = 3; + // Byte[11]: Additional Length. + expect_result_vec[11] = 8; + // Byte[12-19]: Profile Descriptors.(2 descriptors: CD and DVD) + // Byte[12-13]: Profile Number(CD). + expect_result_vec[13] = 8; + // Byte[14]: Bits[1-7]: Reserved. Bit 0: CurrentP. + expect_result_vec[14] = 1; + // Byte[15]: Reserved. + // Byte[16-17]: Profile Number(DVD). + expect_result_vec[17] = 0x10; + // Byte[18]: Bits[1-7]: Reserved. Bit 0: CurrentP. + // Byte[19]: Reserved. + // Bytes[20-31]: Feature 1: Core Feature: + // Bytes[20-21]: Feature Code(0001h). + expect_result_vec[21] = 0x1; + // Byte[22]: Bits[6-7]: Reserved. Bits[2-5]: Version(0010b). Bit 1: Persistent(1). + // Bit 0: Current(1). + expect_result_vec[22] = 0xb; + // Byte[23]: Additional Length(8). + expect_result_vec[23] = 8; + // Bytes[24-27]: Physical Interface Standard. (Scsi Family: 00000001h) + expect_result_vec[27] = 1; + // Byte[28]: Bits[2-7]: Reserved. Bit 1: INQ2. Bit 0: DBE(1). + expect_result_vec[28] = 1; + // Bytes[29-31]: Reserved. + // Bytes[32-40]: Feature 2: Removable media feature: + // Bytes[32-33]: Feature Code(0003h). + expect_result_vec[33] = 3; + // Byte[34]: Bits[6-7]: Reserved. Bit[2-5]: Version(0010b). Bit 1: Persistent(1). + // Bit 0: Current(1). + expect_result_vec[34] = 0xb; + // Byte[35]: Additional Length(4). + expect_result_vec[35] = 4; + // Byte[36]: Bits[5-7]: Loading Mechanism Type(001b). Bit4: Load(1). Bit 3: Eject(1). + // Bit 2: Pvnt Jmpr. Bit 1: DBML. Bit 0: Lock(1). + expect_result_vec[36] = 0x39; + // Byte[37-39]: Reserved. + let cdb_test_args = CdbTest { + cdb: get_configuration_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(GET_CONFIGURATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 6: scsi command: GET_EVENT_STATUS_NOTIFICATION. + // Test 6 Result: Check if scsi command GET_EVENT_STATUS_NOTIFICATION was handled successfully. + // And check the read data is the same with the expect result. + let mut get_event_status_notification_cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE] = + [0; TEST_VIRTIO_SCSI_CDB_SIZE]; + get_event_status_notification_cdb[0] = GET_EVENT_STATUS_NOTIFICATION; + get_event_status_notification_cdb[1] = 1; + // Byte[4]: Notification Class Request. + get_event_status_notification_cdb[4] = 0x10; + get_event_status_notification_cdb[8] = GET_EVENT_STATUS_NOTIFICATION_DATA_LEN; + // Bytes[0-3]: Event Header. + // Bytes[4-n]: Event Descriptor. + // Bytes[0-1]: Event Descriptor Length. + // Byte2: Bit7: NEC(No Event Available). Bits[0-2]: Notification Class. + // NEC = 1: The Drive supports none of the requested notification classes. + // NEC = 0: At least one of the requested notification classes is supported. + // Byte3: Supported Event Class. + // Bytes[4-7]: Media Event Descriptor. + // Byte4: Bits[4-7]: reserved. Bits[0-3]: Event Code. + // Byte5: Media Status. Bits[2-7] reserved. Bit 1: Media Present. Bit 0: Door or Tray open. + // Byte6: Start Slot. + // Byte7: End Slot. + let expect_result_vec = vec![0, 6, 4, 0x10, 0, 2, 0, 0]; + let cdb_test_args = CdbTest { + cdb: get_event_status_notification_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(GET_EVENT_STATUS_NOTIFICATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 7: basic io test. + vst.scsi_try_io(target, lun, ScsiDeviceType::ScsiCd); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi target cdb test. Test some commands no matter it's right or wrong. +/// Target cdb means that the target has at least one lun but the lun id of cdb will not +/// be found in target's all luns' id. +/// Using command REPORT_LUNS/INQUIRY/REQUEST_SENSE/TEST_UNIT_READY as target cdb are supported. +/// Others are not supported now. +/// TestStep: +/// 0. Init process. +/// 1. Test scsi command REPORT_LUNS. +/// 2. Test scsi command INQUIRY. +/// 3. Test scsi command REQUEST_SENSE. +/// 4. Test scsi command TEST_UNIT_READY. +/// 5. Test other scsi command, e.g. READ_CAPACITY_10. +/// 6. Destroy device. +/// Expect: +/// 0/1/2/3/4/5/6: success. +#[test] +fn scsi_target_cdb_test() { + let target = 15; + let lun = 5; + let req_lun = 3; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiCd, target, lun); + + // Test 1: scsi command: REPORT_LUNS. + // Test 1 Result: Check if scsi command REPORT_LUNS was handled successfully. + // And check the read data is the right lun information (target 15, lun 5). + let mut report_luns_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + report_luns_cdb[0] = REPORT_LUNS; + report_luns_cdb[9] = REPORT_LUNS_DATA_LEN; + let expect_result_vec = vec![0, 0, 0, 8, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0]; + let cdb_test_args = CdbTest { + cdb: report_luns_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: u32::from(REPORT_LUNS_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2: scsi command: INQUIRY. + // Test 2.1: request lun id != 0. EVPD = 0. page code = 0. + // Test 2.1 Result: Check if scsi command INQUIRY was handled successfully. + // And check the read data is TYPE_NO_LUN. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[4] = INQUIRY_TARGET_DATA_LEN; + // Byte[0]: TYPE_NO_LUN(0x7f): Scsi target device is not capable of supporting a peripheral + // device connected to this logical unit. + let mut expect_result_vec = vec![0x7f]; + expect_result_vec.resize(INQUIRY_TARGET_DATA_LEN as usize, 0); + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: u32::from(INQUIRY_TARGET_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2.2: request lun id == 0. EVPD = 0. page code = 0. + // Test 2.2 Result: Check if scsi command INQUIRY was handled successfully. + // And check the read data is the right target inquiry information. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[4] = INQUIRY_TARGET_DATA_LEN; + let mut expect_result_vec = vec![0; INQUIRY_TARGET_DATA_LEN as usize]; + // Byte0: Peripheral Qualifier/peripheral device type. + expect_result_vec[0] = 0x3f; + // Byte1:RMB. + // Byte2: VERSION. + expect_result_vec[2] = 0x5; + // Byte3: NORMACA/HISUP/Response Data Format. + expect_result_vec[3] = 0x12; + // Byte4: Additional length(length - 5). + expect_result_vec[4] = INQUIRY_TARGET_DATA_LEN - 5; + // Byte5: SCCS/ACC/TPGS/3PC/RESERVED/PROTECT. + // Byte6: ENCSERV/VS/MULTIP/ADDR16. + // Byte7: WBUS16/SYNC/CMDQUE/VS. + expect_result_vec[7] = 0x12; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun: 0, + data_out: None, + data_in_length: u32::from(INQUIRY_TARGET_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: Some(expect_result_vec), + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2.3: request lun id != 0. EVPD = 1. page code = 0. + // Test 2.3 Result: Check if scsi command INQUIRY was handled successfully. + // And check the read data is the right target inquiry information. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; // Byte1: bit0: EVPD (Enable Vital product bit). + inquiry_cdb[4] = INQUIRY_TARGET_DATA_LEN; + // Byte[3]: Page Length. Supported VPD page list in stratovirt only has 0x00 item. + let mut expect_result_vec = vec![0, 0, 0, 1]; + expect_result_vec.resize(INQUIRY_TARGET_DATA_LEN as usize, 0); + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: u32::from(INQUIRY_TARGET_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2.4: request lun id == 0. EVPD = 1. page code = 0x80. + // Test 2.4 Result: Check if scsi command INQUIRY was handled successfully. + // And check the sense data is SCSI_SENSE_INVALID_FIELD. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; // Byte1: bit0: EVPD (Enable Vital product bit). + inquiry_cdb[2] = 0x80; + inquiry_cdb[4] = INQUIRY_TARGET_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun: 0, + data_out: None, + data_in_length: u32::from(INQUIRY_TARGET_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2.5: request lun id != 0. EVPD = 0. page code = 0x80. + // Test 2.5 Result: Check if scsi command INQUIRY was handled successfully. + // And check the sense data is SCSI_SENSE_INVALID_FIELD. + let mut inquiry_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + inquiry_cdb[0] = INQUIRY; + inquiry_cdb[1] = 0x1; // Byte1: bit0: EVPD (Enable Vital product bit). + inquiry_cdb[2] = 0x80; + inquiry_cdb[4] = INQUIRY_TARGET_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: inquiry_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: u32::from(INQUIRY_TARGET_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3: scsi command: REQUEST_SENSE. + // Test 3.1 req_lun != 0; + // Test 3.1 Result: Check if scsi command REQUEST_SENSE was handled successfully. + // And check the sense data is SCSI_SENSE_LUN_NOT_SUPPORTED. + let mut request_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + request_sense_cdb[0] = REQUEST_SENSE; + request_sense_cdb[4] = TEST_SCSI_SENSE_LEN as u8; + let cdb_test_args = CdbTest { + cdb: request_sense_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: TEST_SCSI_SENSE_LEN, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_LUN_NOT_SUPPORTED)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3.2 req_lun == 0; + // Just return. + // Test 3.1 Result: Check if scsi command REQUEST_SENSE was handled successfully. + let mut request_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + request_sense_cdb[0] = REQUEST_SENSE; + request_sense_cdb[4] = TEST_SCSI_SENSE_LEN as u8; + let cdb_test_args = CdbTest { + cdb: request_sense_cdb, + target, + lun: 0, + data_out: None, + data_in_length: TEST_SCSI_SENSE_LEN, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 4: scsi command: TEST_UNIT_READY. + // Test 4 Result: Check if scsi command TEST_UNIT_READY was handled successfully. + let mut test_unit_ready_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + test_unit_ready_cdb[0] = TEST_UNIT_READY; + let cdb_test_args = CdbTest { + cdb: test_unit_ready_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: 0, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: GOOD, + expect_result_data: None, + expect_sense: None, + }; + vst.scsi_cdb_test(cdb_test_args); + + // TEST 5: other scsi command, eg: READ_CAPACITY_10. + // Test 4 Result: Check if scsi command READ_CAPACITY_10 was handled successfully. + // And check the sense data is SCSI_SENSE_INVALID_OPCODE. + let mut read_capacity_10_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_capacity_10_cdb[0] = READ_CAPACITY_10; + let cdb_test_args = CdbTest { + cdb: read_capacity_10_cdb, + target, + lun: req_lun, + data_out: None, + data_in_length: u32::from(READ_CAPACITY_10_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_OPCODE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + vst.testcase_tear_down(); +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiConfig { + num_queues: u32, + seg_max: u32, + max_sectors: u32, + cmd_per_lun: u32, + event_info_size: u32, + sense_size: u32, + cdb_size: u32, + max_channel: u16, + max_target: u16, + max_lun: u32, +} + +/// Virtio Scsi pci device config Test. +/// Virtio spec requires that only cdb_size and sense size in virtio scsi pci device config +/// can be set from guest. +/// TestStep: +/// 1. Init process. +/// 2. For every parameter in VirtioScsiConfig, do check just like: Read default value -> Set +/// other value -> Read value again -> Check if value was set successfully. +/// 3. Destroy device. +/// Note: +/// 1. sense size and cdb size can not be changed in stratovirt now. So, they are 0 now. +/// Expect: +/// 1/2/3: success. +/// 2: only sense_size and cdb_size are set successfully. +#[test] +fn device_config_test() { + let target = 0x0; + let lun = 0x0; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + let mut num_queues = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, num_queues) as u64); + assert_eq!(num_queues, 1); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, num_queues) as u64, 5); + num_queues = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, num_queues) as u64); + assert_eq!(num_queues, 1); + + let mut seg_max = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, seg_max) as u64); + assert_eq!(seg_max, 254); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, seg_max) as u64, 126); + seg_max = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, seg_max) as u64); + assert_eq!(seg_max, 254); + + let mut max_sectors = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, max_sectors) as u64); + assert_eq!(max_sectors, 0xFFFF_u32); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, max_sectors) as u64, 0xFF_u32); + max_sectors = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, max_sectors) as u64); + assert_eq!(max_sectors, 0xFFFF_u32); + + let mut cmd_per_lun = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, cmd_per_lun) as u64); + assert_eq!(cmd_per_lun, 128); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, cmd_per_lun) as u64, 256); + cmd_per_lun = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, cmd_per_lun) as u64); + assert_eq!(cmd_per_lun, 128); + + let mut event_info_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, event_info_size) as u64); + assert_eq!(event_info_size, 0); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, event_info_size) as u64, 32); + event_info_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, event_info_size) as u64); + assert_eq!(event_info_size, 0); + + let mut sense_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, sense_size) as u64); + assert_eq!(sense_size, 0); + vst.cntlr.borrow().config_writel( + offset_of!(VirtioScsiConfig, sense_size) as u64, + TEST_VIRTIO_SCSI_SENSE_SIZE as u32 + 2, + ); + sense_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, sense_size) as u64); + assert_eq!(sense_size, 0); + vst.cntlr.borrow().config_writel( + offset_of!(VirtioScsiConfig, sense_size) as u64, + TEST_VIRTIO_SCSI_SENSE_SIZE as u32, + ); + sense_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, sense_size) as u64); + assert_eq!(sense_size, 0); + + let mut cdb_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, cdb_size) as u64); + assert_eq!(cdb_size, 0); + vst.cntlr.borrow().config_writel( + offset_of!(VirtioScsiConfig, cdb_size) as u64, + TEST_VIRTIO_SCSI_CDB_SIZE as u32 + 3, + ); + cdb_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, cdb_size) as u64); + assert_eq!(cdb_size, 0); + vst.cntlr.borrow().config_writel( + offset_of!(VirtioScsiConfig, cdb_size) as u64, + TEST_VIRTIO_SCSI_CDB_SIZE as u32, + ); + cdb_size = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, cdb_size) as u64); + assert_eq!(cdb_size, 0); + + let mut max_channel = vst + .cntlr + .borrow() + .config_readw(offset_of!(VirtioScsiConfig, max_channel) as u64); + assert_eq!(max_channel, 0); + vst.cntlr + .borrow() + .config_writew(offset_of!(VirtioScsiConfig, max_channel) as u64, 126); + max_channel = vst + .cntlr + .borrow() + .config_readw(offset_of!(VirtioScsiConfig, max_channel) as u64); + assert_eq!(max_channel, 0); + + let mut max_target = vst + .cntlr + .borrow() + .config_readw(offset_of!(VirtioScsiConfig, max_target) as u64); + assert_eq!(max_target, TEST_VIRTIO_SCSI_MAX_TARGET); + vst.cntlr + .borrow() + .config_writew(offset_of!(VirtioScsiConfig, max_target) as u64, 126); + max_target = vst + .cntlr + .borrow() + .config_readw(offset_of!(VirtioScsiConfig, max_target) as u64); + assert_eq!(max_target, TEST_VIRTIO_SCSI_MAX_TARGET); + + let mut max_lun = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, max_lun) as u64); + assert_eq!(max_lun, TEST_VIRTIO_SCSI_MAX_LUN); + vst.cntlr + .borrow() + .config_writel(offset_of!(VirtioScsiConfig, max_lun) as u64, 1024); + max_lun = vst + .cntlr + .borrow() + .config_readl(offset_of!(VirtioScsiConfig, max_lun) as u64); + assert_eq!(max_lun, TEST_VIRTIO_SCSI_MAX_LUN); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi I/O processing in iothread test. +/// TestStep: +/// 1. Config iothread in scsi controller with a scsi harddisk. Init process. +/// 2. Write Data / Read Data. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: The data read out from the disk is exactly the data written down. +#[test] +fn iothread_test() { + let target = 0x1; + let lun = 0x2; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Test: basic io test. + vst.scsi_try_io(target, lun, ScsiDeviceType::ScsiHd); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi I/O processing in different AIO model. +/// TestStep: +/// 1. Config different AIO model in scsi disk. Init process. +/// 2. Write Data / Read Data. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: The data read out from the disk is exactly the data written down. +#[test] +fn aio_model_test() { + let cntlrcfg = CntlrConfig { + id: 0, + use_iothread: false, + }; + let target = 0x1; + let mut lun = 0x2; + let mut device_vec: Vec = Vec::new(); + + #[cfg(not(target_env = "ohos"))] + if aio_probe(AioEngine::IoUring).is_ok() { + // Scsi Disk 1. AIO io_uring. Direct false. + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw)); + device_vec.push(ScsiDeviceConfig { + cntlr_id: 0, + device_type: ScsiDeviceType::ScsiHd, + image_path, + target, + lun, + read_only: false, + direct: false, + aio: TestAioType::AioIOUring, + serial: None, + }); + + // Scsi Disk 2. AIO io_uring. Direct true. + lun += 1; + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw)); + device_vec.push(ScsiDeviceConfig { + cntlr_id: 0, + device_type: ScsiDeviceType::ScsiHd, + image_path, + target, + lun, + read_only: false, + direct: true, + aio: TestAioType::AioIOUring, + serial: None, + }); + } + + // Scsi Disk 3. AIO OFF. Direct true. This is not allowed. + // Stratovirt will report "low performance expect when use sync io with direct on" + + // Scsi Disk 4. AIO OFF. Direct false. + lun += 1; + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw)); + device_vec.push(ScsiDeviceConfig { + cntlr_id: 0, + device_type: ScsiDeviceType::ScsiHd, + image_path, + target, + lun, + read_only: false, + direct: false, + aio: TestAioType::AioOff, + serial: None, + }); + // Scsi Disk 5. AIO native. Direct false. This is not allowed. + // Stratovirt will report "native aio type should be used with direct on" + + #[cfg(not(target_env = "ohos"))] + if aio_probe(AioEngine::Native).is_ok() { + // Scsi Disk 6. AIO native. Direct true. + lun += 1; + let image_path = Rc::new(create_img(TEST_IMAGE_SIZE, 1, &ImageType::Raw)); + device_vec.push(ScsiDeviceConfig { + cntlr_id: 0, + device_type: ScsiDeviceType::ScsiHd, + image_path, + target, + lun, + read_only: false, + direct: true, + aio: TestAioType::AioNative, + serial: None, + }); + } + + let (cntlr, state, alloc) = scsi_test_init(cntlrcfg, device_vec.clone()); + let features = virtio_scsi_default_feature(cntlr.clone()); + let queues = cntlr + .borrow_mut() + .init_device(state.clone(), alloc.clone(), features, 3); + + let mut vst = VirtioScsiTest { + cntlr, + scsi_devices: device_vec, + state, + alloc, + queues, + }; + + for device in vst.scsi_devices.clone().iter() { + // Test: basic io test. + vst.scsi_try_io(device.target, device.lun, ScsiDeviceType::ScsiHd); + } + + vst.testcase_tear_down(); +} + +/// Virtio Scsi random CDB test. +/// TestStep: +/// 1. Init process. +/// 2. Generate random u8 vector as CDB and send. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Stratovirt will not crash. +#[test] +fn random_cdb_test() { + let target = 0xff; + let lun = 0xff; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Test: Generate random u8 array as cdb. + let mut randcdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + let mut rng = rand::thread_rng(); + for i in 0..TEST_VIRTIO_SCSI_CDB_SIZE { + randcdb[i] = rng.gen(); + } + + let scsi_req = TestVirtioScsiCmdReq::new(target, lun, randcdb); + let mut scsi_resp = TestVirtioScsiCmdResp::default(); + let mut data_in = Vec::::with_capacity(512); + vst.virtio_scsi_do_command(scsi_req, &None, &mut scsi_resp, &mut data_in, 0); + + // Test: the scsi device works normally. + vst.scsi_try_io(target, lun, ScsiDeviceType::ScsiHd); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi wrong size virtioscsirequest test. +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Send virtioscsirequest which is less than expect length. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Report VIRTIO ERROR. Stratovirt will not crash. +#[test] +fn wrong_virtioscsirequest_test() { + let target = 0xff; + let lun = 0xff; + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_SCSI_DESC_ELEM); + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Error request. + let error_req_size = size_of::() as u64 - 1; + let req = vec![1; error_req_size as usize]; + let req_addr = vst + .alloc + .borrow_mut() + .alloc(error_req_size.try_into().unwrap()); + vst.state.borrow().memwrite(req_addr, &req); + + data_entries.push(TestVringDescEntry { + data: req_addr, + len: error_req_size as u32, + write: false, + }); + + // Response. + let cmdresp_len = size_of::() as u64; + let resp = TestVirtioScsiCmdResp::default(); + let resp_addr = vst + .alloc + .borrow_mut() + .alloc(cmdresp_len.try_into().unwrap()); + let resp_bytes = resp.as_bytes(); + vst.state.borrow().memwrite(resp_addr, resp_bytes); + + data_entries.push(TestVringDescEntry { + data: resp_addr, + len: cmdresp_len as u32, + write: false, + }); + + vst.queues[2] + .borrow_mut() + .add_chained(vst.state.clone(), data_entries); + + vst.cntlr + .borrow() + .kick_virtqueue(vst.state.clone(), vst.queues[2].clone()); + + thread::sleep(time::Duration::from_secs(1)); + assert!(vst.cntlr.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi wrong size virtioscsiresponse test. +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Send virtioscsiresponse which is less than expect length. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Report VIRTIO ERROR. Stratovirt will not crash. +#[test] +fn wrong_size_virtioscsiresponse_test() { + let target = 0xff; + let lun = 0xff; + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_SCSI_DESC_ELEM); + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Request Header. + let req_len = size_of::() as u64; + let req = vec![1; req_len as usize]; + let req_addr = vst.alloc.borrow_mut().alloc(req_len.try_into().unwrap()); + vst.state.borrow().memwrite(req_addr, &req); + data_entries.push(TestVringDescEntry { + data: req_addr, + len: req_len as u32, + write: false, + }); + + // Response. + let err_resp_len = size_of::() as u64 - 1; + let resp = vec![1; err_resp_len as usize]; + let resp_addr = vst + .alloc + .borrow_mut() + .alloc(err_resp_len.try_into().unwrap()); + vst.state.borrow().memwrite(resp_addr, &resp); + data_entries.push(TestVringDescEntry { + data: resp_addr, + len: err_resp_len as u32, + write: true, + }); + + vst.queues[2] + .borrow_mut() + .add_chained(vst.state.clone(), data_entries); + vst.cntlr + .borrow() + .kick_virtqueue(vst.state.clone(), vst.queues[2].clone()); + + thread::sleep(time::Duration::from_secs(1)); + assert!(vst.cntlr.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi missing virtioscsirequest test. +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Do not send virtioscsirequest in virtqueue. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Report VIRTIO ERROR. Stratovirt will not crash. +#[test] +fn missing_virtioscsirequest_test() { + let target = 0xff; + let lun = 0xff; + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_SCSI_DESC_ELEM); + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Response. + let resp_len = size_of::() as u64; + let resp = vec![1; resp_len as usize]; + let resp_addr = vst.alloc.borrow_mut().alloc(resp_len.try_into().unwrap()); + vst.state.borrow().memwrite(resp_addr, &resp); + + data_entries.push(TestVringDescEntry { + data: resp_addr, + len: resp_len as u32, + write: true, + }); + + vst.queues[2] + .borrow_mut() + .add_chained(vst.state.clone(), data_entries); + vst.cntlr + .borrow() + .kick_virtqueue(vst.state.clone(), vst.queues[2].clone()); + + thread::sleep(time::Duration::from_secs(1)); + assert!(vst.cntlr.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi missing virtioscsiresponse test. +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Do not send virtioscsiresponse in virtqueue. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Report VIRTIO ERROR. Stratovirt will not crash. +#[test] +fn missing_virtioscsiresponse_test() { + let target = 0xff; + let lun = 0xff; + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_SCSI_DESC_ELEM); + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Request Header. + let req_len = size_of::() as u64; + let req = vec![1; req_len as usize]; + let req_addr = vst.alloc.borrow_mut().alloc(req_len.try_into().unwrap()); + vst.state.borrow().memwrite(req_addr, &req); + + data_entries.push(TestVringDescEntry { + data: req_addr, + len: req_len as u32, + write: false, + }); + + vst.queues[2] + .borrow_mut() + .add_chained(vst.state.clone(), data_entries); + vst.cntlr + .borrow() + .kick_virtqueue(vst.state.clone(), vst.queues[2].clone()); + + thread::sleep(time::Duration::from_secs(1)); + assert!(vst.cntlr.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi wrong lun in virtioscsiresponse test. +/// #[repr(C, packed)] +/// struct TestVirtioScsiCmdReq { +/// lun: [u8; 8], +/// tag: u64, +/// task_attr: u8, +/// prio: u8, +/// crn: u8, +/// cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE], +/// } +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Send virtioscsirequest which has wrong lun parameter. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Return no such target/lun. Stratovirt will not crash. +#[test] +fn wrong_lun_in_virtioscsirequest_test() { + let target = 0xff; + let lun = 0xff; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + let mut test_unit_ready_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + test_unit_ready_cdb[0] = TEST_UNIT_READY; + let err_lun_scsi_req = TestVirtioScsiCmdReq { + lun: [0; 8], // Error lun. + tag: 0, + task_attr: 0, + prio: 0, + crn: 0, + cdb: test_unit_ready_cdb, + }; + let mut scsi_resp = TestVirtioScsiCmdResp::default(); + vst.virtio_scsi_do_command(err_lun_scsi_req, &None, &mut scsi_resp, &mut Vec::new(), 0); + + assert!(scsi_resp.response == VIRTIO_SCSI_S_BAD_TARGET); + + vst.testcase_tear_down(); +} + +/// Send scsi-cd command to scsi-hd. +/// TestStep: +/// 1. Config virtio scsi controller with a scsi harddisk. Init process. +/// 2. Send scsi command which is used for scsi CD-ROM. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +/// 2: Return not supported. Stratovirt will not crash. +#[test] +fn send_cd_command_to_hd_test() { + let target = 3; + let lun = 0; + let mut vst = VirtioScsiTest::general_testcase_run(ScsiDeviceType::ScsiHd, target, lun); + + // Test 1: Scsi Command: MODE_SENSE + let mut mode_sense_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + mode_sense_cdb[0] = MODE_SENSE; + mode_sense_cdb[2] = MODE_PAGE_CAPABILITIES; + mode_sense_cdb[4] = MODE_SENSE_LEN_DATA_LEN; + + let cdb_test_args = CdbTest { + cdb: mode_sense_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(MODE_SENSE_LEN_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 2: scsi command: READ_DISC_INFORMATION. + // Test 2 Result: Check if scsi command READ_DISC_INFORMATION was failure. + let mut read_disc_information_cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE] = + [0; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_disc_information_cdb[0] = READ_DISC_INFORMATION; + read_disc_information_cdb[8] = READ_DISC_INFORMATION_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: read_disc_information_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(READ_DISC_INFORMATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 3: scsi command: GET_CONFIGURATION. + // Test 3 Result: Check if scsi command GET_CONFIGURATION was failure. + let mut get_configuration_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + get_configuration_cdb[0] = GET_CONFIGURATION; + get_configuration_cdb[8] = GET_CONFIGURATION_DATA_LEN; + let cdb_test_args = CdbTest { + cdb: get_configuration_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(GET_CONFIGURATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test 4: scsi command: GET_EVENT_STATUS_NOTIFICATION. + // Test 4 Result: Check if scsi command GET_EVENT_STATUS_NOTIFICATION was failure. + let mut get_event_status_notification_cdb: [u8; TEST_VIRTIO_SCSI_CDB_SIZE] = + [0; TEST_VIRTIO_SCSI_CDB_SIZE]; + get_event_status_notification_cdb[0] = GET_EVENT_STATUS_NOTIFICATION; + get_event_status_notification_cdb[1] = 1; + get_event_status_notification_cdb[4] = 0x10; + get_event_status_notification_cdb[8] = GET_EVENT_STATUS_NOTIFICATION_DATA_LEN; + + let cdb_test_args = CdbTest { + cdb: get_event_status_notification_cdb, + target, + lun, + data_out: None, + data_in_length: u32::from(GET_EVENT_STATUS_NOTIFICATION_DATA_LEN), + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_FIELD)), + }; + vst.scsi_cdb_test(cdb_test_args); + + vst.testcase_tear_down(); +} + +/// Virtio Scsi Wrong io request test. +/// TestStep: +/// 1. Init process. +/// 2. Send READ_10/WRITE_10 CDB. +/// 2.1 READ_10/WRITE_10 transfer length is larger than disk size. +/// 2.2 READ_10/WRITE_10 read/write offset is larger than disk size. +/// 3. Wait for return value. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +/// 2: Stratovirt will not crash. +/// 3. Return error. +#[test] +fn wrong_io_test() { + let target = 0xff; + let lun = 0xff; + let size = 1024; // Disk size: 1K. + + let mut vst = + VirtioScsiTest::testcase_start_with_config(ScsiDeviceType::ScsiHd, target, lun, size, true); + + // Test1: scsi command: WRITE_10. + // Write to LBA(logical block address) 0, transfer length 2KB and disk is 1KB size. + // Test Result: Check if scsi command WRITE_10 was failure. + let mut write_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + write_cdb[0] = WRITE_10; + write_cdb[8] = (2048 / 512) as u8; // 2KB data. + let data = vec![0x5; 2048]; // 2KB data. + let write_data = String::from_utf8(data).unwrap(); + let cdb_test_args = CdbTest { + cdb: write_cdb, + target, + lun, + data_out: Some(write_data), + data_in_length: 0, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_OPCODE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test2: scsi command: READ_10. + // Read from LBA(logical block address) 0, transfer length 2KB and disk is 1KB size. + // Test Result: Check if scsi command READ_10 was failure. + let mut read_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_cdb[0] = READ_10; + read_cdb[8] = (2048 / 512) as u8; // 2KB data. + let cdb_test_args = CdbTest { + cdb: read_cdb, + target, + lun, + data_out: None, + data_in_length: 2048, // Read 2K data. + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_OPCODE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test3: scsi command: WRITE_10. + // Write to LBA(logical block address) 2K, transfer length 1 sector and disk is 1KB size. + // Test Result: Check if scsi command WRITE_10 was failure. + let mut write_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + write_cdb[0] = WRITE_10; + write_cdb[5] = ((2 * 1024) & 0xff) as u8; + write_cdb[4] = ((2 * 1024) >> 8 & 0xff) as u8; + write_cdb[8] = 1; // 1 sector data. + let data = vec![0x5; 512]; // 1 sector data. + let write_data = String::from_utf8(data).unwrap(); + let cdb_test_args = CdbTest { + cdb: write_cdb, + target, + lun, + data_out: Some(write_data), + data_in_length: 0, + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_OPCODE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + // Test4: scsi command: READ_10. + // Read from LBA(logical block address) 2K, transfer length 1 sector and disk is 1KB size. + // Test Result: Check if scsi command READ_10 was failure. + let mut read_cdb = [0_u8; TEST_VIRTIO_SCSI_CDB_SIZE]; + read_cdb[0] = READ_10; + read_cdb[5] = ((2 * 1024) & 0xff) as u8; + read_cdb[4] = ((2 * 1024) >> 8 & 0xff) as u8; + read_cdb[8] = 1; // 1 sector data. + let cdb_test_args = CdbTest { + cdb: read_cdb, + target, + lun, + data_out: None, + data_in_length: 512, // 1 sector data. + expect_response: VIRTIO_SCSI_S_OK, + expect_status: CHECK_CONDITION, + expect_result_data: None, + expect_sense: Some(get_sense_bytes(SCSI_SENSE_INVALID_OPCODE)), + }; + vst.scsi_cdb_test(cdb_test_args); + + vst.testcase_tear_down(); +} diff --git a/tests/mod_test/tests/serial_test.rs b/tests/mod_test/tests/serial_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..8009f8f2043a1cab8be2a4b5264b5ca4d6d89c33 --- /dev/null +++ b/tests/mod_test/tests/serial_test.rs @@ -0,0 +1,803 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::collections::HashMap; +use std::fs::{self, File}; +use std::io::prelude::*; +use std::mem::size_of; +use std::net::Shutdown; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::rc::Rc; +use std::{thread, time}; + +use byteorder::{ByteOrder, LittleEndian}; + +use mod_test::libdriver::machine::TestStdMachine; +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{TestVirtQueue, VirtioDeviceOps, VIRTIO_CONFIG_S_NEEDS_RESET}; +use mod_test::libdriver::virtio_pci_modern::TestVirtioPciDev; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use util::byte_code::ByteCode; + +const TIMEOUT_US: u64 = 15 * 1000 * 1000; +const ROWS_DEFAULT: u16 = 0; +const COLS_DEFAULT: u16 = 0; +const EMERG_WR_DEFAULT: u32 = 0; +// Default 31 serial ports. +const DEFAULT_SERIAL_PORTS_NUMBER: u32 = 31; +const BUFFER_LEN: usize = 96; +// Each port has 2 virtqueues and there exist 2 control virtqueues. +const DEFAULT_SERIAL_VIRTQUEUES: usize = DEFAULT_SERIAL_PORTS_NUMBER as usize * 2 + 2; + +const VIRTIO_CONSOLE_F_SIZE: u64 = 0; +const VIRTIO_CONSOLE_F_MULTIPORT: u64 = 1; +const VIRTIO_CONSOLE_F_EMERG_WRITE: u64 = 2; + +const VIRTIO_CONSOLE_DEVICE_READY: u16 = 0; +const VIRTIO_CONSOLE_PORT_ADD: u16 = 1; +const VIRTIO_CONSOLE_PORT_READY: u16 = 3; +const VIRTIO_CONSOLE_CONSOLE_PORT: u16 = 4; +const VIRTIO_CONSOLE_PORT_OPEN: u16 = 6; +const VIRTIO_CONSOLE_PORT_NAME: u16 = 7; + +const IN_CONTROL_QUEUE_ID: usize = 2; +const OUT_CONTROL_QUEUE_ID: usize = 3; + +#[derive(Clone)] +enum ChardevType { + Pty, + Socket { + path: String, + server: bool, + nowait: bool, + }, +} + +#[derive(Clone)] +struct PortConfig { + chardev_type: ChardevType, + nr: u8, + is_console: bool, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioConsoleControl { + // Port number. + id: u32, + // The kind of control event. + event: u16, + // Extra information for event. + value: u16, +} + +impl VirtioConsoleControl { + fn new(id: u32, event: u16, value: u16) -> Self { + VirtioConsoleControl { id, event, value } + } +} + +impl ByteCode for VirtioConsoleControl {} + +struct SerialTest { + pub serial: Rc>, + pub state: Rc>, + pub alloc: Rc>, + vqs: Vec>>, + ports: HashMap, +} + +impl SerialTest { + fn virtqueue_setup(&mut self, num_queues: usize) { + let features = self.serial.borrow().get_device_features(); + self.vqs = self.serial.borrow_mut().init_device( + self.state.clone(), + self.alloc.clone(), + features, + num_queues, + ); + } + + fn get_pty_path(&mut self) -> String { + let ret = self.state.borrow().qmp("{\"execute\": \"query-chardev\"}"); + if !(*ret.get("return").unwrap()).as_array().unwrap().is_empty() + && (*ret.get("return").unwrap())[0].get("filename").is_some() + { + let filename = (*ret.get("return").unwrap())[0] + .get("filename") + .unwrap() + .to_string() + .replace('"', ""); + let mut file_path: Vec<&str> = filename.split("pty:").collect(); + file_path.pop().unwrap().to_string() + } else { + String::from("") + } + } + + // Send control message by output control queue. + fn out_control_event(&mut self, ctrl_msg: VirtioConsoleControl) { + self.virtqueue_add_element( + OUT_CONTROL_QUEUE_ID, + Some(ctrl_msg.as_bytes()), + size_of::() as u64, + ); + } + + fn virtqueue_add_element( + &mut self, + queue_id: usize, + data: Option<&[u8]>, + buffer_len: u64, + ) -> (u64, u32) { + let queue = self.vqs[queue_id].clone(); + let addr = self.alloc.borrow_mut().alloc(buffer_len); + let mut write = true; + + if let Some(buffer) = data { + self.state.borrow().memwrite(addr, buffer); + write = false; + } + let free_head = queue + .borrow_mut() + .add(self.state.clone(), addr, buffer_len as u32, write); + + self.serial + .borrow() + .kick_virtqueue(self.state.clone(), queue); + + (addr, free_head) + } + + // Fill a batch of buffers elements in queues[$queue_id]. + fn fill_buffer_in_vq(&mut self, queue_id: usize) -> (Vec, Vec) { + // Note: limited by the MST framework, we only allocate 32 * 1K sized buffers. It's enough + // for test. + let mut buf_addrs = Vec::with_capacity(32); + let mut free_heads = Vec::with_capacity(32); + for _ in 0..32 { + let (buf_addr, free_head) = self.virtqueue_add_element(queue_id, None, 1024); + buf_addrs.push(buf_addr); + free_heads.push(free_head); + } + + (free_heads, buf_addrs) + } + + // Init serial device. + fn serial_init(&mut self) { + let control_msg_len = size_of::(); + let mut in_msg = 0; + + // Init virtqueues. + self.virtqueue_setup(DEFAULT_SERIAL_VIRTQUEUES); + + // Prepare control input buffer. + let (free_heads, control_outs) = self.fill_buffer_in_vq(IN_CONTROL_QUEUE_ID); + + // Device ready. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_DEVICE_READY, 1); + self.out_control_event(ready_msg); + + // Port add. + self.serial.borrow().poll_used_elem( + self.state.clone(), + self.vqs[IN_CONTROL_QUEUE_ID].clone(), + free_heads[in_msg], + TIMEOUT_US, + &mut None, + true, + ); + for _ in self.ports.iter() { + let in_control_msg = self + .state + .borrow() + .memread(control_outs[in_msg], control_msg_len as u64); + in_msg += 1; + assert_eq!( + LittleEndian::read_u16(&in_control_msg[4..6]), + VIRTIO_CONSOLE_PORT_ADD + ); + assert_eq!(LittleEndian::read_u16(&in_control_msg[6..8]), 1); + } + + // Port Ready. + for port in self.ports.clone().iter() { + let ready_msg = + VirtioConsoleControl::new(u32::from(*port.0), VIRTIO_CONSOLE_PORT_READY, 1); + self.out_control_event(ready_msg); + + // If it's a console port. + if *port.1 { + let in_control_msg = self + .state + .borrow() + .memread(control_outs[in_msg], control_msg_len as u64); + in_msg += 1; + assert_eq!( + LittleEndian::read_u16(&in_control_msg[4..6]), + VIRTIO_CONSOLE_CONSOLE_PORT + ); + assert_eq!(LittleEndian::read_u16(&in_control_msg[6..8]), 1); + } + + // Port name. + let in_control_msg = self + .state + .borrow() + .memread(control_outs[in_msg], control_msg_len as u64); + in_msg += 1; + assert_eq!( + LittleEndian::read_u16(&in_control_msg[4..6]), + VIRTIO_CONSOLE_PORT_NAME + ); + assert_eq!(LittleEndian::read_u16(&in_control_msg[6..8]), 1); + + // Virtconsole is default host connected. + if *port.1 { + let in_control_msg = self + .state + .borrow() + .memread(control_outs[in_msg], control_msg_len as u64); + in_msg += 1; + assert_eq!( + LittleEndian::read_u16(&in_control_msg[4..6]), + VIRTIO_CONSOLE_PORT_OPEN + ); + assert_eq!(LittleEndian::read_u16(&in_control_msg[6..8]), 1); + + // driver -> device: port open. + let open_msg: VirtioConsoleControl = + VirtioConsoleControl::new(u32::from(*port.0), VIRTIO_CONSOLE_PORT_OPEN, 1); + self.out_control_event(open_msg); + } + } + } + + fn connect_pty_host(&mut self, new: bool) -> Option { + let pty_path = self.get_pty_path(); + assert_ne!(pty_path, String::from("")); + + let pty = match new { + true => File::create(&pty_path), + false => File::open(&pty_path), + }; + + // Connect pty host. + let mut host: Option = None; + match pty { + Ok(file) => host = Some(file), + Err(e) => assert!(false, "{}", e), + } + + host + } + + fn connect_socket_host(&mut self, socket_path: &str) -> Option { + let stream = UnixStream::connect(socket_path).expect("Couldn't connect socket"); + stream + .set_nonblocking(true) + .expect("Couldn't set nonblocking"); + + Some(stream) + } + + fn verify_port_io(&mut self, port: PortConfig) { + // queue[2]: control receiveq(host to guest). + // queue[3]: control transmitq(guest to host). + let input_queue_id = match port.nr { + 0 => 0, + _ => 2 * port.nr + 2, + } as usize; + let output_queue_id = input_queue_id + 1; + + let mut stream = None; + let mut host = None; + + // Connect Host. + match port.chardev_type { + ChardevType::Pty => { + host = self.connect_pty_host(false); + } + ChardevType::Socket { + ref path, + server: _, + nowait: _, + } => { + stream = self.connect_socket_host(path); + } + } + + // Connect Guest. + // driver -> device: port open. + let open_msg: VirtioConsoleControl = + VirtioConsoleControl::new(u32::from(port.nr), VIRTIO_CONSOLE_PORT_OPEN, 1); + self.out_control_event(open_msg); + + // IO: Guest -> Host. + let test_data = String::from("Test\n"); + let (_, free_head) = self.virtqueue_add_element( + output_queue_id, + Some(test_data.as_bytes()), + test_data.len() as u64, + ); + self.serial.borrow().poll_used_elem( + self.state.clone(), + self.vqs[output_queue_id].clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + match port.chardev_type { + ChardevType::Pty => { + verify_input_data(&mut host.unwrap(), &test_data); + } + _ => { + verify_input_data(&mut stream.as_ref().unwrap(), &test_data); + } + } + + // IO: Host -> Guest. + let (addr, free_head) = + self.virtqueue_add_element(input_queue_id, None, test_data.len() as u64); + let result = match port.chardev_type { + ChardevType::Pty => { + let output = self.connect_pty_host(true); + output.unwrap().write(test_data.as_bytes()) + } + _ => stream.as_ref().unwrap().write(test_data.as_bytes()), + }; + match result { + Ok(_num) => { + let start_time = time::Instant::now(); + let timeout_us = time::Duration::from_micros(TIMEOUT_US); + loop { + let mut len: Option = Some(0); + self.serial.borrow().poll_used_elem( + self.state.clone(), + self.vqs[input_queue_id].clone(), + free_head, + TIMEOUT_US, + &mut len, + false, + ); + if len.unwrap() != 0 { + verify_output_data(self.state.clone(), addr, len.unwrap(), &test_data); + break; + } + assert!(time::Instant::now() - start_time < timeout_us); + } + } + Err(e) => assert!(false, "Failed to write contents to socket: {}", e), + } + + // Clean. + match port.chardev_type { + ChardevType::Pty => {} + _ => stream + .unwrap() + .shutdown(Shutdown::Both) + .expect("shutdown function failed"), + }; + } + + fn test_end(&mut self) { + self.serial + .borrow_mut() + .destroy_device(self.alloc.clone(), self.vqs.clone()); + + self.state.borrow_mut().stop(); + } +} + +fn create_serial(ports_config: Vec, pci_slot: u8, pci_fn: u8) -> SerialTest { + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + let serial_pci_args = format!( + "-device {},id=serial0,bus=pcie.0,addr={}.0", + "virtio-serial-pci", pci_slot + ); + let mut ports = HashMap::new(); + args.append(&mut serial_pci_args[..].split(' ').collect()); + + let mut ports_args = String::new(); + for port in ports_config { + let chardev_args = match port.chardev_type { + ChardevType::Pty => format!("-chardev pty,id=charserial{}", port.nr), + ChardevType::Socket { + path, + server, + nowait, + } => { + let mut args = format!("-chardev socket,id=charserial{},path={}", port.nr, path); + if server { + args.push_str(",server") + } + if nowait { + args.push_str(",nowait") + } + args + } + }; + ports_args.push_str(&chardev_args); + + let device_type = match port.is_console { + true => "virtconsole", + false => "virtserialport", + }; + let port_args = format!( + " -device {},chardev=charserial{},id=serialport{},nr={} ", + device_type, port.nr, port.nr, port.nr + ); + ports_args.push_str(&port_args); + ports.insert(port.nr, port.is_console); + } + args.append(&mut ports_args.trim().split(' ').collect()); + + let state = Rc::new(RefCell::new(test_init(args))); + let machine = TestStdMachine::new(state.clone()); + let alloc = machine.allocator.clone(); + let serial = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + serial.borrow_mut().init(pci_slot, pci_fn); + + SerialTest { + serial, + state, + alloc, + vqs: Vec::new(), + ports, + } +} + +fn verify_output_data(test_state: Rc>, addr: u64, len: u32, test_data: &String) { + let mut data_buf: Vec = Vec::with_capacity(len as usize); + data_buf.append(test_state.borrow().memread(addr, u64::from(len)).as_mut()); + let data = String::from_utf8(data_buf).unwrap(); + assert_eq!(data, *test_data); +} + +fn verify_input_data(input: &mut dyn Read, test_data: &String) { + let mut buffer = [0; BUFFER_LEN]; + match input.read(&mut buffer[0..test_data.len()]) { + Ok(size) => { + let response = String::from_utf8_lossy(&buffer[0..size]).to_string(); + assert_eq!(response, *test_data); + } + Err(e) => assert!(false, "Failed to read contents from socket: {}", e), + } +} + +/// Virtio serial pci device config space operation. +/// TestStep: +/// 1. Init virtio serial device(1 virtconsole, pty backend chardev). +/// 2. Read/write pci device config space. +/// 3. IO function test. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn serial_config_rw_conifg() { + let port = PortConfig { + chardev_type: ChardevType::Pty, + nr: 0, + is_console: true, + }; + let pci_slot = 0x04; + let pci_fn = 0x0; + let mut st = create_serial(vec![port.clone()], pci_slot, pci_fn); + + // Cross boundary reading. Stratovirt should not be abnormal. + st.serial.borrow().config_readl(32); + + // Read normally. + assert_eq!( + st.serial.borrow().config_readw(0), + ROWS_DEFAULT, + "The rows of the serial config is incorrect or the testcase parament is out of date!" + ); + + assert_eq!( + st.serial.borrow().config_readw(2), + COLS_DEFAULT, + "The cols of the serial config is incorrect or the testcase parament is out of date!" + ); + + assert_eq!( + st.serial.borrow().config_readl(4), + DEFAULT_SERIAL_PORTS_NUMBER, + "The max_nr_ports of the serial config is incorrect or the testcase parament is out of date!" + ); + + assert_eq!( + st.serial.borrow().config_readl(8), + EMERG_WR_DEFAULT, + "The emerg_wr of the serial config is incorrect or the testcase parament is out of date!" + ); + + // Write config. + st.serial.borrow().config_writew(0, 1); + assert_eq!( + st.serial.borrow().config_readw(0), + ROWS_DEFAULT, + "The serial device doesn't support writing config. But config was written!" + ); + + st.serial_init(); + st.verify_port_io(port); + st.test_end(); +} + +/// Virtio serial pci device features negotiate operation. +/// TestStep: +/// 1. Init virtio serial device(1 virtconsole, pty backend chardev). +/// 2. Negotiate supported features(VIRTIO_CONSOLE_F_SIZE/VIRTIO_CONSOLE_F_MULTIPORT). +/// 3. Negotiate unsupported feature(VIRTIO_CONSOLE_F_EMERG_WRITE). +/// 4. IO function test. +/// 5. Destroy device. +/// Expect: +/// 1/2/4/5: success. +/// 3: unsupported feature can't be negotiated. +#[test] +fn serial_features_negotiate() { + let port = PortConfig { + chardev_type: ChardevType::Pty, + nr: 0, + is_console: true, + }; + let pci_slot = 0x04; + let pci_fn = 0x0; + let mut st = create_serial(vec![port.clone()], pci_slot, pci_fn); + + let mut features = st.serial.borrow().get_device_features(); + features |= 1 << VIRTIO_CONSOLE_F_SIZE | 1 << VIRTIO_CONSOLE_F_MULTIPORT; + st.serial.borrow_mut().negotiate_features(features); + st.serial.borrow_mut().set_features_ok(); + assert_eq!(features, st.serial.borrow_mut().get_guest_features()); + + let unsupported_features = 1 << VIRTIO_CONSOLE_F_EMERG_WRITE; + features |= unsupported_features; + st.serial.borrow_mut().negotiate_features(features); + st.serial.borrow_mut().set_features_ok(); + assert_ne!(features, st.serial.borrow_mut().get_guest_features()); + assert_eq!( + unsupported_features & st.serial.borrow_mut().get_guest_features(), + 0 + ); + + st.serial_init(); + st.verify_port_io(port); + st.test_end(); +} + +/// Virtio serial pci device basic function(socket backend chardev). +/// TestStep: +/// 1. Init virtio serial device(1 virtserialport, socket backend chardev). +/// 2. IO function test. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtserialport_socket_basic() { + let socket_path = "/tmp/test-virtserialport0.sock"; + if Path::new(socket_path).exists() { + fs::remove_file(socket_path).unwrap(); + } + let socket = ChardevType::Socket { + path: String::from(socket_path), + server: true, + nowait: true, + }; + let port = PortConfig { + chardev_type: socket, + nr: 1, + is_console: false, + }; + + let pci_slot = 0x4; + let pci_fn = 0x0; + let mut st = create_serial(vec![port.clone()], pci_slot, pci_fn); + + st.serial_init(); + st.verify_port_io(port); + st.test_end(); +} + +/// Virtio serial pci device basic function(pty backend chardev). +/// TestStep: +/// 1. Init virtio serial device(1 virtserialport, pty backend chardev). +/// 2. IO function test. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtserialport_pty_basic() { + let port = PortConfig { + chardev_type: ChardevType::Pty, + nr: 1, + is_console: false, + }; + let pci_slot = 0x04; + let pci_fn = 0x0; + let mut st = create_serial(vec![port.clone()], pci_slot, pci_fn); + + st.serial_init(); + st.verify_port_io(port); + st.test_end(); +} + +/// Virtio serial pci device error control message test. +/// TestStep: +/// 1. Init virtio serial device(1 virtconsole, pty backend chardev). +/// 2. Send out control message which has invalid event. +/// 3. Send out control message which has non-existed port id. +/// 4. Send out control message which size is illegal. +/// 5. Destroy device. +/// Expect: +/// 1/5: success. +/// 2/3: Just discard this invalid msg. Nothing happened. +/// 4: report virtio error. +#[test] +fn virtconsole_pty_err_out_control_msg() { + let nr = 0; + let port = PortConfig { + chardev_type: ChardevType::Pty, + nr, + is_console: true, + }; + let pci_slot = 0x04; + let pci_fn = 0x0; + let mut st = create_serial(vec![port], pci_slot, pci_fn); + + st.serial_init(); + + // Error out control msg which has invalid event. Just discard this invalid msg. Nothing + // happened. + let invalid_event_msg = VirtioConsoleControl::new(u32::from(nr), VIRTIO_CONSOLE_PORT_NAME, 1); + st.out_control_event(invalid_event_msg); + + // Error out control msg which has non-existed port id. Just discard this invalid msg. Nothing + // happened. + let invalid_event_msg = + VirtioConsoleControl::new(u32::from(nr + 5), VIRTIO_CONSOLE_PORT_OPEN, 1); + st.out_control_event(invalid_event_msg); + + // Error out control msg which size is illegal. + let error_control_msg = vec![0]; + st.virtqueue_add_element(OUT_CONTROL_QUEUE_ID, Some(&error_control_msg), 1); + + thread::sleep(time::Duration::from_secs(1)); + assert!(st.serial.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + // Send a random control message. Check stratovirt is working. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_DEVICE_READY, 1); + st.out_control_event(ready_msg); + + st.test_end(); +} + +/// Virtio serial pci device invalid input control message buffer test. +/// TestStep: +/// 1. Init virtio serial device(1 virtconsole, pty backend chardev). +/// 2. Don't provide buffer in input_control_queue. Send a message which should response in +/// input_control_queue. +/// 3. Provide 1 byte buffer in input_control_queue. Send a message which should response in +/// input_control_queue. +/// 4. Destroy device. +/// Expect: +/// 1/4: success. +/// 2: Just discard this invalid msg. Nothing happened. +/// 3: report virtio error. +#[test] +fn virtconsole_pty_invalid_in_control_buffer() { + let port = PortConfig { + chardev_type: ChardevType::Pty, + nr: 0, + is_console: true, + }; + let pci_slot = 0x04; + let pci_fn = 0x0; + let mut st = create_serial(vec![port], pci_slot, pci_fn); + + // Init virtqueues. + st.virtqueue_setup(DEFAULT_SERIAL_VIRTQUEUES); + + // No buffer in input_control_queue. Will discard all requests sent by input_control_queue. + // Nothing else happened. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_DEVICE_READY, 1); + st.out_control_event(ready_msg); + + // Provide size_of::() buffer for input_control_queue. + st.virtqueue_add_element( + IN_CONTROL_QUEUE_ID, + None, + size_of::() as u64, + ); + + // Error control msg: Guest is not ready. It will do nothing. Buffer in input_control_queue will + // not be used. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_DEVICE_READY, 0); + st.out_control_event(ready_msg); + + // Should response VIRTIO_CONSOLE_PORT_ADD msg when guest is ready. Buffer will be used. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_DEVICE_READY, 1); + st.out_control_event(ready_msg); + + // Give only 1 byte for input control message which will result virtio error. + st.virtqueue_add_element(IN_CONTROL_QUEUE_ID, None, 1); + + // Error control msg: Port is not ready. It will do nothing. Buffer in input_control_queue will + // not be used. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_PORT_READY, 0); + st.out_control_event(ready_msg); + + // Console is default host connected. Should response VIRTIO_CONSOLE_CONSOLE_PORT msg. 1 byte + // Buffer will be used. + let ready_msg = VirtioConsoleControl::new(0, VIRTIO_CONSOLE_PORT_READY, 1); + st.out_control_event(ready_msg); + + // Little buffer for VIRTIO_CONSOLE_CONSOLE_PORT message. + thread::sleep(time::Duration::from_secs(1)); + assert!(st.serial.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET != 0); + + st.test_end(); +} + +/// Virtio serial pci device IO test when host is not connected. +/// TestStep: +/// 1. Init virtio serial device(1 virtserialport, socket backend chardev, don't connect in host). +/// 2. IO test in this port. +/// 3. IO test in virtqueues which have no port. +/// 4. basic IO test.(port is connected) +/// 5. Destroy device. +/// Expect: +/// 1/4/5: success. +/// 2/3: Just discard these requests. Nothing happened. +#[test] +fn virtserialport_socket_not_connect() { + let nr = 1; + let socket_path = "/tmp/test-virtserialport1.sock"; + if Path::new(socket_path).exists() { + fs::remove_file(socket_path).unwrap(); + } + let socket = ChardevType::Socket { + path: String::from(socket_path), + server: true, + nowait: true, + }; + let port = PortConfig { + chardev_type: socket, + nr, + is_console: false, + }; + + let pci_slot = 0x4; + let pci_fn = 0x0; + let mut st = create_serial(vec![port.clone()], pci_slot, pci_fn); + + st.serial_init(); + + // Requests will be discarded when host (port 1, output queue id: 5) is not connected. Nothing + // happened. + let test_data = String::from("Test\n"); + st.virtqueue_add_element(5, Some(test_data.as_bytes()), test_data.len() as u64); + + // Requests will be discarded when it is sent in virtqueue which has no port(port 2, output + // queue id: 7). Nothing happened. + let test_data = String::from("Test\n"); + st.virtqueue_add_element(7, Some(test_data.as_bytes()), test_data.len() as u64); + + // Virtio-serial is working normally after these steps. + st.verify_port_io(port); + st.test_end(); +} diff --git a/tests/mod_test/tests/usb_camera_test.rs b/tests/mod_test/tests/usb_camera_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..98ac301a0840b8699c368471168a7272fb3570f9 --- /dev/null +++ b/tests/mod_test/tests/usb_camera_test.rs @@ -0,0 +1,577 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::{RefCell, RefMut}; +use std::rc::Rc; +use std::{fs::remove_file, fs::File, io::Write}; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use devices::usb::xhci::xhci_trb::TRBCCode; +use mod_test::libdriver::usb::{ + TestUsbBuilder, TestXhciPciDevice, CONTROL_ENDPOINT_ID, PRIMARY_INTERRUPTER_ID, +}; +use mod_test::libtest::TestState; + +const UVC_FID: u8 = 1; +const UVC_HEADER_LEN: u8 = 2; +const VS_ENDPOINT_ID: u32 = 3; + +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Default)] +enum FmtType { + #[default] + Yuy2 = 0, + Rgb565, + Mjpg, + Nv12, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DeviceConfig { + check_interval: u64, + image_mode: String, + force_frame_len: Option, +} + +impl Default for DeviceConfig { + fn default() -> Self { + Self { + check_interval: 10, + image_mode: String::from("default"), + force_frame_len: None, + } + } +} + +struct TestCameraConfig { + path: String, + conf: DeviceConfig, +} + +impl TestCameraConfig { + fn new(name: &str) -> Self { + let path = format!("/tmp/camera_config_{}.json", name); + let mut config = Self { + path, + conf: DeviceConfig::default(), + }; + config.write_config(); + config + } + + fn write_config(&mut self) { + let conf = serde_json::to_string(&self.conf).unwrap(); + let mut file = File::create(&self.path).unwrap(); + file.set_len(0).unwrap(); + file.write_all(conf.as_bytes()).unwrap(); + file.flush().unwrap(); + } +} + +impl Drop for TestCameraConfig { + fn drop(&mut self) { + if let Err(e) = remove_file(&self.path) { + println!("Failed to remove config, {:?}", e); + } + } +} + +fn check_frame( + xhci: &mut RefMut, + slot_id: u32, + format_idx: u8, + frame_idx: u8, + cnt: u32, +) { + start_capture(xhci, slot_id, format_idx, frame_idx); + // Check current setting. + let cur = xhci.vs_get_cur(slot_id); + assert_eq!(cur.bFormatIndex, format_idx); + assert_eq!(cur.bFrameIndex, frame_idx); + // Get frame. + let fmt = format_index_to_fmt(format_idx); + check_multi_frames( + xhci, + slot_id, + &fmt, + cur.dwMaxVideoFrameSize, + cur.dwMaxPayloadTransferSize, + cnt, + ); + stop_capture(xhci, slot_id); +} + +fn format_index_to_fmt(idx: u8) -> FmtType { + if idx == 1 { + FmtType::Yuy2 + } else if idx == 2 { + FmtType::Mjpg + } else if idx == 3 { + FmtType::Rgb565 + } else { + FmtType::Nv12 + } +} + +fn start_capture(xhci: &mut RefMut, slot_id: u32, fmt_idx: u8, frm_idx: u8) { + xhci.vs_probe_control(slot_id, fmt_idx, frm_idx); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.vs_commit_control(slot_id, fmt_idx, frm_idx); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); +} + +fn stop_capture(xhci: &mut RefMut, slot_id: u32) { + xhci.vs_clear_feature(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); +} + +fn check_multi_frames( + xhci: &mut RefMut, + slot_id: u32, + fmt: &FmtType, + frame_len: u32, + max_payload: u32, + cnt: u32, +) { + let mut fid = 0; + for _ in 0..cnt { + let payload_list = xhci.get_payload( + slot_id, + VS_ENDPOINT_ID, + frame_len, + u32::from(UVC_HEADER_LEN), + max_payload, + ); + for buf in &payload_list { + assert_eq!(buf[0], UVC_HEADER_LEN); + assert_eq!(buf[1] & UVC_FID, fid); + } + fid ^= UVC_FID; + let frame = payload_to_frame(payload_list); + check_frame_data(fmt, &frame); + } +} + +fn payload_to_frame(list: Vec>) -> Vec { + let mut frame = Vec::new(); + for buf in list { + frame.append(&mut buf[UVC_HEADER_LEN as usize..].to_vec()) + } + frame +} + +fn check_frame_data(fmt: &FmtType, data: &[u8]) { + let sz = data.len(); + match fmt { + FmtType::Yuy2 => { + assert_eq!(sz % 4, 0); + for i in 0..(sz / 4) { + assert_eq!(data[4 * i..(4 * i + 4)], [82, 90, 82, 240]); + } + } + FmtType::Rgb565 => { + assert_eq!(sz % 4, 0); + for i in 0..(sz / 4) { + assert_eq!(data[4 * i..(4 * i + 4)], [0, 0, 255, 255]); + } + } + FmtType::Mjpg => { + assert_eq!(data[0..4], [0xff, 0xd8, 0xff, 0xe0]); + let pos = data.len() - 2; + assert_eq!(data[pos..], [0xff, 0xf9]); + } + FmtType::Nv12 => { + let len = data.len(); + for i in 0..(len / 2) { + assert_eq!(data[i], 76); + } + for i in 0..(len / 4) { + let idx = len / 2 + i * 2; + assert_eq!(data[idx..idx + 2], [90, 255]); + } + } + } +} + +fn qmp_cameradev_add( + test_state: &Rc>, + id: &str, + driver: &str, + path: &str, +) -> Value { + let test_state = test_state.borrow_mut(); + let cmd: &str = r#"{"execute": "cameradev_add", "arguments": {"id": "ID", "driver": "DRIVER", "path": "PATH"}}"#; + let cmd = cmd.replace("ID", id); + let cmd = cmd.replace("DRIVER", driver); + let cmd = cmd.replace("PATH", path); + test_state.qmp(&cmd) +} + +fn qmp_cameradev_del(test_state: &Rc>, id: &str) -> Value { + let test_state = test_state.borrow_mut(); + let cmd = r#"{"execute": "cameradev_del", "arguments": {"id": "ID"}}"#; + let cmd = cmd.replace("ID", id); + test_state.qmp(&cmd) +} + +fn qmp_plug_camera(test_state: &Rc>, id: &str, camdev: &str) -> Value { + let test_state = test_state.borrow_mut(); + let cmd = r#"{"execute": "device_add", "arguments": {"id": "ID", "driver": "usb-camera", "cameradev": "CAMDEV"}}"#; + let cmd = cmd.replace("ID", id); + let cmd = cmd.replace("CAMDEV", camdev); + test_state.qmp(&cmd) +} + +fn qmp_unplug_camera(test_state: &Rc>, id: &str) -> Value { + let test_state = test_state.borrow_mut(); + let cmd = r#"{"execute": "device_del", "arguments": {"id": "ID"}}"#; + let cmd = cmd.replace("ID", id); + test_state.qmp(&cmd) +} + +/// USB camera basic capture. +/// TestStep: +/// 1. Init camera device. +/// 2. Query control capabilities. +/// 3. Start capture. +/// 4. Check Frame data. +/// 5. Stop capture. +/// Expect: +/// 1/2/3/4/5: success. +#[test] +fn test_xhci_camera_basic() { + let config = TestCameraConfig::new("test_xhci_camera_basic"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + + // Query control capabilities. + let info = xhci.vs_get_info(slot_id); + assert_eq!(info, 2 | 1); + // Yuy2 + check_frame(&mut xhci, slot_id, 1, 2, 3); + // Mjpg + check_frame(&mut xhci, slot_id, 2, 2, 3); + // Rgb + check_frame(&mut xhci, slot_id, 3, 3, 3); + // Nv12 + check_frame(&mut xhci, slot_id, 4, 1, 3); + + test_state.borrow_mut().stop(); +} + +/// USB camera capture with invalid frame length. +/// TestStep: +/// 1. Init camera device with invalid frame length. +/// 2. Start capture. +/// 3. Check Frame data. +/// 4. Stop capture. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn test_xhci_camera_invalid_frame_len() { + let mut config: TestCameraConfig = TestCameraConfig::new("test_xhci_camera_invalid_frame_len"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .with_config("over_transfer_ring", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + // Oversized frame. + let len = 7680 * 4320; + config.conf.force_frame_len = Some(len); + config.write_config(); + start_capture(&mut xhci, slot_id, 1, 1); + let cur = xhci.vs_get_cur(slot_id); + // Get frame. + let payload_list = xhci.get_payload( + slot_id, + VS_ENDPOINT_ID, + len as u32, + u32::from(UVC_HEADER_LEN), + cur.dwMaxPayloadTransferSize, + ); + for item in payload_list { + assert_eq!(item[0], UVC_HEADER_LEN); + } + stop_capture(&mut xhci, slot_id); + // Zero size frame. + config.conf.force_frame_len = Some(0); + config.write_config(); + start_capture(&mut xhci, slot_id, 1, 1); + // Get frame. + xhci.queue_indirect_td(slot_id, VS_ENDPOINT_ID, 10); + xhci.doorbell_write(slot_id, VS_ENDPOINT_ID); + // Wait enough time. + std::thread::sleep(std::time::Duration::from_millis(200)); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + stop_capture(&mut xhci, slot_id); + + test_state.borrow_mut().stop(); +} + +/// USB camera capture with invalid frame index. +/// TestStep: +/// 1. Init camera device. +/// 2. Start capture with invalid frame index. +/// 3. Reset endpoint. +/// 4. Start capture. +/// 5. Stop capture. +/// Expect: +/// 1/3/4/5: success. +/// 2: failure. +#[test] +fn test_xhci_camera_invalid_config() { + let config = TestCameraConfig::new("test_xhci_camera_invalid_config"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + + start_capture(&mut xhci, slot_id, 1, 1); + // Check current setting. + let cur = xhci.vs_get_cur(slot_id); + assert_eq!(cur.bFormatIndex, 1); + assert_eq!(cur.bFrameIndex, 1); + // Get frame. + let fmt = format_index_to_fmt(1); + check_multi_frames( + &mut xhci, + slot_id, + &fmt, + cur.dwMaxVideoFrameSize, + cur.dwMaxPayloadTransferSize, + 2, + ); + // Set invalid index. + xhci.vs_probe_control(slot_id, 99, 99); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + // Reset endpoint. + xhci.reset_endpoint(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Set invalid index. + xhci.vs_commit_control(slot_id, 99, 99); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + // Reset endpoint. + xhci.reset_endpoint(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + check_multi_frames( + &mut xhci, + slot_id, + &fmt, + cur.dwMaxVideoFrameSize, + cur.dwMaxPayloadTransferSize, + 2, + ); + + test_state.borrow_mut().stop(); +} + +/// USB camera capture multiple times. +/// TestStep: +/// 1. Init camera device. +/// 2. Start/Stop capture for multiple times. +/// Expect: +/// 1/2: success. +#[test] +fn test_xhci_camera_repeat_openclose() { + let config = TestCameraConfig::new("test_xhci_camera_repeat_openclose"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + + let cnt = 3; + for _ in 0..cnt { + check_frame(&mut xhci, slot_id, 1, 1, 3); + } + test_state.borrow_mut().stop(); +} + +/// USB camera capture with different config. +/// TestStep: +/// 1. Init camera device. +/// 2. Capture with different config. +/// Expect: +/// 1/2: success. +#[test] +fn test_xhci_camera_repeat_config() { + let config = TestCameraConfig::new("test_xhci_camera_repeat_config"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + check_frame(&mut xhci, slot_id, 1, 1, 3); + check_frame(&mut xhci, slot_id, 1, 2, 3); + check_frame(&mut xhci, slot_id, 1, 3, 3); + check_frame(&mut xhci, slot_id, 1, 4, 3); + check_frame(&mut xhci, slot_id, 3, 2, 3); + + test_state.borrow_mut().stop(); +} + +/// USB camera capture with invalid control order. +/// TestStep: +/// 1. Init camera device. +/// 2. Capture with invalid control order. +/// Expect: +/// 1/2: success. +#[test] +fn test_xhci_camera_invalid_control() { + let config = TestCameraConfig::new("test_xhci_camera_invalid_control"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_camera("cam", &config.path) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 5; // super speed + let slot_id = xhci.init_device(port_id); + + start_capture(&mut xhci, slot_id, 1, 1); + let cur = xhci.vs_get_cur(slot_id); + let fmt = format_index_to_fmt(1); + let cnt = 2; + for _ in 0..cnt { + xhci.vs_probe_control(slot_id, 1, 1); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } + for _ in 0..cnt { + xhci.vs_commit_control(slot_id, 1, 1); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } + + check_multi_frames( + &mut xhci, + slot_id, + &fmt, + cur.dwMaxVideoFrameSize, + cur.dwMaxPayloadTransferSize, + 2, + ); + + test_state.borrow_mut().stop(); +} + +/// USB camera hot plug/unplug. +/// TestStep: +/// 1. Hot plug camera device. +/// 2. Test camera start/stop capture. +/// 3. Hot unplug device. +/// Expect: +/// 1/2: success. +#[test] +fn test_xhci_camera_hotplug() { + let config = TestCameraConfig::new("test_xhci_camera_hotplug"); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + qmp_cameradev_add(&test_state, "camdev0", "demo", &config.path); + qmp_plug_camera(&test_state, "cam0", "camdev0"); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 5; // super speed + xhci.device_config.insert(String::from("camera"), true); + let slot_id = xhci.init_device(port_id); + // Yuy2 + check_frame(&mut xhci, slot_id, 1, 4, 3); + + qmp_unplug_camera(&test_state, "cam0"); + qmp_cameradev_del(&test_state, "camdev0"); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + test_state.borrow_mut().stop(); +} + +/// USB camera hot plug/unplug with invalid config. +/// TestStep: +/// 1. Hot plug camera device with invalid config. +/// 2. Hot unplug camera device with invalid config. +/// Expect: +/// 1/2: failure. +#[test] +fn test_xhci_camera_hotplug_invalid() { + let (_, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_config("auto_run", true) + .build(); + + #[cfg(not(target_env = "ohos"))] + qmp_cameradev_add(&test_state, "camdev0", "v4l2", "/tmp/not-existed"); + #[cfg(target_env = "ohos")] + qmp_cameradev_add(&test_state, "camdev0", "ohcamera", "InvalidNum"); + // Invalid cameradev. + let value = qmp_plug_camera(&test_state, "usbcam0", "camdev0"); + let desc = value["error"]["desc"].as_str().unwrap().to_string(); + #[cfg(not(target_env = "ohos"))] + assert_eq!(desc, "Failed to open v4l2 backend /tmp/not-existed."); + #[cfg(target_env = "ohos")] + assert_eq!(desc, "OH Camera: failed to init cameras"); + // Invalid device id. + let value = qmp_unplug_camera(&test_state, "usbcam0"); + let desc = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!(desc, "Failed to detach device: id usbcam0 not found"); + // Invalid cameradev id. + let value = qmp_cameradev_del(&test_state, "camdev1"); + let desc = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!(desc, "no cameradev with id camdev1"); + + test_state.borrow_mut().stop(); +} diff --git a/tests/mod_test/tests/usb_storage_test.rs b/tests/mod_test/tests/usb_storage_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..38a45a7c12f9c1105a0564d3f0923983d84ab27c --- /dev/null +++ b/tests/mod_test/tests/usb_storage_test.rs @@ -0,0 +1,950 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefMut; + +use byteorder::{ByteOrder, LittleEndian}; + +use devices::usb::{ + config::{USB_INTERFACE_CLASS_IN_REQUEST, USB_INTERFACE_CLASS_OUT_REQUEST}, + storage::{ + UsbMsdCswStatus, CBW_FLAG_IN, CBW_FLAG_OUT, CBW_SIGNATURE, CBW_SIZE, CSW_SIGNATURE, + CSW_SIZE, GET_MAX_LUN, MASS_STORAGE_RESET, + }, + xhci::xhci_trb::{TRBCCode, TRB_SIZE}, + UsbDeviceRequest, +}; +use mod_test::libdriver::usb::{ + TestIovec, TestUsbBuilder, TestXhciPciDevice, CONTROL_ENDPOINT_ID, PRIMARY_INTERRUPTER_ID, + STORAGE_DEVICE_IN_ENDPOINT_ID, STORAGE_DEVICE_OUT_ENDPOINT_ID, +}; +use mod_test::utils::{cleanup_img, create_img, TEST_IMAGE_SIZE}; +use mod_test::{libdriver::malloc::GuestAllocator, utils::ImageType}; + +const READ_10: u8 = 0x28; +const WRITE_10: u8 = 0x2a; +const RESERVE: u8 = 0x16; + +const CBW_ILLEGAL_SIZE: u8 = CBW_SIZE - 1; +const CSW_ILLEGAL_SIZE: u8 = CSW_SIZE - 1; + +const DISK_SECTOR_SIZE: usize = 512; + +struct Cbw { + sig: u32, + tag: u32, + data_len: u32, + flags: u8, + lun: u8, + cmd_len: u8, + cmd: [u8; 16], +} + +impl Cbw { + fn new() -> Self { + Cbw { + sig: CBW_SIGNATURE, + tag: 123456, + data_len: 0, + flags: 0, + lun: 0, + cmd_len: 0, + cmd: [0; 16], + } + } +} + +fn cbw_phase( + cbw: Cbw, + mut xhci: RefMut, + mut guest_allocator: RefMut, + slot_id: u32, + status: TRBCCode, + len: u8, +) { + let mut cbw_buf: [u8; CBW_SIZE as usize] = [0; CBW_SIZE as usize]; + LittleEndian::write_u32(&mut cbw_buf[0..4], cbw.sig); + LittleEndian::write_u32(&mut cbw_buf[4..8], cbw.tag); + LittleEndian::write_u32(&mut cbw_buf[8..12], cbw.data_len); + cbw_buf[12] = cbw.flags; + cbw_buf[13] = cbw.lun; + cbw_buf[14] = cbw.cmd_len; + for i in 0..16 { + cbw_buf[15 + i] = cbw.cmd[i]; + } + + let mut iovecs = Vec::new(); + let ptr = guest_allocator.alloc(u64::from(CBW_SIZE)); + xhci.mem_write(ptr, &cbw_buf); + + let iovec = TestIovec::new(ptr, len as usize, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, STORAGE_DEVICE_OUT_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, STORAGE_DEVICE_OUT_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, status as u32); +} + +fn data_phase( + mut xhci: RefMut, + mut guest_allocator: RefMut, + slot_id: u32, + buf: &[u8], + to_host: bool, + if_success: bool, +) { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.alloc(buf.len() as u64); + let iovec = TestIovec::new(ptr, buf.len(), false); + + if !to_host { + xhci.mem_write(ptr, buf); + } + + iovecs.push(iovec); + + if to_host { + xhci.queue_td_by_iovec(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID); + } else { + xhci.queue_td_by_iovec(slot_id, STORAGE_DEVICE_OUT_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, STORAGE_DEVICE_OUT_ENDPOINT_ID); + } + + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + if if_success { + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } else { + assert_ne!(evt.ccode, TRBCCode::Success as u32); + } + + if to_host { + let data_buf = xhci.mem_read(ptr, buf.len()); + assert_eq!(buf, data_buf); + } +} + +fn csw_phase( + mut xhci: RefMut, + mut guest_allocator: RefMut, + slot_id: u32, + status: TRBCCode, + len: u8, + sig_check: bool, +) -> u64 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.alloc(u64::from(len)); + + let iovec = TestIovec::new(ptr, len as usize, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, status as u32); + + if sig_check { + let buf = xhci.mem_read(ptr, len as usize); + assert_eq!(CSW_SIGNATURE, LittleEndian::read_u32(&buf[0..4])); + } + + ptr +} + +/// USB storage device basic IO function test. +/// TestStep: +/// 0. Init process. +/// 1. CBW: write. +/// 2. DataOut: data write from host to device. +/// 3. CSW. +/// 4. CBW: read. +/// 5. DataIn: data read from device to host. +/// 6. Csw. +/// 7. Test ends. Destroy device. +/// Expect: +/// 0/1/2/3/4/5/6/7: success. +#[test] +fn usb_storage_basic() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "disk") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 4; + cbw.flags = CBW_FLAG_OUT; + cbw.cmd_len = 10; + cbw.cmd[0] = WRITE_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: DataOut phase. + let mut buf = "TEST".as_bytes().to_vec(); + buf.resize(DISK_SECTOR_SIZE, 0); + data_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + &buf, + false, + true, + ); + + // Test 3: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CSW_SIZE, + true, + ); + + // Test 4: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 4; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 5: Datain phase. + data_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + &buf, + true, + true, + ); + + // Test 6: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CSW_SIZE, + true, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device functional 'Reset' test. +/// TestStep: +/// 0. Init process. +/// 1. Reset. +/// 2. Test ends. Destroy device. +/// Expect: +/// 0/1/2: success. +#[test] +fn usb_storage_functional_reset() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_OUT_REQUEST, + request: MASS_STORAGE_RESET, + value: 0, + index: 0, + length: 0, + }; + + xhci.queue_device_request(slot_id, &device_req); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device functional 'Get Max Lun' test. +/// TestStep: +/// 0. Init process. +/// 1. Get Max Lun. +/// 2. Send CBW whose lun is greater than 'MAX LUN'. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: Stallerror. +#[test] +fn usb_storage_functional_get_max_lun() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let device_req = UsbDeviceRequest { + request_type: USB_INTERFACE_CLASS_IN_REQUEST, + request: GET_MAX_LUN, + value: 0, + index: 0, + length: 1, + }; + + xhci.queue_device_request(slot_id, &device_req); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 1); + + assert_eq!(buf, [0]); + + // Test: lun > 0 CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; + cbw.lun = 8; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci, + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CBW_SIZE, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device illegal request test. +/// TestStep: +/// 0. Init process. +/// 1. Illegal request. +/// 2. Test ends. Destroy device. +/// Expect: +/// 0/2: success. +/// 1: StallError. +#[test] +fn usb_storage_illegal_request() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let device_req = UsbDeviceRequest { + request_type: 2, + request: 210, + value: 0, + index: 0, + length: 0, + }; + + xhci.queue_device_request(slot_id, &device_req); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device CBW signature test. +/// TestStep: +/// 0. Init process. +/// 1. CBW: the signature value is abnormal. +/// 2. Test ends. Destroy device. +/// Expect: +/// 0/2: success. +/// 1: CBW StallError. +#[test] +fn usb_storage_cbw_signature() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.sig = 0x123456; + + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CBW_SIZE, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device CBW illegal packet size test. +/// TestStep: +/// 0. Init process. +/// 1. CBW: the packet size is abnormal. +/// 2. Test ends. Destroy device. +/// Expect: +/// 0/2: success. +/// 1: CBW StallError. +#[test] +fn usb_storage_cbw_illegal_size() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let cbw = Cbw::new(); + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CBW_ILLEGAL_SIZE, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device CSW illegal packet size test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW: the packet size is abnormal. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: CSW StallError. +#[test] +fn usb_storage_csw_illegal_size() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let cbw = Cbw::new(); + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CSW_ILLEGAL_SIZE, + false, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device abnormal phase (CBW -> CSW) test, skip Data phase. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: CSW StallError. +#[test] +fn usb_storage_abnormal_phase_01() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + let ptr = csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CSW_SIZE, + false, + ); + + let buf = xhci.borrow_mut().mem_read(ptr, CSW_SIZE as usize); + assert_ne!(CSW_SIGNATURE, LittleEndian::read_u32(&buf[0..4])); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device abnormal phase (CBW -> CSW -> CSW) test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW. +/// 3. CSW. +/// 4. Test ends. Destroy device. +/// Expect: +/// 0/1/2/4: success. +/// 3: CSW StallError. +#[test] +fn usb_storage_abnormal_phase_02() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let cbw = Cbw::new(); + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CSW_SIZE, + true, + ); + + // Test 3: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CSW_SIZE, + false, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device abnormal phase (CBW -> CBW) test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CBW. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: CBW StallError. +#[test] +fn usb_storage_abnormal_phase_03() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CBW_SIZE, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device illegal scsi cdb test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: CSW StallError. +#[test] +fn usb_storage_illegal_scsi_cdb() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = 0xff; + cbw.cmd[8] = 1; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::StallError, + CSW_SIZE, + false, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device does not provide enough data buffer test. +/// TestStep: +/// 0. Init process. +/// 1. CBW: read. +/// 2. DataIn: data read from device to host. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: StallError. +#[test] +fn insufficient_data_buffer_test() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.data_len = 512; // 512 Bytes data buffer. + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = READ_10; + cbw.cmd[8] = 1; // Need 1 logical sector(CD-ROM: 2048Bytes). + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: Datain phase. + let buf = vec![0; 512]; // Provides 512 Bytes datain buffer. + data_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + &buf, + true, + false, + ); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device not supported scsi cdb test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW. +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/2/3: success. +/// 2: CSW status = UsbMsdCswStatus::Failed. +#[test] +fn usb_storage_not_supported_scsi_cdb() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let mut cbw = Cbw::new(); + cbw.flags = CBW_FLAG_IN; + cbw.cmd_len = 10; + cbw.cmd[0] = RESERVE; + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + let csw_addr = csw_phase( + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CSW_SIZE, + true, + ); + + let buf = xhci.borrow_mut().mem_read(csw_addr, CSW_SIZE as usize); + assert_eq!(UsbMsdCswStatus::Failed as u8, buf[12]); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device CBW phase to invalid endpoint test. +/// TestStep: +/// 0. Init process. +/// 1. CBW: invalid endpoint(not Out endpoint). +/// 2. Test ends. Destroy device. +/// Expect: +/// 0/2: success. +/// 1: CBW StallError. +#[test] +fn usb_storage_cbw_invalid_endpoint() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // Test 1: CBW phase. + let cbw = Cbw::new(); + + let mut cbw_buf: [u8; CBW_SIZE as usize] = [0; CBW_SIZE as usize]; + LittleEndian::write_u32(&mut cbw_buf[0..4], cbw.sig); + + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(u64::from(CBW_SIZE)); + xhci.mem_write(ptr, &cbw_buf); + + let iovec = TestIovec::new(ptr, CBW_SIZE as usize, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, STORAGE_DEVICE_IN_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} + +/// USB storage device CSW phase to invalid endpoint test. +/// TestStep: +/// 0. Init process. +/// 1. CBW. +/// 2. CSW: invalid endpoint(not In endpoint). +/// 3. Test ends. Destroy device. +/// Expect: +/// 0/1/3: success. +/// 2: CSW StallError. +#[test] +fn usb_storage_csw_invalid_endpoint() { + let image_path = create_img(TEST_IMAGE_SIZE, 0, &ImageType::Raw); + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_storage(&image_path, "cdrom") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let port_id = 1; + let slot_id = xhci.borrow_mut().init_device(port_id); + + // Test 1: CBW phase. + let cbw = Cbw::new(); + cbw_phase( + cbw, + xhci.borrow_mut(), + guest_allocator.borrow_mut(), + slot_id, + TRBCCode::Success, + CBW_SIZE, + ); + + // Test 2: CSW phase. + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(u64::from(CSW_SIZE)); + + let iovec = TestIovec::new(ptr, CSW_SIZE as usize, false); + iovecs.push(iovec); + xhci.borrow_mut().queue_td_by_iovec( + slot_id, + STORAGE_DEVICE_OUT_ENDPOINT_ID, + &mut iovecs, + false, + ); + xhci.borrow_mut() + .doorbell_write(slot_id, STORAGE_DEVICE_OUT_ENDPOINT_ID); + let evt = xhci + .borrow_mut() + .fetch_event(PRIMARY_INTERRUPTER_ID) + .unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + + test_state.borrow_mut().stop(); + cleanup_img(image_path); +} diff --git a/tests/mod_test/tests/usb_test.rs b/tests/mod_test/tests/usb_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..8a5f81140c783c9908691389926d5785081bb7df --- /dev/null +++ b/tests/mod_test/tests/usb_test.rs @@ -0,0 +1,2795 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use devices::usb::config::*; +use devices::usb::hid::HID_SET_REPORT; +use devices::usb::xhci::xhci_controller::{ + DwordOrder, XhciInputCtrlCtx, XhciSlotCtx, EP_RUNNING, SLOT_ADDRESSED, +}; +use devices::usb::xhci::xhci_regs::{ + XHCI_INTR_REG_ERSTBA_LO, XHCI_INTR_REG_ERSTSZ, XHCI_INTR_REG_IMAN, XHCI_OPER_NE_MASK, + XHCI_OPER_REG_CONFIG, XHCI_OPER_REG_DNCTRL, XHCI_OPER_REG_USBCMD, XHCI_OPER_REG_USBSTS, +}; +use devices::usb::xhci::xhci_trb::{TRBCCode, TRBType, TRB_SIZE}; +use devices::usb::UsbDeviceRequest; +use mod_test::libdriver::pci::{PCI_DEVICE_ID, PCI_VENDOR_ID}; +use mod_test::libdriver::usb::{ + clear_iovec, qmp_event_read, qmp_plug_keyboard_event, qmp_plug_tablet_event, + qmp_send_key_event, qmp_send_multi_key_event, qmp_send_pointer_event, qmp_unplug_usb_event, + TestIovec, TestNormalTRB, TestUsbBuilder, CONTROL_ENDPOINT_ID, HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, HID_POINTER_LEN, KEYCODE_NUM1, KEYCODE_SPACE, PCI_CLASS_PI, + PRIMARY_INTERRUPTER_ID, TD_TRB_LIMIT, XHCI_PCI_CAP_OFFSET, XHCI_PCI_DOORBELL_OFFSET, + XHCI_PCI_FUN_NUM, XHCI_PCI_OPER_OFFSET, XHCI_PCI_PORT_OFFSET, XHCI_PCI_RUNTIME_OFFSET, + XHCI_PCI_SLOT_NUM, XHCI_PORTSC_OFFSET, +}; + +#[test] +fn test_xhci_keyboard_basic() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // Case 1 + // Space down + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, true); + let transfer_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + let data_ptr = xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(transfer_ptr, evt.ptr); + let buf = xhci.get_transfer_data_direct(data_ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 44, 0, 0, 0, 0, 0]); + // Space up + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, false); + let transfer_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + let data_ptr = xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(transfer_ptr, evt.ptr); + let buf = xhci.get_transfer_data_direct(data_ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + + // Case 2 + let key_list = vec![ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list, true); + xhci.queue_multi_indirect_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + key_list.len(), + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // 1 2 3 4 down + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 33, 0, 0]); + // 1 2 3 4 Up + qmp_send_multi_key_event(test_state.clone(), &key_list, false); + xhci.queue_multi_indirect_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + key_list.len(), + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 32, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_direct() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let key_list = vec![ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list, true); + xhci.queue_multi_direct_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + key_list.len(), + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // 1 2 3 4 Down + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 33, 0, 0]); + // 1 2 3 4 Up + qmp_send_multi_key_event(test_state.clone(), &key_list, false); + xhci.queue_multi_direct_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + key_list.len(), + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 32, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_multi_trb() { + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let key_list = vec![ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list, true); + let mut io_list = Vec::new(); + for _ in 0..4 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(4); + let iovec = TestIovec::new(ptr, 4, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // 1 2 3 4 Down + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[2]); + assert_eq!(buf, [0, 0, 30, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[3]); + assert_eq!(buf, [0, 0, 30, 31, 32, 33, 0, 0]); + // 1 2 3 4 Up + qmp_send_multi_key_event(test_state.clone(), &key_list, false); + let mut io_list = Vec::new(); + for _ in 0..4 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(4); + let iovec = TestIovec::new(ptr, 4, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 33, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 33, 32, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[2]); + assert_eq!(buf, [0, 0, 33, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[3]); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_event_data() { + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let key_list = vec![ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list, true); + let mut io_list = Vec::new(); + for _ in 0..4 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(4); + let iovec = TestIovec::new(ptr, 4, false); + iovecs.push(iovec); + // Event Data TRB + let mut iovec = TestIovec::new(0xff00ff00ff00ff00, 0, false); + iovec.event_data = true; + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // 1 2 3 4 Down + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[2]); + assert_eq!(buf, [0, 0, 30, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[3]); + assert_eq!(buf, [0, 0, 30, 31, 32, 33, 0, 0]); + // 1 2 3 4 Up + qmp_send_multi_key_event(test_state.clone(), &key_list, false); + let mut io_list = Vec::new(); + for _ in 0..4 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(4); + let iovec = TestIovec::new(ptr, 4, false); + iovecs.push(iovec); + // Event Data TRB + let mut iovec = TestIovec::new(0xff00ff00ff00ff00, 0, false); + iovec.event_data = true; + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 33, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 33, 32, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[2]); + assert_eq!(buf, [0, 0, 33, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.length, 8); + assert_eq!(evt.ptr, 0xff00ff00ff00ff00); + let buf = xhci.get_transfer_data_by_iovec(&io_list[3]); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_over_hid_buffer() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + const HID_BUFFER_SIZE: u32 = 16; + let event_cnt = 20; + // 1 -> 0 down / up + for i in 0..(event_cnt / 2) { + qmp_send_key_event(test_state.borrow_mut(), 2 + i, true); + qmp_send_key_event(test_state.borrow_mut(), 2 + i, false); + } + xhci.queue_multi_indirect_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + event_cnt as usize, + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + for i in 0..event_cnt { + if i < HID_BUFFER_SIZE { + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + if i % 2 == 0 { + assert_eq!(buf, [0, 0, 30 + i as u8 / 2, 0, 0, 0, 0, 0]); + } else { + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + } + } else { + // event lost. + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + } + } + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_over_ring_limit() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let org_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + // Fake ring length. + let transfer_limit = 32; + let test_cnt = 3; + for i in 0..test_cnt { + for _ in 0..(transfer_limit / 2) { + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 44, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + } + if i == 0 { + // Fake link new address. + xhci.queue_link_trb( + slot_id, + HID_DEVICE_ENDPOINT_ID, + org_ptr + u64::from(TRB_SIZE) * 64, + false, + ); + } else if i == 1 { + // Goto the origin address. + xhci.queue_link_trb(slot_id, HID_DEVICE_ENDPOINT_ID, org_ptr, true); + } else { + xhci.queue_link_trb(slot_id, HID_DEVICE_ENDPOINT_ID, org_ptr, true); + let ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + assert_eq!(org_ptr, ptr); + } + } + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_reorder() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 4); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + let key_list = vec![ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list, true); + // 1 2 3 4 Down + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 30, 31, 32, 33, 0, 0]); + // 1 2 3 4 Up + let key_list = [ + KEYCODE_NUM1, + KEYCODE_NUM1 + 1, + KEYCODE_NUM1 + 2, + KEYCODE_NUM1 + 3, + ]; + qmp_send_multi_key_event(test_state.clone(), &key_list[0..2], false); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.queue_multi_indirect_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_KEYBOARD_LEN, + key_list.len() - 2, + ); + qmp_send_multi_key_event(test_state.clone(), &key_list[2..], false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 31, 32, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 32, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 33, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_remote_wakeup() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // U0 -> U3 + // NOTE: write PLS field should set LWS field. + xhci.port_regs_write( + port_id, + XHCI_PORTSC_OFFSET, + PORTSC_LWS | PLS_U3 << PORTSC_PLS_SHIFT, + ); + let portsc = xhci.port_regs_read(port_id, XHCI_PORTSC_OFFSET); + assert!(portsc >> PORTSC_PLS_SHIFT & PLS_U3 == PLS_U3); + + // Set remote wakeup. + xhci.set_feature(slot_id, USB_DEVICE_REMOTE_WAKEUP as u16); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + + // U3 -> U0 + xhci.port_regs_write( + port_id, + XHCI_PORTSC_OFFSET, + PORTSC_LWS | PLS_U0 << PORTSC_PLS_SHIFT, + ); + let portsc = xhci.port_regs_read(port_id, XHCI_PORTSC_OFFSET); + assert!(portsc >> PORTSC_PLS_SHIFT & PLS_U0 == PLS_U0); + assert!(portsc & PORTSC_PLC == PORTSC_PLC); + + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.get_trb_type(), TRBType::ErPortStatusChange as u32); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 44, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + test_state.borrow_mut().stop(); +} + +// Abnormal +#[test] +fn test_xhci_keyboard_invalid_value() { + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // Case 1: invalid code + qmp_send_key_event(test_state.borrow_mut(), 0, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + + // Case 2: invalid cycle bit + qmp_send_key_event(test_state.borrow_mut(), 2, true); + let mut trb = TestNormalTRB::generate_normal_td(0, 8); + let ptr = guest_allocator.borrow_mut().alloc(8); + trb.set_pointer(ptr); + trb.force_cycle = true; + xhci.queue_trb(slot_id, HID_DEVICE_ENDPOINT_ID, &mut trb); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + // clean td + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let old_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.set_tr_dequeue(old_ptr + 0x20, slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // Case 3: invalid down length. + const KEY_LIMIT: u32 = 6; + let key_len = 10; + for i in 0..key_len { + qmp_send_key_event(test_state.borrow_mut(), 2 + i, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + if i >= KEY_LIMIT { + // rollover + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 1, 1, 1, 1, 1, 1]); + } + } + for i in (0..key_len).rev() { + qmp_send_key_event(test_state.borrow_mut(), 2 + i, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + if i == 0 { + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_KEYBOARD_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0, 0]); + } + } + + // Case 4: length over 8 when IDT = 1. + qmp_send_key_event(test_state.borrow_mut(), 2, true); + let mut trb = TestNormalTRB::generate_normal_td(0, 10); + xhci.queue_trb(slot_id, HID_DEVICE_ENDPOINT_ID, &mut trb); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + // clean up key event. + qmp_send_key_event(test_state.borrow_mut(), 2, false); + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + // Case 5: invalid interrupter target + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, false); + let mut trb = TestNormalTRB::generate_normal_td(100, HID_KEYBOARD_LEN as u32); + xhci.queue_trb(slot_id, HID_DEVICE_ENDPOINT_ID, &mut trb); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // NOTE: no HCE, only primary interrupter supported now. + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE != USB_STS_HCE); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_invalid_buffer() { + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // over limit + qmp_send_key_event(test_state.borrow_mut(), 2, true); + qmp_send_key_event(test_state.borrow_mut(), 3, true); + let mut io_list = Vec::new(); + for _ in 0..2 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(5); + let iovec = TestIovec::new(ptr, 5, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(4); + let iovec = TestIovec::new(ptr, 4, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + // NOTE: ensure the memory is zero. + clear_iovec(test_state.borrow_mut(), &iovecs); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 30, 31, 0, 0, 0, 0, 0, 0, 0]); + // less buffer. + qmp_send_key_event(test_state.borrow_mut(), 2, false); + qmp_send_key_event(test_state.borrow_mut(), 3, false); + let mut io_list = Vec::new(); + for _ in 0..2 { + let mut iovecs = Vec::new(); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, true); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + let ptr = guest_allocator.borrow_mut().alloc(2); + let iovec = TestIovec::new(ptr, 2, false); + iovecs.push(iovec); + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, true); + // NOTE: ensure the memory is zero. + clear_iovec(test_state.borrow_mut(), &iovecs); + io_list.push(iovecs); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[0]); + assert_eq!(buf, [0, 0, 31, 0, 0, 0]); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_by_iovec(&io_list[1]); + assert_eq!(buf, [0, 0, 0, 0, 0, 0]); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_over_transfer_ring() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .with_config("over_transfer_ring", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + qmp_send_key_event(test_state.borrow_mut(), 2, true); + qmp_send_key_event(test_state.borrow_mut(), 3, true); + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // link trb overlimit by setting next trb to itself. + let ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.queue_link_trb(slot_id, HID_DEVICE_ENDPOINT_ID, ptr, false); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // Host Controller Error + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(true); + let slot_id = xhci.init_device(port_id); + // Invalid iovec over td limit. + let trb_limit = TD_TRB_LIMIT; + let mut iovecs = vec![TestIovec::new(0, 1, true); trb_limit as usize]; + xhci.queue_td_by_iovec(slot_id, HID_DEVICE_ENDPOINT_ID, &mut iovecs, false); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // Host Controller Error + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(true); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_invalid_doorbell() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("command_auto_doorbell", true) + .with_config("auto_run", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + // Kick invalid slot. + xhci.doorbell_write(10, HID_DEVICE_ENDPOINT_ID); + + qmp_send_key_event(test_state.borrow_mut(), 2, true); + qmp_send_key_event(test_state.borrow_mut(), 3, true); + xhci.queue_indirect_td(slot_id, 2, HID_KEYBOARD_LEN); + xhci.queue_indirect_td(0, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + xhci.doorbell_write(slot_id, 0xff); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + xhci.doorbell_write(0xff, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + qmp_send_key_event(test_state.borrow_mut(), 2, false); + qmp_send_key_event(test_state.borrow_mut(), 3, false); + xhci.queue_indirect_td(2, 2, HID_KEYBOARD_LEN); + xhci.queue_indirect_td(0, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + xhci.queue_multi_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN, 2); + xhci.doorbell_write(0, 0); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + xhci.doorbell_write(0, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +// Init +#[test] +fn test_xhci_keyboard_controller_init_invalid_register() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + // Case 1: write value to pci config. + xhci.read_pci_config(); + // write vendor id. + xhci.pci_dev.config_writew(PCI_VENDOR_ID, 0xf0ff); + // write device id. + xhci.pci_dev.config_writew(PCI_DEVICE_ID, 0xff0f); + // write class code. + // write invalid data. + xhci.pci_dev.config_writel(PCI_CLASS_PI, 0xf0f0ffff); + xhci.read_pci_config(); + + // Case 2: write value to capability registers. + xhci.read_capability(); + // write invalid data. + xhci.pci_dev.io_writel(xhci.bar_addr, 0, 0xffffffff); + xhci.read_capability(); + let old_value = xhci + .pci_dev + .io_readl(xhci.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET) + 0x2c); + xhci.pci_dev + .io_writel(xhci.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET) + 0x2c, 0xffff); + let value = xhci + .pci_dev + .io_readl(xhci.bar_addr, u64::from(XHCI_PCI_CAP_OFFSET) + 0x2c); + assert_eq!(value, old_value); + + // Case 3: write invalid slot. + xhci.pci_dev.io_writel( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CONFIG, + 0xffff, + ); + let config = xhci.pci_dev.io_readl( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_CONFIG, + ); + assert_ne!(config, 0xffff); + + // Case 4: invalid oper + xhci.pci_dev.io_writel( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBSTS, + 0xffff, + ); + let status = xhci.pci_dev.io_readl( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_USBSTS, + ); + assert_ne!(status, 0xffff); + // Device Notify Control + xhci.pci_dev.io_writel( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_DNCTRL, + 0x12345, + ); + let ndctrl = xhci.pci_dev.io_readl( + xhci.bar_addr, + u64::from(XHCI_PCI_OPER_OFFSET) + XHCI_OPER_REG_DNCTRL, + ); + assert_eq!(ndctrl, 0x12345 & XHCI_OPER_NE_MASK); + // invalid port offset. + let invalid_offset = 0x7; + xhci.pci_dev.io_writel( + xhci.bar_addr, + u64::from(XHCI_PCI_PORT_OFFSET) + invalid_offset, + 0xff, + ); + let invalid_offset = xhci.pci_dev.io_readl( + xhci.bar_addr, + u64::from(XHCI_PCI_PORT_OFFSET) + invalid_offset, + ); + assert_eq!(invalid_offset, 0); + + xhci.init_device_context_base_address_array_pointer(); + xhci.init_command_ring_dequeue_pointer(); + + // Case 5: write invalid interrupter. + xhci.interrupter_regs_write(0, XHCI_INTR_REG_ERSTSZ, 0); + xhci.interrupter_regs_writeq(0, XHCI_INTR_REG_ERSTBA_LO, 0); + // micro frame index. + xhci.pci_dev + .io_writel(xhci.bar_addr, u64::from(XHCI_PCI_RUNTIME_OFFSET), 0xf); + let mf_index = xhci + .pci_dev + .io_readl(xhci.bar_addr, u64::from(XHCI_PCI_RUNTIME_OFFSET)); + assert!(mf_index <= 0x3fff); + // invalid offset + xhci.pci_dev.io_writel( + xhci.bar_addr, + u64::from(XHCI_PCI_RUNTIME_OFFSET) + 0x1008, + 0xf, + ); + let over_offset = xhci + .pci_dev + .io_readl(xhci.bar_addr, u64::from(XHCI_PCI_RUNTIME_OFFSET) + 0x1008); + assert_eq!(over_offset, 0); + + // Case 6: invalid doorbell + xhci.pci_dev + .io_writel(xhci.bar_addr, u64::from(XHCI_PCI_DOORBELL_OFFSET), 0xf); + let invalid_db = xhci + .pci_dev + .io_readl(xhci.bar_addr, u64::from(XHCI_PCI_DOORBELL_OFFSET)); + assert_eq!(invalid_db, 0); + + // Case 7: invalid size + xhci.init_event_ring(0, 1, 12); + xhci.init_msix(); + xhci.run(); + xhci.no_op(); + // NOTE: no event now. + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + + xhci.reset_controller(true); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + // Case 8: invalid PLS. + xhci.port_regs_write( + port_id, + XHCI_PORTSC_OFFSET, + PORTSC_LWS | 18 << PORTSC_PLS_SHIFT, + ); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_controller_init_miss_step() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + // Case 1: miss init command ring. + xhci.read_pci_config(); + xhci.read_capability(); + xhci.init_max_device_slot_enabled(); + xhci.init_device_context_base_address_array_pointer(); + xhci.init_interrupter(); + xhci.run(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + // Host Controller Error + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(false); + // Case 2: miss init dcbaap. + xhci.read_pci_config(); + xhci.read_capability(); + xhci.init_max_device_slot_enabled(); + xhci.init_command_ring_dequeue_pointer(); + xhci.init_interrupter(); + xhci.run(); + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device + xhci.address_device(slot_id, false, port_id); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + // Host Controller Error + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(false); + // Case 3: miss init interrupter. + xhci.read_pci_config(); + xhci.read_capability(); + xhci.init_max_device_slot_enabled(); + xhci.init_command_ring_dequeue_pointer(); + xhci.run(); + // reset usb port + xhci.reset_port(port_id); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + // NOTE: no HCE now. only print error log. + + xhci.reset_controller(true); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_control_command() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .with_config("address_device_bsr", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // no op + let ptr = xhci.get_command_pointer(); + xhci.no_op(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.ptr, ptr); + + let slot_id = xhci.init_device(port_id); + + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_SET_CONFIGURATION, + value: 0, + index: 0, + length: 8, + }; + // Setup Stage. + let mut setup_trb = TestNormalTRB::generate_setup_td(&device_req); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(0, device_req.length, in_dir); + data_trb.set_idt_flag(true); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + status_trb.set_ch_flag(true); + status_trb.set_ioc_flag(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + // Event Data TRB. + let mut event_data_trb = TestNormalTRB::generate_event_data_trb(0x1234); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut event_data_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.ptr, 0x1234); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_control_command_invalid_order() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device bsr = 0 + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_ctx = xhci.get_slot_context(slot_id); + assert_eq!(slot_ctx.get_slot_state(), SLOT_ADDRESSED); + let ep0_ctx = xhci.get_endpoint_context(slot_id, CONTROL_ENDPOINT_ID); + assert_eq!(ep0_ctx.get_ep_state(), EP_RUNNING); + // get descriptor + xhci.get_usb_descriptor(slot_id); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // disable slot + xhci.disable_slot(slot_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // reset endpoint after disable slot + xhci.reset_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + + // set tr dequeue + let old_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.set_tr_dequeue(old_ptr, slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + + // reset device + xhci.reset_device(slot_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + // stop endpoint after reset device + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // address device + let slot_id = evt.get_slot_id(); + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // stop endpoint. invalid slot id in trb. + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_ep_id(2); + trb.set_trb_type(TRBType::CrStopEndpoint as u32); + xhci.queue_command(&mut trb); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::EpNotEnabledError as u32); + // set tr dequeue. + let mut trb = TestNormalTRB::default(); + trb.set_pointer(0xff00 | 1); + trb.set_slot_id(slot_id); + trb.set_ep_id(2); + trb.set_trb_type(TRBType::CrSetTrDequeue as u32); + xhci.queue_command(&mut trb); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::EpNotEnabledError as u32); + // reset endpoint + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(slot_id); + trb.set_ep_id(2); + trb.set_trb_type(TRBType::CrResetEndpoint as u32); + xhci.queue_command(&mut trb); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::EpNotEnabledError as u32); + + // stop endpoint. + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure again. + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_over_command_ring() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let org_ptr = xhci.get_command_pointer(); + // Fake ring length. + let ring_len = 32; + for _ in 0..ring_len - 1 { + let ptr = xhci.get_command_pointer(); + xhci.no_op(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.ptr, ptr); + } + xhci.queue_link_trb(0, 0, org_ptr, true); + let ptr = xhci.get_command_pointer(); + assert_eq!(org_ptr, ptr); + for _ in 0..ring_len - 1 { + let ptr = xhci.get_command_pointer(); + xhci.no_op(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.ptr, ptr); + } + let ptr = xhci.get_command_pointer(); + // link trb overlimit by setting next trb to itself. + xhci.queue_link_trb(0, 0, ptr, false); + xhci.doorbell_write(0, 0); + // Host Controller Error + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(true); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_invalid_value() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // Case 1: invalid value when address device + // Invalid port id. + xhci.address_device(slot_id, true, 0xff); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + xhci.address_device(slot_id, true, port_id + 1); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + // Invalid add flag. + let input_ctx_addr = xhci.address_device(slot_id, true, port_id); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.add_flags |= 0xf0; + xhci.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ParameterError as u32); + // Invalid slot state. + let input_ctx_addr = xhci.address_device(slot_id, false, port_id); + let mut slot_ctx = XhciSlotCtx::default(); + slot_ctx.set_slot_state(SLOT_ADDRESSED); + slot_ctx.set_context_entry(1); + slot_ctx.set_port_number(port_id); + xhci.mem_write_u32(input_ctx_addr + 0x20, slot_ctx.as_dwords()); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + // correct + xhci.address_device(slot_id, false, port_id); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Case 2: invalid value when evaluate context + // Invalid input context. + let input_ctx_addr = xhci.evaluate_context(slot_id, 0x1234, 3, 128); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.drop_flags = 0xf; + xhci.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + // Invalid slot id. + xhci.evaluate_context(slot_id + 1, 0x1234, 3, 128); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + // correct + xhci.evaluate_context(slot_id, 0x1234, 0, 64); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Case 3: invalid value when configure endpoint + // DC when endpoint is not configured. + xhci.configure_endpoint(slot_id, true); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + // Invalid input context. + let input_ctx_addr = xhci.configure_endpoint(slot_id, false); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.add_flags = 0x2; + xhci.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + // correct + xhci.configure_endpoint(slot_id, false); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // Case 4: invalid command + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(0); + xhci.queue_command(&mut trb); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_invalid_request() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get descriptor + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: u16::from(USB_DT_CONFIGURATION) << 8 | 6, + index: 10, + length: 10, + }; + xhci.queue_device_request(slot_id, &device_req); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + // Stall Error. + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + // reset endpoint + xhci.reset_endpoint(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // evaluate context + xhci.evaluate_context(slot_id, 0x1234, 0, 64); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // SET_CONFIGURATION invalid value. + // Inject invalid usb device request to let endpoint halted. + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_OUT_REQUEST, + request: USB_REQUEST_SET_CONFIGURATION, + value: 0xff, + index: 2, + length: 64, + }; + xhci.queue_device_request(slot_id, &device_req); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + // Stall Error. + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + // reset endpoint + xhci.reset_endpoint(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // set report + xhci.set_report(slot_id, 3); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_invalid_control() { + let (xhci, test_state, guest_allocator) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get descriptor, invalid control value + let device_req = UsbDeviceRequest { + request_type: USB_DEVICE_IN_REQUEST, + request: USB_REQUEST_GET_DESCRIPTOR, + value: u16::from(USB_DT_CONFIGURATION) << 8, + index: 0, + length: 64, + }; + // Case 1: no SetUp Stage. + // Data Stage. + let ptr = guest_allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case 2: no Status Stage. + // Setup Stage. + let org_ptr = xhci.get_transfer_pointer(slot_id, CONTROL_ENDPOINT_ID); + let mut setup_trb = TestNormalTRB::generate_setup_td(&device_req); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let ptr = guest_allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + data_trb.set_ch_flag(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + // NOTE: no event for current implement now. + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + // clean up. rewrite tr dequeue. + xhci.set_transfer_pointer(org_ptr, slot_id, CONTROL_ENDPOINT_ID); + + // Case 3: no IDT = 1. in SetUp TD + // Setup Stage. + let mut setup_trb = TestNormalTRB::generate_setup_td(&device_req); + setup_trb.set_idt_flag(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let ptr = guest_allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case 4: invalid length in Setup TD + // Setup Stage. + let mut setup_trb = TestNormalTRB::generate_setup_td(&device_req); + setup_trb.set_trb_transfer_length(11); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let ptr = guest_allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case 5: Data direction mismatch EP + // Setup Stage. + let mut setup_trb = TestNormalTRB::generate_setup_td(&device_req); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut setup_trb); + // Data Stage. + let ptr = guest_allocator + .borrow_mut() + .alloc(u64::from(device_req.length)); + let in_dir = + device_req.request_type & USB_DIRECTION_DEVICE_TO_HOST == USB_DIRECTION_DEVICE_TO_HOST; + let mut data_trb = TestNormalTRB::generate_data_td(ptr, device_req.length, in_dir); + data_trb.set_dir_flag(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut data_trb); + // Status Stage. + let mut status_trb = TestNormalTRB::generate_status_td(false); + xhci.queue_trb(slot_id, CONTROL_ENDPOINT_ID, &mut status_trb); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // evaluate context + xhci.evaluate_context(slot_id, 0x1234, 0, 64); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_invalid_order() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // Case 1: configure endpoint before address device + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + // address device + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Case 2: address device after configure endpoint + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // recover, configure again + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Case 3: set tr dequeue when not stop. + let old_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.set_tr_dequeue(old_ptr, slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + + // disable slot + xhci.disable_slot(slot_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Case 4: stop endpoint after disable slot + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + // Case 5: reset endpoint after disable slot + xhci.reset_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + // Case 6: disable slot again + xhci.disable_slot(slot_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::SlotNotEnabledError as u32); + // reenable + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // address device + let slot_id = evt.get_slot_id(); + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_reset_device() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // Case 1: reset after enable slot. + xhci.reset_device(slot_id); + let status = xhci.oper_regs_read(XHCI_OPER_REG_USBSTS); + assert!(status & USB_STS_HCE == USB_STS_HCE); + + xhci.reset_controller(true); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_multi_enable_slot() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + let enable_limit = 64; + for _ in 0..enable_limit - 1 { + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + assert_ne!(slot_id, 0); + } + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::NoSlotsError as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_reconfigure_endpoint() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let cnt = 3; + for _ in 0..cnt { + xhci.configure_endpoint(slot_id, true); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_device_request_repeat() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let cnt = 3; + for i in 0..cnt { + // get descriptor + xhci.get_device_descriptor(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + xhci.get_config_descriptor(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + xhci.get_string_descriptor(slot_id, i); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + } + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let cnt = 3; + for _ in 0..cnt { + // get status + xhci.get_status(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 2); + assert_eq!(buf, [0, 0]); + // set configuration + xhci.set_configuration(slot_id, 1); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get configuration + xhci.get_configuration(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 2); + assert_eq!(buf[0], 1); + // Set remote wakeup. + xhci.set_feature(slot_id, USB_DEVICE_REMOTE_WAKEUP as u16); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get status + xhci.get_status(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), 2); + assert_eq!(buf, [2, 0]); + // Clear remote wakeup. + xhci.clear_feature(slot_id, USB_DEVICE_REMOTE_WAKEUP as u16); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Set interface. + xhci.set_interface(slot_id, 0, 0); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // Get interface. + xhci.get_interface(slot_id, 0); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + // set protocol + xhci.set_protocol(slot_id, 1); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get protocol + xhci.get_protocol(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // NOTE: set idle, not fully implemented yet. + xhci.set_idle(slot_id, 0x3 << 8); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get idle + xhci.get_idle(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // set report + xhci.set_report(slot_id, 3); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // get report + xhci.get_report(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + } + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_device_init_device_miss_step() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // enable slot + xhci.enable_slot(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + // address device + xhci.address_device(slot_id, false, port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + qmp_send_key_event(test_state.borrow_mut(), KEYCODE_SPACE, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + // NOTE: not kick actually, just print error. + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + + // configure endpoint + xhci.configure_endpoint(slot_id, false); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // clean + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_KEYBOARD_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_some()); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +// Tablet +#[test] +fn test_xhci_tablet_basic() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let cnt = 10; + for i in 0..cnt { + qmp_send_pointer_event(test_state.borrow_mut(), i * 10, i * 20, i % 3, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + qmp_send_pointer_event(test_state.borrow_mut(), i * 10, i * 20, i % 3, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + + for i in 0..cnt { + let press_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + let release_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(press_evt.ccode, TRBCCode::Success as u32); + assert_eq!(release_evt.ccode, TRBCCode::Success as u32); + let press_buf = xhci.get_transfer_data_indirect(press_evt.ptr, HID_POINTER_LEN); + assert_eq!( + press_buf, + [ + i as u8 % 3, + (i * 10) as u8, + ((i * 10) >> 8) as u8, + (i * 20) as u8, + ((i * 20) >> 8) as u8, + 0, + 0 + ] + ); + let release_buf = xhci.get_transfer_data_indirect(release_evt.ptr, HID_POINTER_LEN); + assert_eq!( + release_buf, + [ + 0, + (i * 10) as u8, + ((i * 10) >> 8) as u8, + (i * 20) as u8, + ((i * 20) >> 8) as u8, + 0, + 0 + ] + ) + } + + // INPUT_BUTTON_WHEEL_LEFT + INPUT_BUTTON_WHEEL_UP. + for _ in 0..cnt { + qmp_send_pointer_event(test_state.borrow_mut(), 10, 20, 0xa0, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + qmp_send_pointer_event(test_state.borrow_mut(), 10, 20, 0xa0, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + for _ in 0..cnt { + let press_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + let release_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(press_evt.ccode, TRBCCode::Success as u32); + assert_eq!(release_evt.ccode, TRBCCode::Success as u32); + + let press_buf = xhci.get_transfer_data_indirect(press_evt.ptr, HID_POINTER_LEN); + assert_eq!(press_buf, [0, 10, 0, 20, 0, 1, 255]); + let release_buf = xhci.get_transfer_data_indirect(release_evt.ptr, HID_POINTER_LEN); + assert_eq!(release_buf, [0, 10, 0, 20, 0, 0, 0]); + } + + // INPUT_BUTTON_WHEEL_RIGHT + INPUT_BUTTON_WHEEL_DOWN. + for _ in 0..cnt { + qmp_send_pointer_event(test_state.borrow_mut(), 10, 20, 0x140, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + qmp_send_pointer_event(test_state.borrow_mut(), 10, 20, 0x140, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + } + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + for _ in 0..cnt { + let press_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + let release_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(press_evt.ccode, TRBCCode::Success as u32); + assert_eq!(release_evt.ccode, TRBCCode::Success as u32); + + let press_buf = xhci.get_transfer_data_indirect(press_evt.ptr, HID_POINTER_LEN); + assert_eq!(press_buf, [0, 10, 0, 20, 0, 255, 1]); + let release_buf = xhci.get_transfer_data_indirect(release_evt.ptr, HID_POINTER_LEN); + assert_eq!(release_buf, [0, 10, 0, 20, 0, 0, 0]); + } + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_over_hid_buffer() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + const HID_BUFFER_SIZE: u32 = 16; + let event_cnt = 20; + for i in 0..event_cnt { + qmp_send_pointer_event(test_state.borrow_mut(), i, i + 100, 0, true); + } + xhci.queue_multi_indirect_td( + slot_id, + HID_DEVICE_ENDPOINT_ID, + HID_POINTER_LEN, + event_cnt as usize, + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + for i in 0..event_cnt as u32 { + if i < HID_BUFFER_SIZE - 1 { + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, i as u8, 0, (i + 100) as u8, 0, 0, 0]); + } else { + // event lost. + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + } + } + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_over_ring_limit() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + let org_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + // Fake ring length. + let transfer_limit = 32; + let test_cnt = 3; + for i in 0..test_cnt { + for _ in 0..transfer_limit { + qmp_send_pointer_event(test_state.borrow_mut(), 50, 100, 0, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, 50, 0, 100, 0, 0, 0]); + } + if i == 0 { + // Fake link new address. + xhci.queue_link_trb( + slot_id, + HID_DEVICE_ENDPOINT_ID, + org_ptr + u64::from(TRB_SIZE) * 64, + false, + ); + } else if i == 1 { + // Goto the origin address. + xhci.queue_link_trb(slot_id, HID_DEVICE_ENDPOINT_ID, org_ptr, true); + } else { + xhci.queue_link_trb(slot_id, HID_DEVICE_ENDPOINT_ID, org_ptr, true); + let ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + assert_eq!(org_ptr, ptr); + } + } + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_invalid_value() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + qmp_send_pointer_event(test_state.borrow_mut(), 0xfffff, 0xfffff, 0xff, true); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + qmp_send_pointer_event(test_state.borrow_mut(), 0xfffff, 0xfffff, 0xff, false); + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let press_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(press_evt.ccode, TRBCCode::Success as u32); + let press_buf = xhci.get_transfer_data_indirect(press_evt.ptr, HID_POINTER_LEN); + assert_eq!(press_buf, [31, 255, 127, 255, 127, 1, 255]); + let release_evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(release_evt.ccode, TRBCCode::Success as u32); + let release_buf = xhci.get_transfer_data_indirect(release_evt.ptr, HID_POINTER_LEN); + assert_eq!(release_buf, [0, 255, 127, 255, 127, 0, 0]); + + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_device_init_control_command() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .with_config("address_device_bsr", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // no op + let ptr = xhci.get_command_pointer(); + xhci.no_op(); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + assert_eq!(evt.ptr, ptr); + + let slot_id = xhci.init_device(port_id); + + // get report + xhci.get_report(slot_id); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ShortPacket as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr - u64::from(TRB_SIZE), HID_POINTER_LEN); + assert_eq!(buf, [0, 0, 0, 0, 0, 0, 0]); + + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_invalid_value_002() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_config("auto_run", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + // reset usb port + xhci.reset_port(port_id); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.enable_slot(); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let slot_id = evt.get_slot_id(); + xhci.address_device(slot_id, false, port_id); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // Case: invalid port. + xhci.reset_port(2); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + + // Case: invalid slot id. + let mut trb = TestNormalTRB::default(); + trb.set_slot_id(128); + trb.set_trb_type(TRBType::CrResetDevice as u32); + xhci.queue_command(&mut trb); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case: only enable slot without endpoint. + let input_ctx_addr = xhci.configure_endpoint(slot_id, false); + let mut input_ctx = XhciInputCtrlCtx::default(); + input_ctx.add_flags = 1; + input_ctx.drop_flags = 0; + xhci.mem_write_u32(input_ctx_addr, input_ctx.as_dwords()); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + // configure endpoint. + xhci.configure_endpoint(slot_id, false); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + // Case: stop invalid endpoint + xhci.stop_endpoint(slot_id, 32); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case: stop when not running + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + + // Case: reset invalid endpoint + xhci.reset_endpoint(slot_id, 32); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + // Case: reset endpoint when endpoint is not halted + xhci.reset_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::ContextStateError as u32); + + // Case set tr dequeue for invalid endpoint + let old_ptr = xhci.get_transfer_pointer(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.set_tr_dequeue(old_ptr, slot_id, 32); + xhci.doorbell_write(0, 0); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + + xhci.test_keyboard_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_flush() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + let slot_id = xhci.init_device(port_id); + // Case: stop endpoint when transfer is doing. + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + xhci.stop_endpoint(slot_id, HID_DEVICE_ENDPOINT_ID); + // No data, the xhci report short packet. + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Stopped as u32); + // Stop command return success. + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_command_config() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci_config("xhci", 16, 16) + .with_usb_tablet("tbt") + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + xhci.init_max_device_slot_enabled(); + xhci.init_device_context_base_address_array_pointer(); + xhci.init_command_ring_dequeue_pointer(); + xhci.init_interrupter(); + xhci.run(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_reset() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + test_state + .borrow_mut() + .qmp("{\"execute\": \"system_reset\"}"); + test_state.borrow_mut().qmp_read(); + + let mut xhci = xhci.borrow_mut(); + xhci.init_host_controller(XHCI_PCI_SLOT_NUM, XHCI_PCI_FUN_NUM); + xhci.run(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_disable_interrupt() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // Case: disable USB_CMD_INTE + qmp_send_pointer_event(test_state.borrow_mut(), 100, 200, 0, true); + xhci.queue_direct_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + let value = xhci.oper_regs_read(XHCI_OPER_REG_USBCMD); + xhci.oper_regs_write(XHCI_OPER_REG_USBCMD, value & !USB_CMD_INTE); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + let value = xhci.oper_regs_read(XHCI_OPER_REG_USBCMD); + xhci.oper_regs_write(XHCI_OPER_REG_USBCMD, value | USB_CMD_INTE); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, 100, 0, 200, 0, 0, 0]); + + // Case: disable IMAN_IE + qmp_send_pointer_event(test_state.borrow_mut(), 100, 200, 0, true); + xhci.queue_direct_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + let value = xhci.interrupter_regs_read(PRIMARY_INTERRUPTER_ID as u64, XHCI_INTR_REG_IMAN); + xhci.interrupter_regs_write( + PRIMARY_INTERRUPTER_ID as u64, + XHCI_INTR_REG_IMAN, + value & !IMAN_IE & IMAN_IP, + ); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + assert!(xhci.fetch_event(PRIMARY_INTERRUPTER_ID).is_none()); + let value = xhci.interrupter_regs_read(PRIMARY_INTERRUPTER_ID as u64, XHCI_INTR_REG_IMAN); + xhci.interrupter_regs_write( + PRIMARY_INTERRUPTER_ID as u64, + XHCI_INTR_REG_IMAN, + value & !IMAN_IP | IMAN_IE, + ); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_direct(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, 100, 0, 200, 0, 0, 0]); + + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_invalid_request() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + // unsupported report request for tablet. + xhci.test_invalie_device_request(slot_id, USB_INTERFACE_CLASS_OUT_REQUEST, HID_SET_REPORT, 0); + // invalid descriptor type. + xhci.test_invalie_device_request( + slot_id, + USB_DEVICE_IN_REQUEST, + USB_REQUEST_GET_DESCRIPTOR, + 17 << 8, + ); + // invalid string index + xhci.get_string_descriptor(slot_id, 100); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + xhci.recovery_endpoint(slot_id, CONTROL_ENDPOINT_ID); + // invalid interface index + xhci.set_interface(slot_id, 5, 5); + xhci.doorbell_write(slot_id, CONTROL_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::StallError as u32); + xhci.recovery_endpoint(slot_id, CONTROL_ENDPOINT_ID); + // invalid request type + xhci.test_invalie_device_request( + slot_id, + USB_INTERFACE_CLASS_OUT_REQUEST + 5, + HID_SET_REPORT, + 0, + ); + // invalid in request + xhci.test_invalie_device_request(slot_id, USB_INTERFACE_CLASS_IN_REQUEST, 0xf, 0); + // invalid out request + xhci.test_invalie_device_request(slot_id, USB_INTERFACE_CLASS_OUT_REQUEST, 0xf, 0); + // invalid interface request value + xhci.test_invalie_device_request( + slot_id, + USB_INTERFACE_IN_REQUEST, + USB_REQUEST_GET_DESCRIPTOR, + 20 << 8, + ); + // invalid interface request + xhci.test_invalie_device_request(slot_id, USB_INTERFACE_IN_REQUEST, 0xf, 0x22 << 8); + + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_keyboard_tablet_basic() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_keyboard("kbd") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + xhci.device_config.insert(String::from("keyboard"), true); + xhci.device_config.insert(String::from("tablet"), false); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + let port_id = 2; + xhci.device_config.insert(String::from("keyboard"), false); + xhci.device_config.insert(String::from("tablet"), true); + let slot_id = xhci.init_device(port_id); + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +#[test] +fn test_xhci_tablet_invalid_trb() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("tbt") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + let port_id = 1; + let slot_id = xhci.init_device(port_id); + + qmp_send_pointer_event(test_state.borrow_mut(), 100, 200, 0, true); + // Invalid address in TRB. + let mut trb = TestNormalTRB::generate_normal_td(0, 6); + trb.set_pointer(0); + trb.set_idt_flag(false); + xhci.queue_trb(slot_id, HID_DEVICE_ENDPOINT_ID, &mut trb); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::TrbError as u32); + // Fetch the remaining data. + xhci.queue_indirect_td(slot_id, HID_DEVICE_ENDPOINT_ID, HID_POINTER_LEN); + xhci.doorbell_write(slot_id, HID_DEVICE_ENDPOINT_ID); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + let buf = xhci.get_transfer_data_indirect(evt.ptr, HID_POINTER_LEN); + assert_eq!(buf, [0, 100, 0, 200, 0, 0, 0]); + + xhci.test_pointer_event(slot_id, test_state.clone()); + test_state.borrow_mut().stop(); +} + +/// Test basic hotplug function. +/// TestStep: +/// 1. Start a domain without tablet and keyboard. +/// 2. Hotplug tablet and keyboard. +/// 3. Test the tablet and keyboard functions. +/// 4. Unplug tablet and keyboard. +/// Except: +/// 1/2/3/4: success. +#[test] +fn test_plug_usb_basic() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + qmp_plug_keyboard_event(test_state.borrow_mut(), 1); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 1; + xhci.device_config.insert(String::from("keyboard"), true); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + qmp_plug_tablet_event(test_state.borrow_mut(), 2); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 2; + xhci.device_config.insert(String::from("tablet"), true); + let slot_id = xhci.init_device(port_id); + xhci.test_pointer_event(slot_id, test_state.clone()); + + qmp_unplug_usb_event(test_state.borrow_mut(), 1); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + qmp_unplug_usb_event(test_state.borrow_mut(), 2); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + test_state.borrow_mut().stop(); +} + +/// Test basic hotplug functions +/// TestStep: +/// 1. Start a domain with tablet and keyboard. +/// 2. Hotplug the new tablet and keyboard, unplug the old. +/// 3. Test the tablet and keyboard functions. +/// 4. Unplug tablet and keyboard. +/// Except: +/// 1/2/3/4: success. +#[test] +fn test_tablet_keyboard_plug() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_usb_tablet("input1") + .with_usb_keyboard("input2") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let port_id = 1; + xhci.device_config.insert(String::from("keyboard"), false); + xhci.device_config.insert(String::from("tablet"), true); + let slot_id = xhci.init_device(port_id); + xhci.test_pointer_event(slot_id, test_state.clone()); + + let port_id = 2; + xhci.device_config.insert(String::from("keyboard"), true); + xhci.device_config.insert(String::from("tablet"), false); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + qmp_plug_tablet_event(test_state.borrow_mut(), 3); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 3; + xhci.device_config.insert(String::from("keyboard"), false); + xhci.device_config.insert(String::from("tablet"), true); + let slot_id_tbt = xhci.init_device(port_id); + + qmp_plug_keyboard_event(test_state.borrow_mut(), 4); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 4; + xhci.device_config.insert(String::from("keyboard"), true); + xhci.device_config.insert(String::from("tablet"), false); + let slot_id_kbd = xhci.init_device(port_id); + + qmp_unplug_usb_event(test_state.borrow_mut(), 1); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + qmp_unplug_usb_event(test_state.borrow_mut(), 2); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + xhci.test_pointer_event(slot_id_tbt, test_state.clone()); + xhci.test_keyboard_event(slot_id_kbd, test_state.clone()); + + qmp_unplug_usb_event(test_state.borrow_mut(), 3); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + qmp_unplug_usb_event(test_state.borrow_mut(), 4); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + test_state.borrow_mut().stop(); +} + +/// Test stratovirt maximum specification +/// TestStep: +/// 1. Start a domain without tablet and keyboard. +/// 2. Plug 15 tablets and keyboards. +/// 3. Continue to hotplug tablet and keyboard, expectation is failure. +/// 4. Unplug all tablets and keyboards. +/// Except: +/// 1/2/4: success. +/// 3: No available USB port. +#[test] +fn test_max_number_of_device() { + let max_num_of_port2 = 15; + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci_config("xhci", max_num_of_port2, 0) + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let mut i = 1; + while i <= max_num_of_port2 { + qmp_plug_keyboard_event(test_state.borrow_mut(), i); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + i += 1; + } + + let value = qmp_plug_keyboard_event(test_state.borrow_mut(), 16); + let status = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!(status, "No available USB port.".to_string()); + + let value = qmp_plug_tablet_event(test_state.borrow_mut(), 16); + let status = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!(status, "No available USB port.".to_string()); + + i = 1; + while i <= max_num_of_port2 { + qmp_unplug_usb_event(test_state.borrow_mut(), i); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + qmp_event_read(test_state.borrow_mut()); + i += 1; + } + test_state.borrow_mut().stop(); +} + +/// Test abnormal hotplug operation +/// TestStep: +/// 1. Start a domain without tablet and keyboard. +/// 2. Unplug tablet and keyboard, expectation is failure. +/// 3. Hotplug tablet and keyboard. +/// 4. Unplug tablet and keyboard, expectation is success. +/// 5. Repeat the unplug operation, expectation is failure. +/// Except: +/// 1/3/4: success. +/// 2/5: Failed to detach device: id input1 not found. +#[test] +fn test_unplug_usb_device() { + let (xhci, test_state, _) = TestUsbBuilder::new() + .with_xhci("xhci") + .with_config("auto_run", true) + .with_config("command_auto_doorbell", true) + .build(); + let mut xhci = xhci.borrow_mut(); + + let value = qmp_unplug_usb_event(test_state.borrow_mut(), 1); + let status = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!( + status, + "Failed to detach device: id input1 not found".to_string() + ); + + qmp_plug_keyboard_event(test_state.borrow_mut(), 1); + let evt = xhci.fetch_event(PRIMARY_INTERRUPTER_ID).unwrap(); + assert_eq!(evt.ccode, TRBCCode::Success as u32); + + let port_id = 1; + xhci.device_config.insert(String::from("keyboard"), true); + xhci.device_config.insert(String::from("tablet"), false); + let slot_id = xhci.init_device(port_id); + xhci.test_keyboard_event(slot_id, test_state.clone()); + + qmp_unplug_usb_event(test_state.borrow_mut(), 1); + qmp_event_read(test_state.borrow_mut()); + + let value = qmp_unplug_usb_event(test_state.borrow_mut(), 1); + let status = value["error"]["desc"].as_str().unwrap().to_string(); + assert_eq!( + status, + "Failed to detach device: id input1 not found".to_string() + ); + test_state.borrow_mut().stop(); +} diff --git a/tests/mod_test/tests/virtio_gpu_test.rs b/tests/mod_test/tests/virtio_gpu_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..c9baef2382ecec91effdbb5b85d67506abd2ee1a --- /dev/null +++ b/tests/mod_test/tests/virtio_gpu_test.rs @@ -0,0 +1,794 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::time::{Duration, Instant}; +use std::vec; + +use mod_test::libdriver::virtio::VirtioDeviceOps; +use mod_test::libdriver::virtio_gpu::{ + current_curosr_check, current_surface_check, get_display_info, get_edid, invalid_cmd_test, + resource_attach_backing, resource_attach_backing_with_invalid_ctx_len, resource_create, + resource_detach_backing, resource_flush, resource_unref, set_scanout, transfer_to_host, + update_cursor, GpuDevConfig, VirtioGpuCtrlHdr, VirtioGpuDisplayInfo, VirtioGpuGetEdid, + VirtioGpuMemEntry, VirtioGpuRect, VirtioGpuResourceAttachBacking, VirtioGpuResourceCreate2d, + VirtioGpuResourceDetachBacking, VirtioGpuResourceFlush, VirtioGpuResourceUnref, + VirtioGpuSetScanout, VirtioGpuTransferToHost2d, +}; +use mod_test::libdriver::virtio_gpu::{set_up, tear_down}; +use util::byte_code::ByteCode; +use virtio::{ + cal_image_hostmem, VIRTIO_GPU_CMD_GET_DISPLAY_INFO, VIRTIO_GPU_CMD_RESOURCE_CREATE_2D, + VIRTIO_GPU_FORMAT_INVALID_UNORM, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, VIRTIO_GPU_RESP_ERR_UNSPEC, VIRTIO_GPU_RESP_OK_DISPLAY_INFO, + VIRTIO_GPU_RESP_OK_EDID, VIRTIO_GPU_RESP_OK_NODATA, +}; + +const D_RES_ID: u32 = 1; +const D_SCANOUT_ID: u32 = 0; +const D_INVALID_SCANOUT_ID: u32 = 100; +const D_FMT: u32 = 2; +const D_INVALID_FMT: u32 = VIRTIO_GPU_FORMAT_INVALID_UNORM; +const D_WIDTH: u32 = 64; +const D_HEIGHT: u32 = 64; +const D_CURSOR_WIDTH: u32 = 64; +const D_CURSOR_HEIGHT: u32 = 64; +const D_BYTE_PER_PIXEL: u32 = 4; +const D_CURSOR_IMG_SIZE: u32 = D_CURSOR_WIDTH * D_CURSOR_HEIGHT * D_BYTE_PER_PIXEL; +const D_OFFSET: u64 = 0; +const D_X_COORD: u32 = 0; +const D_Y_COORD: u32 = 0; +const D_INVALID_NR_ENTRIES: u32 = 1 + 16384; + +#[test] +fn image_display_fun() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + let image_addr = gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + let image_byte_0 = vec![0_u8; 1]; + let image_byte_1 = vec![1_u8; 1]; + let image_0 = vec![0_u8; image_size as usize]; + + // image with half data 1 + let mut image_half_1 = vec![0_u8; image_size as usize]; + let mut i = 0; + while i < image_size / 2 { + image_half_1[i as usize] = 1; + i += 1; + } + // image with quarter data1 + let mut image_quarter_1 = vec![0_u8; image_size as usize]; + let mut i = 0; + while i < image_size / 4 { + image_quarter_1[i as usize] = 1; + i += 1; + } + + assert_eq!( + VIRTIO_GPU_RESP_OK_DISPLAY_INFO, + get_display_info(&gpu).header.hdr_type + ); + + assert_eq!( + VIRTIO_GPU_RESP_OK_EDID, + get_edid(&gpu, VirtioGpuGetEdid::new(D_SCANOUT_ID)) + .header + .hdr_type + ); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + gpu.borrow_mut() + .state + .borrow_mut() + .memset(image_addr, image_size, &image_byte_0); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + set_scanout( + &gpu, + VirtioGpuSetScanout::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_SCANOUT_ID, + D_RES_ID, + ) + ) + .hdr_type + ); + assert!(current_surface_check(&dpy, &image_0)); + + // update image, half of image change to 1 + gpu.borrow_mut() + .state + .borrow_mut() + .memset(image_addr, image_size / 2, &image_byte_1); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + // But we only flush quarter of the image. So check the image is quarter 1 or not. + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_flush( + &gpu, + VirtioGpuResourceFlush::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT / 4), + D_RES_ID + ) + ) + .hdr_type + ); + assert!(current_surface_check(&dpy, &image_quarter_1)); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_detach_backing(&gpu, VirtioGpuResourceDetachBacking::new(D_RES_ID),).hdr_type + ); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_unref(&gpu, VirtioGpuResourceUnref::new(D_RES_ID)).hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn cursor_display_fun() { + let image_0: Vec = vec![0_u8; D_CURSOR_IMG_SIZE as usize]; + let image_1: Vec = vec![1_u8; D_CURSOR_IMG_SIZE as usize]; + let image_byte_1 = vec![1_u8; 1]; + + let image_size = cal_image_hostmem(D_FMT, D_CURSOR_WIDTH, D_CURSOR_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + + let image_addr = gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_CURSOR_WIDTH, D_CURSOR_HEIGHT) + ) + .hdr_type + ); + + // init data is all 0 + update_cursor(&gpu, D_RES_ID, D_SCANOUT_ID); + assert!(current_curosr_check(&dpy, &image_0)); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + // update image to 1 + gpu.borrow_mut() + .state + .borrow_mut() + .memset(image_addr, image_size, &image_byte_1); + + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_CURSOR_WIDTH, D_CURSOR_HEIGHT), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + // now resource data is all 1 + update_cursor(&gpu, D_RES_ID, D_SCANOUT_ID); + assert!(current_curosr_check(&dpy, &image_1)); + + tear_down(dpy, gpu); +} + +#[test] +fn resource_create_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + + // exceed max_hostmem + assert_eq!( + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH + 1, D_HEIGHT) + ) + .hdr_type + ); + + // invalid format + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_INVALID_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // invalid resource id 0 + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(0, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // resource id exist + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT / 2) + ) + .hdr_type + ); + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT / 2) + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn resource_destroy_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + let (dpy, gpu) = set_up(&gpu_cfg); + + // release resource which doesn't exist + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_unref(&gpu, VirtioGpuResourceUnref::new(D_RES_ID)).hdr_type + ); + + // release and check + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + // release it + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_unref(&gpu, VirtioGpuResourceUnref::new(D_RES_ID)).hdr_type + ); + // check if it release, expect can create again + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn resource_attach_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + let image_addr = gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // resource is invalid yet + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // invalid nr_entries + assert_eq!( + VIRTIO_GPU_RESP_ERR_UNSPEC, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, D_INVALID_NR_ENTRIES), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + // invalid context length + assert_eq!( + VIRTIO_GPU_RESP_ERR_UNSPEC, + resource_attach_backing_with_invalid_ctx_len( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1) + ) + .hdr_type + ); + + // invalid context address + assert_eq!( + VIRTIO_GPU_RESP_ERR_UNSPEC, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(0, image_size as u32)] + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn resource_detach_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid resource id + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_detach_backing(&gpu, VirtioGpuResourceDetachBacking::new(D_RES_ID),).hdr_type + ); + + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // invalid resource id + assert_eq!( + VIRTIO_GPU_RESP_ERR_UNSPEC, + resource_detach_backing(&gpu, VirtioGpuResourceDetachBacking::new(D_RES_ID),).hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn resource_transfer_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + let image_addr = gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid resource id + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // have not attach any data source + assert_eq!( + VIRTIO_GPU_RESP_ERR_UNSPEC, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + // attach first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + // invalid rect region + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + transfer_to_host( + &gpu, + VirtioGpuTransferToHost2d::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH + 1, D_HEIGHT - 1), + D_OFFSET, + D_RES_ID, + ), + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn scanout_set_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + let image_addr = gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid scanout id + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, + set_scanout( + &gpu, + VirtioGpuSetScanout::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_INVALID_SCANOUT_ID, + D_RES_ID, + ) + ) + .hdr_type + ); + + // invalid resource id + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + set_scanout( + &gpu, + VirtioGpuSetScanout::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_SCANOUT_ID, + D_RES_ID, + ) + ) + .hdr_type + ); + + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // attach backing + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_attach_backing( + &gpu, + VirtioGpuResourceAttachBacking::new(D_RES_ID, 1), + vec![VirtioGpuMemEntry::new(image_addr, image_size as u32)] + ) + .hdr_type + ); + + // invalid rect region + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + set_scanout( + &gpu, + VirtioGpuSetScanout::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH + 1, D_HEIGHT), + D_SCANOUT_ID, + D_RES_ID, + ) + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn scanout_flush_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid resource id + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + resource_flush( + &gpu, + VirtioGpuResourceFlush::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH, D_HEIGHT), + D_RES_ID + ) + ) + .hdr_type + ); + + // create resource first + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT) + ) + .hdr_type + ); + + // invalid rect region + assert_eq!( + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + resource_flush( + &gpu, + VirtioGpuResourceFlush::new( + VirtioGpuRect::new(D_X_COORD, D_Y_COORD, D_WIDTH + 1, D_HEIGHT), + D_RES_ID + ) + ) + .hdr_type + ); + + tear_down(dpy, gpu); +} + +#[test] +fn cursor_update_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_CURSOR_WIDTH, D_CURSOR_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + let image_empty: Vec = vec![]; + let image_0: Vec = vec![0_u8; D_CURSOR_IMG_SIZE as usize]; + + // invalid scanout id + assert!(current_curosr_check(&dpy, &image_empty)); + update_cursor(&gpu, D_RES_ID, D_INVALID_SCANOUT_ID); + assert!(current_curosr_check(&dpy, &image_empty)); + + // invalid resource id + update_cursor(&gpu, D_RES_ID, D_SCANOUT_ID); + assert!(current_curosr_check(&dpy, &image_0)); + + // create resource which have invalid width + assert_eq!( + VIRTIO_GPU_RESP_OK_NODATA, + resource_create( + &gpu, + VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_CURSOR_WIDTH / 2, D_CURSOR_HEIGHT) + ) + .hdr_type + ); + // invalid rect region even resource is exist + update_cursor(&gpu, D_RES_ID, D_SCANOUT_ID); + assert!(current_curosr_check(&dpy, &image_0)); + + tear_down(dpy, gpu); +} + +#[test] +fn invalid_cmd_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid cmd + assert_eq!(VIRTIO_GPU_RESP_ERR_UNSPEC, invalid_cmd_test(&gpu).hdr_type); + + tear_down(dpy, gpu); +} + +#[test] +fn crash_dfx() { + let image_size = cal_image_hostmem(D_FMT, D_WIDTH, D_HEIGHT); + let image_size = image_size.0.unwrap() as u64; + + let mut gpu_cfg = GpuDevConfig::default(); + gpu_cfg.max_hostmem = image_size; + + let (dpy, gpu) = set_up(&gpu_cfg); + gpu.borrow_mut().allocator.borrow_mut().alloc(image_size); + + // invalid request header length + let mut hdr = VirtioGpuCtrlHdr::default(); + hdr.hdr_type = VIRTIO_GPU_CMD_GET_DISPLAY_INFO; + let mut resp = VirtioGpuDisplayInfo::default(); + resp.header.hdr_type = 0x1234; // will not change because req has been ignored + + let temp = hdr.as_bytes(); + let slice = &temp[4..]; + gpu.borrow_mut() + .submit_request(true, slice, None, None, Some(&mut resp), false); + + // expect has no resp from backend. + let time_out = Instant::now() + Duration::from_secs(2); + loop { + gpu.borrow_mut().state.borrow().clock_step(); + assert!(!gpu + .borrow() + .device + .borrow() + .queue_was_notified(gpu.borrow().ctrl_q.clone())); + if Instant::now() > time_out { + assert_eq!(0x1234, resp.header.hdr_type); + break; + } + } + + // invalid hdr_ctx + let mut hdr = VirtioGpuCtrlHdr::default(); + hdr.hdr_type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D; + let hdr_ctx = VirtioGpuResourceCreate2d::new(D_RES_ID, D_FMT, D_WIDTH, D_HEIGHT); + let mut resp = VirtioGpuCtrlHdr::default(); + resp.hdr_type = 0x1234; // will not change because req has been ignored + + let temp = hdr_ctx.as_bytes(); + let slice = &temp[4..]; + gpu.borrow_mut().submit_request( + true, + hdr.as_bytes(), + Some(slice), + None, + Some(&mut resp), + false, + ); + + // expect has no resp from backend. + let time_out = Instant::now() + Duration::from_secs(2); + loop { + gpu.borrow_mut().state.borrow().clock_step(); + assert!(!gpu + .borrow() + .device + .borrow() + .queue_was_notified(gpu.borrow().ctrl_q.clone())); + if Instant::now() > time_out { + assert_eq!(0x1234, resp.hdr_type); + break; + } + } + + tear_down(dpy, gpu); +} diff --git a/tests/mod_test/tests/virtio_test.rs b/tests/mod_test/tests/virtio_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..c306f3592a2d417d0a8c702f356f09b19b973838 --- /dev/null +++ b/tests/mod_test/tests/virtio_test.rs @@ -0,0 +1,2275 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cell::RefCell; +use std::mem::size_of; +use std::rc::Rc; + +use rand::Rng; +use serde_json::json; + +use mod_test::libdriver::malloc::GuestAllocator; +use mod_test::libdriver::virtio::{ + get_vring_size, TestVirtQueue, TestVringIndirectDesc, VirtioDeviceOps, VringDesc, + VIRTIO_CONFIG_S_NEEDS_RESET, VIRTIO_F_VERSION_1, VIRTIO_PCI_VRING_ALIGN, + VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, VRING_AVAIL_F_NO_INTERRUPT, + VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT, VRING_DESC_F_WRITE, VRING_DESC_SIZE, +}; +use mod_test::libdriver::virtio_block::{ + add_blk_request, set_up, tear_down, virtio_blk_read, virtio_blk_request, virtio_blk_write, + TestVirtBlkReq, DEFAULT_IO_REQS, REQ_ADDR_LEN, REQ_DATA_LEN, REQ_STATUS_OFFSET, TIMEOUT_US, + VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, +}; +use mod_test::libdriver::virtio_pci_modern::{TestVirtioPciDev, VirtioPciCommonCfg}; +use mod_test::libtest::TestState; +use mod_test::utils::{ImageType, TEST_IMAGE_SIZE}; +use util::offset_of; + +fn add_request( + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + req_type: u32, + sector: u64, +) -> (u32, u64) { + add_blk_request(test_state, alloc, vq, req_type, sector, false) +} + +fn virtio_read( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + sector: u64, +) { + virtio_blk_read(blk, test_state, alloc, vq, sector, false); +} + +fn virtio_write( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + vq: Rc>, + sector: u64, +) { + virtio_blk_write(blk, test_state, alloc, vq, sector, false); +} + +fn virtio_request( + test_state: Rc>, + alloc: Rc>, + req: TestVirtBlkReq, +) -> u64 { + virtio_blk_request(test_state, alloc, req, false) +} + +fn send_one_request( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + vq: Rc>, +) { + let (free_head, req_addr) = + add_request(test_state.clone(), alloc, vq.clone(), VIRTIO_BLK_T_OUT, 0); + blk.borrow().virtqueue_notify(vq.clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vq, + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, VIRTIO_BLK_S_OK); +} + +fn check_stratovirt_status(test_state: Rc>) { + let ret = test_state + .borrow() + .qmp("{\"execute\": \"qmp_capabilities\"}"); + assert_eq!(*ret.get("return").unwrap(), json!({})); +} + +fn init_device_step( + blk: Rc>, + test_state: Rc>, + alloc: Rc>, + tests: Vec<[usize; 8]>, +) { + for elem in tests { + let mut vqs: Vec>> = Vec::new(); + for j in elem.iter() { + match j { + 1 => blk.borrow_mut().reset(), + 2 => blk.borrow().set_acknowledge(), + 3 => blk.borrow().set_driver(), + 4 => blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1), + 5 => blk.borrow_mut().set_features_ok(), + 7 => { + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + vqs = blk + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + } + 8 => { + blk.borrow().set_driver_ok(); + } + 9 => blk.borrow().set_status(128), + _ => continue, + } + } + + // Try to send write and read request to StratoVirt, ignore + // the interrupt from device. + if !vqs.is_empty() { + let (_, _) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + let (_, _) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_IN, + 0, + ); + blk.borrow().virtqueue_notify(vqs[0].clone()); + } + + check_stratovirt_status(test_state.clone()); + } +} + +fn check_req_result( + blk: Rc>, + test_state: Rc>, + vq: Rc>, + addr: u64, + timeout_us: u64, +) { + let status = blk.borrow().req_result(test_state, addr, timeout_us); + assert!(!blk.borrow().queue_was_notified(vq)); + assert_eq!(status, VIRTIO_BLK_S_OK); +} + +fn check_queue(blk: Rc>, desc: u64, avail: u64, used: u64) { + let bar = blk.borrow().bar; + let common_base = u64::from(blk.borrow().common_base); + let reqs = [ + (offset_of!(VirtioPciCommonCfg, queue_desc_lo), desc), + (offset_of!(VirtioPciCommonCfg, queue_desc_hi), desc >> 32), + (offset_of!(VirtioPciCommonCfg, queue_avail_lo), avail), + (offset_of!(VirtioPciCommonCfg, queue_avail_hi), avail >> 32), + (offset_of!(VirtioPciCommonCfg, queue_used_lo), used), + (offset_of!(VirtioPciCommonCfg, queue_used_hi), used >> 32), + ]; + for (offset, value) in reqs { + let addr = blk + .borrow() + .pci_dev + .io_readl(bar, common_base + offset as u64); + assert_eq!(addr, value as u32); + } +} + +fn do_event_idx_with_flag(flag: u16) { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_RING_F_EVENT_IDX, + 1, + ); + + let (free_head, mut req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + vqs[0].borrow().set_used_event(test_state.clone(), 1); + blk.borrow().virtqueue_notify(vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, VIRTIO_BLK_S_OK); + + // DEFAULT_IO_REQS write requests: + // Write "TEST" to sector 0 to DEFAULT_IO_REQS. + for i in 1..DEFAULT_IO_REQS { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set avail->used_event to DEFAULT_IO_REQS which the right value is DEFAULT_IO_REQS - 1, + // it will not get the interrupt which means event index feature works. + vqs[0] + .borrow() + .set_used_event(test_state.clone(), DEFAULT_IO_REQS as u16); + blk.borrow().virtqueue_notify(vqs[0].clone()); + check_req_result( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + req_addr + REQ_STATUS_OFFSET, + TIMEOUT_US, + ); + + // Create two write requests, the avail->used_event will be the update to the right value. + // It will get the interrupt from device. + let mut free_head = 0_u32; + for i in DEFAULT_IO_REQS..DEFAULT_IO_REQS * 2 { + (free_head, _) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set flag to avail->flag. + test_state.borrow().writew(vqs[0].borrow().avail, flag); + blk.borrow().virtqueue_notify(vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + // Read the content in sector DEFAULT_IO_REQS * 2 - 1. + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS * 2 - 1, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Feature Test. +/// Driver don't enable feature, and failed to do the I/O request. +/// TestStep: +/// 1. Init device: no virtio feature negotiation. +/// 2. Do the I/O request. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/3/4: success. +/// 2: device can't handle the io request. +#[test] +fn virtio_feature_none() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk + .borrow_mut() + .init_device(test_state.clone(), alloc.clone(), 0, 1); + + let mut req_addr = 0_u64; + for i in 0..DEFAULT_IO_REQS { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, 0xff); + + check_stratovirt_status(test_state.clone()); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Feature Test. +/// Driver just enable VIRTIO_F_VERSION_1 feature, and succeed to do the I/O request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request: +/// 1) avail->flags with VRING_AVAIL_F_NO_INTERRUPT; +/// 2) avail->flags with not VRING_AVAIL_F_NO_INTERRUPT; +/// 3. For different avail->flags: +/// 1) check the request status, it has been handled. +/// 2) it will get the interrupt from device. +/// 4. Destroy device. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn virtio_feature_vertion_1() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + // 1) avail->flags with VRING_AVAIL_F_NO_INTERRUPT(1). + vqs[0] + .borrow() + .set_avail_flags(test_state.clone(), VRING_AVAIL_F_NO_INTERRUPT); + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + for i in 0..DEFAULT_IO_REQS { + (free_head, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + assert!(!blk.borrow().queue_was_notified(vqs[0].clone())); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + // need be changed. + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + + assert!(!blk.borrow().queue_was_notified(vqs[0].clone())); + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, VIRTIO_BLK_S_OK); + + // 2) avail->flags with no VRING_AVAIL_F_NO_INTERRUPT. + vqs[0].borrow().set_avail_flags(test_state.clone(), 0); + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Driver just enable VIRTIO_F_VERSION_1|VIRTIO_RING_F_INDIRECT_DESC feature, +/// and succeed to do the I/O request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request(indirect and indirect + normal). +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_feature_indirect() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_RING_F_INDIRECT_DESC, + 1, + ); + + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + for i in 0..DEFAULT_IO_REQS { + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, i, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, 8, false); + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), 2); + indirect_req.add_desc(test_state.clone(), req_addr + 8, 520, false); + indirect_req.add_desc(test_state.clone(), req_addr + REQ_STATUS_OFFSET, 1, true); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, VIRTIO_BLK_S_OK); + + let blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_IN, 1, 0, REQ_DATA_LEN as usize); + let req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, 8, false); + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), 2); + indirect_req.add_desc(test_state.clone(), req_addr + 8, 8, false); + indirect_req.add_desc( + test_state.clone(), + req_addr + u64::from(REQ_ADDR_LEN), + 513, + true, + ); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + blk.borrow() + .kick_virtqueue(test_state.clone(), vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + let status = test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(status, VIRTIO_BLK_S_OK); + + assert_eq!( + String::from_utf8( + test_state + .borrow() + .memread(req_addr + u64::from(REQ_ADDR_LEN), 4) + ) + .unwrap(), + "TEST" + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Driver just enable VIRTIO_F_VERSION_1|VIRTIO_RING_F_EVENT_IDX feature, +/// and succeed to do the I/O request. +/// TestStep: +/// 1. Init device with VIRTIO_F_VERSION_1|VIRTIO_RING_F_EVENT_IDX feature. +/// 2. Do the I/O request: +/// 1) create 5 request, and modify avail->used_event to 5. +/// 2) If the event idx works, we will not get the interrupt from device. +/// 3) create 5 request, and use the right avail->used_event. +/// 4) we will get the interrupt from device. +/// 5) read the sector 10 to check the write content, which is same as write. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_feature_event_idx() { + do_event_idx_with_flag(0); +} + +/// Driver just enable these features: +/// VIRTIO_F_VERSION_1 | VIRTIO_RING_F_INDIRECT_DESC | VIRTIO_RING_F_EVENT_IDX +/// and succeed to do the I/O request(normal + indirect) which has opened the event idx. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request(indirect and indirect + normal). +/// 1) create 5 request(with indirect), and modify avail->used_event to 5. +/// 2) If the event idx works, we will not get the interrupt from device. +/// 3) create 5 request, and use the right avail->used_event. +/// 4) we will get the interrupt from device. +/// 5) read the sector 10 to check the write content, which is same as write. +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_feature_indirect_and_event_idx() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_RING_F_INDIRECT_DESC | 1 << VIRTIO_RING_F_EVENT_IDX, + 1, + ); + + send_one_request( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + ); + + // Test write. + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + let req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + let free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, REQ_ADDR_LEN, false); + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + // 2 desc elems in indirect desc table. + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), 2); + indirect_req.add_desc( + test_state.clone(), + req_addr + u64::from(REQ_ADDR_LEN), + REQ_DATA_LEN, + false, + ); + indirect_req.add_desc(test_state.clone(), req_addr + REQ_STATUS_OFFSET, 1, true); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + + let mut req_addr = 0_u64; + for i in 2..DEFAULT_IO_REQS { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set avail->used_event to DEFAULT_IO_REQS which the right value is DEFAULT_IO_REQS - 1, + // it will not get the interrupt which means event index feature works. + vqs[0] + .borrow() + .set_used_event(test_state.clone(), DEFAULT_IO_REQS as u16); + blk.borrow().virtqueue_notify(vqs[0].clone()); + check_req_result( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + req_addr + REQ_STATUS_OFFSET, + TIMEOUT_US, + ); + assert_eq!( + vqs[0].borrow().get_avail_event(test_state.clone()), + DEFAULT_IO_REQS as u16 + ); + + // Create two write requests, the avail->used_event will be the update to the right value. + // It will get the interrupt from device. + let mut free_head = 0_u32; + for i in DEFAULT_IO_REQS..DEFAULT_IO_REQS * 2 { + (free_head, _) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + blk.borrow().virtqueue_notify(vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + // Read the content in sector DEFAULT_IO_REQS * 2 - 1. + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS * 2 - 1, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting abnormal status in device initialization. +/// TestStep: +/// 1. Init device. +/// 1) set device status: special status and random status. +/// 2) ACKNOWLEDGE -> DRIVER -> DRIVER -> negotiate_features -> FEATURES_OK -> setup_virtqueue +/// -> DRIVER_OK. +/// 2. Do the I/O request. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failure. +/// 3/4: success. +#[test] +fn virtio_init_device_abnormal_status() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + // Test some special status. + let status = [31, 0, 2, 16, 31, 0, 1, 16, 31, 0, 7, 16, 31, 64, 128]; + for i in 0..status.len() { + blk.borrow().set_status(status[i]); + if i % 4 == 0 { + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + } + if i % 7 == 0 { + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + blk.borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + } + } + + // Test 16 times of random status in [0, 0xff). + let mut rng = rand::thread_rng(); + for i in 0..16 { + blk.borrow().set_status(rng.gen_range(0..0xff)); + if i % 4 == 0 { + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + } + if i % 7 == 0 { + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + blk.borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + } + } + + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + blk.borrow_mut().set_features_ok(); + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = blk + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + blk.borrow_mut().set_driver_ok(); + + // 2. Do the I/O request. + for i in 0..DEFAULT_IO_REQS { + add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + + // 3. Send qmp to StratoVirt. + check_stratovirt_status(test_state.clone()); + + // 4. Destroy device. + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting abnormal feature in device initialization. +/// TestStep: +/// 1. Init device. +/// negotiate unsupported features: +/// 1) 1 << 63; +/// 2) 1 << 63 | 1 << VIRTIO_F_VERSION_1; +/// 2. Do the I/O request. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failure. +/// 3/4: success. +#[test] +fn virtio_init_device_abnormal_features() { + for i in 0..2 { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + // 1. Init device. + blk.borrow_mut().reset(); + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + // Set unsupported feature 1 << 63 or (1 << 63 | 1 << VIRTIO_F_VERSION_1). + let mut features = 1 << 63; + if i == 0 { + features |= 1 << VIRTIO_F_VERSION_1; + } + blk.borrow_mut().negotiate_features(features); + blk.borrow_mut().set_features_ok(); + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + let vqs = blk + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + blk.borrow_mut().set_driver_ok(); + + // 2. Do the I/O request. + if i == 0 { + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + } else { + for i in 0..DEFAULT_IO_REQS { + add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + } + + // 3. Send qmp to StratoVirt. + check_stratovirt_status(test_state.clone()); + + // 4. Destroy device. + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting abnormal vring info in device initialization. +/// TestStep: +/// 1. Init device with abnormal steps: +/// 1) use invalid value to select queue(not enable multi-queue): +/// 2, u16::MAX +/// 2) set invalid queue size: +/// 0, 255, 1<<15, u16::MAX +/// 3) set address overlap in desc/avail/used; +/// 4) set not aligned desc/avail/used address; +/// 5) set invalid desc/avail/used address: +/// 0, 1 << 48, u64::MAX +/// 6) set 0 to enable vq; +/// 7) check if the written queue info is right. +/// 2. Do the I/O request. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failure. +/// 3/4: success. +#[test] +fn virtio_init_device_abnormal_vring_info() { + // (err_type, value, ack, device_status) + let reqs = [ + (0, u64::from(u16::MAX), 0, 0), + (0, 2, 0, 0), + (1, 0_u64, 0xff, 0), + (1, 255, 0xff, 0), + (1, 1 << 15, 0xff, 0), + (1, u64::from(u16::MAX), 0xff, 0), + (2, 0, 0xff, 0), + (3, 0, 0xff, 0), + (4, 0, 0xff, 0), + (5, 0, 0xff, 0), + (6, 0, 0xff, 0), + (6, 1 << 48, 0xff, 0), + (6, u64::MAX, 0xff, 0), + (7, 0, 0xff, 0), + (7, 1 << 48, 0xff, 0), + (7, u64::MAX, 0xff, 0), + (8, 0, 0xff, 0), + (8, 1 << 48, 0xff, 0), + (8, u64::MAX, 0xff, 0), + (9, 0, 0xff, 0), + (10, 1, 0, 0), + ]; + + for (err_type, value, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + // 1. Init device. + blk.borrow_mut().reset(); + assert_eq!(blk.borrow().get_generation(), 0); + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_driver(); + blk.borrow_mut().negotiate_features(1 << VIRTIO_F_VERSION_1); + blk.borrow_mut().set_features_ok(); + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + + let mut vqs = Vec::new(); + let vq = Rc::new(RefCell::new(TestVirtQueue::new())); + let features = blk.borrow().get_guest_features(); + // Set invalid value to select queue. + if err_type == 0 { + let q_select = blk.borrow().get_queue_select(); + assert_ne!(q_select, value as u16); + blk.borrow().queue_select(value as u16); + } + + let queue_size = u32::from(blk.borrow().get_queue_size()); + + // Set invalid queue size. + if err_type == 1 { + blk.borrow().set_queue_size(value as u16); + assert_eq!(blk.borrow().get_queue_size(), value as u16); + } + + vq.borrow_mut().index = 0; + vq.borrow_mut().size = queue_size; + vq.borrow_mut().free_head = 0; + vq.borrow_mut().num_free = queue_size; + vq.borrow_mut().align = VIRTIO_PCI_VRING_ALIGN; + vq.borrow_mut().indirect = (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) != 0; + vq.borrow_mut().event = (features & (1 << VIRTIO_RING_F_EVENT_IDX)) != 0; + + let addr = alloc.borrow_mut().alloc(u64::from(get_vring_size( + queue_size, + VIRTIO_PCI_VRING_ALIGN, + ))); + + vq.borrow_mut().desc = addr; + let avail = addr + u64::from(queue_size * size_of::() as u32) + 16; + vq.borrow_mut().avail = avail; + let used = (avail + + u64::from(size_of::() as u32 * (3 + queue_size)) + + u64::from(VIRTIO_PCI_VRING_ALIGN) + - 1) + & (!(u64::from(VIRTIO_PCI_VRING_ALIGN) - 1) + 16); + vq.borrow_mut().used = used + 16; + + match err_type { + 2 => { + // Test desc and avail address overlap. + vq.borrow_mut().desc = addr + 16 + 1; + } + 3 => { + // TEST desc not aligned + vq.borrow_mut().desc = addr + 1; + } + 4 => { + // TEST avail not aligned. + vq.borrow_mut().avail = avail + 1; + } + 5 => { + // TEST used not aligned. + vq.borrow_mut().used = used + 1; + } + 6 => { + // TEST invalid desc address. + if value != u64::MAX { + vq.borrow_mut().desc = value; + } + } + 7 => { + // TEST invalie avail address. + if value != u64::MAX { + vq.borrow_mut().avail = value; + } + } + 8 => { + // TEST invalie used address. + if value != u64::MAX { + vq.borrow_mut().used = value; + } + } + _ => (), + } + + let mut desc = vq.borrow().desc; + let mut avail = vq.borrow().avail; + let mut used = vq.borrow().used; + if queue_size > 0 { + vq.borrow().vring_init(test_state.clone()); + } + // TEST invalid desc address. + if err_type == 6 && value == u64::MAX { + desc = value; + } + // TEST invalid avail address. + if err_type == 7 && value == u64::MAX { + avail = value; + } + // TEST invalid used address. + if err_type == 8 && value == u64::MAX { + used = value; + } + blk.borrow().activate_queue(desc, avail, used); + // TEST if the written queue info is right. + if err_type == 10 { + check_queue(blk.clone(), desc, avail, used); + } + + let notify_off = blk.borrow().pci_dev.io_readw( + blk.borrow().bar, + u64::from(blk.borrow().common_base) + + offset_of!(VirtioPciCommonCfg, queue_notify_off) as u64, + ); + vq.borrow_mut().queue_notify_off = u64::from(blk.borrow().notify_base) + + u64::from(notify_off) * u64::from(blk.borrow().notify_off_multiplier); + + let offset = offset_of!(VirtioPciCommonCfg, queue_enable) as u64; + // TEST enable vq with 0 + if err_type == 9 { + blk.borrow().pci_dev.io_writew( + blk.borrow().bar, + u64::from(blk.borrow().common_base) + offset, + 0, + ); + } else { + blk.borrow().pci_dev.io_writew( + blk.borrow().bar, + u64::from(blk.borrow().common_base) + + offset_of!(VirtioPciCommonCfg, queue_enable) as u64, + 1, + ); + if err_type == 10 { + let status = blk.borrow().pci_dev.io_readw( + blk.borrow().bar, + u64::from(blk.borrow().common_base) + offset, + ); + assert_eq!(status, 1); + } + } + + blk.borrow() + .setup_virtqueue_intr(1, alloc.clone(), vq.clone()); + vqs.push(vq); + + blk.clone().borrow_mut().set_driver_ok(); + + // 2. Do the I/O request. + let mut req_addr: u64 = 0; + if queue_size > 0 { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + // 3. Send qmp to StratoVirt. + check_stratovirt_status(test_state.clone()); + + // ecover the addr for free. + vqs[0].borrow_mut().desc = addr; + + // 4. Destroy device. + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Init device out of order test 1. +/// TestStep: +/// 1. Abnormal init device. +/// 1.1->1.3->1.2->1.4->1.5->1.6->1.7->1.8 +/// 1.1->1.2->1.4->1.3->1.5->1.6->1.7->1.8 +/// 1.1->1.2->1.3->1.5->1.4->1.6->1.7->1.8 +/// 1.1->1.2->1.3->1.4->1.6->1.5->1.7->1.8 +/// 1.1->1.2->1.3->1.4->1.7->1.6->1.5->1.8 +/// 2. Normal init device. +/// 3. Write and read. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failed, stratovirt process status is normal. +/// 3/4: success. +#[test] +fn virtio_init_device_out_of_order_1() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let tests = vec![ + [1, 3, 2, 4, 5, 6, 7, 8], + [1, 2, 4, 3, 5, 6, 7, 8], + [1, 2, 3, 5, 4, 6, 7, 8], + [1, 2, 3, 4, 6, 5, 7, 8], + [1, 2, 3, 4, 7, 6, 5, 8], + ]; + + init_device_step(blk.clone(), test_state.clone(), alloc.clone(), tests); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Init device out of order test 2. +/// TestStep: +/// 1. Abnormal init device. +/// 1.1->1.2->1.3->1.4->1.8->1.6->1.7->1.5 +/// 1.1->1.3->1.4->1.5->1.6->1.7->1.8 +/// 1.1->1.2->1.4->1.5->1.6->1.7->1.8 +/// 1.1->1.2->1.3->1.4->1.6->1.7->1.8 +/// 1.1->1.2->1.3->1.4->1.5->1.6->1.8 +/// 2. Normal init device. +/// 3. Write and read. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failed, stratovirt process status is normal. +/// 3/4: success. +#[test] +fn virtio_init_device_out_of_order_2() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let tests = vec![ + [1, 2, 3, 4, 8, 6, 7, 5], + [1, 3, 4, 5, 6, 7, 8, 0], + [1, 2, 4, 5, 6, 7, 8, 0], + [1, 2, 3, 4, 6, 7, 8, 0], + [1, 2, 3, 4, 5, 6, 8, 0], + ]; + + init_device_step(blk.clone(), test_state.clone(), alloc.clone(), tests); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Init device out of order test 3. +/// TestStep: +/// 1. Abnormal init device. +/// 1.1->1.2->1.3->1.4->1.5->1.6->1.7 +/// 1.1->1.2->1.3->1.4->1.9 +/// 1.1->1.2->1.3->1.5->1.8 +/// 1.1->1.2->1.3->1.4->1.9(FAILED)->normal init process +/// 1.1->1.2->1.3->1.4->1.9(FAILED)->1.2->1.3->1.4->1.5->1.6->1.7->1.8 +/// 2. Normal init device. +/// 3. Write and read. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failed, stratovirt process status is normal. +/// 3/4: success. +#[test] +fn virtio_init_device_out_of_order_3() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let tests = vec![ + [1, 2, 3, 4, 5, 6, 7, 0], + [1, 2, 3, 4, 9, 0, 0, 0], + [1, 2, 3, 5, 8, 0, 0, 0], + [1, 2, 3, 4, 9, 0, 0, 0], + [1, 2, 3, 4, 5, 6, 7, 8], + [1, 2, 3, 4, 9, 0, 0, 0], + [2, 3, 4, 5, 6, 7, 8, 0], + ]; + + init_device_step(blk.clone(), test_state.clone(), alloc.clone(), tests); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Repeat the initialization operation. +/// TestStep: +/// 1. Init device. +/// reset -> reset -> ACKNOWLEDGE -> ACKNOWLEDGE -> DRIVER -> DRIVER -> +/// negotiate_features -> FEATURES_OK -> FEATURES_OK -> setup_virtqueue -> +/// DRIVER_OK. +/// 2. Do the I/O request. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/2: success or failed, stratovirt process status is normal. +/// 3/4: success. +#[test] +fn virtio_init_device_repeat() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + // Reset virtio device twice. + blk.borrow_mut().reset(); + blk.borrow_mut().reset(); + // Set ACKNOWLEDGE twice. + blk.borrow_mut().set_acknowledge(); + blk.borrow_mut().set_acknowledge(); + // Set DRIVER twice. + blk.borrow_mut().set_driver(); + blk.borrow_mut().set_driver(); + + let features = blk.borrow().get_device_features() + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX; + blk.borrow_mut().negotiate_features(features); + // Set FEATURES_OK twice. + blk.borrow_mut().set_features_ok(); + blk.borrow_mut().set_features_ok(); + + let capability = blk.borrow().config_readq(0); + assert_eq!(capability, TEST_IMAGE_SIZE / 512); + + blk.borrow_mut().pci_dev.enable_msix(None); + blk.borrow_mut() + .setup_msix_configuration_vector(alloc.clone(), 0); + + let vqs = blk + .borrow_mut() + .init_virtqueue(test_state.clone(), alloc.clone(), 1); + blk.borrow_mut().set_driver_ok(); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting abnormal desc addr in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with abnormal desc[i]->addr: +/// 0, address unaligned, 0x5000, u64::MAX +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_addr() { + // (addr, ack, device_status) + let reqs = [ + (0, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (1, 0x2, 0), + (0x5000, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (u64::MAX, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + ]; + for (mut addr, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + if addr == 1 { + addr += req_addr; + } + test_state.borrow().writeq(vqs[0].borrow().desc, addr); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting abnormal desc length in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with abnormal desc[i]->len: +/// 1) 0 with 1 request 3 desc elems; +/// 2) 0x5000 with 1 request 3 desc elems; +/// 3) u32::MAX with 1 request 3 desc elems; +/// 4) u32::MAX with 2 request to test overflow; +/// 5) total length of all desc is bigger than (1 << 32): ((1 << 32) / 64) with indirect request +/// which has 65 desc elems; +/// 6) test the invalid length of the indirect desc. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_len() { + // (length, num of IO, ack, device_status) + let reqs = [ + (0, 1, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (0x5000, 1, VIRTIO_BLK_S_IOERR, 0), + (u32::MAX, 1, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (u32::MAX, 2, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (1 << 26, 65, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (16, 65, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + ]; + for (length, io_num, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut req_addr: u64 = 0; + if io_num <= 1 { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + test_state.borrow().writel( + vqs[0].borrow().desc + offset_of!(VringDesc, len) as u64, + length, + ); + } else if io_num == 2 { + // Io request 1 is valid, used to create cache of desc[0]->addr. + // Io request 2 is invalid, test overflow for desc[1]->addr + desc[1]->len. + for i in 0..2 { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i as u64, + ); + } + let req_1_addr = vqs[0].borrow().desc + VRING_DESC_SIZE; + test_state.borrow().writeq(req_1_addr, u64::MAX); + test_state + .borrow() + .writel(req_1_addr + offset_of!(VringDesc, len) as u64, length); + } else { + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), io_num); + for _ in 0..io_num { + indirect_req.add_desc(test_state.clone(), req_addr, length, true); + } + let indirect_desc = indirect_req.desc; + let free_head = + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + vqs[0].borrow().update_avail(test_state.clone(), free_head); + // Test invalid length of the indirect desc elem. + if length == 16 { + test_state.borrow().writel( + indirect_desc + offset_of!(VringDesc, len) as u64, + u32::from(u16::MAX) * (VRING_DESC_SIZE as u32 + 1), + ); + test_state.borrow().writel( + indirect_desc + offset_of!(VringDesc, flags) as u64, + u32::from(VRING_DESC_F_INDIRECT | VRING_DESC_F_NEXT), + ); + } + } + blk.borrow().virtqueue_notify(vqs[0].clone()); + + test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET); + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting abnormal desc flag in IO request, testcase 1. +/// TestStep: +/// 1. Init device, not negotiate INDIRECT_DESC feature. +/// 2. Do the I/O request with abnormal desc[i]->flags: +/// 1) add VRING_DESC_F_INDIRECT to flags +/// 2) add invalid value 16 to flags +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_flags_1() { + // (flag, ack, device_status) + let reqs = [ + (VRING_DESC_F_INDIRECT, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (16, 0, 0), + ]; + for (flag, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + + // Add VRING_DESC_F_INDIRECT or 16 to desc[0]->flags; + let flags = test_state.borrow().readw(vqs[0].borrow().desc + 12) | flag; + test_state.borrow().writew(vqs[0].borrow().desc + 12, flags); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting abnormal desc flag in IO request, testcase 2. +/// TestStep: +/// 1. Init device, negotiate INDIRECT_DESC feature. +/// 2. Do the I/O request with abnormal desc[i]->flags: add VRING_DESC_F_INDIRECT to flags in +/// indirect desc table. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_flags_2() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_RING_F_INDIRECT_DESC, + 1, + ); + + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + let req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + let free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, REQ_ADDR_LEN, false); + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), 2); + indirect_req.add_desc( + test_state.clone(), + req_addr + u64::from(REQ_ADDR_LEN), + REQ_DATA_LEN, + false, + ); + indirect_req.add_desc(test_state.clone(), req_addr + REQ_STATUS_OFFSET, 1, true); + indirect_req.set_desc_flag(test_state.clone(), 0, VRING_DESC_F_INDIRECT); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!( + test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), + 0xff + ); + assert!(blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET > 0); + check_stratovirt_status(test_state.clone()); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting abnormal desc flag in IO request, testcase 3. +/// TestStep: +/// 1. Init device, negotiate INDIRECT_DESC feature. +/// 2. Do the I/O request with abnormal desc[i]->flags: add VRING_DESC_F_INDIRECT | +/// VRING_DESC_F_WRITE to flags in indirect desc table, and the device will ignore the +/// VRING_DESC_F_WRITE flag. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_flags_3() { + // (flag, ack, device_status) + let reqs = [ + (VRING_DESC_F_WRITE, 0, 0), + (VRING_DESC_F_NEXT, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + ]; + for (flag, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << VIRTIO_RING_F_INDIRECT_DESC, + 1, + ); + + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, REQ_DATA_LEN as usize); + blk_req.data.push_str("TEST"); + let req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + let free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, 8, false); + + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), 2); + indirect_req.add_desc(test_state.clone(), req_addr + 8, 520, false); + indirect_req.add_desc(test_state.clone(), req_addr + REQ_STATUS_OFFSET, 1, true); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + + // Add VRING_DESC_F_WRITE or VRING_DESC_F_NEXT to desc[0]->flags; + let addr = vqs[0].borrow().desc + 16_u64 * u64::from(free_head + 1) + 12; + let flags = test_state.borrow().readw(addr) | flag; + test_state.borrow().writew(addr, flags); + blk.borrow().virtqueue_notify(vqs[0].clone()); + if flag == VRING_DESC_F_WRITE { + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + } + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting abnormal desc next in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with abnormal desc[i]->next: +/// 1) point to the wrong place in the queue_size; +/// 2) create an circuit; +/// 3) point to the place beyond the queue_size; +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_next() { + // (next, ack, device_status) + let reqs = [ + (0, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (16, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + (u16::MAX, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), + ]; + for (next, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + // It will create a write request with 3 desc elems: + // desc[0]: addr, len, flags(NEXT), next(0) + // desc[1]: addr, len, flags(NEXT), next(1) + // desc[2]: addr, len, flags(WRITE), next + let (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + 0, + ); + + assert_eq!(test_state.borrow().readw(vqs[0].borrow().desc + 14), 1); + // desc[1]->next = next; + test_state + .borrow() + .writew(vqs[0].borrow().desc + 16 + 14, next); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting desc elems in abnormal place in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with writable desc elem before readable desc elem. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_desc_elem_place() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + // It will create a read request with 3 desc elems: + // desc[0]: addr, len, flags(NEXT), next(0) + // desc[1]: addr, len, flags(NEXT|WRITE), next(1) + // desc[2]: addr, len, flags(WRITE), next + let (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_IN, + 0, + ); + + // The address of desc[2]->flag. + assert_eq!(VRING_DESC_SIZE, 16); + let addr = vqs[0].borrow().desc + VRING_DESC_SIZE * 2 + 12; + assert_eq!(test_state.borrow().readw(addr), VRING_DESC_F_WRITE); + // desc[2]->flag = 0. + test_state.borrow().writew(addr, 0); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!( + test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), + 0xff + ); + assert!(blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET > 0); + + check_stratovirt_status(test_state.clone()); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting (queue_size + 1) indirect desc elems in IO request. +/// TestStep: +/// 1. Init device with INDIRECT feature. +/// 2. Do the I/O request with (queue_size + 1) desc elems in indirect desc table. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 1/3/4: success. +/// 2: success or failure. +#[test] +fn virtio_io_abnormal_indirect_desc_elem_num() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let queue_size = vqs[0].borrow().size as usize; + + let mut blk_req = TestVirtBlkReq::new(VIRTIO_BLK_T_OUT, 1, 0, 2 * queue_size); + blk_req.data.push_str("TEST"); + let req_addr = virtio_request(test_state.clone(), alloc.clone(), blk_req); + let free_head = vqs[0] + .borrow_mut() + .add(test_state.clone(), req_addr, REQ_ADDR_LEN, false); + let offset = u64::from(free_head) * VRING_DESC_SIZE + offset_of!(VringDesc, flags) as u64; + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset, VRING_DESC_F_NEXT); + test_state + .borrow() + .writew(vqs[0].borrow().desc + offset + 2, free_head as u16 + 1); + let mut indirect_req = TestVringIndirectDesc::new(); + indirect_req.setup(alloc.clone(), test_state.clone(), queue_size as u16 + 1); + for i in 0..queue_size { + indirect_req.add_desc(test_state.clone(), req_addr + 16 + 2 * i as u64, 2, false); + } + indirect_req.add_desc( + test_state.clone(), + req_addr + 16 * 2 * queue_size as u64, + 1, + true, + ); + vqs[0] + .borrow_mut() + .add_indirect(test_state.clone(), indirect_req, true); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!( + test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), + 0xff + ); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + check_stratovirt_status(test_state.clone()); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Setting invalid flags to avail->flag in IO request. +/// TestStep: +/// 1. Init device with EVENT_IDX feature. +/// 2. Do the I/O request with avail->flags: +/// 1) invalid value: 2; +/// 2) VRING_AVAIL_F_NO_INTERRUPT with EVENT_IDX feature; +/// 3) VRING_AVAIL_F_NO_INTERRUPT | 2 with EVENT_IDX feature; +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_io_abnormal_avail_flags() { + let flags = [ + VRING_AVAIL_F_NO_INTERRUPT, + 2, + VRING_AVAIL_F_NO_INTERRUPT | 2, + ]; + for flag in flags { + do_event_idx_with_flag(flag); + } +} + +/// Setting invalid idx to avail->idx in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with avail->idx: +/// 1) assign 16 to avail->idx, but do not add req to desc; +/// 2) assign u16::MAX to avail->idx, which is bigger than queue size; +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_avail_idx() { + let idxs = [16, u16::MAX]; + for idx in idxs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + for i in 1..DEFAULT_IO_REQS { + add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set flag to avail->idx. + test_state.borrow().writew(vqs[0].borrow().avail + 2, idx); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting invalid desc_idx to avail->ring[i] in IO request. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request with avail->ring[i]: +/// 1) assign u16::MAX to avail->ring[i], which is bigger than queue size; +/// 2) avail->ring[i..j] point to the same desc index; +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_avail_ring() { + // (ring[i], ack, device_status) + let reqs = [(u16::MAX, 0xff, VIRTIO_CONFIG_S_NEEDS_RESET), (0, 0xff, 0)]; + for (value, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut req_addr = 0_u64; + for i in 0..DEFAULT_IO_REQS { + (_, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set value to avail->ring[DEFAULT_IO_REQS - 1]. + test_state + .borrow() + .writew(vqs[0].borrow().avail + 4 + 2 * (DEFAULT_IO_REQS - 1), value); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting invalid value to avail->used_event in IO request. +/// TestStep: +/// 1. Init device with or with not EVENT_IDX feature. +/// 2. Do the I/O request with avail->used_event: +/// 1) without EVENT_IDX, set valid to used_event. +/// 2) with EVENT_IDX, set u16::MAX to used_event. +/// 3) with EVENT_IDX, do not modify used_event. +/// 3. Send qmp to StratoVirt. +/// 4. Destroy device. +/// Expect: +/// 2: success or failure. +/// 1/3/4: success. +#[test] +fn virtio_io_abnormal_used_event() { + // (feature, used_event, ack, device_status) + let reqs = [ + (VIRTIO_F_VERSION_1, DEFAULT_IO_REQS as u16 - 1, 0, 0), + (VIRTIO_RING_F_EVENT_IDX, u16::MAX, 0, 0), + (VIRTIO_RING_F_EVENT_IDX, 0, 0, 0), + ]; + for (feature, used_event, ack, device_status) in reqs { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1 | 1 << feature, + 1, + ); + + send_one_request( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + ); + + let mut free_head = 0_u32; + let mut req_addr = 0_u64; + for i in 1..DEFAULT_IO_REQS { + (free_head, req_addr) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set used_event to avail->used_event. + vqs[0] + .borrow() + .set_used_event(test_state.clone(), used_event); + blk.borrow().virtqueue_notify(vqs[0].clone()); + + if feature == VIRTIO_RING_F_EVENT_IDX { + check_req_result( + blk.clone(), + test_state.clone(), + vqs[0].clone(), + req_addr + REQ_STATUS_OFFSET, + TIMEOUT_US, + ); + } else { + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + } + + assert_eq!(test_state.borrow().readb(req_addr + REQ_STATUS_OFFSET), ack); + assert_eq!( + blk.borrow().get_status() & VIRTIO_CONFIG_S_NEEDS_RESET, + device_status + ); + + check_stratovirt_status(test_state.clone()); + + tear_down( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs, + image_path.clone(), + ); + } +} + +/// Setting invalid value to used->idx in IO request. +/// TestStep: +/// 1. Init device; +/// 2. Do the I/O request with avail->used_event = u16::MAX; +/// 3. Destroy device. +/// Expect: +/// 1/2/3: success. +#[test] +fn virtio_io_abnormal_used_idx() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let mut free_head = 0_u32; + for i in 0..DEFAULT_IO_REQS { + (free_head, _) = add_request( + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + VIRTIO_BLK_T_OUT, + i, + ); + } + + // Set u16::MAX to used->idx. + test_state + .borrow() + .writew(vqs[0].borrow().used + 2, u16::MAX); + blk.borrow().virtqueue_notify(vqs[0].clone()); + blk.borrow().poll_used_elem( + test_state.clone(), + vqs[0].clone(), + free_head, + TIMEOUT_US, + &mut None, + true, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Virtio test step out of order, testcase 1. +/// TestStep: +/// 1. Init device. +/// 2. Do the I/O request(normal io in desc). +/// 3. Init device. +/// 4. Do the I/O request(normal io in desc). +/// 5. Send qmp to StratoVirt. +/// 6. Destroy device. +/// Expect: +/// 1/2/5/6: success. +/// 3/4: success or failure. +#[test] +fn virtio_test_out_of_order_1() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + check_stratovirt_status(test_state.clone()); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Virtio test step out of order, testcase 2. +/// TestStep: +/// 1. Init device. +/// 2. Destroy device. +/// 3. Init device. +/// 4. Do the I/O request(normal io in desc). +/// 5. Destroy device. +/// Expect: +/// 1/2/3/4/5: success. +#[test] +fn virtio_test_out_of_order_2() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); + + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + + tear_down(blk, test_state, alloc, vqs, image_path); +} + +/// Virtio test step repeat. +/// TestStep: +/// 1. Init device. +/// 2. Init device. +/// 3. Do the I/O request(normal io in desc). +/// 4. Do the I/O request(normal io in desc). +/// 5. Send qmp to StratoVirt. +/// 6. Destroy device. +/// 7. Destroy device. +/// Expect: +/// 1/2/3/4/5/6/7: success. +#[test] +fn virtio_test_repeat() { + let (blk, test_state, alloc, image_path) = set_up(&ImageType::Raw); + + blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + let vqs = blk.borrow_mut().init_device( + test_state.clone(), + alloc.clone(), + 1 << VIRTIO_F_VERSION_1, + 1, + ); + + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + 0, + ); + virtio_write( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS, + ); + virtio_read( + blk.clone(), + test_state.clone(), + alloc.clone(), + vqs[0].clone(), + DEFAULT_IO_REQS * 2 - 1, + ); + + check_stratovirt_status(test_state.clone()); + + blk.borrow_mut().destroy_device(alloc.clone(), vqs.clone()); + blk.borrow_mut().destroy_device(alloc.clone(), vqs.clone()); + tear_down(blk, test_state, alloc, vqs, image_path); +} diff --git a/tests/mod_test/tests/virtiofs_test.rs b/tests/mod_test/tests/virtiofs_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..6598a9c7e5fe9b88ac980294326aa4d28c588cad --- /dev/null +++ b/tests/mod_test/tests/virtiofs_test.rs @@ -0,0 +1,2018 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, env, mem::size_of, path::Path, process::Command, rc::Rc, slice::from_raw_parts, +}; + +use mod_test::libdriver::{ + machine::TestStdMachine, + malloc::GuestAllocator, + virtio::{ + TestVirtQueue, TestVringDescEntry, VirtioDeviceOps, VIRTIO_F_BAD_FEATURE, + VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, + }, + virtio_pci_modern::TestVirtioPciDev, + virtiofs::*, +}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; +use mod_test::utils::get_rand_str; +use util::byte_code::ByteCode; +use util::offset_of; + +const DEFAULT_FS_DESC_ELEM: usize = 4; // 4 elems: inheader/inbody/outheader/outbody. +const TIMEOUT_US: u64 = 10 * 1000 * 1000; // 10s timeout. +const PARENT_NODEID: u64 = 1; // parent dir nodeid. +const TEST_MEM_SIZE: u64 = 1024; // 1G mem size. +const TEST_PAGE_SIZE: u64 = 4096; // 4k page size. +const TEST_FILE_NAME: &str = "testfile"; +const TEST_CHARDEV_NAME: &str = "testchar"; +const DEFAULT_READ_SIZE: usize = 1024; // 1024 Bytes. +const DEFAULT_XATTR_SIZE: u32 = 1024; // 1024 Bytes. +const MAX_TAG_LENGTH: usize = 36; // Tag buffer's max length in pci device config. + +#[derive(Copy, Clone)] +#[repr(C, packed)] +struct VirtioFsConfig { + tag: [u8; MAX_TAG_LENGTH], + num_request_queues: u32, +} + +struct VirtioFsTest { + device: Rc>, + state: Rc>, + allocator: Rc>, + queues: Vec>>, +} + +fn env_prepare(temp: bool) -> (String, String, String) { + let rng_name: String = get_rand_str(8); + + let dir = if temp { "/tmp" } else { "/var" }; + let virtiofs_test_dir = format!("{}/mst-virtiofs-{}", dir, rng_name); + let virtiofs_shared_dir = format!("{}/{}", virtiofs_test_dir, "shared"); + let virtiofs_test_file = format!("{}/{}", virtiofs_shared_dir, TEST_FILE_NAME); + let virtiofs_test_character_device = format!("{}/{}", virtiofs_shared_dir, TEST_CHARDEV_NAME); + + Command::new("mkdir") + .arg("-p") + .arg(virtiofs_shared_dir.clone()) + .output() + .unwrap(); + + Command::new("touch") + .arg(virtiofs_test_file.clone()) + .output() + .unwrap(); + + Command::new("mknod") + .arg(virtiofs_test_character_device) + .arg("c") + .arg("1") + .arg("1") + .output() + .unwrap(); + + let output = Command::new("dd") + .arg("if=/dev/zero") + .arg(format!("of={}", virtiofs_test_file)) + .arg("bs=1M") + .arg("count=10") + .output() + .unwrap(); + assert!(output.status.success()); + + (virtiofs_test_dir, virtiofs_shared_dir, virtiofs_test_file) +} + +fn env_clean(test_dir: String) { + Command::new("rm").arg("-rf").arg(test_dir).spawn().unwrap(); +} + +fn virtio_fs_default_feature(dev: Rc>) -> u64 { + let mut features = dev.borrow().get_device_features(); + features &= + !(VIRTIO_F_BAD_FEATURE | 1 << VIRTIO_RING_F_INDIRECT_DESC | 1 << VIRTIO_RING_F_EVENT_IDX); + + features +} + +impl VirtioFsTest { + fn new(memsize: u64, page_size: u64, virtiofs_sock: String) -> Self { + let tag = "myfs"; + let pci_slot: u8 = 0x4; + let pci_fn: u8 = 0x0; + + let mut args = format!("-D {},mem-share=on", MACHINE_TYPE_ARG); + + let mem_args = format!(" -m {}", memsize); + args.push_str(&mem_args); + let chardev_args = format!( + " -chardev socket,id=virtio_fs,path={},server,nowait", + virtiofs_sock + ); + args.push_str(&chardev_args); + let virtiofs_pci_args = format!( + " -device vhost-user-fs-pci,id=device_id,chardev=virtio_fs,tag={},bus=pcie.0,addr=0x4", + tag, + ); + args.push_str(&virtiofs_pci_args); + + let args_vec: Vec<&str> = args.trim().split(' ').collect(); + let test_state = Rc::new(RefCell::new(test_init(args_vec))); + let machine = + TestStdMachine::new_bymem(test_state.clone(), memsize * 1024 * 1024, page_size); + let allocator = machine.allocator.clone(); + let dev = Rc::new(RefCell::new(TestVirtioPciDev::new(machine.pci_bus))); + dev.borrow_mut().init(pci_slot, pci_fn); + let features = virtio_fs_default_feature(dev.clone()); + let queues = + dev.borrow_mut() + .init_device(test_state.clone(), allocator.clone(), features, 2); + + VirtioFsTest { + device: dev, + state: test_state, + allocator, + queues, + } + } + + fn handle_request_member( + &self, + reqmember: Option<&[u8]>, + data_entries: &mut Vec, + is_write: bool, + ) -> Option { + if let Some(member) = reqmember { + let member_size = member.len() as u64; + let member_addr = self.allocator.borrow_mut().alloc(member_size); + self.state.borrow().memwrite(member_addr, member); + data_entries.push(TestVringDescEntry { + data: member_addr, + len: member_size as u32, + write: is_write, + }); + + return Some(member_addr); + } + + None + } + + fn do_virtio_request( + &self, + fuseinheader: Option<&[u8]>, + fuseinbody: Option<&[u8]>, + fuseoutheader: Option<&[u8]>, + fuseoutbody: Option<&[u8]>, + ) -> (Option, Option, Option, Option) { + let mut data_entries: Vec = Vec::with_capacity(DEFAULT_FS_DESC_ELEM); + + // FuseInHeader. + let fuseinheader_addr = self.handle_request_member(fuseinheader, &mut data_entries, false); + // FuseInBody. + let fuseinbody_addr = self.handle_request_member(fuseinbody, &mut data_entries, false); + // FuseOutHeader. + let fuseoutheader_addr = self.handle_request_member(fuseoutheader, &mut data_entries, true); + // FuseOutbody. + let fuseoutbody_addr = self.handle_request_member(fuseoutbody, &mut data_entries, true); + + let free_head = self.queues[1] + .clone() + .borrow_mut() + .add_chained(self.state.clone(), data_entries); + + // Kick. + self.device + .borrow_mut() + .kick_virtqueue(self.state.clone(), self.queues[1].clone()); + + // Wait for response. + self.wait_for_response(free_head); + + ( + fuseinheader_addr, + fuseinbody_addr, + fuseoutheader_addr, + fuseoutbody_addr, + ) + } + + fn virtiofs_do_virtio_request( + &self, + fuseinheader: &[u8], + fuseinbody: &[u8], + fuseoutheader: &[u8], + fuseoutbody: &[u8], + ) -> (u64, u64) { + let (_, _, fuseoutheader_addr, fuseoutbody_addr) = self.do_virtio_request( + Some(fuseinheader), + Some(fuseinbody), + Some(fuseoutheader), + Some(fuseoutbody), + ); + + (fuseoutheader_addr.unwrap(), fuseoutbody_addr.unwrap()) + } + + fn virtiofsd_start_with_config( + dir_temp: bool, + seccomp: Option, + sandbox: Option, + modcaps: Option<&str>, + rlimit_nofile: Option, + xattr: bool, + ) -> (String, String, String) { + let binary_path = env::var("VIRTIOFSD_BINARY").unwrap(); + let (virtiofs_test_dir, virtiofs_shared_dir, virtiofs_test_file) = env_prepare(dir_temp); + let virtiofs_sock = format!("{}/virtiofs.sock", virtiofs_shared_dir); + + let mut args = "--log-level info".to_string(); + if seccomp.is_some() { + let seccomp_args = format!(" --seccomp {}", seccomp.unwrap()); + args.push_str(&seccomp_args); + } + if sandbox.is_some() { + let sandbox_args = format!(" --sandbox {}", sandbox.unwrap()); + args.push_str(&sandbox_args); + } + if modcaps.is_some() { + let modcaps_args = format!(" {}", modcaps.unwrap()); + args.push_str(&modcaps_args); + } + if rlimit_nofile.is_some() { + let rlimit_args = format!(" --rlimit-nofile {}", rlimit_nofile.unwrap()); + args.push_str(&rlimit_args); + } + if xattr { + args.push_str(" --xattr"); + } + + let args_vec: Vec<&str> = args.trim().split(' ').collect(); + + Command::new(binary_path) + .arg("--shared-dir") + .arg(virtiofs_shared_dir) + .arg("--socket-path") + .arg(virtiofs_sock.clone()) + .args(args_vec) + .spawn() + .unwrap(); + + // Wait totally 10s for that the vhost user fs socket is being created. + let path = virtiofs_sock.clone(); + let sock_path = Path::new(&path); + for _ in 0..100 { + if sock_path.exists() { + break; + } + // Query at 0.1s interval. + std::thread::sleep(std::time::Duration::from_millis(100)); + } + + (virtiofs_test_dir, virtiofs_sock, virtiofs_test_file) + } + + fn virtiofsd_start() -> (String, String, String) { + VirtioFsTest::virtiofsd_start_with_config(true, None, None, None, None, false) + } + + fn testcase_end(&self, test_dir: String) { + self.testcase_check_and_end(None, test_dir); + } + + fn testcase_check_and_end(&self, absolute_virtiofs_sock: Option, test_dir: String) { + self.device + .borrow_mut() + .destroy_device(self.allocator.clone(), self.queues.clone()); + + if let Some(path) = absolute_virtiofs_sock { + let path_clone = path; + let sock_path = Path::new(&path_clone); + assert!(sock_path.exists()); + self.state.borrow_mut().stop(); + } else { + self.state.borrow_mut().stop(); + } + + env_clean(test_dir); + } + + fn wait_for_response(&self, free_head: u32) { + self.device.borrow().poll_used_elem( + self.state.clone(), + self.queues[1].clone(), + free_head, + TIMEOUT_US, + &mut None, + false, + ); + } +} + +fn read_obj(test_state: Rc>, read_addr: u64) -> T { + let read_len = size_of::() as u64; + let read_bytes = test_state.borrow().memread(read_addr, read_len); + let slice = unsafe { from_raw_parts(read_bytes.as_ptr() as *const T, size_of::()) }; + slice[0].clone() +} + +fn fuse_init(fs: &VirtioFsTest) -> (FuseOutHeader, FuseInitOut) { + let len = size_of::() + size_of::(); + let fuse_in_head = FuseInHeader::new(len as u32, FUSE_INIT, 0, 0, 0, 0, 0, 0); + let fuse_init_in = FuseInitIn { + major: FUSE_KERNEL_VERSION, + minor: FUSE_KERNEL_MINOR_VERSION, + max_readahead: TEST_MAX_READAHEAD, + flags: TEST_FLAG, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_init_out = FuseInitOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_init_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_init_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + let init_out = read_obj::(fs.state.clone(), outbodyaddr); + + (out_header, init_out) +} + +fn fuse_destroy(fs: &VirtioFsTest) -> FuseOutHeader { + let len = size_of::(); + let fuse_in_head = FuseInHeader::new(len as u32, FUSE_DESTROY, 0, 0, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheaderaddr, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + None, + Some(fuse_out_head.as_bytes()), + None, + ); + + read_obj::(fs.state.clone(), outheaderaddr.unwrap()) +} + +fn fuse_lookup(fs: &VirtioFsTest, name: String) -> u64 { + // The reason why add "1" is that there exists "\0" after string. + let len = (size_of::() + name.len() + 1) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_LOOKUP, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_lookup_in = FuseLookupIn { name }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_lookup_out = FuseEntryOut::default(); + let (_outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_lookup_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_lookup_out.as_bytes(), + ); + + let entry_out = read_obj::(fs.state.clone(), outbodyaddr); + + entry_out.nodeid +} + +fn fuse_open(fs: &VirtioFsTest, nodeid: u64) -> u64 { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_OPEN, 0, nodeid, 0, 0, 0, 0); + let fuse_open_in = FuseOpenIn { + flags: O_RDWR, + unused: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_open_out = FuseOpenOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_open_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_open_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let openout = read_obj::(fs.state.clone(), outbodyaddr); + + openout.fh +} + +fn fuse_open_dir(fs: &VirtioFsTest, nodeid: u64) -> u64 { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_OPENDIR, 0, nodeid, 0, 0, 0, 0); + let fuse_open_in = FuseOpenIn { + flags: 0, + unused: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_open_out = FuseOpenOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_open_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_open_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let openout = read_obj::(fs.state.clone(), outbodyaddr); + + openout.fh +} + +// Note: Virtiofsd doesn't support illegal size message now, so trim will only support 0 until virtiofsd modification. +fn fuse_lseek( + fs: &VirtioFsTest, + nodeid: u64, + fh: u64, + trim: usize, +) -> (FuseOutHeader, FuseLseekOut) { + let fuse_lseek_in = FuseLseekIn { + fh, + offset: 0, + whence: SEEK_END, + padding: 0, + }; + let lseek_in_len = fuse_lseek_in.as_bytes().len(); + let trim_lseek_in_len = lseek_in_len - trim; + let fuse_out_head = FuseOutHeader::default(); + let fuse_lseek_out = FuseLseekOut::default(); + let len = (size_of::() + trim_lseek_in_len) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_LSEEK, 0, nodeid, 0, 0, 0, 0); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_lseek_in.as_bytes()[0..lseek_in_len - trim], + fuse_out_head.as_bytes(), + fuse_lseek_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + let lseekout = read_obj::(fs.state.clone(), outbodyaddr); + + (out_header, lseekout) +} + +fn fuse_getattr(fs: &VirtioFsTest, nodeid: u64, fh: u64) -> (FuseOutHeader, FuseAttrOut) { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_GETATTR, 0, nodeid, 0, 0, 0, 0); + let fuse_getattr_in = FuseGetAttrIn { + getattr_flags: 0, + dummy: 0, + fh, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_getattr_out = FuseAttrOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_getattr_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_getattr_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + let attrout = read_obj::(fs.state.clone(), outbodyaddr); + + (out_header, attrout) +} + +// Test situation: mount -t virtiofs myfs /mnt. +#[test] +fn mount_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config( + true, + Some(SeccompAction::Kill), + Some(SandBoxMechanism::Namespace), + Some("--modcaps=-LEASE:+KILL"), + None, + false, + ); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock.clone()); + + // basic function test. + fuse_init(&fs); + + // kill process and clean env. + fs.testcase_check_and_end(Some(virtiofs_sock), virtiofs_test_dir); +} + +// Test situation: umount /mnt. +#[test] +fn umount_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config( + true, + Some(SeccompAction::None), + Some(SandBoxMechanism::Chroot), + Some("--modcaps=-LEASE:+KILL"), + None, + false, + ); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // mount. + fuse_init(&fs); + + // unmout and check. + let resp = fuse_destroy(&fs); + assert_eq!(resp.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +/// Test: mkdir /mnt/dir +#[test] +fn mkdir_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config( + true, + Some(SeccompAction::Log), + None, + None, + Some(4096), // Test rlimit_nofile config. -rlimit_nofile 4096. + false, + ); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // do request. + let fuse_mkdir_in = FuseMkdirIn { + mode: 0o777, // Directory right: 777. + umask: 0, + name: String::from("dir"), + }; + let len = (size_of::() + fuse_mkdir_in.len()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_MKDIR, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_mkdir_out = FuseEntryOut::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_mkdir_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_mkdir_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + let mut linkpath = virtiofs_test_dir.clone(); + linkpath.push_str("/shared/dir"); + let linkpath_clone = linkpath.clone(); + let link_path = Path::new(&linkpath_clone); + assert!(link_path.is_dir()); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +/// Test: sync /mnt/testfile. +#[test] +fn sync_fun() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config( + true, + Some(SeccompAction::Trap), + None, + None, + None, + false, + ); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + let fh = fuse_open(&fs, nodeid); + + // sync file. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_FSYNC, 0, nodeid, 0, 0, 0, 0); + let fuse_fallocate_in = FuseFsyncIn { + fh, + fsync_flags: 0, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(fuse_fallocate_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +/// Test: sync /mnt +#[test] +fn syncdir_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let fh = fuse_open_dir(&fs, PARENT_NODEID); + + // sync directory. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_FSYNCDIR, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_fallocate_in = FuseFsyncIn { + fh, + fsync_flags: 0, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(fuse_fallocate_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn invalid_fuse_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // generate fake fuse request and send it. + let fake_fuse_in_body = [0]; + let fake_len = (size_of::() + fake_fuse_in_body.len()) as u32; + let fake_ops = 50; // No such fuse command. + let fuse_in_head = FuseInHeader::new(fake_len, fake_ops, 0, 0, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fake_fuse_out_body = [0]; + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fake_fuse_in_body, + fuse_out_head.as_bytes(), + &fake_fuse_out_body, + ); + + // Check returned error. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert!(out_header.error != 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// Note: Virtiofsd does not support illegal size message, block this test case. +#[test] +#[ignore] +fn missing_fuseinbody_fuseoutbody_virtiorequest_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // generate fake fuse request and send it. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_INIT, 0, 0, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + None, + Some(fuse_out_head.as_bytes()), + None, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert!(out_header.error != 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn virtiofs_device_config_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // get values from device config. + let mut tag = [0; MAX_TAG_LENGTH]; + for i in 0..MAX_TAG_LENGTH { + tag[i] = fs + .device + .borrow() + .config_readb((offset_of!(VirtioFsConfig, tag) + i) as u64); + } + let num_request_queues = fs + .device + .borrow() + .config_readl(offset_of!(VirtioFsConfig, num_request_queues) as u64); + + // set values to device config. + for i in 0..MAX_TAG_LENGTH { + fs.device + .borrow() + .config_writeb((offset_of!(VirtioFsConfig, tag) + i) as u64, 0x10); + } + fs.device + .borrow() + .config_writel(offset_of!(VirtioFsConfig, num_request_queues) as u64, 5); + + // get values from device config. + let mut tag_new = [0; MAX_TAG_LENGTH]; + for i in 0..MAX_TAG_LENGTH { + tag_new[i] = fs + .device + .borrow() + .config_readb((offset_of!(VirtioFsConfig, tag) + i) as u64); + } + let num_request_queues_new = fs + .device + .borrow() + .config_readl(offset_of!(VirtioFsConfig, num_request_queues) as u64); + + // Check config can not be changed. + assert!(num_request_queues == num_request_queues_new); + assert!(tag == tag_new); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn ls_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // FUSE_OPENDIR. + let fh = fuse_open_dir(&fs, PARENT_NODEID); + + // FUSE_READDIRPLUS. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_READDIRPLUS, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_read_in = FuseReadIn { + fh, + offset: 0, + size: DEFAULT_READ_SIZE as u32, + ..Default::default() + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_read_out = [0; DEFAULT_READ_SIZE]; + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_read_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_read_out, + ); + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + // FUSE_FORGET. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_FORGET, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_read_in = FuseForgetIn { nlookup: 1 }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_forget_out = FuseForgetOut::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_read_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_forget_out.as_bytes(), + ); + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + // FUSE_READDIR. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_READDIR, 0, 1, 0, 0, 0, 0); + let fuse_read_in = FuseReadIn { + fh, + offset: 0, + size: DEFAULT_READ_SIZE as u32, + ..Default::default() + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_read_out = [0_u8; DEFAULT_READ_SIZE]; + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_read_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_read_out, + ); + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + // FUSE_RELEASEDIR. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_RELEASEDIR, 0, 1, 0, 0, 0, 0); + let fuse_read_in = FuseReleaseIn { + fh, + ..Default::default() + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_read_out = [0_u8; DEFAULT_READ_SIZE]; + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_read_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_read_out, + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +fn fuse_setattr( + fs: &VirtioFsTest, + nodeid: u64, + fuse_setattr_in: FuseSetattrIn, +) -> (FuseOutHeader, FuseAttrOut) { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_SETATTR, 0, nodeid, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_attr_out = FuseAttrOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_setattr_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_attr_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + let attr_out = read_obj::(fs.state.clone(), outbodyaddr); + + (out_header, attr_out) +} + +#[test] +fn setattr_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // do init. + fuse_init(&fs); + + // do lookup + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + let fh = fuse_open(&fs, nodeid); + + // chmod 666 testfile + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.mode = 0o666; // file right: 666. + fuse_setattr_in.valid = FATTR_MODE | FATTR_FH; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.mode & 0o666 == 0o666); + + // chmod 777 testfile + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.mode = 0o777; // file right: 777. + fuse_setattr_in.valid = FATTR_MODE; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.mode & 0o777 == 0o777); + + // chown. + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.valid = FATTR_UID | FATTR_GID; + fuse_setattr_in.fh = fh; + fuse_setattr_in.uid = 100; + fuse_setattr_in.gid = 200; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.uid == 100); + assert!(attr.attr.gid == 200); + + // truncate /mnt/testfile -s 1k + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.size = 1024; // 1k + fuse_setattr_in.valid = FATTR_SIZE | FATTR_FH; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.size == 1024); + + // truncate /mnt/testfile -s 2k + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.size = 2048; // 2k + fuse_setattr_in.valid = FATTR_SIZE; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.size == 2048); + + // touch -m -t 202301010000 test.c + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.mtime = 1672531200; // 2023.01.01 00:00 + fuse_setattr_in.valid = FATTR_MTIME; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.mtime == 1672531200); + + // touch -a -t 202301010000 test.c + let mut fuse_setattr_in = FuseSetattrIn::default(); + fuse_setattr_in.atime = 1672531200; // 2023.01.01 00:00 + fuse_setattr_in.valid = FATTR_ATIME | FATTR_FH; + fuse_setattr_in.fh = fh; + let (out_header, _attr) = fuse_setattr(&fs, nodeid, fuse_setattr_in); + assert_eq!(out_header.error, 0); + + let (_out_header, attr) = fuse_getattr(&fs, nodeid, fh); + assert!(attr.attr.atime == 1672531200); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// unlink /mnt/testfile +#[test] +fn unlink_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + fuse_init(&fs); + fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + + // unlink request. + let len = (size_of::() + TEST_FILE_NAME.len() + 1) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_UNLINK, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_unlink_in = FuseUnlinkrIn { + name: String::from(TEST_FILE_NAME), + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_unlink_out = FuseEntryOut::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_unlink_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_unlink_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + let mut linkpath = virtiofs_test_dir.clone(); + linkpath.push_str("/shared/testfile"); + let linkpath_clone = linkpath.clone(); + let link_path = Path::new(&linkpath_clone); + assert!(!link_path.exists()); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn rmdir_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + let mut dir = virtiofs_test_dir.clone(); + dir.push_str("/shared/dir"); + Command::new("mkdir") + .arg("-p") + .arg(dir.clone()) + .output() + .unwrap(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + fuse_init(&fs); + fuse_lookup(&fs, "dir".to_string()); + + // rmdir request. + let fuse_unlink_in = FuseUnlinkrIn { + name: String::from("dir"), + }; + let len = (size_of::() + fuse_unlink_in.len()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_RMDIR, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_unlink_out = FuseEntryOut::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_unlink_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_unlink_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + let mut linkpath = virtiofs_test_dir.clone(); + linkpath.push_str("/shared/dir"); + let linkpath_clone = linkpath.clone(); + let link_path = Path::new(&linkpath_clone); + assert!(!link_path.exists()); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn symlink_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // do request. + let linkname = "link".to_string(); + let len = (linkname.len() + size_of::() + TEST_FILE_NAME.len() + 2) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_SYMLINK, 4, PARENT_NODEID, 0, 0, 0, 0); + let fuse_init_in = FusesysmlinkIn { + name: linkname.clone(), + linkname: String::from(TEST_FILE_NAME), + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_init_out = FuseEntryOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_init_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_init_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + let entryout = read_obj::(fs.state.clone(), outbodyaddr); + assert_eq!(entryout.attr.nlink, 1); + + let mut linkpath = virtiofs_test_dir.clone(); + linkpath.push_str("/shared/link"); + let linkpath_clone = linkpath.clone(); + let link_path = Path::new(&linkpath_clone); + assert!(link_path.is_symlink()); + + // Read link + let node_id = fuse_lookup(&fs, linkname); + let len = size_of::() as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_READLINK, 8, node_id, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_read_link_out = [0_u8; 1024]; + let (_, _, outheader, outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + None, + Some(fuse_out_head.as_bytes()), + Some(&fuse_read_link_out), + ); + + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(0, out_header.error); + let fuse_read_link_out = fs.state.borrow().memread(outbodyaddr.unwrap(), 1024); + let read_path = String::from_utf8(fuse_read_link_out); + let mut read_path = read_path.unwrap(); + read_path.truncate(TEST_FILE_NAME.len()); + assert_eq!(TEST_FILE_NAME.to_string(), read_path); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// fallocate -l 1024K /mnt/testfile +#[test] +fn fallocate_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + let fh = fuse_open(&fs, nodeid); + + // FUSE_FALLOCATE. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_FALLOCATE, 0, nodeid, 0, 0, 0, 0); + let fuse_fallocate_in = FuseFallocateIn { + fh, + offset: 0, + length: 1048576, // 1KB. + mode: 0, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(fuse_fallocate_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// Note: Virtiofsd does not support `GETLK` message, block this test case. +// fcntl() function test. +#[test] +#[ignore] +fn posix_file_lock_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + let fh = fuse_open(&fs, nodeid); + + // getlk write lock. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_GETLK, 0, nodeid, 0, 0, 0, 0); + let fuse_lk_in = FuseLkIn { + fh, + owner: 0, + lk: FuseFileLock { + start: 0, + end: 1, + lock_type: F_WRLCK, + pid: 1, + }, + lk_flags: 0, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_lk_out = FuseLkOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_lk_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_lk_out.as_bytes(), + ); + + // Check file is unlock. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let lkout = read_obj::(fs.state.clone(), outbodyaddr); + assert_eq!(lkout.lk.lock_type, F_UNLCK); + + // setlk write lock. + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_SETLK, 0, nodeid, 0, 0, 0, 0); + let fuse_lk_in = FuseLkIn { + fh, + owner: 0, + lk: FuseFileLock { + start: 0, + end: 1, + lock_type: F_WRLCK, + pid: 1, + }, + lk_flags: 0, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_lk_out = FuseLkOut::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_lk_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_lk_out.as_bytes(), + ); + + // check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn mknod_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // FUSE_MKNOD. + let fuse_mknod_in = FuseMknodIn { + mode: 0o666, // right mode 666. + rdev: 0, + umask: 0, + padding: 0, + name: String::from("node"), + }; + let len = (size_of::() + fuse_mknod_in.len()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_MKNOD, 4, PARENT_NODEID, 0, 0, 0, 0); + + let fuse_out_head = FuseOutHeader::default(); + let fuse_init_out = FuseEntryOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_mknod_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_init_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let entryout = read_obj::(fs.state.clone(), outbodyaddr); + assert_eq!(entryout.attr.nlink, 1); + + let mut nodepath = virtiofs_test_dir.clone(); + nodepath.push_str("/shared/node"); + let nodepath_clone = nodepath.clone(); + let node_path = Path::new(&nodepath_clone); + assert!(node_path.exists()); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +fn get_xattr(fs: &VirtioFsTest, name: String, nodeid: u64) -> (FuseOutHeader, String) { + let len = + (size_of::() + offset_of!(FuseGetxattrIn, name) + name.len() + 1) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_GETXATTR, 0, nodeid, 0, 0, 0, 0); + let fuse_in = FuseGetxattrIn { + size: DEFAULT_XATTR_SIZE, + padding: 0, + name, + }; + + let fuse_out_head = FuseOutHeader::default(); + let fuse_out = [0_u8; DEFAULT_XATTR_SIZE as usize]; + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_out, + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + let fuse_read_out = fs + .state + .borrow() + .memread(outbodyaddr, u64::from(DEFAULT_XATTR_SIZE)); + let attr = String::from_utf8(fuse_read_out).unwrap(); + + (out_header, attr) +} + +fn flush_file(fs: &VirtioFsTest, nodeid: u64, fh: u64) { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_FLUSH, 0, nodeid, 0, 0, 0, 0); + let fuse_in = FuseFlushIn { + fh, + unused: 0, + padding: 0, + lock_owner: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(fuse_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(out_header.error, 0); +} + +fn write_file(fs: &VirtioFsTest, nodeid: u64, fh: u64, write_buf: String) { + let len = (size_of::() + offset_of!(FuseWriteIn, write_buf) + write_buf.len() + 1) + as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_WRITE, 0, nodeid, 0, 0, 0, 0); + let fuse_write_in = FuseWriteIn::new(fh, 0, write_buf.clone()); + let fuse_out_head = FuseOutHeader::default(); + let fuse_write_out = FuseWriteOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_write_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_write_out.as_bytes(), + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + + let write_out = read_obj::(fs.state.clone(), outbodyaddr); + assert_eq!(write_out.size, (write_buf.len() + 1) as u32); +} + +fn release_file(fs: &VirtioFsTest, nodeid: u64, fh: u64) { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_RELEASE, 0, nodeid, 0, 0, 0, 0); + let fuse_read_in = FuseReleaseIn { + fh, + flags: O_NONBLOCK | O_DIRECT, + ..Default::default() + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(fuse_read_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(out_header.error, 0); +} + +fn create_file(fs: &VirtioFsTest, name: String) -> (FuseOutHeader, FuseCreateOut) { + let len = (size_of::() + offset_of!(FuseCreateIn, name) + name.len() + 1) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_CREATE, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_in = FuseCreateIn { + flags: O_CREAT | O_TRUNC | O_RDWR, + mode: 0o777, // file right mode 777. + umask: 0, + padding: 0, + name, + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_out = FuseCreateOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let createout = read_obj::(fs.state.clone(), outbodyaddr); + + (out_header, createout) +} + +#[test] +fn writefile_fun() { + let file = "text.txt".to_string(); + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + let (out_head, attr) = create_file(&fs, file); + assert_eq!(out_head.error, 0); + + let mut nodepath = virtiofs_test_dir.clone(); + nodepath.push_str("/shared/text.txt"); + + let nodepath_clone = nodepath.clone(); + let node_path = Path::new(&nodepath_clone); + assert!(node_path.exists()); + + flush_file(&fs, attr.create_out.nodeid, attr.open_out.fh); + get_xattr(&fs, "security.selinux".to_string(), attr.create_out.nodeid); + + write_file( + &fs, + attr.create_out.nodeid, + attr.open_out.fh, + "12345".to_string(), + ); + flush_file(&fs, attr.create_out.nodeid, attr.open_out.fh); + + release_file(&fs, attr.create_out.nodeid, attr.open_out.fh); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +fn read_file(fs: &VirtioFsTest, nodeid: u64, fh: u64) -> String { + let len = (size_of::() + size_of::()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_READ, 0, nodeid, 0, 0, 0, 0); + let fuse_in = FuseReadIn { + fh, + offset: 0, + size: DEFAULT_READ_SIZE as u32, + ..Default::default() + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_out = [0_u8; DEFAULT_READ_SIZE]; + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_out, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, 0); + let fuse_read_out = fs.state.borrow().memread(outbodyaddr, 5); + + String::from_utf8(fuse_read_out).unwrap() +} + +#[test] +fn openfile_test() { + let file = TEST_FILE_NAME.to_string(); + + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, file); + + // open/write/flush/close/open/read/close + let fh = fuse_open(&fs, nodeid); + let mut nodepath = virtiofs_test_dir.clone(); + nodepath.push_str("/shared/testfile"); + let nodepath_clone = nodepath.clone(); + let node_path = Path::new(&nodepath_clone); + assert!(node_path.exists()); + write_file(&fs, nodeid, fh, "12345".to_string()); + flush_file(&fs, nodeid, fh); + release_file(&fs, nodeid, fh); + + let fh = fuse_open(&fs, nodeid); + let get_str = read_file(&fs, nodeid, fh); + assert_eq!(get_str, "12345".to_string()); + release_file(&fs, nodeid, fh); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn rename_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // FUSE_RENAME. Rename testfile to file. + fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + let fuse_rename_in = FuseRenameIn { + newdir: PARENT_NODEID, + oldname: TEST_FILE_NAME.to_string(), + newname: "file".to_string(), + }; + let len = (size_of::() + fuse_rename_in.len()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_RENAME, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(&fuse_rename_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(0, out_header.error); + let path = virtiofs_test_dir.clone() + "/shared" + "/file"; + let path = Path::new(path.as_str()); + assert!(path.exists()); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn link_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // FUSE_LINK. + let oldnodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + + let fuse_rename_in = FuseLinkIn { + oldnodeid, + newname: "file_link".to_string(), + }; + let len = (size_of::() + fuse_rename_in.len()) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_LINK, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_entry_out = FuseEntryOut::default(); + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_rename_in.as_bytes(), + fuse_out_head.as_bytes(), + fuse_entry_out.as_bytes(), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(0, out_header.error); + let entry_out = read_obj::(fs.state.clone(), outbodyaddr); + // link a file will make its nlink count +1 + assert_eq!(2, entry_out.attr.nlink); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn statfs_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + + // do request. + let len = size_of::() as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_STATFS, 0, PARENT_NODEID, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let fuse_statfs_out = FuseKstatfs::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + None, + Some(fuse_out_head.as_bytes()), + Some(fuse_statfs_out.as_bytes()), + ); + + // Check. + let out_header = read_obj::(fs.state.clone(), outheader.unwrap()); + assert_eq!(0, out_header.error); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn virtio_fs_fuse_ioctl_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // FUSE_LOOKUP. + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + + // FUSE_IOCTL. + let len = size_of::() as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_IOCTL, 0, nodeid, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &[0], + fuse_out_head.as_bytes(), + &[0], + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_ne!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn virtio_fs_fuse_abnormal_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // Unsupported message 0xff. + let len = size_of::() as u32; + let fuse_in_head = FuseInHeader::new(len, 0xff, 0, 0, 0, 0, 0, 0); + let fuse_out_head = FuseOutHeader::default(); + + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &[0], + fuse_out_head.as_bytes(), + &[0], + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_ne!(out_header.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// Read "\0" c string from buffer[start..] and return the end position's next in buffer. +fn read_cstring(buffer: Vec, start: usize) -> (Option, usize) { + let mut pos = start; + + for i in start..buffer.len() { + if buffer[i] == b'\0' { + pos = i; + break; + } + } + + if pos == start { + return (None, pos); + } + + let cstring = String::from_utf8(buffer[start..pos].to_vec()).unwrap(); + + (Some(cstring), pos + 1) +} + +fn fuse_setxattr(fs: &VirtioFsTest, name: String, value: String, nodeid: u64) -> FuseOutHeader { + // 8: offset_of!(name, FuseSetxattrIn). + // 2: two "/0". + let len = (size_of::() + 8 + name.len() + value.len() + 2) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_SETXATTR, 4, nodeid, 0, 0, 0, 0); + let fuse_setxattr_in = FuseSetxattrIn { + size: value.len() as u32 + 1, + flags: XATTR_CREATE, + name, + value, + }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(&fuse_setxattr_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + read_obj::(fs.state.clone(), outheader.unwrap()) +} + +fn fuse_removexattr(fs: &VirtioFsTest, name: String, nodeid: u64) -> FuseOutHeader { + let len = (size_of::() + name.len() + 1) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_REMOVEXATTR, 0, nodeid, 0, 0, 0, 0); + let fuse_removexattr_in = FuseRemoveXattrIn { name }; + let fuse_out_head = FuseOutHeader::default(); + let (_, _, outheader, _outbodyaddr) = fs.do_virtio_request( + Some(fuse_in_head.as_bytes()), + Some(&fuse_removexattr_in.as_bytes()), + Some(fuse_out_head.as_bytes()), + None, + ); + + read_obj::(fs.state.clone(), outheader.unwrap()) +} + +fn fuse_listxattr(fs: &VirtioFsTest, nodeid: u64) -> (FuseOutHeader, u64) { + // 8: offset_of!(name, FuseGetxattrIn). + let len = (size_of::() + 8) as u32; + let fuse_in_head = FuseInHeader::new(len, FUSE_LISTXATTR, 0, nodeid, 0, 0, 0, 0); + let fuse_in = FuseGetxattrIn { + size: DEFAULT_XATTR_SIZE, + padding: 0, + name: "".to_string(), + }; + let fuse_out_head = FuseOutHeader::default(); + let fuse_out = [0_u8; DEFAULT_XATTR_SIZE as usize]; + let (outheaderaddr, outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &fuse_in.as_bytes(), + fuse_out_head.as_bytes(), + &fuse_out, + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + + (out_header, outbodyaddr) +} + +// setfattr -n user.abc -v valtest testfile +// getfattr -n user.abc testfile +// getfattr testfile +// setfattr -x user.abc testfile +#[test] +fn regularfile_xattr_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config(false, None, None, None, None, true); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + + let testattr_name = "user.abc"; + let testattr_value = "valtest"; + + // SETXATTR. + let fuseout = fuse_setxattr( + &fs, + testattr_name.to_string(), + testattr_value.to_string(), + nodeid, + ); + assert_eq!(fuseout.error, 0); + + // GETXATTR + let (header, value) = get_xattr(&fs, testattr_name.to_string(), nodeid); + assert_eq!(0, header.error); + assert_eq!(value[0..testattr_value.len()], testattr_value.to_string()); + + // LISTXATTR + let (header, outbodyaddr) = fuse_listxattr(&fs, nodeid); + assert_eq!(header.error, 0); + let attr_list = fs + .state + .borrow() + .memread(outbodyaddr, u64::from(DEFAULT_XATTR_SIZE)); + // The first attr is "security.selinux" + let (_attr1, next1) = read_cstring(attr_list.clone(), 0); + // The next attrs are what we set by FUSE_SETXATTR. Check it. + let (attr2, _next2) = read_cstring(attr_list, next1); + assert_eq!(attr2.unwrap(), testattr_name); + + // REMOVEXATTR + let outheader = fuse_removexattr(&fs, testattr_name.to_string(), nodeid); + assert_eq!(0, outheader.error); + + // GETXATTR + // Xattr "user.abc" has been removed, should receive ERROR. + let (header, _value) = get_xattr(&fs, testattr_name.to_string(), nodeid); + assert_ne!(0, header.error); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// setfattr -n user.abc -v valtest /mnt/testchar +// getfattr -n user.abc /mnt/testchar +// getfattr /mnt/testchar +// setfattr -x user.abc /mnt/testchar +#[test] +fn character_file_xattr_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = + VirtioFsTest::virtiofsd_start_with_config(false, None, None, None, None, true); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + fuse_init(&fs); + let nodeid = fuse_lookup(&fs, TEST_CHARDEV_NAME.to_string()); + + let testattr_name = "user.abc"; + let testattr_value = "valtest"; + + // SETXATTR. + let fuseout = fuse_setxattr( + &fs, + testattr_name.to_string(), + testattr_value.to_string(), + nodeid, + ); + // can not setxattr for character device. + assert_ne!(fuseout.error, 0); + + // GETXATTR nothing. + let (header, _value) = get_xattr(&fs, testattr_name.to_string(), nodeid); + assert_ne!(0, header.error); + + // LISTXATTR + let (header, _outbodyaddr) = fuse_listxattr(&fs, nodeid); + assert_eq!(header.error, 0); + + // REMOVEXATTR + let outheader = fuse_removexattr(&fs, testattr_name.to_string(), nodeid); + assert_ne!(0, outheader.error); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +#[test] +fn virtio_fs_fuse_lseek_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // FUSE_LOOKUP. + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + // FUSE_OPEN. + let fh = fuse_open(&fs, nodeid); + + // FUSE_GETATTR. + let (out_header, _attrout) = fuse_getattr(&fs, nodeid, fh); + assert_eq!(out_header.error, 0); + + // FUSE_LSEEK. + /* + Block this test until virtiofsd support illegal size message. + + assert_ne!(fuse_lseek(&fs, nodeid, fh + 1, 1).0.error, 0); + */ + assert_ne!(fuse_lseek(&fs, nodeid, fh + 1, 0).0.error, 0); + assert_eq!(fuse_lseek(&fs, nodeid, fh, 0).0.error, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// Note: Virtiofsd doesn't support illegal size message now, so trim will only support 0 until virtiofsd modification. +fn fuse_batch_forget(fs: &VirtioFsTest, nodeid: u64, trim: usize) { + let len = + size_of::() + size_of::() + size_of::(); + let fuse_in_head = FuseInHeader::new(len as u32, FUSE_BATCH_FORGET, 0, 0, 0, 0, 0, 0); + let fuse_batch_forget_in = FuseBatchForgetIn { count: 1, dummy: 0 }; + let fuse_forget_data_in = FuseForgetDataIn { + ino: nodeid, + nlookup: 1, + }; + let data_bytes = [ + fuse_batch_forget_in.as_bytes(), + fuse_forget_data_in.as_bytes(), + ] + .concat(); + let (_, _) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + &data_bytes[0..data_bytes.len() - trim], + &[0], + &[0], + ); +} + +#[test] +fn virtio_fs_fuse_batch_forget_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // FUSE_LOOKUP. + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + + // FUSE_BATCH_FORGET. + + /* + Block these two test until virtiofsd support illegal size message. + + // Incomplete FuseBatchForgetIn. + fuse_batch_forget(&fs, nodeid, size_of::() + 1); + // Incomplete FuseForgetDataIn. + fuse_batch_forget(&fs, nodeid, size_of::() - 1); + */ + + // Normal test. + fuse_batch_forget(&fs, nodeid, 0); + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} + +// Note: Virtiofsd does not support `SETLK` and `SETLKW` message, block this test case. +// flock. +#[test] +#[ignore] +fn virtio_fs_fuse_setlkw_test() { + // start virtiofsd process. + let (virtiofs_test_dir, virtiofs_sock, _virtiofs_test_file) = VirtioFsTest::virtiofsd_start(); + + // start vm. + let fs = VirtioFsTest::new(TEST_MEM_SIZE, TEST_PAGE_SIZE, virtiofs_sock); + + // init filesystem. + fuse_init(&fs); + + // FUSE_LOOKUP. + let nodeid = fuse_lookup(&fs, TEST_FILE_NAME.to_string()); + // FUSE_OPEN. + let fh = fuse_open(&fs, nodeid); + + let reqs = [ + //(req_type, lk_flags, lock_type, fh, error), + (0, 1, 0, fh, 0), // Normal F_RDLCK test. + (0, 1, 1, fh, 0), // Normal F_WDLCK test. + (0, 1, 2, fh, 0), // Normal F_UNLCK test. + (0, 0, 1, fh, -95), // Abnormal test with error -libc::EOPNOTSUPP. + (0, 1, 0, fh + 1, -9), // Abnormal test with error -libc::EBADF. + (0, 1, 0, fh + 1, -9), // Abnormal test with error -libc::EBADF. + (1, 1, 0, fh, -22), // Abnormal test with error -libc::EINVAL. + ]; + + // FUSE_SETLKW. + for (req_type, lk_flags, lock_type, fh, error) in reqs { + let len = size_of::() + size_of::(); + let fuse_in_head = FuseInHeader::new(len as u32, FUSE_SETLKW, 0, nodeid, 0, 0, 0, 0); + let fuse_lk_in = FuseLkIn { + fh, + owner: 0, + lk: FuseFileLock { + start: 0, + end: 1, + lock_type, + pid: 0, + }, + lk_flags, + padding: 0, + }; + let fuse_out_head = FuseOutHeader::default(); + let mut fuse_lk_in_bytes = fuse_lk_in.as_bytes(); + if req_type == 1 { + fuse_lk_in_bytes = &fuse_lk_in.as_bytes()[0..1]; + } + + let (outheaderaddr, _outbodyaddr) = fs.virtiofs_do_virtio_request( + fuse_in_head.as_bytes(), + fuse_lk_in_bytes, + fuse_out_head.as_bytes(), + &[0], + ); + + let out_header = read_obj::(fs.state.clone(), outheaderaddr); + assert_eq!(out_header.error, error); + } + + // kill process and clean env. + fs.testcase_end(virtiofs_test_dir); +} diff --git a/tests/mod_test/tests/vnc_test.rs b/tests/mod_test/tests/vnc_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..61868f9e08f06f6a3e44351e252efc680f8f0c30 --- /dev/null +++ b/tests/mod_test/tests/vnc_test.rs @@ -0,0 +1,875 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, rc::Rc}; + +use anyhow::Result; +use serde_json::Value; +use vmm_sys_util::epoll::EventSet; + +use mod_test::{ + libdriver::vnc::{ + create_new_client, set_up, tear_down, DemoGpuConfig, EncodingType, InputConfig, + RfbClientMessage, RfbPixelFormat, RfbServerMsg, TestAuthType, TestClientCut, UpdateState, + KEYEVENTLIST, PIXMAN_A1, PIXMAN_A8B8G8R8, PIXMAN_R8G8B8, PIXMAN_X2R10G10B10, PIXMAN_YUY2, + POINTEVENTLIST, TEST_CLIENT_RAND_MSG, + }, + libtest::TestState, +}; + +fn qmp_query_vnc(test_state: Rc>) -> Value { + let str = "{\"execute\": \"query-vnc\"}".to_string(); + test_state.borrow_mut().qmp(&str) +} + +/// Brief: +/// 1. When received a framebuffer request from the client, the vnc server can +/// send the pixel which is changed to the client. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client set pixel format Raw. +/// 3. VNC client send framebuffer request + Incremental +/// + The Demo GPU device changes the pixel and update image -> expect 1. +/// 4. The Demo GPU device changes the pixel and update image +/// + VNC client send framebuffer request + Incremental -> expect 1. +/// 5. VNC client set pixel format Raw. +/// 6. VNC client send framebuffer request + Incremental +/// + The Demo GPU device changes the pixel and update image -> expect 1. +/// ExpectOutput +/// 1. VNC client can receive image updates event, and the image format meets expectations. +#[test] +fn test_set_area_dirty() { + let port: u16 = 0; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + + // Encoding -> Raw. + // Demo update image -> VNC client send update request. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingRaw)) + .is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::Incremental, 0, 0, 640_u16, 480_u16,) + .is_ok()); + demo_gpu.borrow_mut().update_image_area(0, 0, 64, 64); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 64, 64); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res + .unwrap() + .contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + + // Encoding -> Raw + // VNC client send update request -> Demo update image. + demo_gpu.borrow_mut().update_image_area(0, 0, 64, 64); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 64, 64); + assert!(vnc_client + .test_update_request(UpdateState::Incremental, 0, 0, 640_u16, 480_u16,) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res + .unwrap() + .contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + + // Encoding -> Hextile. + // Demo update image -> VNC client send update request. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingHextile)) + .is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::Incremental, 0, 0, 640_u16, 480_u16,) + .is_ok()); + demo_gpu.borrow_mut().update_image_area(0, 0, 64, 64); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 64, 64); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingHextile + ))); + + assert!(vnc_client.disconnect().is_ok()); + + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// 1. When received a framebuffer request from the client, the vnc server can +/// send the pixel which is changed to the client. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client set pixel format (Raw of Hextile). +/// 3. VNC client send framebuffer request + NotIncremental +/// + The Demo GPU device changes the pixel and update image -> expect 1. +/// ExpectOutput +/// 1. VNC client can receive image updates event, and the image format meets expectations. +#[test] +fn test_set_multiple_area_dirty() { + let port: u16 = 8; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + // Encoding -> Raw. + // Multiple areas of image have been updated. + // Demo update image -> VNC client send update request. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingRaw)) + .is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + demo_gpu.borrow_mut().update_image_area(0, 0, 64, 64); + demo_gpu.borrow_mut().update_image_area(72, 72, 109, 109); + demo_gpu.borrow_mut().update_image_area(119, 120, 160, 160); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 640, 480); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 640_u16, 480_u16,) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res + .unwrap() + .contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + + // Encoding -> Hextile. + // Multiple areas of image have been updated. + // Demo update image -> VNC client send update request. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingHextile)) + .is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + demo_gpu.borrow_mut().update_image_area(0, 0, 64, 64); + demo_gpu.borrow_mut().update_image_area(72, 72, 109, 109); + demo_gpu.borrow_mut().update_image_area(119, 120, 160, 160); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 640, 480); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 640_u16, 480_u16,) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingHextile + ))); + + assert!(vnc_client.disconnect().is_ok()); + + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// 1. VNC Server can update cursor image. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client setting feature of EncodingRichCursor. +/// 3. Demo GPU update the cursor image of VNC server -> expect 1. +/// 4. VNC client setting feature of EncodingAlphaCursor. +/// 5. Demo GPU update the cursor image of VNC server -> expect 1. +/// 6. VNC client setting feature of EncodingRichCursor. +/// 7. Demo GPU update the abnormal cursor image of VNC server -> expect 2. +/// 8. VNC client setting feature of EncodingAlphaCursor. +/// 9. Demo GPU update the abnormal cursor image of VNC server -> expect 2. +/// ExpectOutput: +/// 1. The client receives the cursor image, and the format meets expect. +/// 2. The state of VNC client and server are normal, and the next normal connection will not be +/// effect. +#[test] +fn test_send_cursor_image() { + let port: u16 = 1; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client + .test_setup_encodings(Some(1), Some(EncodingType::EncodingRichCursor)) + .is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + demo_gpu + .borrow_mut() + .replace_cursor(64, 64, 16, 16, 64 * 64 * 4); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingRichCursor + ))); + + assert!(vnc_client + .test_setup_encodings(Some(1), Some(EncodingType::EncodingAlphaCursor)) + .is_ok()); + demo_gpu + .borrow_mut() + .replace_cursor(64, 64, 16, 16, 64 * 64 * 4); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingAlphaCursor + ))); + + assert!(vnc_client + .test_setup_encodings(Some(1), Some(EncodingType::EncodingRichCursor)) + .is_ok()); + demo_gpu.borrow_mut().replace_cursor(64, 64, 16, 16, 0); + assert!(vnc_client + .test_setup_encodings(Some(1), Some(EncodingType::EncodingAlphaCursor)) + .is_ok()); + demo_gpu.borrow_mut().replace_cursor(64, 64, 16, 16, 0); + assert!(vnc_client.disconnect().is_ok()); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client.disconnect().is_ok()); + + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// When the surface size of VNC server is changed, server will inform +/// clients which has desktop_resize feature that the desktop size has been changed. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client setting feature of EncodingDesktopresize. +/// 3. Demo GPU replace the surface of VNC server. -> expect 1. +/// 4. VNC client setting feature of Raw. +/// 5. Demo GPU replace the surface of VNC server. -> expect 2. +/// 6. VNC client setting feature of All. +/// 7. Demo GPU replace the surface of VNC server. -> expect 2. +/// ExpectOutput +/// 1. VNC client received a desktop resize request from VNC server. +/// 2. VNC client not received any desktop resize request from VNC server. +#[test] +fn test_desktop_resize() { + let port: u16 = 2; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingDesktopresize)) + .is_ok()); + demo_gpu + .borrow_mut() + .replace_surface(1920, 1080, PIXMAN_A8B8G8R8); + + let pf = RfbPixelFormat::new(8, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingDesktopresize + ))); + + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingRaw)) + .is_ok()); + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + let pf = RfbPixelFormat::new(8, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + let res = vnc_client.test_recv_server_data(pf); + assert!(!res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingDesktopresize + ))); + + assert!(vnc_client.test_setup_encodings(None, None).is_ok()); + demo_gpu + .borrow_mut() + .replace_surface(1920, 1080, PIXMAN_A8B8G8R8); + let pf = RfbPixelFormat::new(8, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingDesktopresize + ))); + + assert!(vnc_client.disconnect().is_ok()); + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// The VNC server can receive image update request and return image data to client. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client set pixel format. +/// 3. VNC client send framebuffer request + NotIncremental. +/// 4. VNC client check the image data from VNC server. +/// Situation: +/// 1. Pixel format: Raw + bit_per_pixel=32 + true_color_flag=1 -> expect 1. +/// 2. Pixel format: Raw + bit_per_pixel=16 + true_color_flag=1 -> expect 1. +/// 3. Pixel format: Raw + bit_per_pixel=8 + true_color_flag=0. -> expect 2. +/// 4. Pixel format: Hextile + bit_per_pixel=32 + true_color_flag=1 -> expect 1. +/// 5. Pixel format: Hextile + bit_per_pixel=8 + true_color_flag=1 -> expect 1. +/// 6. Pixel format: Hextile + bit_per_pixel=8 + true_color_flag=2 -> expect 2. +/// ExpectOutput: +/// 1. The image format meets expectations. +/// 2. The Image format meets expectations, VNC client receives the messages of set +/// color map information from VNC server. +#[test] +fn test_set_pixel_format() { + let port: u16 = 3; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingRaw)) + .is_ok()); + assert!(vnc_client.stream_read_to_end().is_ok()); + + // Raw + bit_per_pixel=32 + true_color_flag=1. + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res + .unwrap() + .contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + + // Raw + bit_per_pixel=16 + true_color_flag=1. + let pf = RfbPixelFormat::new(16, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res + .unwrap() + .contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + + // Raw + bit_per_pixel=8 + true_color_flag=0. + let pf = RfbPixelFormat::new(8, 8, 0_u8, 0_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + let res = res.unwrap(); + assert!(res.contains(&(RfbServerMsg::FramebufferUpdate, EncodingType::EncodingRaw))); + assert!(res.contains(&( + RfbServerMsg::SetColourMapEntries, + EncodingType::EncodingInvalid + ))); + + // Hextile + bit_per_pixel=32 + true_color_flag=1. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingHextile)) + .is_ok()); + assert!(vnc_client.stream_read_to_end().is_ok()); + let pf = RfbPixelFormat::new(32, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingHextile + ))); + + // Hextile + bit_per_pixel=8 + true_color_flag=1. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingHextile)) + .is_ok()); + assert!(vnc_client.stream_read_to_end().is_ok()); + let pf = RfbPixelFormat::new(8, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingHextile + ))); + + // Hextile + bit_per_pixel=8 + true_color_flag=0. + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingHextile)) + .is_ok()); + assert!(vnc_client.stream_read_to_end().is_ok()); + let pf = RfbPixelFormat::new(8, 8, 0_u8, 0_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + assert!(vnc_client + .test_update_request(UpdateState::NotIncremental, 0, 0, 2560, 2048) + .is_ok()); + + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + let res = res.unwrap(); + assert!(res.contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingHextile + ))); + assert!(res.contains(&( + RfbServerMsg::SetColourMapEntries, + EncodingType::EncodingInvalid + ))); + assert!(vnc_client.disconnect().is_ok()); + + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// The VNC server can receive keyboard and pointer events. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep +/// 1. VNC client connect to server. +/// 2. VNC client send key event -> expect 1. +/// 3. VNC client send pointer event -> expect 2. +/// ExpectOutput: +/// 1. VNC server received the keyboard event, the observed key value in demo keyboard device meets +/// the expectation. +/// 2. VNC server received the pointer event, the observed coordinate in demo pointer device has +/// been changed. +#[test] +fn test_vnc_kbd_mouse() { + let port: u16 = 4; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + // Key event. + for &(name, keysym, keycode) in KEYEVENTLIST.iter() { + assert!(vnc_client.test_key_event(0, u32::from(keysym)).is_ok()); + let msg = input.borrow_mut().read_input_event(); + println!("key {:?}: {:?}", name, msg); + assert_eq!(msg.keycode, keycode); + assert_eq!(msg.down, 0); + assert!(vnc_client.test_key_event(1, u32::from(keysym)).is_ok()); + + let msg = input.borrow_mut().read_input_event(); + println!("key {:?}: {:?}", name, msg); + assert_eq!(msg.keycode, keycode); + assert_eq!(msg.down, 1); + } + + // Pointer event. + let (button_mask, x, y) = POINTEVENTLIST[0]; + assert!(vnc_client.test_point_event(button_mask, x, y).is_ok()); + let mut old_msg = input.borrow_mut().read_input_event(); + for &(button_mask, x, y) in POINTEVENTLIST[1..].iter() { + assert!(vnc_client.test_point_event(button_mask, x, y).is_ok()); + let msg = input.borrow_mut().read_input_event(); + // After sending the pointer event, the coordinate should be changed + assert!(!(old_msg.button == msg.button && old_msg.x == msg.x && old_msg.y == msg.y)); + old_msg = msg; + println!("msg: {:?}", msg); + } + assert!(vnc_client.disconnect().is_ok()); + demo_gpu.borrow_mut().deactive(); + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// The display device can be switched through Ctl+Alt+Num on VNC client. +/// Preparation: +/// 1. Configure a demo pointer device and two test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. First demo gpu device create an image with size of 640 * 480. +/// 4. Second demo gpu device create an image with size of 1920 * 1080. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client setting feature of EncodingDesktopresize. +/// 3. VNC client send the key event of Ctl+Alt+Num -> expect 1. +/// ExpectOutput: +/// 1. The activate display device is be changed, and the VNC client receive the message of +/// desktopresize. +#[test] +fn test_switch_display_device() { + let port: u16 = 5; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu_1".to_string(), + }; + gpu_list.push(gpu_conf); + let gpu_conf = DemoGpuConfig { + pci_slot: 4, + id: "demo-pci-gpu_2".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 5, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu_1 = gpu_list[0].clone(); + let demo_gpu_2 = gpu_list[1].clone(); + demo_gpu_1 + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + demo_gpu_2 + .borrow_mut() + .replace_surface(1920, 1080, PIXMAN_A8B8G8R8); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client + .test_setup_encodings(None, Some(EncodingType::EncodingDesktopresize)) + .is_ok()); + + // Ctl + Alt + 2. + assert!(vnc_client.test_key_event(1, 0xffe3).is_ok()); + assert!(vnc_client.test_key_event(1, 0xffe9).is_ok()); + assert!(vnc_client.test_key_event(1, 0x32).is_ok()); + + let pf = RfbPixelFormat::new(8, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + let res = vnc_client.test_recv_server_data(pf); + assert!(res.is_ok()); + assert!(res.unwrap().contains(&( + RfbServerMsg::FramebufferUpdate, + EncodingType::EncodingDesktopresize + ))); + + tear_down(gpu_list, input, test_state); +} + +/// Brief: +/// Test possible exceptions during image update. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. Demo GPU do some operation. +/// Abnormal Situation: +/// 1. The area to set dirty is out of range -> expect 1. +/// 2. The image size exceeds value -> expect 1. +/// 3. Switch different pixman formats -> expect 1. +/// ExpectOutput: +/// 1. The status of VNC server status is normal and can handle the next connect request. +#[test] +fn test_update_image_abnormal() { + let port: u16 = 6; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + let demo_gpu = gpu_list[0].clone(); + + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + demo_gpu.borrow_mut().set_area_dirty(0, 0, 65535, 65535); + demo_gpu + .borrow_mut() + .replace_surface(65535, 65535, PIXMAN_A8B8G8R8); + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_X2R10G10B10); + demo_gpu + .borrow_mut() + .replace_surface(1080, 720, PIXMAN_R8G8B8); + demo_gpu.borrow_mut().replace_surface(640, 480, PIXMAN_A1); + demo_gpu + .borrow_mut() + .replace_surface(1080, 720, PIXMAN_YUY2); + demo_gpu + .borrow_mut() + .replace_surface(640, 480, PIXMAN_A8B8G8R8); + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + let value = qmp_query_vnc(test_state.clone()); + let client_num = value["return"]["clients"].as_array().unwrap().len(); + assert!(client_num >= 1); + assert!(vnc_client.disconnect().is_ok()); + tear_down(gpu_list, input, test_state); +} + +fn test_rfb_version_abnormal(test_state: Rc>, port: u16) -> Result<()> { + let mut buf: Vec = Vec::new(); + let mut vnc_client = create_new_client(test_state, port).unwrap(); + println!("Connect to server."); + assert!(vnc_client.read_msg(&mut buf, 12).is_ok()); + assert_eq!(buf[..12].to_vec(), "RFB 003.008\n".as_bytes().to_vec()); + println!("Client Rfb version: RFB 003.010"); + assert!(vnc_client.write_msg("RFB 003.010\n".as_bytes()).is_ok()); + buf.drain(..12); + // VNC server closed connection. + let res = vnc_client.epoll_wait(EventSet::READ_HANG_UP); + assert!(res.is_ok()); + assert!(res.unwrap() > 0); + assert_ne!( + vnc_client.ready_events[0].events() & EventSet::READ_HANG_UP.bits(), + 0 + ); + assert!(vnc_client.disconnect().is_ok()); + + Ok(()) +} + +fn test_unsupported_sec_type(test_state: Rc>, port: u16) -> Result<()> { + let mut buf: Vec = Vec::new(); + let mut vnc_client = create_new_client(test_state, port).unwrap(); + println!("Connect to server."); + assert!(vnc_client.read_msg(&mut buf, 12).is_ok()); + assert_eq!(buf[..12].to_vec(), "RFB 003.008\n".as_bytes().to_vec()); + assert!(vnc_client.write_msg("RFB 003.008\n".as_bytes()).is_ok()); + buf.drain(..12); + + // Step 2: Auth num is 1. + assert!(vnc_client.read_msg(&mut buf, 1).is_ok()); + let auth_num = buf[0]; + assert!(auth_num > 0); + buf.drain(..1); + assert!(vnc_client.read_msg(&mut buf, auth_num as usize).is_ok()); + buf.drain(..auth_num as usize); + assert!(vnc_client + .write_msg((TestAuthType::Invalid as u8).to_be_bytes().as_ref()) + .is_ok()); + // VNC server close the connection. + let res = vnc_client.epoll_wait(EventSet::READ_HANG_UP); + assert!(res.is_ok()); + assert!(res.unwrap() > 0); + assert_ne!( + vnc_client.ready_events[0].events() & EventSet::READ_HANG_UP.bits(), + 0 + ); + assert!(vnc_client.disconnect().is_ok()); + + Ok(()) +} + +fn test_set_pixel_format_abnormal(test_state: Rc>, port: u16) -> Result<()> { + let mut vnc_client = create_new_client(test_state, port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + let pf = RfbPixelFormat::new(17, 8, 0_u8, 1_u8, 255, 255, 255, 16, 8, 0); + assert!(vnc_client.test_set_pixel_format(pf).is_ok()); + + // VNC server close the connection. + let res = vnc_client.epoll_wait(EventSet::READ_HANG_UP)?; + assert!(res > 0); + assert_ne!( + vnc_client.ready_events[0].events() & EventSet::READ_HANG_UP.bits(), + 0 + ); + + assert!(vnc_client.disconnect().is_ok()); + Ok(()) +} + +fn test_set_encoding_abnormal(test_state: Rc>, port: u16) -> Result<()> { + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + assert!(vnc_client.test_setup_encodings(Some(100), None).is_ok()); + // Send a qmp to query vnc client state. + let value = qmp_query_vnc(test_state); + let client_num = value["return"]["clients"].as_array().unwrap().len(); + assert_eq!(client_num, 1); + assert!(vnc_client.disconnect().is_ok()); + Ok(()) +} + +fn test_client_cut_event(test_state: Rc>, port: u16) -> Result<()> { + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + let text = "Stratovirt".to_string(); + let client_cut = TestClientCut { + event_type: RfbClientMessage::RfbClientCutText, + pad0: 0, + pad1: 0, + length: text.len() as u32, + text, + }; + assert!(vnc_client.test_send_client_cut(client_cut).is_ok()); + // Send a qmp to query vnc client state. + let value = qmp_query_vnc(test_state); + let client_num = value["return"]["clients"].as_array().unwrap().len(); + assert_eq!(client_num, 1); + assert!(vnc_client.disconnect().is_ok()); + Ok(()) +} + +fn test_client_rand_bytes(test_state: Rc>, port: u16) -> Result<()> { + let mut vnc_client = create_new_client(test_state, port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + let mut buf = TEST_CLIENT_RAND_MSG; + vnc_client.write_msg(&mut buf)?; + assert!(vnc_client.disconnect().is_ok()); + Ok(()) +} + +/// Brief: +/// Test possible exceptions during RFB protocol connection. +/// Preparation: +/// 1. Configure a demo pointer device and test GPU device. +/// 2. Start a VNC Server and listens on local ports. +/// 3. The demo gpu device create an image with size of 640 * 480 and sends it to VNC. +/// TestStep: +/// 1. VNC client connect to server. +/// 2. VNC client set pixel format. +/// 3. VNC client send framebuffer request + NotIncremental. +/// 4. VNC client check the image data from VNC server. +/// Abnormal Situation: +/// 1. Unsupported RFB version -> expect 1 + 2. +/// 2. Unsupported security type -> expect 1 + 2. +/// 3. The message of set pixel formal is abnormal -> expect 1 + 2. +/// 4. Send the rand bytes from the client -> expect 2. +/// 5. Send set encoding event: encoding number abnormal -> expect 2. +/// 6. Unsupported event: Client cut event is not supported now -> expect 2. +/// ExpectOutput: +/// 1. VNC server close the connection. +/// 2. The status of VNC server status is normal and can handle the next connect request. +#[test] +fn test_rfb_abnormal() { + let port: u16 = 7; + let mut gpu_list: Vec = vec![]; + let gpu_conf = DemoGpuConfig { + pci_slot: 3, + id: "demo-pci-gpu".to_string(), + }; + gpu_list.push(gpu_conf); + let input_conf = InputConfig { + pci_slot: 4, + id: "demo-pci-input".to_string(), + }; + let (gpu_list, input, test_state) = set_up(gpu_list, input_conf, port); + + assert!(test_rfb_version_abnormal(test_state.clone(), port).is_ok()); + assert!(test_unsupported_sec_type(test_state.clone(), port).is_ok()); + assert!(test_set_pixel_format_abnormal(test_state.clone(), port).is_ok()); + assert!(test_set_encoding_abnormal(test_state.clone(), port).is_ok()); + assert!(test_client_cut_event(test_state.clone(), port).is_ok()); + assert!(test_client_rand_bytes(test_state.clone(), port).is_ok()); + + let mut vnc_client = create_new_client(test_state.clone(), port).unwrap(); + assert!(vnc_client.connect(TestAuthType::VncAuthNone).is_ok()); + let value = qmp_query_vnc(test_state.clone()); + let client_num = value["return"]["clients"].as_array().unwrap().len(); + assert_eq!(client_num, 1); + assert!(vnc_client.disconnect().is_ok()); + + tear_down(gpu_list, input, test_state); +} diff --git a/tests/mod_test/tests/x86_64/cpu_hotplug_test.rs b/tests/mod_test/tests/x86_64/cpu_hotplug_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..6a279843d33dec4d5208778773ee93bf19865699 --- /dev/null +++ b/tests/mod_test/tests/x86_64/cpu_hotplug_test.rs @@ -0,0 +1,249 @@ +// Copyright (c) 2023 China Telecom Co.,Ltd. All rights reserved. +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::borrow::BorrowMut; +use std::thread::sleep; +use std::time::Duration; + +use serde_json::{json, Value}; + +use machine::x86_64::standard::{LayoutEntryType, MEM_LAYOUT}; +use mod_test::libtest::{test_init, TestState, MACHINE_TYPE_ARG}; + +const GED_ADDR_BASE: u64 = MEM_LAYOUT[LayoutEntryType::GedMmio as usize].0; + +fn ged_read_evt(ts: &TestState) -> u32 { + ts.readl(GED_ADDR_BASE) +} + +fn set_up(cpu: u8, max_cpus: Option) -> TestState { + // Vm extra_args. + let mut extra_args: Vec<&str> = Vec::new(); + let mut args: Vec<&str> = MACHINE_TYPE_ARG.split(' ').collect(); + extra_args.append(&mut args); + + let cpu_args = if let Some(max_cpus) = max_cpus { + format!("-smp {},maxcpus={}", cpu, max_cpus) + } else { + format!("-smp {}", cpu) + }; + args = cpu_args[..].split(' ').collect(); + extra_args.append(&mut args); + + let mem_args = "-m 512".to_string(); + args = mem_args[..].split(' ').collect(); + extra_args.append(&mut args); + + extra_args.push("-append"); + extra_args.push("root=/dev/vda panic=1"); + + let uefi_drive = + "-drive file=/usr/share/edk2/ovmf/OVMF_CODE.fd,if=pflash,unit=0,readonly=true".to_string(); + args = uefi_drive[..].split(' ').collect(); + extra_args.append(&mut args); + + let root_device = + "-device pcie-root-port,port=0x0,addr=0x1.0x0,bus=pcie.0,id=pcie.1".to_string(); + args = root_device[..].split(' ').collect(); + extra_args.append(&mut args); + + args = "-disable-seccomp -daemonize".split(' ').collect(); + extra_args.append(&mut args); + + test_init(extra_args) +} + +fn hotplug_cpu(test_state: &mut TestState, id: &str, cpu_id: u8) -> Value { + test_state.borrow_mut().qmp(&format!( + "{{\"execute\": \"device_add\",\"arguments\": {{ \"id\": \"{id}\", \"driver\": \"generic-x86-cpu\", \"cpu-id\": {cpu_id}}}}}" + )) +} + +fn hotunplug_cpu(test_state: &mut TestState, id: &str) -> Value { + test_state.borrow_mut().qmp(&format!( + "{{\"execute\": \"device_del\", \"arguments\": {{\"id\": \"{id}\"}}}}" + )) +} + +fn assert_response(result: Value, index: &str, expect: Option) { + if index == "return" { + assert_eq!(*result.get("return").unwrap(), json!({})) + } else { + assert_eq!( + result["error"]["desc"].as_str().unwrap().to_string(), + expect.unwrap(), + ) + } +} + +/// Normal cpu hotplug. +/// TestStep: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Read ged event, expect 16. +/// 3. Destroy VM. +/// Expect: +/// 1/2/3: success. +#[test] +fn normal_hotplug_cpu() { + let mut ts = set_up(1, Some(2)); + + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response(ret, "return", None); + sleep(Duration::from_micros(200)); + + let event = ged_read_evt(&ts); + assert_eq!(event, 16); + + ts.borrow_mut().stop(); +} + +/// Normal cpu hotunplug. +/// TestStep: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Send id vcpu-1 hotunplug qmp command. +/// 3. Read ged event, expect 16. +/// 4. Destroy VM. +/// Expect: +/// 1/2/3/4: success. +#[test] +fn normal_hotunplug_cpu() { + let mut ts = set_up(1, Some(2)); + + // Hotplug vcpu-1. + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response(ret, "return", None); + ts.qmp_read(); + + // Hotunplug vcpu-1. + let ret = hotunplug_cpu(&mut ts, "vcpu-1"); + assert_response(ret, "return", None); + + let event = ged_read_evt(&ts); + assert_eq!(event, 16); + + ts.borrow_mut().stop(); +} + +/// Hotplug cpu with an existed id. +/// TestStep: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Send id vcpu-1 and cpu-id 2 hotplug qmp command. +/// 3. Destroy VM. +/// Expect: +/// 1/3: Success. +/// 2: Failed. +#[test] +fn existed_id_hotplug_cpu() { + let mut ts = set_up(1, Some(3)); + + // Hotplug vcpu-1. + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response(ret, "return", None); + ts.qmp_read(); + + // Hotplug vcpu-1. + let ret = hotplug_cpu(&mut ts, "vcpu-1", 2); + assert_response( + ret, + "error", + Some("Device id vcpu-1 already existed.".to_string()), + ); + + ts.borrow_mut().stop(); +} + +/// Hotplug cpu with an existed cpu id. +/// TestStep: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Send id vcpu-2 and cpu-id 1 hotplug qmp command. +/// 3. Destroy VM. +/// Expect: +/// 1/3: Success. +/// 2: Failed. +#[test] +fn existed_cpuid_hotplug_cpu() { + let mut ts = set_up(1, Some(3)); + + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response(ret, "return", None); + ts.qmp_read(); + + let ret = hotplug_cpu(&mut ts, "vcpu-2", 1); + assert_response( + ret, + "error", + Some("Cpu-id 1 is running, device id is vcpu-1.".to_string()), + ); + + ts.borrow_mut().stop(); +} + +/// Hotplug cpu with empty id. +/// TestStep: +/// 1. Send empty id and cpu-id 1 hotplug qmp command. +/// 2. Destroy VM. +/// Expect: +/// 2: Success. +/// 1: Failed. +#[test] +fn empty_id_hotplug_cpu() { + let mut ts = set_up(1, Some(2)); + + let ret = hotplug_cpu(&mut ts, "", 1); + assert_response(ret, "error", Some("Device id is empty".to_string())); + + ts.borrow_mut().stop(); +} + +/// Hotplug cpu with an overrange cpu id. +/// TestStep: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Send id vcpu-2 and cpu-id 2 hotplug qmp command. +/// 3. Destroy VM. +/// Expect: +/// 1/3: Success. +/// 2: Failed. +#[test] +fn overrange_hotplug_cpu() { + let mut ts = set_up(1, Some(2)); + + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response(ret, "return", None); + ts.qmp_read(); + + let ret = hotplug_cpu(&mut ts, "vcpu-2", 2); + assert_response(ret, "error", Some("Max cpu-id is 1".to_string())); + + ts.borrow_mut().stop(); +} + +/// Hotplug cpu when max_cpus is not explicitly configured. +/// TestSetp: +/// 1. Send id vcpu-1 and cpu-id 1 hotplug qmp command. +/// 2. Destroy VM. +/// Expect: +/// 2: Success. +/// 1: Failed. +#[test] +fn without_config_max_cpus_hotplug_cpu() { + let mut ts = set_up(1, None); + + let ret = hotplug_cpu(&mut ts, "vcpu-1", 1); + assert_response( + ret, + "error", + Some("There is no hotpluggable cpu-id for this VM.".to_string()), + ); + + ts.borrow_mut().stop(); +} diff --git a/tests/mod_test/tests/x86_64/mod.rs b/tests/mod_test/tests/x86_64/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..42cf6f78c9d421500af95e7ae5b42a2e217fddbf --- /dev/null +++ b/tests/mod_test/tests/x86_64/mod.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2023 China Telecom Co.,Ltd. All rights reserved. +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod cpu_hotplug_test; diff --git a/tools/build_stratovirt_static/Dockerfile b/tools/build_stratovirt_static/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..dcc66aae6dc21b863fde721db09f62f72097d12e --- /dev/null +++ b/tools/build_stratovirt_static/Dockerfile @@ -0,0 +1,23 @@ +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +# +# StratoVirt is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan +# PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +from openeuler/openeuler:22.03-lts-sp2 + +ARG ARCH + +RUN yum update -y && \ + yum upgrade -y && \ + yum install -y cargo musl-gcc cyrus-sasl-devel && \ + yum install -y libcap-devel libcap-ng-devel libseccomp-devel && \ + if [ "${ARCH}" == aarch64 ]; then yum install -y dtc-devel; fi && \ + yum clean all + diff --git a/tools/build_stratovirt_static/build-stratovirt-from-docker.sh b/tools/build_stratovirt_static/build-stratovirt-from-docker.sh new file mode 100755 index 0000000000000000000000000000000000000000..a1708e7f9f06fef6d2aefa71ad171ead173226f5 --- /dev/null +++ b/tools/build_stratovirt_static/build-stratovirt-from-docker.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +# +# StratoVirt is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan +# PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +set -o errexit +set -o nounset +set -o pipefail + +build_stratovirt() { + sudo "${container_engine}" run \ + --rm -i \ + --env ARCH="${ARCH}" \ + -v "${repo_root_dir}:/root/stratovirt" \ + "${container_image}" \ + bash -c "cd /root/stratovirt && ${CARGO} build --workspace --bin stratovirt --release --target=${RUST_TARGET}" +} + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root_dir="$(cd ${script_dir}/../.. && pwd)" + +ARCH=${ARCH:-$(uname -m)} +CARGO="/usr/bin/env CARGO_HOME=.cargo RUSTC_BOOTSTRAP=1 /usr/bin/cargo" +container_engine="${container_engine:-docker}" +container_image="${container_image:-$1}" + +if [ "${ARCH}" == "x86_64" ]; then RUST_TARGET="x86_64-unknown-linux-musl"; fi +if [ "${ARCH}" == "aarch64" ]; then RUST_TARGET="aarch64-unknown-linux-musl"; fi + +echo "Building StratoVirt with ${RUST_TARGET}" + +sudo "${container_engine}" build \ + --build-arg ARCH="${ARCH}" \ + "${repo_root_dir}" \ + -f "${script_dir}/Dockerfile" \ + -t "${container_image}" && \ + +build_stratovirt + diff --git a/trace/Cargo.toml b/trace/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..4fd31409de9fea6bf25f507922ef9a66d9c9d9c6 --- /dev/null +++ b/trace/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "trace" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "Provide tracing infrastructure for StratoVirt" + +[dependencies] +log = "0.4" +lazy_static = "1.4.0" +regex = "1" +anyhow = "1.0" +trace_generator = { path = "trace_generator" } +vmm-sys-util = "0.12.1" +libloading = "0.7.4" + +[features] +trace_to_logger = [] +trace_to_ftrace = [] +trace_to_hitrace = [] diff --git a/trace/build.rs b/trace/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..d8a01f1d4bddaa448b11770fe35ece8676e198d3 --- /dev/null +++ b/trace/build.rs @@ -0,0 +1,18 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +fn main() { + println!( + "cargo:rerun-if-changed={}/trace_info", + std::env::var("CARGO_MANIFEST_DIR").unwrap() + ); +} diff --git a/trace/src/ftrace.rs b/trace/src/ftrace.rs new file mode 100644 index 0000000000000000000000000000000000000000..a6bf373dd2ca5085d4bd894eab922570034497d8 --- /dev/null +++ b/trace/src/ftrace.rs @@ -0,0 +1,70 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + fs::{File, OpenOptions}, + io::{prelude::Write, BufRead, BufReader}, + sync::Mutex, +}; + +use lazy_static::lazy_static; + +lazy_static! { + static ref TRACE_MARKER_FD: Mutex = Mutex::new(open_trace_marker()); +} + +pub(crate) fn open_trace_marker() -> File { + let mounts_path: &str = "/proc/mounts"; + let mounts_fd = File::open(mounts_path) + .unwrap_or_else(|e| panic!("Failed to open {}: {:?}", mounts_path, e)); + let mut reader = BufReader::new(mounts_fd); + let target_line = loop { + let mut buffer = String::new(); + reader + .read_line(&mut buffer) + .unwrap_or_else(|e| panic!("Read {} error: {:?}", &mounts_path, e)); + match buffer.as_str() { + "" => { + panic!("Failed to get mount point of tracefs") + } + _ => { + if buffer.contains("tracefs") { + break buffer; + } + } + } + }; + let fields: Vec<&str> = target_line.split(' ').collect(); + let tracefs_mount_point = fields + .get(1) + .unwrap_or_else(|| panic!("Failed to get mount point of tracefs")) + .to_string(); + + let tracing_on_path = format!("{}/tracing_on", tracefs_mount_point); + let mut tracing_on_fd = OpenOptions::new() + .write(true) + .open(&tracing_on_path) + .unwrap_or_else(|e| panic!("Failed to open {}: {:?}", tracing_on_path, e)); + tracing_on_fd + .write_all(b"1") + .unwrap_or_else(|e| panic!("Failed to enable tracing_on: {:?}", e)); + + let trace_marker_path = format!("{}/trace_marker", tracefs_mount_point); + OpenOptions::new() + .write(true) + .open(&trace_marker_path) + .unwrap_or_else(|e| panic!("Failed to open {}: {:?}", trace_marker_path, e)) +} + +pub fn write_trace_marker(buf: &str) { + let _result = TRACE_MARKER_FD.lock().unwrap().write_all(buf.as_bytes()); +} diff --git a/trace/src/hitrace.rs b/trace/src/hitrace.rs new file mode 100644 index 0000000000000000000000000000000000000000..625ea6209c3a1e48d6fc0e654f87d5fe0ed1174d --- /dev/null +++ b/trace/src/hitrace.rs @@ -0,0 +1,118 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::ffi::OsStr; + +use anyhow::{Context, Result}; +use lazy_static::lazy_static; +use libloading::os::unix::Symbol; +use libloading::Library; +use log::error; + +const HITRACE_TAG_VIRSE: u64 = 1u64 << 11; + +lazy_static! { + static ref HITRACE_FUNC_TABLE: HitraceFuncTable = + // SAFETY: The dynamic library should be always existing. + unsafe { + HitraceFuncTable::new(OsStr::new("libhitrace_meter.so")) + .map_err(|e| { + error!("failed to init HitraceFuncTable with error: {:?}", e); + e + }) + .unwrap() + }; +} + +macro_rules! get_libfn { + ( $lib: ident, $tname: ident, $fname: ident ) => { + $lib.get::<$tname>(stringify!($fname).as_bytes()) + .with_context(|| format!("failed to get function {}", stringify!($fname)))? + .into_raw() + }; +} + +type StartTraceWrapperFn = unsafe extern "C" fn(u64, *const u8); +type FinishTraceFn = unsafe extern "C" fn(u64); +type StartAsyncTraceWrapperFn = unsafe extern "C" fn(u64, *const u8, i32); +type FinishAsyncTraceWrapperFn = unsafe extern "C" fn(u64, *const u8, i32); + +struct HitraceFuncTable { + pub start_trace: Symbol, + pub finish_trace: Symbol, + pub start_trace_async: Symbol, + pub finish_trace_async: Symbol, +} + +impl HitraceFuncTable { + unsafe fn new(library_name: &OsStr) -> Result { + let library = + Library::new(library_name).with_context(|| "failed to load hitrace_meter library")?; + + Ok(Self { + start_trace: get_libfn!(library, StartTraceWrapperFn, StartTraceWrapper), + finish_trace: get_libfn!(library, FinishTraceFn, FinishTrace), + start_trace_async: get_libfn!( + library, + StartAsyncTraceWrapperFn, + StartAsyncTraceWrapper + ), + finish_trace_async: get_libfn!( + library, + FinishAsyncTraceWrapperFn, + FinishAsyncTraceWrapper + ), + }) + } +} + +pub fn start_trace(value: &str) { + if let Ok(value_ptr) = std::ffi::CString::new(value) { + // SAFETY: All parameters have been checked. + unsafe { + (HITRACE_FUNC_TABLE.start_trace)(HITRACE_TAG_VIRSE, value_ptr.as_ptr() as *const u8) + } + } +} + +pub fn finish_trace() { + // SAFETY: All parameters have been checked. + unsafe { + (HITRACE_FUNC_TABLE.finish_trace)(HITRACE_TAG_VIRSE); + } +} + +pub fn start_trace_async(value: &str, task_id: i32) { + if let Ok(value_ptr) = std::ffi::CString::new(value) { + // SAFETY: All parameters have been checked. + unsafe { + (HITRACE_FUNC_TABLE.start_trace_async)( + HITRACE_TAG_VIRSE, + value_ptr.as_ptr() as *const u8, + task_id, + ) + } + } +} + +pub fn finish_trace_async(value: &str, task_id: i32) { + if let Ok(value_ptr) = std::ffi::CString::new(value) { + // SAFETY: All parameters have been checked. + unsafe { + (HITRACE_FUNC_TABLE.finish_trace_async)( + HITRACE_TAG_VIRSE, + value_ptr.as_ptr() as *const u8, + task_id, + ) + } + } +} diff --git a/trace/src/lib.rs b/trace/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..ba8aa2ab1ef761ce1ce1e4a8d5ddf9a6aed14492 --- /dev/null +++ b/trace/src/lib.rs @@ -0,0 +1,159 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "trace_to_ftrace")] +pub(crate) mod ftrace; +#[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] +pub(crate) mod hitrace; +#[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") +))] +pub mod trace_scope; + +use std::{ + fmt, + os::unix::io::RawFd, + sync::atomic::{AtomicBool, Ordering}, + sync::Arc, +}; + +use anyhow::{Ok, Result}; +use lazy_static::lazy_static; +use log::warn; +use regex::Regex; +use vmm_sys_util::eventfd::EventFd; + +use trace_generator::{ + add_trace_state_to, gen_trace_event_func, gen_trace_scope_func, gen_trace_state, +}; + +#[derive(PartialEq, Eq)] +pub enum TraceType { + Event, + Scope, + Unknown, +} + +struct TraceState { + name: String, + trace_type: TraceType, + get_state: fn() -> bool, + set_state: fn(bool), +} + +impl TraceState { + fn new(name: String, type_str: &str, get_state: fn() -> bool, set_state: fn(bool)) -> Self { + let trace_type = match type_str { + "event" => TraceType::Event, + "scope" => TraceType::Scope, + _ => { + warn!("The type of {} is Unknown: {}", name, type_str); + TraceType::Unknown + } + }; + TraceState { + name, + trace_type, + get_state, + set_state, + } + } +} + +#[derive(Default)] +struct TraceStateSet { + state_list: Vec, +} + +impl TraceStateSet { + fn add_trace_state(&mut self, state: TraceState) { + self.state_list.push(state); + } + + fn set_state_by_pattern(&self, pattern: String, target_state: bool) -> Result<()> { + let re = Regex::new(&pattern)?; + for state in &self.state_list { + if re.is_match(&state.name) { + (state.set_state)(target_state); + } + } + Ok(()) + } + + fn enable_state_by_type(&self, trace_type: TraceType) -> Result<()> { + for state in &self.state_list { + if state.trace_type == trace_type { + (state.set_state)(true); + } + } + Ok(()) + } + + fn get_state_by_pattern(&self, pattern: String) -> Result> { + let re = Regex::new(&pattern)?; + let mut ret: Vec<(String, bool)> = Vec::new(); + for state in &self.state_list { + if re.is_match(&state.name) { + ret.push((state.name.to_string(), (state.get_state)())); + } + } + Ok(ret) + } +} + +gen_trace_state! {} + +lazy_static! { + static ref TRACE_STATE_SET: TraceStateSet = { + let mut set = TraceStateSet::default(); + add_trace_state_to!(set); + set + }; +} + +gen_trace_event_func! {} + +gen_trace_scope_func! {} + +#[macro_export] +macro_rules! trace_scope_start { + ($func: ident) => { + let _scope = trace::$func(false); + }; + ($func: ident, args=($($args: expr),+)) => { + let _scope = trace::$func(false, $($args),+); + }; +} + +#[macro_export] +macro_rules! trace_scope_asyn_start { + ($func: ident) => { + let _scope = trace::$func(true); + }; + ($func: ident, args=($($args: expr),+)) => { + let _scope = trace::$func(true, $($args),+); + }; +} + +pub fn get_state_by_pattern(pattern: String) -> Result> { + TRACE_STATE_SET.get_state_by_pattern(pattern) +} + +pub fn set_state_by_pattern(pattern: String, state: bool) -> Result<()> { + TRACE_STATE_SET.set_state_by_pattern(pattern, state) +} + +pub fn enable_state_by_type(trace_type: TraceType) -> Result<()> { + TRACE_STATE_SET.enable_state_by_type(trace_type) +} diff --git a/trace/src/trace_scope.rs b/trace/src/trace_scope.rs new file mode 100644 index 0000000000000000000000000000000000000000..a860ef8d7dfdda3be65263d201c7761db7ca150e --- /dev/null +++ b/trace/src/trace_scope.rs @@ -0,0 +1,116 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::atomic::{AtomicI32, Ordering}; + +#[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] +use crate::hitrace::{finish_trace, finish_trace_async, start_trace, start_trace_async}; + +#[cfg(feature = "trace_to_ftrace")] +use crate::ftrace::write_trace_marker; + +static mut TRACE_SCOPE_COUNTER: AtomicI32 = AtomicI32::new(i32::MIN); + +#[derive(Clone)] +pub enum Scope { + Common(TraceScope), + Asyn(TraceScopeAsyn), + None, +} + +#[derive(Clone)] +pub struct TraceScope {} + +impl TraceScope { + pub fn new(value: String) -> Self { + #[cfg(feature = "trace_to_logger")] + { + log::trace!("[SCOPE_START]{}", value); + } + #[cfg(feature = "trace_to_ftrace")] + { + write_trace_marker(&format!("[SCOPE_START]{}", value)); + } + #[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] + { + start_trace(&value); + } + TraceScope {} + } +} + +impl Drop for TraceScope { + fn drop(&mut self) { + #[cfg(feature = "trace_to_logger")] + { + log::trace!("[SCOPE_END]"); + } + #[cfg(feature = "trace_to_ftrace")] + { + write_trace_marker("[SCOPE_END]"); + } + #[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] + { + finish_trace() + } + } +} + +#[derive(Clone)] +pub struct TraceScopeAsyn { + value: String, + id: i32, +} + +impl TraceScopeAsyn { + #[allow(unused_variables)] + pub fn new(value: String) -> Self { + // SAFETY: AtomicI32 can be safely shared between threads. + let id = unsafe { + TRACE_SCOPE_COUNTER + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| { + Some(x.wrapping_add(1)) + }) + .unwrap() + }; + #[cfg(feature = "trace_to_logger")] + { + log::trace!("[SCOPE_START(id={})]{}", id, value); + } + #[cfg(feature = "trace_to_ftrace")] + { + write_trace_marker(&format!("[SCOPE_START(id={})]{}", id, value)); + } + #[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] + { + start_trace_async(&value, id); + } + TraceScopeAsyn { value, id } + } +} + +impl Drop for TraceScopeAsyn { + fn drop(&mut self) { + #[cfg(feature = "trace_to_logger")] + { + log::trace!("[SCOPE_END(id={})]{}", self.id, self.value); + } + #[cfg(feature = "trace_to_ftrace")] + { + write_trace_marker(&format!("[SCOPE_END(id={})]{}", self.id, self.value)); + } + #[cfg(all(target_env = "ohos", feature = "trace_to_hitrace"))] + { + finish_trace_async(&self.value, self.id); + } + } +} diff --git a/trace/trace_generator/Cargo.toml b/trace/trace_generator/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..646404477f855714a0ef81e41045bafb24b16d84 --- /dev/null +++ b/trace/trace_generator/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "trace_generator" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" + +[lib] +name = "trace_generator" +proc-macro = true + +[dependencies] +syn = "2.0.18" +quote = "1.0" +proc-macro2 = "1.0" +toml = "0.7" +serde = { version = "1.0", features = ["derive"] } diff --git a/trace/trace_generator/src/lib.rs b/trace/trace_generator/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..ce6ce12618f9d13802ea93438e1f346a739ddc42 --- /dev/null +++ b/trace/trace_generator/src/lib.rs @@ -0,0 +1,282 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{fs, io::Read}; + +use proc_macro::TokenStream; +use quote::quote; +use serde::Deserialize; +use syn::{parse_macro_input, parse_str, Ident, Type}; + +const TRACE_DIR_NAME: &str = "trace_info"; + +#[derive(Debug, Deserialize)] +struct TraceDesc { + name: String, + args: String, + message: String, + enabled: bool, +} + +#[derive(Debug, Deserialize)] +struct TraceConf { + events: Option>, + scopes: Option>, +} + +fn get_trace_desc() -> TraceConf { + let trace_dir_path = format!( + "{}/{}", + std::env::var("CARGO_MANIFEST_DIR").unwrap(), + TRACE_DIR_NAME + ); + let paths = fs::read_dir(trace_dir_path).unwrap(); + let mut desc = String::new(); + + for path in paths { + let file_path = path.unwrap().path(); + let file_name = file_path.to_str().unwrap(); + if file_name.ends_with(".toml") { + let mut file = fs::File::open(file_path).unwrap(); + file.read_to_string(&mut desc).unwrap(); + } + } + toml::from_str::(&desc).unwrap() +} + +#[proc_macro] +pub fn add_trace_state_to(input: TokenStream) -> TokenStream { + let trace_conf = get_trace_desc(); + let mut state = Vec::new(); + for desc in trace_conf.events.unwrap_or_default() { + if desc.enabled { + state.push((desc.name.trim().to_string(), "event")); + } + } + for desc in trace_conf.scopes.unwrap_or_default() { + if desc.enabled { + state.push((desc.name.trim().to_string(), "scope")); + } + } + + let set = parse_macro_input!(input as Ident); + let init_code = state.iter().map(|(name, type_str)| { + let get_func = parse_str::(format!("get_{}_state", name).as_str()).unwrap(); + let set_func = parse_str::(format!("set_{}_state", name).as_str()).unwrap(); + quote!( + #set.add_trace_state(TraceState::new(#name.to_string(), #type_str, #get_func, #set_func)); + ) + }); + TokenStream::from(quote! { #( #init_code )* }) +} + +#[proc_macro] +pub fn gen_trace_state(_input: TokenStream) -> TokenStream { + let trace_conf = get_trace_desc(); + let mut state_name = Vec::new(); + for desc in trace_conf.events.unwrap_or_default() { + if desc.enabled { + state_name.push(desc.name.trim().to_string()); + } + } + for desc in trace_conf.scopes.unwrap_or_default() { + if desc.enabled { + state_name.push(desc.name.trim().to_string()); + } + } + + let trace_state = state_name.iter().map(|name| { + let state_name = + parse_str::(format!("{}_state", name).to_uppercase().as_str()).unwrap(); + let get_func = parse_str::(format!("get_{}_state", name).as_str()).unwrap(); + let set_func = parse_str::(format!("set_{}_state", name).as_str()).unwrap(); + quote!( + static mut #state_name: AtomicBool = AtomicBool::new(false); + fn #get_func() -> bool { + // SAFETY: AtomicBool can be safely shared between threads. + unsafe { #state_name.load(Ordering::SeqCst) } + } + fn #set_func(val: bool) { + // SAFETY: AtomicBool can be safely shared between threads. + unsafe { #state_name.store(val, Ordering::SeqCst) } + } + ) + }); + + TokenStream::from(quote! { #( #trace_state )* }) +} + +#[proc_macro] +pub fn gen_trace_event_func(_input: TokenStream) -> TokenStream { + let events = match get_trace_desc().events { + Some(events) => events, + None => return TokenStream::from(quote!()), + }; + let trace_func = events.iter().map(|desc| { + let event_name = desc.name.trim(); + let func_name = parse_str::(event_name).unwrap(); + + let func_args = match desc.args.is_empty() { + true => quote!(), + false => { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, t) = arg.split_once(':').unwrap(); + let arg_name = parse_str::(v.trim()).unwrap(); + let arg_type = parse_str::(t.trim()).unwrap(); + quote!( + #arg_name: #arg_type, + ) + }); + quote! { #( #_args )* } + } + }; + + let message_args = match desc.args.is_empty() { + true => quote!(), + false => { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, _) = arg.split_once(':').unwrap(); + let arg_name = parse_str::(v.trim()).unwrap(); + quote!( + , #arg_name + ) + }); + quote! { #( #_args )* } + } + }; + + let func_body = match desc.enabled { + true => { + let message = format!("[{{}}] {}", desc.message.trim()); + let state_name = parse_str::(format!("{}_state", event_name).to_uppercase().as_str()).unwrap(); + quote!( + #[cfg(any(feature = "trace_to_logger", feature = "trace_to_ftrace"))] + // SAFETY: AtomicBool can be safely shared between threads. + if unsafe { #state_name.load(Ordering::SeqCst) } { + #[cfg(feature = "trace_to_logger")] + { + log::trace!(#message, #event_name.to_string() #message_args); + } + #[cfg(feature = "trace_to_ftrace")] + { + let trace_info = format!(#message, #event_name.to_string() #message_args); + let _result = ftrace::write_trace_marker(&trace_info); + } + } + ) + } + false => quote!(), + }; + + quote!( + #[inline(always)] + pub fn #func_name(#func_args) { + #func_body + } + ) + }); + + TokenStream::from(quote! { #( #trace_func )* }) +} + +#[proc_macro] +pub fn gen_trace_scope_func(_input: TokenStream) -> TokenStream { + let scopes = match get_trace_desc().scopes { + Some(scopes) => scopes, + None => return TokenStream::from(quote!()), + }; + let trace_func =scopes.iter().map(|desc| { + let scope_name = desc.name.trim(); + let func_name = parse_str::(scope_name).unwrap(); + + let func_args = match desc.args.is_empty() { + true => quote!(), + false => { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, t) = arg.split_once(':').unwrap(); + let arg_name = parse_str::(v.trim()).unwrap(); + let arg_type = parse_str::(t.trim()).unwrap(); + quote!( + #arg_name: #arg_type, + ) + }); + quote! { #( #_args )* } + } + }; + + let func_decl = match desc.enabled { + true => quote!(pub fn #func_name(asyn: bool, #func_args) -> trace_scope::Scope), + false => quote!(pub fn #func_name(asyn: bool, #func_args)), + }; + + let message_args = match desc.args.is_empty() { + true => quote!(), + false => { + let split_args: Vec<&str> = desc.args.split(',').collect(); + let _args = split_args.iter().map(|arg| { + let (v, _) = arg.split_once(':').unwrap(); + let arg_name = parse_str::(v.trim()).unwrap(); + quote!( + , #arg_name + ) + }); + quote! { #( #_args )* } + } + }; + + let func_body = match desc.enabled { + true => { + let message = format!("[{{}}] {}", desc.message.trim()); + let state_name = parse_str::(format!("{}_state", scope_name).to_uppercase().as_str()).unwrap(); + quote!( + #[cfg(any(feature = "trace_to_logger", feature = "trace_to_ftrace", all(target_env = "ohos", feature = "trace_to_hitrace")))] + // SAFETY: AtomicBool can be safely shared between threads. + if unsafe { #state_name.load(Ordering::SeqCst) } { + let trace_info = format!(#message, #scope_name.to_string() #message_args); + if asyn { + return trace_scope::Scope::Asyn(trace_scope::TraceScopeAsyn::new(trace_info)) + } + return trace_scope::Scope::Common(trace_scope::TraceScope::new(trace_info)) + } + return trace_scope::Scope::None + ) + } + false => quote!(), + }; + + quote!( + #[cfg(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + ))] + #[inline(always)] + #func_decl { + #func_body + } + + #[cfg(not(any( + feature = "trace_to_logger", + feature = "trace_to_ftrace", + all(target_env = "ohos", feature = "trace_to_hitrace") + )))] + #[inline(always)] + pub fn #func_name(asyn: bool, #func_args) { + } + ) + }); + + TokenStream::from(quote! { #( #trace_func )* }) +} diff --git a/trace/trace_info/acpi.toml b/trace/trace_info/acpi.toml new file mode 100644 index 0000000000000000000000000000000000000000..9b4352aaabca473eaf0addd6e761e49e05f18488 --- /dev/null +++ b/trace/trace_info/acpi.toml @@ -0,0 +1,23 @@ +[[events]] +name = "ged_inject_acpi_event" +args = "event: u32" +message = "acpi_sevent {}" +enabled = true + +[[events]] +name = "ged_read" +args = "event: u32" +message = "acpi_sevent {}" +enabled = true + +[[events]] +name = "power_read" +args = "reg_idx: u64, value: u32" +message = "reg_idx {} value {}" +enabled = true + +[[events]] +name = "power_status_read" +args = "regs: &dyn fmt::Debug" +message = "regs {:?}" +enabled = true diff --git a/trace/trace_info/aio.toml b/trace/trace_info/aio.toml new file mode 100644 index 0000000000000000000000000000000000000000..0cb7d2eb3dd9aafc5a4b720fe7e0adb45cd81dbb --- /dev/null +++ b/trace/trace_info/aio.toml @@ -0,0 +1,5 @@ +[[events]] +name = "aio_submit_request" +args = "fd: RawFd, opcode: &dyn fmt::Debug, offset: usize, nbytes: u64" +message = "fd: {}, opcode {:?}, offset {}, nbytes {}" +enabled = true diff --git a/trace/trace_info/block_backend.toml b/trace/trace_info/block_backend.toml new file mode 100644 index 0000000000000000000000000000000000000000..43bb8d5a3ba0a3eee869d9b08a390b17ba830517 --- /dev/null +++ b/trace/trace_info/block_backend.toml @@ -0,0 +1,53 @@ +[[events]] +name = "block_read_vectored" +args = "id: &str, offset: usize, nbytes: u64" +message = "read vectored for device \"{}\", offset {}, nbytes {}" +enabled = true + +[[events]] +name = "block_write_vectored" +args = "id: &str, offset: usize, nbytes: u64" +message = "write vectored for device \"{}\", offset {}, nbytes {}" +enabled = true + +[[events]] +name = "block_write_zeroes" +args = "id: &str, offset: usize, nbytes: u64, unmap: bool" +message = "write zeroes for device \"{}\", offset {}, nbytes {}, unmap {}" +enabled = true + +[[events]] +name = "block_discard" +args = "id: &str, offset: usize, nbytes: u64" +message = "discard for device \"{}\", offset {}, nbytes {}" +enabled = true + +[[events]] +name = "block_datasync" +args = "id: &str" +message = "datasync for device \"{}\"" +enabled = true + +[[events]] +name = "block_flush_request" +args = "id: &str" +message = "flush request for device \"{}\"" +enabled = true + +[[events]] +name = "block_drain_request" +args = "id: &str" +message = "drain request for device \"{}\"" +enabled = true + +[[events]] +name = "qcow2_flush" +args = "id: &str" +message = "qcow2 flush cache data for device \"{}\"" +enabled = true + +[[events]] +name = "qcow2_process_discards" +args = "id: &str, opcode: &dyn fmt::Debug, unmap: bool" +message = "qcow2 process discard for device \"{}\", opcode {:?}, unmap {}" +enabled = true diff --git a/trace/trace_info/camera.toml b/trace/trace_info/camera.toml new file mode 100644 index 0000000000000000000000000000000000000000..e1f044ffb8d32c898acd946cf8229a04eadbe94d --- /dev/null +++ b/trace/trace_info/camera.toml @@ -0,0 +1,35 @@ +[[events]] +name = "camera_register_fd" +args = "id: &str, fd: i32" +message = "camera {} register fd {}." +enabled = true + +[[events]] +name = "camera_unregister_fd" +args = "id: &str, fd: i32" +message = "camera {} unregister fd {}." +enabled = true + +[[events]] +name = "camera_set_format" +args = "id: &str, fd: i32" +message = "camera {} set format open fd {}." +enabled = true + +[[events]] +name = "camera_get_format_by_index" +args = "format_index: u8, frame_index: u8, out: &dyn fmt::Debug" +message = "V4l2 fmt {}, frm {}, info {:?}." +enabled = true + +[[scopes]] +name = "ohcam_get_frame" +args = "offset: usize, len: usize" +message = "ohcam get frame offset {} len {}" +enabled = true + +[[scopes]] +name = "ohcam_next_frame" +args = "frame_id: u64" +message = "ohcam next frame {}" +enabled = true diff --git a/trace/trace_info/cpu.toml b/trace/trace_info/cpu.toml new file mode 100644 index 0000000000000000000000000000000000000000..733362ab332e55e6fe86c8cf603fc2357b058d41 --- /dev/null +++ b/trace/trace_info/cpu.toml @@ -0,0 +1,5 @@ +[[events]] +name = "cpu_boot_config" +args = "cpu_boot_config: &dyn fmt::Debug" +message = "{:#?}" +enabled = true diff --git a/trace/trace_info/device_legacy.toml b/trace/trace_info/device_legacy.toml new file mode 100644 index 0000000000000000000000000000000000000000..42bbae08d9431af9cffeae7d494e42a8f4d0d216 --- /dev/null +++ b/trace/trace_info/device_legacy.toml @@ -0,0 +1,227 @@ +[[events]] +name = "pl031_read" +args = "addr: u64, value: u32" +message = "addr 0x{:04x} value 0x{:08x}" +enabled = true + +[[events]] +name = "pl031_write" +args = "addr: u64, value: u32" +message = "addr 0x{:04x} value 0x{:08x}" +enabled = true + +[[events]] +name = "pl031_inject_interrupt" +args = "" +message = "" +enabled = true + +[[events]] +name = "rtc_read" +args = "addr: u8, value: u8" +message = "addr 0x{:02x} value 0x{:02x}" +enabled = true + +[[events]] +name = "rtc_write" +args = "addr: u8, value: u8" +message = "addr 0x{:02x} value 0x{:02x}" +enabled = true + +[[events]] +name = "rtc_inject_interrupt" +args = "" +message = "" +enabled = true + +[[events]] +name = "pl011_read" +args = "addr: u64, value: u32" +message = "addr 0x{:08x} value 0x{:08x}" +enabled = true + +[[events]] +name = "pl011_read_fifo" +args = "read_count: u32" +message = "FIFO read, read_count now {}" +enabled = true + +[[events]] +name = "pl011_write" +args = "addr: u64, value: u32" +message = "addr 0x{:08x} value 0x{:08x}" +enabled = true + +[[events]] +name = "pl011_interrupt" +args = "flag: u32" +message = "flag 0x{:08x}" +enabled = true + +[[events]] +name = "pl011_baudrate_change" +args = "ibrd: u32, fbrd: u32" +message = "ibrd {}, fbrd {}" +enabled = true + +[[events]] +name = "pl011_pause_rx" +args = "" +message = "rx paused" +enabled = true + +[[events]] +name = "pl011_unpause_rx" +args = "" +message = "rx unpause" +enabled = true + +[[events]] +name = "pl011_receive" +args = "value: u32, read_count: u32" +message = "new char 0x{:08x}, read_count now {}" +enabled = true + +[[events]] +name = "pl011_receive_full" +args = "" +message = "FIFO now full, RXFF set" +enabled = true + +[[events]] +name = "serial_read" +args = "addr: u64, value: u8" +message = "addr 0x{:08x} value 0x{:02x}" +enabled = true + +[[events]] +name = "serial_write" +args = "addr: u64, value: u8" +message = "addr 0x{:08x} value 0x{:02x}" +enabled = true + +[[events]] +name = "serial_update_iir" +args = "iir: u8" +message = "value 0x{:02x}" +enabled = true + +[[events]] +name = "serial_receive" +args = "len: usize" +message = "data length {}" +enabled = true + +[[events]] +name = "serial_pause_rx" +args = "" +message = "rx paused" +enabled = true + +[[events]] +name = "serial_unpause_rx" +args = "" +message = "rx unpause" +enabled = true + +[[events]] +name = "pflash_device_id" +args = "id: u32" +message = "read device ID: 0x{:04x}" +enabled = true + +[[events]] +name = "pflash_device_info" +args = "offset: u64" +message = "read device information offset: 0x{:04x}" +enabled = true + +[[events]] +name = "pflash_io_read" +args = "offset: u64, size: u32, value: u32, cmd: u8, wcycle: u32" +message = "offset: 0x{:04x}, size: {}, value: 0x{:04x}, cmd: 0x{:02x}, wcycle: {}" +enabled = true + +[[events]] +name = "pflash_io_write" +args = "offset: u64, size: u8, value: u32, wcycle: u32" +message = "offset: 0x{:04x}, size: {}, value: 0x{:04x}, wcycle: {}" +enabled = true + +[[events]] +name = "pflash_manufacturer_id" +args = "id: u32" +message = "read manufacturer ID: 0x{:04x}" +enabled = true + +[[events]] +name = "pflash_mode_read_array" +args = "" +message = "read array mode" +enabled = true + +[[events]] +name = "pflash_read_data" +args = "offset: u64, len: usize, value: &[u8]" +message = "data offset: 0x{:04x}, length: {}, value: 0x{:x?}" +enabled = true + +[[events]] +name = "pflash_read_status" +args = "status: u32" +message = "status: 0x{:x}" +enabled = true + +[[events]] +name = "pflash_read_unknown_state" +args = "cmd: u8" +message = "unknown command state: 0x{:02x}" +enabled = true + +[[events]] +name = "pflash_write" +args = "str: String, cmd: u8" +message = "{}, cmd: 0x{:02x}" +enabled = true + +[[events]] +name = "pflash_write_block" +args = "value: u32" +message = "block write: bytes: 0x{:x}" +enabled = true + +[[events]] +name = "pflash_write_block_erase" +args = "offset: u64, len: u32" +message = "block erase offset: 0x{:04x}, bytes: 0x{:x}" +enabled = true + +[[events]] +name = "pflash_write_data" +args = "offset: u64, size: usize, value: &[u8], counter: u32" +message = "data offset: 0x{:04x}, size: {}, value: 0x{:x?}, counter: 0x{:04x}" +enabled = true + +[[events]] +name = "fwcfg_select_entry" +args = "key: u16, key_name: &'static str, ret: i32" +message = "key_value {} key_name {:?} ret {}" +enabled = true + +[[events]] +name = "fwcfg_add_entry" +args = "key: u16, key_name: &'static str, data: Vec" +message = "key_value {} key_name {:?} data {:?}" +enabled = true + +[[events]] +name = "fwcfg_read_data" +args = "value: u64" +message = "value {}" +enabled = true + +[[events]] +name = "fwcfg_add_file" +args = "index: usize, filename: &str, data_len: usize" +message = "index {} filename {:?} data_len {}" +enabled = true diff --git a/trace/trace_info/event_loop.toml b/trace/trace_info/event_loop.toml new file mode 100644 index 0000000000000000000000000000000000000000..f8f6b27d6f854971935418dd41137948477494ff --- /dev/null +++ b/trace/trace_info/event_loop.toml @@ -0,0 +1,42 @@ + +[[events]] +name = "update_event" +args = "raw_fd: &dyn fmt::Debug, operation: &dyn fmt::Debug" +message = "raw_fd={:?} operation={:?}" +enabled = true + +[[events]] +name = "timer_add" +args = "id: &dyn fmt::Debug, expire_time: &dyn fmt::Debug" +message = "id={:?} expire_time={:?}" +enabled = true + +[[events]] +name = "timer_del" +args = "id: &dyn fmt::Debug, expire_time: &dyn fmt::Debug" +message = "id={:?} expire_time={:?}" +enabled = true + +[[events]] +name = "timer_run" +args = "id: &dyn fmt::Debug" +message = "id={:?}" +enabled = true + +[[events]] +name = "thread_pool_submit_task" +args = "" +message = "" +enabled = true + +[[events]] +name = "thread_pool_spawn_thread" +args = "total_threads: &u64, blocked_threads: &u64, new_threads: &u64, pending_threads: &u64" +message = "total_threads={:?} blocked_threads={:?} new_threads={:?} pending_threads={:?}" +enabled = true + +[[events]] +name = "thread_pool_exit_thread" +args = "total_threads: &u64, lists_len: &dyn fmt::Debug" +message = "total_threads={:?} lists_len={:?}" +enabled = true diff --git a/trace/trace_info/gpu.toml b/trace/trace_info/gpu.toml new file mode 100644 index 0000000000000000000000000000000000000000..d794cfabfe2e987513899462245a4e7594d6de09 --- /dev/null +++ b/trace/trace_info/gpu.toml @@ -0,0 +1,5 @@ +[[scopes]] +name = "update_cursor" +args = "" +message = "" +enabled = true diff --git a/trace/trace_info/kvm.toml b/trace/trace_info/kvm.toml new file mode 100644 index 0000000000000000000000000000000000000000..ce6060bc7bb415689ccc58b3af9ab20cfb39780e --- /dev/null +++ b/trace/trace_info/kvm.toml @@ -0,0 +1,209 @@ +[[events]] +name = "kvm_vcpu_run_exit" +args = "index: u8, reason: &dyn fmt::Debug" +message = "vcpu index {} exit reason {:?}" +enabled = true + +[[events]] +name = "kvm_add_ioeventfd" +args = "fd: &Arc, addr: &dyn fmt::Debug, data_match: bool, size: u64, data: u64" +message = "fd {:?} addr {:?} data_match {} size {} data {}" +enabled = true + +[[events]] +name = "kvm_delete_ioeventfd" +args = "fd: &Arc, addr: &dyn fmt::Debug, data_match: bool, size: u64, data: u64" +message = "fd {:?} addr {:?} data_match {} size {} data {}" +enabled = true + +[[events]] +name = "kvm_commit_irq_routing" +args = "" +message = "" +enabled = true + +[[events]] +name = "kvm_release_irq" +args = "irq: u32" +message = "irq {}" +enabled = true + +[[events]] +name = "kvm_register_irqfd" +args = "fd: &EventFd, gsi: u32" +message = "fd {:?} gsi {}" +enabled = true + +[[events]] +name = "kvm_unregister_irqfd" +args = "fd: &EventFd, gsi: u32" +message = "fd {:?} gsi {}" +enabled = true + +[[events]] +name = "kvm_trigger_irqfd" +args = "irq_fd: &EventFd" +message = "irq_fd {:?}" +enabled = true + +[[events]] +name = "kvm_signal_msi" +args = "msi: &dyn fmt::Debug" +message = "kvm_msi {:?}" +enabled = true + +[[events]] +name = "kvm_reset_vcpu" +args = "id: u8" +message = "vcpu id {}" +enabled = true + +[[events]] +name = "kvm_get_one_reg" +args = "id: u8, reg_id: u64, val: u128" +message = "vcpu id {} reg_id: {} val {}" +enabled = true + +[[events]] +name = "kvm_set_one_reg" +args = "id: u8, reg_id: u64, val: u128" +message = "vcpu id {} reg_id: {} value {}" +enabled = true + +[[events]] +name = "kvm_get_mp_state" +args = "id: u8, mp_state: &dyn fmt::Debug" +message = "vcpu id {} mp_state {:?}" +enabled = true + +[[events]] +name = "kvm_get_vcpu_events" +args = "id: u8, cpu_events: &dyn fmt::Debug" +message = "vcpu id {} cpu_events {:?}" +enabled = true + +[[events]] +name = "kvm_get_reg_list" +args = "id: u8, cpreg_list: &dyn fmt::Debug" +message = "vcpu id {} cpreg_list {:?}" +enabled = true + +[[events]] +name = "kvm_set_mp_state" +args = "id: u8, mp_state: &dyn fmt::Debug" +message = "vcpu id {} mp_state {:?}" +enabled = true + +[[events]] +name = "kvm_set_vcpu_events" +args = "id: u8, event: &dyn fmt::Debug" +message = "vcpu id {} event {:?}" +enabled = true + +[[events]] +name = "kvm_get_regs" +args = "id: u8, regs: &dyn fmt::Debug" +message = "vcpu id {} kvm_regs {:?}" +enabled = true + +[[events]] +name = "kvm_get_sregs" +args = "id: u8, sregs: &dyn fmt::Debug" +message = "vcpu id {} sregs {:?}" +enabled = true + +[[events]] +name = "kvm_get_fpu" +args = "id: u8, fpu: &dyn fmt::Debug" +message = "vcpu id {} fpu {:?}" +enabled = true + +[[events]] +name = "kvm_get_lapic" +args = "id: u8, lapic: &dyn fmt::Debug" +message = "vcpu id {} lapic {:?}" +enabled = true + +[[events]] +name = "kvm_get_msrs" +args = "id: u8, msrs: &dyn fmt::Debug" +message = "vcpu id {} msrs {:?}" +enabled = true + +[[events]] +name = "kvm_get_xsave" +args = "id: u8, xsave: &dyn fmt::Debug" +message = "vcpu id {} xsave {:?}" +enabled = true + +[[events]] +name = "kvm_get_xcrs" +args = "id: u8, xcrs: &dyn fmt::Debug" +message = "vcpu id {} xcrs {:?}" +enabled = true + +[[events]] +name = "kvm_get_debug_regs" +args = "id: u8, debugregs: &dyn fmt::Debug" +message = "vcpu id {} debugregs: {:?}" +enabled = true + +[[events]] +name = "kvm_set_regs" +args = "id: u8, reg: &dyn fmt::Debug" +message = "vcpu id {} regs {:?}" +enabled = true + +[[events]] +name = "kvm_set_sregs" +args = "id: u8, sregs: &dyn fmt::Debug" +message = "vcpu id {} sregs {:?}" +enabled = true + +[[events]] +name = "kvm_set_fpu" +args = "id: u8, fpu: &dyn fmt::Debug" +message = "vcpu id {} fpu {:?}" +enabled = true + +[[events]] +name = "kvm_set_lapic" +args = "id: u8, lapic: &dyn fmt::Debug" +message = "vcpu id {} lapic {:?}" +enabled = true + +[[events]] +name = "kvm_set_msrs" +args = "id: u8, msrs: &dyn fmt::Debug" +message = "vcpu id {} msrs: {:?}" +enabled = true + +[[events]] +name = "kvm_set_xsave" +args = "id: u8, xsave: &dyn fmt::Debug" +message = "vcpu id {} xsave {:?}" +enabled = true + +[[events]] +name = "kvm_set_xcrs" +args = "id: u8, xcrs: &dyn fmt::Debug" +message = "vcpu id {} xcrs {:?}" +enabled = true + +[[events]] +name = "kvm_set_debug_regs" +args = "id: u8, debugregs: &dyn fmt::Debug" +message = "vcpu id {} debugregs {:?}" +enabled = true + +[[events]] +name = "kvm_setup_cpuid" +args = "id: u8, cpuid: &dyn fmt::Debug" +message = "vcpu id {} cpuid {:?}" +enabled = true + +[[events]] +name = "kvm_set_cpuid2" +args = "id: u8, cpuid2: &dyn fmt::Debug" +message = "vcpu id {} cpuid2 {:?}" +enabled = true diff --git a/trace/trace_info/machine.toml b/trace/trace_info/machine.toml new file mode 100644 index 0000000000000000000000000000000000000000..ff2b6f8145345160202b00090ad5a26df0e6db23 --- /dev/null +++ b/trace/trace_info/machine.toml @@ -0,0 +1,29 @@ +[[events]] +name = "cpu_topo" +args = "cpu_topo: &dyn fmt::Debug" +message = "{:#?}" +enabled = true + +[[events]] +name = "sysbus" +args = "sysbus: &dyn fmt::Debug" +message = "{:#?}" +enabled = true + +[[events]] +name = "replaceable_info" +args = "replaceable_info: &dyn fmt::Debug" +message = "{:#?}" +enabled = true + +[[events]] +name = "vm_state" +args = "vm_state: &dyn fmt::Debug" +message = "{:#?}" +enabled = true + +[[events]] +name = "mmio_replaceable_config" +args = "mmio_replaceable_config: &dyn fmt::Debug" +message = "{:#?}" +enabled = true diff --git a/trace/trace_info/memory.toml b/trace/trace_info/memory.toml new file mode 100644 index 0000000000000000000000000000000000000000..c7d8171c5d0ac5dd8d285908e42b7008c15cd03f --- /dev/null +++ b/trace/trace_info/memory.toml @@ -0,0 +1,29 @@ +[[events]] +name = "address_space_read" +args = "addr: &dyn fmt::Debug, count: u64" +message = "Memory: flatview_read addr {:?}, count {}" +enabled = true + +[[events]] +name = "address_space_write" +args = "addr: &dyn fmt::Debug, count: u64" +message = "Memory: flatview_write addr {:?}, count {}" +enabled = true + +[[scopes]] +name = "address_update_topology" +args = "" +message = "Memory: update opology" +enabled = true + +[[scopes]] +name = "pre_alloc" +args = "size: u64" +message = "Memory: pre_alloc ram size is {}" +enabled = true + +[[scopes]] +name = "init_memory" +args = "" +message = "Memory: init memory" +enabled = true diff --git a/trace/trace_info/misc.toml b/trace/trace_info/misc.toml new file mode 100644 index 0000000000000000000000000000000000000000..28cf5132667509a234bec29f8968fbd17269c4ce --- /dev/null +++ b/trace/trace_info/misc.toml @@ -0,0 +1,89 @@ +[[events]] +name = "scream_init" +args = "dir: &dyn fmt::Debug, header: &dyn fmt::Debug" +message = "dir: {:?} header: {:?}" +enabled = true + +[[events]] +name = "scream_alsa_send_frames" +args = "frame: u32, offset: usize, end: usize" +message = "frames {} offset {} end {}" +enabled = true + +[[events]] +name = "scream_alsa_receive_frames" +args = "frame: u32, offset: usize, end: usize" +message = "frames {} offset {} end {}" +enabled = true + +[[events]] +name = "scream_setup_alsa_swp" +args = "name: &str, swp: &dyn fmt::Debug" +message = "scream {} setup software parameters: {:?}" +enabled = true + +[[events]] +name = "scream_setup_alsa_hwp" +args = "name: &str, hwp: &dyn fmt::Debug" +message = "scream {} setup hardware parameters: {:?}" +enabled = true + +[[events]] +name = "oh_scream_render_init" +args = "context: &dyn fmt::Debug" +message = "context: {:?}" +enabled = true + +[[events]] +name = "oh_scream_render_destroy" +args = "" +message = "" +enabled = true + +[[events]] +name = "oh_scream_capture_init" +args = "context: &dyn fmt::Debug" +message = "context: {:?}" +enabled = true + +[[events]] +name = "oh_scream_capture_destroy" +args = "" +message = "" +enabled = true + +[[events]] +name = "oh_scream_on_write_data_cb" +args = "len: usize" +message = "len: {}" +enabled = true + +[[events]] +name = "oh_scream_on_read_data_cb" +args = "len: usize" +message = "len: {}" +enabled = true + +[[scopes]] +name = "ohaudio_render_process" +args = "data: &dyn fmt::Debug" +message = "audio data {:?} to render" +enabled = true + +[[scopes]] +name = "ohaudio_capturer_process" +args = "data: &dyn fmt::Debug" +message = "audio data {:?} to capture" +enabled = true + +[[scopes]] +name = "ohaudio_write_cb" +args = "to_copy: usize" +message = "OH audio expect audio data {} bytes" +enabled = true + +[[scopes]] +name = "ohaudio_read_cb" +args = "len: i32" +message = "OH audio captured {} bytes" +enabled = true diff --git a/trace/trace_info/pci.toml b/trace/trace_info/pci.toml new file mode 100644 index 0000000000000000000000000000000000000000..be76599f3165de03b03852e405ee0f327c0b4801 --- /dev/null +++ b/trace/trace_info/pci.toml @@ -0,0 +1,17 @@ +[[events]] +name = "pci_read_config" +args = "dev_name: &str, addr: usize, data: &[u8]" +message = "dev name: {} addr: 0x{:#X} data: 0x{:X?}" +enabled = true + +[[events]] +name = "pci_write_config" +args = "dev_name: &str, addr: usize, data: &[u8]" +message = "dev name: {} addr: 0x{:#X} data: 0x{:X?}" +enabled = true + +[[events]] +name = "msix_write_config" +args = "dev_id: u16, masked: bool, enabled: bool" +message = "dev id: {} masked: {} enabled: {}" +enabled = true diff --git a/trace/trace_info/scsi.toml b/trace/trace_info/scsi.toml new file mode 100644 index 0000000000000000000000000000000000000000..d5c9605a96873008b7cb871aed0caae7565c2698 --- /dev/null +++ b/trace/trace_info/scsi.toml @@ -0,0 +1,35 @@ +[[events]] +name = "scsi_bus_get_device" +args = "target_id: u8, lun: u16, lun_id: u16" +message = "target request, target {}, requested lun {}, found lun {}." +enabled = true + +[[events]] +name = "scsi_bus_get_no_device" +args = "target: u8, lun: u16" +message = "can't find scsi device target {} lun {}." +enabled = true + +[[events]] +name = "scsi_emulate_execute" +args = "op: u8" +message = "emulate scsi command is {:#x}." +enabled = true + +[[events]] +name = "scsi_emulate_execute_error" +args = "op: u8, result: &dyn fmt::Debug" +message = "error in processing scsi command {:#x}, err is {:?}" +enabled = true + +[[events]] +name = "scsi_emulate_mode_sense" +args = "page_code: u8, page_control: u8, subpage: u8, dbd: u8, length: u8" +message = "MODE SENSE page_code {:x}, page_control {:x}, subpage {:x}, dbd bit {:x}, Allocation length {}." +enabled = true + +[[events]] +name = "scsi_outbuf_to_iov" +args = "cmd: u8, outbuf_len: usize, iov_len: u64, idx: usize, iovec_size: usize" +message = "cmd is {:x}, outbuf len is {}, iov_len is {}, idx is {}, iovec size is {}." +enabled = true diff --git a/trace/trace_info/ui.toml b/trace/trace_info/ui.toml new file mode 100644 index 0000000000000000000000000000000000000000..1df64556aed17bfc6491229948f624b2daba92d1 --- /dev/null +++ b/trace/trace_info/ui.toml @@ -0,0 +1,281 @@ +[[events]] +name = "vnc_dpy_switch" +args = "old_width: &dyn fmt::Debug, old_height: &dyn fmt::Debug, new_width: &dyn fmt::Debug, new_height: &dyn fmt::Debug" +message = "old_width={:?} old_height={:?}, new_width={:?}, new_height={:?}" +enabled = true + +[[events]] +name = "vnc_dpy_pageflip" +args = "w: &dyn fmt::Debug, h: &dyn fmt::Debug, fmt: &dyn fmt::Debug" +message = "w={:?} h={:?} fmt={:?}" +enabled = true + +[[events]] +name = "vnc_dpy_refresh" +args = "dirty_num: &dyn fmt::Debug, update_interval: &dyn fmt::Debug" +message = "dirty_num={:?} update_interval={:?} ms" +enabled = true + +[[events]] +name = "vnc_dpy_image_update" +args = "x: &dyn fmt::Debug, y: &dyn fmt::Debug, w: &dyn fmt::Debug, h: &dyn fmt::Debug" +message = "x={:?} y={:?} x={:?} y={:?}" +enabled = true + +[[events]] +name = "vnc_dpy_cursor_update" +args = "width: &dyn fmt::Debug, height: &dyn fmt::Debug" +message = "width={:?} height={:?}" +enabled = true + +[[events]] +name = "vnc_server_desktop_resize" +args = "width: &dyn fmt::Debug, height: &dyn fmt::Debug" +message = "width={:?} height={:?}" +enabled = true + +[[events]] +name = "vnc_client_connect" +args = "stream: &dyn fmt::Debug" +message = "stream={:?}" +enabled = true + +[[events]] +name = "vnc_client_handle_version" +args = "major: &usize, minor: &usize" +message = "major={:?} minor={:?}" +enabled = true + +[[events]] +name = "vnc_client_handle_init" +args = "clients: &dyn fmt::Debug, conn_limits: &dyn fmt::Debug" +message = "total clients={:?}, limits={:?}" +enabled = true + +[[events]] +name = "vnc_client_handle_auth" +args = "auth_type: &u8" +message = "auth_type={:?}" +enabled = true + +[[events]] +name = "vnc_client_key_event" +args = "keysym: &dyn fmt::Debug, down: &bool" +message = "keysym={:?} down={:?}" +enabled = true + +[[events]] +name = "vnc_client_point_event" +args = "button: &dyn fmt::Debug, x: &u16, y: &u16" +message = "button={:?} x={:?} y={:?}" +enabled = true + +[[events]] +name = "vnc_client_vencrypt_init" +args = "" +message = "" +enabled = true + +[[events]] +name = "vnc_client_vencrypt_auth" +args = "auth: &dyn fmt::Debug, subauth: &dyn fmt::Debug" +message = "auth={:?}, subauth={:?}" +enabled = true + +[[events]] +name = "vnc_client_tls_handshake_done" +args = "" +message = "" +enabled = true + +[[events]] +name = "vnc_client_get_mechname_length" +args = "len: &u32" +message = "length={:?}" +enabled = true + +[[events]] +name = "vnc_client_get_mechname" +args = "mechname: &dyn fmt::Debug" +message = "mechname={:?}" +enabled = true + +[[events]] +name = "vnc_client_get_authmessage_length" +args = "length: &dyn fmt::Debug" +message = "length={:?}" +enabled = true + +[[events]] +name = "vnc_client_sasl_auth" +args = "result: &dyn fmt::Debug, serverout_len: &dyn fmt::Debug" +message = "result={:?}, serverout_len={:?}" +enabled = true + +[[events]] +name = "vnc_server_send_mech_list" +args = "mech_list: &dyn fmt::Debug" +message = "mech_list={:?}" +enabled = true + +[[events]] +name = "gtk_enter_callback" +args = "enter_or_leave: &dyn fmt::Debug" +message = "{:?}" +enabled = true + +[[events]] +name = "gtk_dyp_channel_switch" +args = "dev_name: &dyn fmt::Debug" +message = "device_name={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_channel_refresh" +args = "dev_name: &dyn fmt::Debug" +message = "device_name={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_channel_image_update" +args = "dev_name: &dyn fmt::Debug, x: &i32, y: &i32, w: &i32, h: &i32" +message = "device_name={:?} x={:?} y={:?} w={:?} h={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_channel_cursor_update" +args = "dev_name: &dyn fmt::Debug" +message = "device_name={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_switch" +args = "old_width: &dyn fmt::Debug, old_height: &dyn fmt::Debug, new_width: &dyn fmt::Debug, new_height: &dyn fmt::Debug" +message = "old_width={:?} old_height={:?} new_width={:?} new_height={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_update" +args = "x: &dyn fmt::Debug, y: &dyn fmt::Debug, w: &dyn fmt::Debug, h: &dyn fmt::Debug" +message = "x={:?} y={:?} w={:?} h={:?}" +enabled = true + +[[events]] +name = "gtk_dyp_refresh" +args = "" +message = "" +enabled = true + +[[events]] +name = "gtk_dyp_cursor_define" +args = "width: &dyn fmt::Debug, height: &dyn fmt::Debug, hot_x: &dyn fmt::Debug, hot_y: &dyn fmt::Debug, data_len: &dyn fmt::Debug" +message = "width={:?} height={:?} hot_x={:?} hot_y={:?} data_len={:?}" +enabled = true + +[[events]] +name = "gtk_configure_callback" +args = "width: &dyn fmt::Debug, height: &dyn fmt::Debug" +message = "width={:?} height={:?}" +enabled = true + +[[events]] +name = "gtk_key_event_callback" +args = "key_value: &dyn fmt::Debug, press: &dyn fmt::Debug" +message = "key_value={:?} press={:?}" +enabled = true + +[[events]] +name = "gtk_pointer_callback" +args = "button_mask: &dyn fmt::Debug" +message = "button_mask={:?}" +enabled = true + +[[events]] +name = "gtk_cursor_move_event" +args = "x: &dyn fmt::Debug, y: &dyn fmt::Debug" +message = "x={:?} y={:?}" +enabled = true + +[[events]] +name = "gtk_scroll_callback" +args = "direction: &dyn fmt::Debug" +message = "direction={:?}" +enabled = true + +[[events]] +name = "console_dpy_refresh" +args = "interval: &dyn fmt::Debug" +message = "interval={:?} ms" +enabled = true + +[[events]] +name = "console_dpy_ui_info" +args = "dev_name: &dyn fmt::Debug, width: &dyn fmt::Debug, height: &dyn fmt::Debug, last_width: &dyn fmt::Debug, last_height: &dyn fmt::Debug" +message = "dev_name={:?} width={:?} height={:?} last_width={:?} last_height={:?}" +enabled = true + +[[events]] +name = "console_select" +args = "con_id: &dyn fmt::Debug" +message = "console id={:?}" +enabled = true + +[[events]] +name = "oh_event_mouse_button" +args = "msg_btn: u32, action: u32" +message = "msg_btn={} action={}" +enabled = true + +[[events]] +name = "oh_event_mouse_motion" +args = "x: f64, y: f64" +message = "x={} y={}" +enabled = true + +[[events]] +name = "oh_event_keyboard" +args = "keycode: u16, key_action: u16" +message = "keycode={} key_action={}" +enabled = true + +[[events]] +name = "oh_event_windowinfo" +args = "width: u32, height: u32" +message = "width={} height={}" +enabled = true + +[[events]] +name = "oh_event_scroll" +args = "direction: u32" +message = "direction={}" +enabled = true + +[[events]] +name = "oh_event_ledstate" +args = "state: u32" +message = "state={}" +enabled = true + +[[events]] +name = "oh_event_focus" +args = "state: u32" +message = "state={}" +enabled = true + +[[events]] +name = "oh_event_greet" +args = "id: u64" +message = "token_id={}" +enabled = true + +[[events]] +name = "oh_event_unsupported_type" +args = "ty: &dyn fmt::Debug, size: u32" +message = "type={:?} body_size={}" +enabled = true + +[[scopes]] +name = "handle_msg" +args = "opcode: &dyn fmt::Debug" +message = "handle ohui {:?} message" +enabled = true diff --git a/trace/trace_info/usb.toml b/trace/trace_info/usb.toml new file mode 100644 index 0000000000000000000000000000000000000000..9defe6d287c5195b74d4f605c66bbfc5c4aecbf9 --- /dev/null +++ b/trace/trace_info/usb.toml @@ -0,0 +1,605 @@ +[[events]] +name = "usb_xhci_exit" +args = "" +message = "=== EXIT ===" +enabled = true + +[[events]] +name = "usb_xhci_run" +args = "" +message = "=== RUN ===" +enabled = true + +[[events]] +name = "usb_xhci_reset" +args = "" +message = "=== RESET ===" +enabled = true + +[[events]] +name = "usb_xhci_stop" +args = "" +message = "=== STOP ===" +enabled = true + +[[events]] +name = "usb_xhci_attach_device" +args = "port_id: &dyn fmt::Debug, device_id: &dyn fmt::Debug" +message = "port_id={:?} device_id={:?}" +enabled = true + +[[events]] +name = "usb_xhci_detach_device" +args = "port_id: &dyn fmt::Debug, device_id: &dyn fmt::Debug" +message = "port_id={:?} device_id={:?}" +enabled = true + +[[events]] +name = "usb_xhci_cap_read" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_oper_read" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_runtime_read" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_runtime_write" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_oper_write" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_doorbell_read" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_doorbell_write" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_port_read" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_port_write" +args = "gpa: &dyn fmt::Debug, offset: &dyn fmt::Debug, value: &dyn fmt::Debug" +message = "gpa={:x?} offset={:04x?} value={:?}" +enabled = true + +[[events]] +name = "usb_xhci_port_link" +args = "port: &dyn fmt::Debug, pls: &dyn fmt::Debug" +message = "port={:?} pls={:?}" +enabled = true + +[[events]] +name = "usb_xhci_ep_kick" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug, dequeue: &dyn fmt::Debug" +message = "slotid={:?} epid={:?} dequeue={:x?}" +enabled = true + +[[events]] +name = "usb_xhci_fetch_trb" +args = "addr: &dyn fmt::Debug, param: &dyn fmt::Debug, status: &dyn fmt::Debug, control: &dyn fmt::Debug" +message = "addr={:x?} param={:?} status={:?} control={:?}" +enabled = true + +[[events]] +name = "usb_xhci_port_reset" +args = "port: &dyn fmt::Debug, warm: &dyn fmt::Debug" +message = "port={:?} warm={:?}" +enabled = true + +[[events]] +name = "usb_xhci_port_notify" +args = "port: &dyn fmt::Debug, bits: &dyn fmt::Debug" +message = "port={:?} bits={:?}" +enabled = true + +[[events]] +name = "usb_xhci_enable_slot" +args = "slotid: &dyn fmt::Debug" +message = "slotid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_disable_slot" +args = "slotid: &dyn fmt::Debug" +message = "slotid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_address_device" +args = "slotid: &dyn fmt::Debug, port: &dyn fmt::Debug" +message = "slotid={:?} port={:?}" +enabled = true + +[[events]] +name = "usb_xhci_configure_endpoint" +args = "slotid: &dyn fmt::Debug" +message = "slotid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_evaluate_context" +args = "slotid: &dyn fmt::Debug" +message = "slotid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_reset_device" +args = "slotid: &dyn fmt::Debug" +message = "slotid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_enable_endpoint" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_disable_endpoint" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_set_tr_dequeue" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug, param: &dyn fmt::Debug" +message = "slotid={:?} epid={:?} param={:?}" +enabled = true + +[[events]] +name = "usb_xhci_stop_endpoint" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_reset_endpoint" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_xfer_start" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_xfer_async" +args = "" +message = "" +enabled = true + +[[events]] +name = "usb_xhci_xfer_nak" +args = "" +message = "" +enabled = true + +[[events]] +name = "usb_xhci_xfer_retry" +args = "" +message = "" +enabled = true + +[[events]] +name = "usb_xhci_xfer_success" +args = "len: &dyn fmt::Debug" +message = "len={:?}" +enabled = true + +[[events]] +name = "usb_xhci_xfer_error" +args = "status: &dyn fmt::Debug" +message = "status={:?}" +enabled = true + +[[events]] +name = "usb_xhci_cancel_all_ep_transfers" +args = "slotid: &dyn fmt::Debug, epid: &dyn fmt::Debug" +message = "slotid={:?} epid={:?}" +enabled = true + +[[events]] +name = "usb_xhci_unimplemented" +args = "str: &dyn fmt::Debug" +message = "{:?}" +enabled = true + +[[events]] +name = "usb_xhci_set_state" +args = "ep_id: u32, new_state: u32" +message = "Endpoint {} set new state {}." +enabled = true + +[[events]] +name = "usb_xhci_update_dequeue" +args = "ep_id: u32, dequeue: u64, stream_id: u32" +message = "Endpoint {} update dequeue {} on Stream ID {}." +enabled = true + +[[events]] +name = "usb_xhci_reset_streams" +args = "ep_id: u32" +message = "Resetting streams on Endpoint {}." +enabled = true + +[[events]] +name = "usb_xhci_get_ring" +args = "ep_id: u32, stream_id: u32" +message = "Found Transfer ring on Endpoint {} Stream ID {}." +enabled = true + +[[events]] +name = "usb_xhci_get_stream" +args = "stream_id: u32, ep_id: u32" +message = "Found Stream Context {} for Endpoint {}." +enabled = true + +[[events]] +name = "usb_handle_control" +args = "device: &str, req: &dyn fmt::Debug" +message = "device {} handle control request {:?}." +enabled = true + +[[events]] +name = "usb_camera_vs_control_request" +args = "cs: u8, vs_control: &dyn fmt::Debug" +message = "VideoStreamingControl {} {:?}." +enabled = true + +[[events]] +name = "usb_camera_handle_control" +args = "" +message = "camera control handled by descriptor." +enabled = true + +[[events]] +name = "usb_camera_handle_payload" +args = "frame_offset: usize, payload_offset: usize, data_size: u64, copied: usize" +message = "camera handle payload, frame_offset {} payloadoffset {} data_size {} copied {}." +enabled = true + +[[events]] +name = "usb_storage_handle_control" +args = "" +message = "storage control handled by descriptor." +enabled = true + +[[events]] +name = "usb_storage_handle_data" +args = "ep_number: u8, pid: u32, mode: &dyn fmt::Debug" +message = "endpoint {}, pid 0x{:X}, mode {:?}" +enabled = true + +[[events]] +name = "usb_storage_handle_token_out" +args = "cbw: &dyn fmt::Debug" +message = "cbw {:?}" +enabled = true + +[[events]] +name = "usb_storage_handle_token_in" +args = "csw: &dyn fmt::Debug" +message = "csw {:?}" +enabled = true + +[[events]] +name = "usb_storage_handle_data_inout_packet" +args = "len: u32" +message = "iovec length {}" +enabled = true + +[[events]] +name = "usb_storage_handle_scsi_request" +args = "csw: &dyn fmt::Debug" +message = "csw {:?}" +enabled = true + +[[events]] +name = "usb_tablet_update_point_state" +args = "input_type: &dyn fmt::Debug" +message = "input_type={:?}" +enabled = true + +[[events]] +name = "usb_tablet_point_sync" +args = "" +message = "input pointer sync" +enabled = true + +[[events]] +name = "usb_tablet_queue_full" +args = "" +message = "pointer queue is full" +enabled = true + +[[events]] +name = "usb_keyboard_event" +args = "keycode: &dyn fmt::Debug, down: &dyn fmt::Debug" +message = "do keyboard event keycode={:?} down={:?}" +enabled = true + +[[events]] +name = "usb_keyboard_queue_full" +args = "" +message = "keyboard queue is full" +enabled = true + +[[events]] +name = "usb_convert_to_hid_code" +args = "hid_code: &dyn fmt::Debug, index: &dyn fmt::Debug, key: &dyn fmt::Debug" +message = "hid_code {:?} index {:?} key {:?}" +enabled = true + +[[events]] +name = "usb_no_data_in_usb_device" +args = "" +message = "no data in usb device." +enabled = true + +[[events]] +name = "usb_keyboard_set_report" +args = "led_state: &dyn fmt::Debug" +message = "led_state={:?}" +enabled = true + +[[events]] +name = "usb_host_open_started" +args = "bus_num: u8, addr: u8" +message = "dev bus 0x{:X} addr 0x{:X}" +enabled = true + +[[events]] +name = "usb_host_close" +args = "bus_num: u8, addr: u8" +message = "dev bus 0x{:X} addr 0x{:X}" +enabled = true + +[[events]] +name = "usb_host_open_success" +args = "bus_num: u8, addr: u8" +message = "dev bus 0x{:X} addr 0x{:X}" +enabled = true + +[[events]] +name = "usb_host_reset" +args = "bus_num: u8, addr: u8" +message = "dev bus 0x{:X} addr 0x{:X} reset" +enabled = true + +[[events]] +name = "usb_host_attach_kernel" +args = "bus_num: u8, addr: u8, interface: u8" +message = "dev bus 0x{:X} addr 0x{:X}, interface {}" +enabled = true + +[[events]] +name = "usb_host_detach_kernel" +args = "bus_num: u8, addr: u8, interface: u8" +message = "dev bus 0x{:X} addr 0x{:X}, interface {}" +enabled = true + +[[events]] +name = "usb_host_set_interface" +args = "bus_num: u8, addr: u8, iface: u16, alt: u16" +message = "dev bus 0x{:X} addr 0x{:X}, set interface {}, alt {}" +enabled = true + +[[events]] +name = "usb_host_set_config" +args = "bus_num: u8, addr: u8, config: u8" +message = "dev bus 0x{:X} addr 0x{:X}, set config {}" +enabled = true + +[[events]] +name = "usb_host_set_address" +args = "bus_num: u8, addr: u8, address: u8" +message = "dev bus 0x{:X} addr 0x{:X}, set address {}" +enabled = true + +[[events]] +name = "usb_host_claim_interface" +args = "bus_num: u8, addr: u8, interface: u8" +message = "dev bus 0x{:X} addr 0x{:X}, claim interface {}" +enabled = true + +[[events]] +name = "usb_host_release_interface" +args = "bus_num: u8, addr: u8, interface: u8" +message = "dev bus 0x{:X} addr 0x{:X}, release interface {}" +enabled = true + +[[events]] +name = "usb_host_parse_config" +args = "bus_num: u8, addr: u8, value: u8" +message = "dev bus 0x{:X} addr 0x{:X}, parse config value {}" +enabled = true + +[[events]] +name = "usb_host_parse_interface" +args = "bus_num: u8, addr: u8, num: u8, alt: u8" +message = "dev bus 0x{:X} addr 0x{:X}, parse interface num {} alt {}" +enabled = true + +[[events]] +name = "usb_host_parse_error" +args = "bus_num: u8, addr: u8, msg: &str" +message = "dev bus 0x{:X} addr 0x{:X}, msg {}" +enabled = true + +[[events]] +name = "usb_host_parse_endpoint" +args = "bus_num: u8, addr: u8, ep: u8, dir: &dyn fmt::Debug, ep_type: &dyn fmt::Debug" +message = "dev bus 0x{:X} addr 0x{:X}, parse endpoint {} dir {:?} ep_type {:?}" +enabled = true + +[[events]] +name = "usb_host_req_control" +args = "bus_num: u8, addr: u8, request: &dyn fmt::Debug" +message = "dev bus 0x{:X} addr 0x{:X}, request {:?}" +enabled = true + +[[events]] +name = "usb_host_req_data" +args = "bus_num: u8, addr: u8, packet: u64, pid: u32, ep_num: u8, iov_len: usize" +message = "dev bus 0x{:X} addr 0x{:X}, packet 0x{:#X}, pid {} ep_number {} iov len {}" +enabled = true + +[[events]] +name = "usb_host_iso_start" +args = "bus_num: u8, addr: u8, ep_number: u8" +message = "dev bus 0x{:X} addr 0x{:X}, endpoint {}" +enabled = true + +[[events]] +name = "usb_host_iso_stop" +args = "bus_num: u8, addr: u8, ep_number: u8" +message = "dev bus 0x{:X} addr 0x{:X}, endpoint {}" +enabled = true + +[[events]] +name = "usb_host_req_emulated" +args = "bus_num: u8, addr: u8, packet: u64, status: &dyn fmt::Debug" +message = "dev bus 0x{:X} addr 0x{:X}, packet 0x{:#X}, status {:?}" +enabled = true + +[[events]] +name = "usb_host_req_complete" +args = "bus_num: u8, addr: u8, packet: u64, status: &dyn fmt::Debug, actual_length: usize" +message = "dev bus 0x{:X} addr 0x{:X}, packet 0x{:#X}, status {:?} actual length {}" +enabled = true + +[[events]] +name = "usb_uas_handle_control" +args = "packet_id: u32, device_id: &str, req: &[u8]" +message = "USB {} packet received on UAS {} device, the request is {:?}." +enabled = true + +[[events]] +name = "usb_uas_handle_iu_command" +args = "device_id: &str, cdb: u8" +message = "UAS {} device handling IU with cdb[0] {}." +enabled = true + +[[events]] +name = "usb_uas_fill_sense" +args = "status: u8, iu_len: usize, sense_len: usize" +message = "UAS device is filling sense with status {:02} URB length {} sense length {}." +enabled = true + +[[events]] +name = "usb_uas_fill_fake_sense" +args = "status: u8, iu_len: usize, sense_len: usize" +message = "UAS device is filling fake sense with status {:02} URB length {} sense length {}." +enabled = true + +[[events]] +name = "usb_uas_fill_packet" +args = "iovec_size: usize" +message = "UAS device is filling USB packet with iovec of size {}." +enabled = true + +[[events]] +name = "usb_uas_try_start_next_transfer" +args = "device_id: &str, xfer_len: i64" +message = "UAS {} device is trying to start next transfer of length {}." +enabled = true + +[[events]] +name = "usb_uas_start_next_transfer" +args = "device_id: &str, stream: usize" +message = "UAS {} device starting a transfer on stream {}." +enabled = true + +[[events]] +name = "usb_uas_handle_data" +args = "device_id: &str, endpoint: u8, stream: usize" +message = "UAS {} device handling data on endpoint {} and stream {}." +enabled = true + +[[events]] +name = "usb_uas_command_received" +args = "packet_id: u32, device_id: &str" +message = "USB {} command packet received on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_command_completed" +args = "packet_id: u32, device_id: &str" +message = "USB {} command packet completed on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_status_received" +args = "packet_id: u32, device_id: &str" +message = "USB {} status packet received on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_status_completed" +args = "packet_id: u32, device_id: &str" +message = "USB {} status packet completed on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_status_queued_async" +args = "packet_id: u32, device_id: &str" +message = "USB {} status packet queued async on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_data_received" +args = "packet_id: u32, device_id: &str" +message = "USB {} data packet received on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_data_completed" +args = "packet_id: u32, device_id: &str" +message = "USB {} data packet completed on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_data_queued_async" +args = "packet_id: u32, device_id: &str" +message = "USB {} data packet queued async on UAS {} device." +enabled = true + +[[events]] +name = "usb_uas_handle_iu_task_management" +args = "device_id: &str, tmf: u8, tag: u16" +message = "UAS {} device handling TMF {} with tag {}." +enabled = true + +[[events]] +name = "usb_uas_tmf_abort_task" +args = "device_id: &str, task_tag: usize" +message = "UAS {} device aborting task with tag {}." +enabled = true diff --git a/trace/trace_info/virtio.toml b/trace/trace_info/virtio.toml new file mode 100644 index 0000000000000000000000000000000000000000..5d3240b00c317b5c30b641a79e771e0136a7a5fc --- /dev/null +++ b/trace/trace_info/virtio.toml @@ -0,0 +1,323 @@ +[[events]] +name = "virtio_receive_request" +args = "device: String, behaviour: String" +message = "{}: Request received from guest {}, ready to start processing." +enabled = true + +[[events]] +name = "virtqueue_send_interrupt" +args = "device: &str, queue: u64" +message = "{}: virtqueue 0x{:X?} processing complete, ready to send interrupt to guest." +enabled = true + +[[events]] +name = "virtio_scsi_handle_cmd_req" +args = "target: u8, lun: u16, tag: u64, cmd: u8" +message = "target={}, lun={}, tag={}, cmd={}." +enabled = true + +[[events]] +name = "virtio_scsi_handle_cmd_resp" +args = "target: u8, lun: u16, tag: u64, status: u8, response: u8" +message = "target={}, lun={}, tag={}, status={}, response={}." +enabled = true + +[[events]] +name = "virtio_serial_output_data" +args = "iovec_size: u64, size: u64" +message = "iovec size {}, write size {}." +enabled = true + +[[events]] +name = "virtio_serial_disconnected_port" +args = "" +message = "virtio-serial port is none or disconnected." +enabled = true + +[[events]] +name = "virtio_serial_pause_rx" +args = "" +message = "pause rx." +enabled = true + +[[events]] +name = "virtio_serial_unpause_chardev_rx" +args = "" +message = "unpause rx on chardev." +enabled = true + +[[events]] +name = "virtio_serial_new_inputqueue_buf" +args = "" +message = "new buf appeared in virtio-serial input queue." +enabled = true + +[[events]] +name = "virtio_rng_write_req_data" +args = "size: u32" +message = "size {}" +enabled = true + +[[events]] +name = "virtio_blk_process_queue_suppress_notify" +args = "len: u16" +message = "len {}" +enabled = true + +[[events]] +name = "virtio_blk_complete_request" +args = "status: u8" +message = "status {}" +enabled = true + +[[events]] +name = "virtio_blk_complete_one_request" +args = "index: u16, len: u32" +message = "index {}, len {}" +enabled = true + +[[events]] +name = "virtio_blk_execute" +args = "request_type: u32, len: usize, offset: usize" +message = "request type {}, iovecs len {}, offset {}" +enabled = true + +[[events]] +name = "virtio_blk_handle_discard_write_zeroes_req" +args = "opcode: &dyn fmt::Debug, flags: u32, offset: usize, nbytes: u64" +message = "opcode {:?}, flags {}, offset {}, nbytes {}" +enabled = true + +[[events]] +name = "virtio_blk_merge_req_queue" +args = "can_merge: bool, merged_reqs: u16, merged_iovs: usize, merged_bytes: u64" +message = "can_merge {}, merged_reqs {}, merged_iovs {}, merged_bytes {}" +enabled = true + +[[events]] +name = "virtio_blk_read_config" +args = "offset: u64, data: &[u8]" +message = "offset {}, data {:?}" +enabled = true + +[[events]] +name = "virtio_blk_write_config" +args = "offset: u64, data: &[u8]" +message = "offset {}, data {:?}" +enabled = true + +[[events]] +name = "virtio_gpu_update_cursor" +args = "scanout: u32, x: u32, y: u32, res: u32, cmd: &str" +message = "scanout {}, x {}, y {}, resource {}, type {}." +enabled = true + +[[events]] +name = "virtio_gpu_get_edid" +args = "scanout: u32" +message = "scanout {}." +enabled = true + +[[events]] +name = "virtio_gpu_resource_create_2d" +args = "res: u32, fmt: u32, w: u32, h: u32" +message = "resource: {}, format {}, width {}, height {}." +enabled = true + +[[events]] +name = "virtio_gpu_resource_unref" +args = "res: u32" +message = "resource: {}." +enabled = true + +[[events]] +name = "virtio_gpu_xfer_toh_2d" +args = "res: u32" +message = "resource: {}." +enabled = true + +[[events]] +name = "virtio_gpu_resource_flush" +args = "res: u32, w: u32, h: u32, x: u32, y: u32" +message = "resource: {}, width: {}, height: {}, x: {}, y: {}." +enabled = true + +[[events]] +name = "virtio_gpu_set_scanout" +args = "res: u32, scanout: u32, w: u32, h: u32, x: u32, y: u32" +message = "resource: {}, scanout: {}, width: {}, height: {}, x: {}, y: {}." +enabled = true + +[[events]] +name = "virtio_gpu_resource_attach_backing" +args = "res: u32" +message = "resource: {}." +enabled = true + +[[events]] +name = "virtio_gpu_resource_detach_backing" +args = "res: u32" +message = "resource: {}." +enabled = true + +[[events]] +name = "virtio_gpu_init_config_features" +args = "features: u64" +message = "features is {}." +enabled = true + +[[events]] +name = "virtio_gpu_console_hw_update" +args = "con: usize, w: i32, h: i32" +message = "console {} receive hw update request, update size {} {}." +enabled = true + +[[events]] +name = "virtio_net_handle_ctrl" +args = "class: u8, cmd: u8" +message = "class {}, cmd {}" +enabled = true + +[[events]] +name = "virtqueue_pop_avail" +args = "vring: u64, in_num: usize, out_num: usize" +message = "virtqueue {:#X} pop avail elem, in_iov length {}, out_iov length {}" +enabled = true + +[[events]] +name = "virtqueue_add_used" +args = "vring: u64, next_used: u64, index: u16, len: u32" +message = "virtqueue {:#X} add used elem, used index {}, desc index {}, len {}" +enabled = true + +[[events]] +name = "virtqueue_set_avail_event" +args = "vring: u64, event_idx: u16" +message = "virtqueue {:#X} set avail event idx {}" +enabled = true + +[[events]] +name = "virtio_tpt_read_common_config" +args = "id: &str, offset: u64" +message = "read common config for {}, offset is {:#X}" +enabled = true + +[[events]] +name = "virtio_tpt_write_common_config" +args = "id: &str, offset: u64, value: u32" +message = "write common config for {}, offset is {:#X}, value is {:#X}" +enabled = true + +[[events]] +name = "virtio_tpt_read_config" +args = "id: &str, offset: u64, len: usize" +message = "read config for {}, offset is {:#X}, len is {}" +enabled = true + +[[events]] +name = "virtio_tpt_write_config" +args = "id: &str, offset: u64, data: &[u8]" +message = "write config for {}, offset is {:#X}, data is {:X?}" +enabled = true + +[[events]] +name = "vhost_set_owner" +args = "" +message = "" +enabled = true + +[[events]] +name = "vhost_reset_owner" +args = "" +message = "" +enabled = true + +[[events]] +name = "vhost_get_features" +args = "features: u64" +message = "features: {:#x}." +enabled = true + +[[events]] +name = "vhost_set_features" +args = "features: u64" +message = "features: {:#x}." +enabled = true + +[[events]] +name = "vhost_set_mem_table" +args = "mem: &dyn fmt::Debug" +message = "mem table: {:?}." +enabled = true + +[[events]] +name = "vhost_set_vring_num" +args = "queue_idx: usize, num: u16" +message = "set vring {} descriptors num {}." +enabled = true + +[[events]] +name = "vhost_set_vring_addr" +args = "vring_addr: &dyn fmt::Debug" +message = "vring addr: {:?}." +enabled = true + +[[events]] +name = "vhost_set_vring_base" +args = "queue_idx: usize, num: u16" +message = "queue_idx {} num {}." +enabled = true + +[[events]] +name = "vhost_get_vring_base" +args = "queue_idx: usize, num: u16" +message = "queue_idx {} num {}." +enabled = true + +[[events]] +name = "vhost_set_vring_call" +args = "queue_idx: usize, event_fd: &dyn fmt::Debug" +message = "queue_idx {}, event_fd {:?}." +enabled = true + +[[events]] +name = "vhost_set_vring_kick" +args = "queue_idx: usize, event_fd: &dyn fmt::Debug" +message = "queue_idx {}, event_fd {:?}." +enabled = true + +[[events]] +name = "vhost_set_vring_enable" +args = "queue_idx: usize, status: bool" +message = "set vring {} status {}." +enabled = true + +[[events]] +name = "vhost_delete_mem_range_failed" +args = "" +message = "Vhost: deleting mem region failed: not matched." +enabled = true + +[[events]] +name = "auto_msg_evt_handler" +args = "" +message = "Balloon: handle auto balloon message" +enabled = true + +[[events]] +name = "reporting_evt_handler" +args = "" +message = "Balloon: handle fpr message" +enabled = true + +[[events]] +name = "virtio_read_object_direct" +args = "host_addr: u64, count: usize" +message = "Memory: virtio_read_object_direct host_addr {}, count {}" +enabled = true + +[[events]] +name = "virtio_write_object_direct" +args = "host_addr: u64, count: usize" +message = "Memory: virtio_write_object_direct host_addr {}, count {}" +enabled = true diff --git a/ui/Cargo.toml b/ui/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..b7faa42406adbf0ba8776cfcc0d385c97f8d1860 --- /dev/null +++ b/ui/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "ui" +version = "2.4.0" +authors = ["Huawei StratoVirt Team"] +edition = "2021" +license = "Mulan PSL v2" +description = "User Interface" + +[dependencies] +thiserror = "1.0" +anyhow = "1.0" +libc = "0.2" +log = "0.4" +serde_json = "1.0" +vmm-sys-util = "0.12.1" +once_cell = "1.18.0" +sscanf = "0.4.1" +bitintr = "0.3.0" +gtk = { version = "0.17.1", optional = true } +gettext-rs = { version = "0.7.0", features = ["gettext-system"], optional = true } +cairo-rs = { version = "0.17.10", features = ["png"], optional = true } +rustls = { version = "0.21.1", optional = true } +rustls-pemfile = { version = "1.0.2", optional = true } +sasl2-sys = { version = "0.1.20", optional = true } +machine_manager = { path = "../machine_manager" } +util = { path = "../util" } +trace = { path = "../trace" } +address_space = { path = "../address_space" } + +[features] +keycode = [] +pixman = ["util/pixman"] +console = ["pixman"] +gtk = ["console", "keycode", "dep:cairo-rs", "dep:gtk", "dep:gettext-rs", "machine_manager/gtk"] +vnc = ["console", "keycode", "machine_manager/vnc"] +vnc_auth = ["vnc", "dep:rustls", "dep:rustls-pemfile", "dep:sasl2-sys", "machine_manager/vnc_auth"] +ohui_srv = ["console", "keycode", "machine_manager/ohui_srv"] diff --git a/ui/src/console.rs b/ui/src/console.rs new file mode 100644 index 0000000000000000000000000000000000000000..f4461c8c8eb616aede1e094a421bbec979930628 --- /dev/null +++ b/ui/src/console.rs @@ -0,0 +1,906 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cmp, + mem::size_of, + ptr, + sync::{Arc, Mutex, Weak}, + time::Duration, +}; + +use anyhow::Result; +use log::error; +use once_cell::sync::Lazy; + +use crate::pixman::{ + create_pixman_image, get_image_data, get_image_height, get_image_stride, get_image_width, + pixman_glyph_from_vgafont, pixman_glyph_render, unref_pixman_image, ColorNames, + COLOR_TABLE_RGB, +}; +use machine_manager::event_loop::EventLoop; +use util::pixman::{pixman_format_code_t, pixman_image_t}; + +static CONSOLES: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new(ConsoleList::new()))); +static DISPLAY_STATE: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new(DisplayState::new()))); + +/// Width of font. +const FONT_WIDTH: i32 = 8; +/// Height of font. +const FONT_HEIGHT: i32 = 16; +/// Width of image in surface. +pub const DEFAULT_SURFACE_WIDTH: i32 = 800; +/// Height of image in surface. +pub const DEFAULT_SURFACE_HEIGHT: i32 = 600; +/// Maximum default window width. +pub const MAX_WINDOW_WIDTH: u16 = 2560; +/// Maximum default window height. +pub const MAX_WINDOW_HEIGHT: u16 = 2048; + +/// Minimum refresh interval in ms. +pub const DISPLAY_UPDATE_INTERVAL_DEFAULT: u64 = 30; +/// Update time interval dynamically. +pub const DISPLAY_UPDATE_INTERVAL_INC: u64 = 50; +/// Maximum refresh interval in ms. +pub const DISPLAY_UPDATE_INTERVAL_MAX: u64 = 3_000; + +pub enum ConsoleType { + Graphic, + Text, +} + +/// Run stage of virtual machine. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum VmRunningStage { + Init, + Bios, + Os, +} + +#[derive(Default)] +struct UiInfo { + last_width: u32, + last_height: u32, +} + +/// Image data defined in display. +#[derive(Clone, Copy)] +pub struct DisplaySurface { + /// Image format. + pub format: pixman_format_code_t, + /// Pointer to image + pub image: *mut pixman_image_t, +} + +impl Default for DisplaySurface { + fn default() -> Self { + DisplaySurface { + format: pixman_format_code_t::PIXMAN_a8r8g8b8, + image: ptr::null_mut(), + } + } +} + +impl DisplaySurface { + pub fn width(&self) -> i32 { + get_image_width(self.image) + } + + pub fn height(&self) -> i32 { + get_image_height(self.image) + } + + pub fn stride(&self) -> i32 { + get_image_stride(self.image) + } + + pub fn data(&self) -> *mut u32 { + get_image_data(self.image) + } +} + +/// Cursor data defined in Display. +/// hot_x and hot_y indicate the hotspot of the cursor. +/// width and height indicate the width of the cursor in pixel. +/// The data consists of the primary and secondary colours for +/// the cursor, followed by one bitmap for the colour and +/// one bitmask for the transparency. +#[derive(Clone, Default)] +pub struct DisplayMouse { + pub width: u32, + pub height: u32, + pub hot_x: u32, + pub hot_y: u32, + pub data: Vec, +} + +impl DisplayMouse { + pub fn new(width: u32, height: u32, hot_x: u32, hot_y: u32) -> Self { + let data_size = (width * height) as usize * size_of::(); + DisplayMouse { + width, + height, + hot_x, + hot_y, + data: vec![0_u8; data_size], + } + } +} + +/// UIs (such as VNC) can register interfaces related to image display. +/// After the graphic hardware processes images, these interfaces can be +/// called to display images on the user's desktop. +pub trait DisplayChangeListenerOperations { + /// Switch the image in display surface. + fn dpy_switch(&self, _surface: &DisplaySurface) -> Result<()>; + /// Refresh the image. + fn dpy_refresh(&self, _dcl: &Arc>) -> Result<()>; + /// Update image. + fn dpy_image_update(&self, _x: i32, _y: i32, _w: i32, _h: i32) -> Result<()>; + /// Update the cursor data. + fn dpy_cursor_update(&self, _cursor: &DisplayMouse) -> Result<()>; + /// Set the current display as major. + fn dpy_set_major(&self) -> Result<()> { + Ok(()) + } +} + +/// Callback functions registered by graphic hardware. +pub trait HardWareOperations { + /// Update image. + fn hw_update(&self, _con: Arc>) {} + /// Ui configuration changed. + fn hw_ui_info(&self, _con: Arc>, _width: u32, _height: u32) {} +} + +/// Listen to the change of image and call the related +/// interface to update the image on user's desktop. +pub struct DisplayChangeListener { + pub con_id: Option, + pub dcl_id: Option, + pub active: bool, + pub update_interval: u64, + pub dpy_opts: Arc, +} + +impl DisplayChangeListener { + pub fn new(con_id: Option, dpy_opts: Arc) -> Self { + Self { + con_id, + dcl_id: None, + active: false, + update_interval: 0, + dpy_opts, + } + } +} + +/// Graphic hardware can register a console during initialization +/// and store the information of images in this structure. +pub struct DisplayConsole { + pub con_id: usize, + pub dev_name: String, + pub con_type: ConsoleType, + pub width: i32, + pub height: i32, + ui_info: UiInfo, + pub surface: Option, + pub console_list: Weak>, + pub dev_opts: Arc, + pub timer_id: Option, + pub active: bool, +} + +impl DisplayConsole { + pub fn new( + con_id: usize, + dev_name: String, + con_type: ConsoleType, + console_list: Weak>, + dev_opts: Arc, + ) -> Self { + Self { + con_id, + dev_name, + con_type, + width: 0, + height: 0, + ui_info: UiInfo::default(), + console_list, + surface: None, + dev_opts, + timer_id: None, + active: true, + } + } +} + +/// The state of console layer. +struct DisplayState { + /// Running stage. + run_stage: VmRunningStage, + /// Refresh interval, which can be dynamic changed. + interval: u64, + /// Whether there is a refresh task. + is_refresh: bool, + /// A list of DisplayChangeListeners. + listeners: Vec>>>, + /// Total number of refresh task. + refresh_num: i32, +} + +// SAFETY: The Arc in rust doesn't impl Send, it will be delivered only once during +// initialization process, and only be saved in the single thread. So implement Send is safe. +unsafe impl Send for DisplayState {} + +impl DisplayState { + fn new() -> Self { + Self { + run_stage: VmRunningStage::Init, + interval: DISPLAY_UPDATE_INTERVAL_DEFAULT, + is_refresh: false, + listeners: Vec::new(), + refresh_num: 0, + } + } + + // Get all related display by con_id. + fn get_related_display(&self, con_id: usize) -> Result>>> { + let mut related_dpys: Vec>> = vec![]; + let active_id = CONSOLES.lock().unwrap().activate_id; + for dcl in self.listeners.iter().flatten() { + match dcl.lock().unwrap().con_id { + Some(id) if con_id == id => { + related_dpys.push(dcl.clone()); + } + None if Some(con_id) == active_id => { + related_dpys.push(dcl.clone()); + } + _ => {} + } + } + + Ok(related_dpys) + } +} + +/// The registered console will be inserted in the console list. +/// If no console is specified, the activate console will be used. +pub struct ConsoleList { + pub activate_id: Option, + pub console_list: Vec>>>, +} + +// SAFETY: +// 1. The raw pointer in rust doesn't impl Send, the target thread can only read the memory of image +// by this pointer. +// 2. The Arc in rust doesn't impl Send, it will be delivered only once during +// initialization process, +// and only be saved in the single thread. +// So implement Send is safe. +unsafe impl Send for ConsoleList {} + +impl ConsoleList { + fn new() -> Self { + Self { + activate_id: None, + console_list: Vec::new(), + } + } + + // Get console by device name. + fn get_console_by_dev_name(&mut self, dev_name: String) -> Option>> { + let mut target: Option>> = None; + for con in self.console_list.iter().flatten() { + let locked_con = con.lock().unwrap(); + if locked_con.dev_name == dev_name { + target = Some(con.clone()); + break; + } + } + target + } + + /// Get the console by id. + fn get_console_by_id(&mut self, con_id: Option) -> Option>> { + if con_id.is_none() && self.activate_id.is_none() { + return None; + } + + let mut target_id: usize = 0; + if let Some(id) = con_id { + target_id = id; + } else if let Some(id) = self.activate_id { + target_id = id; + } + + self.console_list.get(target_id)?.clone() + } +} + +/// Set currently running stage for virtual machine. +pub fn set_run_stage(run_stage: VmRunningStage) { + DISPLAY_STATE.lock().unwrap().run_stage = run_stage +} + +/// Get currently running stage. +pub fn get_run_stage() -> VmRunningStage { + DISPLAY_STATE.lock().unwrap().run_stage +} + +/// Refresh display image. +fn display_refresh() { + let mut dcl_interval: u64; + let mut interval: u64 = DISPLAY_UPDATE_INTERVAL_MAX; + + let mut locked_state = DISPLAY_STATE.lock().unwrap(); + let mut related_listeners: Vec>> = vec![]; + for dcl in &mut locked_state.listeners.iter_mut().flatten() { + related_listeners.push(dcl.clone()); + } + drop(locked_state); + + for dcl in &mut related_listeners.iter() { + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + if let Err(e) = (*dcl_opts).dpy_refresh(dcl) { + error!("{:?}", e); + return; + } + + // Update refresh interval. + dcl_interval = dcl.lock().unwrap().update_interval; + if dcl_interval == 0 { + dcl_interval = DISPLAY_UPDATE_INTERVAL_MAX; + } + + if interval > dcl_interval { + interval = dcl_interval + } + } + trace::console_dpy_refresh(&interval); + + let mut locked_state = DISPLAY_STATE.lock().unwrap(); + locked_state.interval = interval; + if locked_state.interval != 0 { + locked_state.is_refresh = true; + setup_refresh(interval); + } +} + +/// Register the timer to execute the scheduled +/// refresh task. +fn setup_refresh(update_interval: u64) { + let func = Box::new(move || { + display_refresh(); + }); + + if update_interval != 0 { + EventLoop::get_ctx(None) + .unwrap() + .timer_add(func, Duration::from_millis(update_interval)); + } +} + +/// Switch the image of surface in display. +pub fn display_replace_surface( + console: &Option>>, + surface: Option, +) -> Result<()> { + let con = match console.as_ref().and_then(|c| c.upgrade()) { + Some(c) => c, + None => return Ok(()), + }; + + let mut locked_con = con.lock().unwrap(); + let old_surface = locked_con.surface; + if surface.is_none() { + // Create a place holder message. + locked_con.surface = create_msg_surface( + DEFAULT_SURFACE_WIDTH, + DEFAULT_SURFACE_HEIGHT, + "Display is not active.".to_string(), + ); + } else { + locked_con.surface = surface; + } + + if let Some(s) = locked_con.surface { + locked_con.width = get_image_width(s.image); + locked_con.height = get_image_height(s.image); + } + let con_id = locked_con.con_id; + if let Some(s) = old_surface { + unref_pixman_image(s.image); + } + drop(locked_con); + + let related_listeners = DISPLAY_STATE.lock().unwrap().get_related_display(con_id)?; + for dcl in related_listeners.iter() { + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + if let Some(s) = &con.lock().unwrap().surface.clone() { + (*dcl_opts).dpy_switch(s)?; + } + } + Ok(()) +} + +/// Update area of the image. +/// `x` `y` `w` `h` marke the area of image. +pub fn display_graphic_update( + console: &Option>>, + x: i32, + y: i32, + w: i32, + h: i32, +) -> Result<()> { + let con = match console.as_ref().and_then(|c| c.upgrade()) { + Some(c) => c, + None => return Ok(()), + }; + let mut width: i32 = w; + let mut height: i32 = h; + let locked_con = con.lock().unwrap(); + if let Some(s) = locked_con.surface { + width = get_image_width(s.image); + height = get_image_height(s.image); + } + let mut x = cmp::max(x, 0); + let mut y = cmp::max(y, 0); + x = cmp::min(x, width); + y = cmp::min(y, height); + let w = cmp::min(w, width - x); + let h = cmp::min(h, height - y); + let con_id = locked_con.con_id; + drop(locked_con); + + let related_listeners = DISPLAY_STATE.lock().unwrap().get_related_display(con_id)?; + for dcl in related_listeners.iter() { + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + (*dcl_opts).dpy_image_update(x, y, w, h)?; + } + Ok(()) +} + +/// Update cursor data in display. +/// +/// # Arguments +/// +/// * `con_id` - console id in console list. +/// * `cursor` - data of curosr image. +pub fn display_cursor_define( + console: &Option>>, + cursor: &DisplayMouse, +) -> Result<()> { + let con = match console.as_ref().and_then(|c| c.upgrade()) { + Some(c) => c, + None => return Ok(()), + }; + let con_id = con.lock().unwrap().con_id; + let related_listeners = DISPLAY_STATE.lock().unwrap().get_related_display(con_id)?; + + for dcl in related_listeners.iter() { + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + (*dcl_opts).dpy_cursor_update(cursor)?; + } + Ok(()) +} + +/// Set specific screen as the main display screen. +pub fn display_set_major_screen(dev_name: &str) -> Result<()> { + let con = match CONSOLES + .lock() + .unwrap() + .get_console_by_dev_name(dev_name.to_string()) + { + Some(c) => c, + None => return Ok(()), + }; + let con_id = con.lock().unwrap().con_id; + console_select(Some(con_id))?; + let related_listeners = DISPLAY_STATE.lock().unwrap().get_related_display(con_id)?; + + for dcl in related_listeners.iter() { + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + (*dcl_opts).dpy_set_major()?; + } + Ok(()) +} + +pub fn graphic_hardware_update(con_id: Option) { + let console = CONSOLES.lock().unwrap().get_console_by_id(con_id); + if let Some(con) = console { + let con_opts = con.lock().unwrap().dev_opts.clone(); + (*con_opts).hw_update(con); + } +} + +pub fn graphic_hardware_ui_info( + con: Arc>, + width: u32, + height: u32, +) -> Result<()> { + let mut locked_con = con.lock().unwrap(); + trace::console_dpy_ui_info( + &locked_con.dev_name, + &width, + &height, + &locked_con.ui_info.last_width, + &locked_con.ui_info.last_height, + ); + if locked_con.ui_info.last_width == width && locked_con.ui_info.last_height == height { + return Ok(()); + } + locked_con.ui_info.last_width = width; + locked_con.ui_info.last_height = height; + + let clone_con = con.clone(); + let con_opts = locked_con.dev_opts.clone(); + let func = Box::new(move || { + (*con_opts).hw_ui_info(clone_con.clone(), width, height); + }); + + let ctx = EventLoop::get_ctx(None).unwrap(); + if let Some(timer_id) = locked_con.timer_id { + ctx.timer_del(timer_id); + } + locked_con.timer_id = Some(ctx.timer_add(func, Duration::from_millis(500))); + + Ok(()) +} + +/// Get the weak reference of all active consoles from the console lists. +pub fn get_active_console() -> Vec>> { + let mut res: Vec>> = vec![]; + let locked_cons = CONSOLES.lock().unwrap(); + for con in locked_cons.console_list.iter().flatten() { + if con.lock().unwrap().active { + res.push(Arc::downgrade(con)); + } + } + + res +} + +/// Register a dcl and return the id. +pub fn register_display(dcl: &Arc>) -> Result<()> { + let mut dcl_id = 0; + let mut locked_state = DISPLAY_STATE.lock().unwrap(); + let len = locked_state.listeners.len(); + for dcl in &mut locked_state.listeners.iter() { + if dcl.is_none() { + break; + } + dcl_id += 1; + } + if dcl_id < len { + locked_state.listeners[dcl_id] = Some(dcl.clone()); + } else { + locked_state.listeners.push(Some(dcl.clone())); + } + locked_state.refresh_num += 1; + // Register the clock and execute the scheduled refresh event. + if !locked_state.is_refresh && locked_state.interval != 0 { + locked_state.is_refresh = true; + setup_refresh(locked_state.interval); + } + drop(locked_state); + dcl.lock().unwrap().dcl_id = Some(dcl_id); + let dcl_opts = dcl.lock().unwrap().dpy_opts.clone(); + + let con_id = dcl.lock().unwrap().con_id; + let console = CONSOLES.lock().unwrap().get_console_by_id(con_id); + if let Some(con) = console { + if let Some(surface) = &mut con.lock().unwrap().surface.clone() { + (*dcl_opts).dpy_switch(surface)?; + } + } else { + let mut place_holder_image = create_msg_surface( + DEFAULT_SURFACE_WIDTH, + DEFAULT_SURFACE_HEIGHT, + "This VM has no graphic display device.".to_string(), + ); + if let Some(surface) = &mut place_holder_image { + (*dcl_opts).dpy_switch(surface)?; + } + } + + Ok(()) +} + +/// Unregister display change listener. +pub fn unregister_display(dcl: &Option>>) -> Result<()> { + let dcl = match dcl.as_ref().and_then(|d| d.upgrade()) { + Some(d) => d, + None => return Ok(()), + }; + let dcl_id = dcl.lock().unwrap().dcl_id; + let mut locked_state = DISPLAY_STATE.lock().unwrap(); + let len = locked_state.listeners.len(); + let id = dcl_id.unwrap_or(len); + if id >= len { + return Ok(()); + } + locked_state.listeners[id] = None; + // Stop refreshing if the current refreshing num is 0 + locked_state.refresh_num -= 1; + if locked_state.refresh_num <= 0 { + locked_state.is_refresh = false; + } + drop(locked_state); + Ok(()) +} + +/// Create a console and add into a global list. Then returen a console id +/// for later finding the assigned console. +pub fn console_init( + dev_name: String, + con_type: ConsoleType, + dev_opts: Arc, +) -> Option>> { + let mut locked_consoles = CONSOLES.lock().unwrap(); + for con in locked_consoles.console_list.iter().flatten() { + let mut locked_con = con.lock().unwrap(); + if locked_con.dev_name == dev_name { + locked_con.active = true; + locked_con.dev_opts = dev_opts; + return Some(Arc::downgrade(con)); + } + } + + let con_id = locked_consoles.console_list.len(); + let new_con = DisplayConsole::new( + con_id, + dev_name, + con_type, + Arc::downgrade(&CONSOLES), + dev_opts, + ); + let con = Arc::new(Mutex::new(new_con)); + locked_consoles.console_list.push(Some(con.clone())); + if locked_consoles.activate_id.is_none() { + locked_consoles.activate_id = Some(con_id); + } + drop(locked_consoles); + + let con = Arc::downgrade(&con); + let surface = create_msg_surface( + DEFAULT_SURFACE_WIDTH, + DEFAULT_SURFACE_HEIGHT, + "Guest has not initialized the display yet.".to_string(), + ); + display_replace_surface(&Some(con.clone()), surface) + .unwrap_or_else(|e| error!("Error occurs during surface switching: {:?}", e)); + set_run_stage(VmRunningStage::Bios); + Some(con) +} + +/// Close a console. +pub fn console_close(console: &Option>>) -> Result<()> { + let con = match console.as_ref().and_then(|c| c.upgrade()) { + Some(c) => c, + None => return Ok(()), + }; + let mut locked_con = con.lock().unwrap(); + if let Some(surface) = locked_con.surface { + unref_pixman_image(surface.image); + } + locked_con.active = false; + locked_con.surface = create_msg_surface( + DEFAULT_SURFACE_WIDTH, + DEFAULT_SURFACE_HEIGHT, + "Display is not active.".to_string(), + ); + let con_id = locked_con.con_id; + drop(locked_con); + + // If the active console is closed, reset the active console. + let mut locked_consoles = CONSOLES.lock().unwrap(); + match locked_consoles.activate_id { + Some(active_con) if active_con == con_id => { + let mut active_id: Option = None; + for con in locked_consoles.console_list.iter().flatten() { + let locked_con = con.lock().unwrap(); + if locked_con.active { + active_id = Some(locked_con.con_id); + break; + } + } + locked_consoles.activate_id = active_id; + } + _ => {} + } + + Ok(()) +} + +/// Select the default display device. +/// If con_id is none, then do nothing. +pub fn console_select(con_id: Option) -> Result<()> { + trace::console_select(&con_id); + + let mut locked_consoles = CONSOLES.lock().unwrap(); + if locked_consoles.activate_id == con_id { + return Ok(()); + } + let activate_console: Option>> = match con_id { + Some(id) if locked_consoles.console_list.get(id).is_some() => { + locked_consoles.activate_id = Some(id); + locked_consoles.console_list[id].clone() + } + _ => return Ok(()), + }; + drop(locked_consoles); + + let mut related_listeners: Vec>> = vec![]; + let mut locked_state = DISPLAY_STATE.lock().unwrap(); + for dcl in locked_state.listeners.iter_mut().flatten() { + if dcl.lock().unwrap().con_id.is_some() { + continue; + } + + related_listeners.push(dcl.clone()); + } + drop(locked_state); + + let con = match activate_console { + Some(c) => c, + None => return Ok(()), + }; + let width = con.lock().unwrap().width; + let height = con.lock().unwrap().height; + for dcl in related_listeners { + let dpy_opts = dcl.lock().unwrap().dpy_opts.clone(); + if let Some(s) = &mut con.lock().unwrap().surface { + (*dpy_opts).dpy_switch(s)?; + } + } + + display_graphic_update(&Some(Arc::downgrade(&con)), 0, 0, width, height) +} + +/// Create a default image to display messages. +/// +/// # Arguments +/// +/// * `width` - width of image. +/// * `height` - height of image. +/// * `msg` - test messages showed in display. +pub fn create_msg_surface(width: i32, height: i32, msg: String) -> Option { + if !(0..i32::from(MAX_WINDOW_WIDTH)).contains(&width) + || !(0..i32::from(MAX_WINDOW_HEIGHT)).contains(&height) + { + error!("The size of image is invalid!"); + return None; + } + let mut surface = DisplaySurface::default(); + + // One pixel occupies four bytes. + surface.image = create_pixman_image(surface.format, width, height, ptr::null_mut(), width * 4); + if surface.image.is_null() { + error!("create default surface failed!"); + return None; + } + + let fg = COLOR_TABLE_RGB[0][ColorNames::ColorWhite as usize]; + let bg = COLOR_TABLE_RGB[0][ColorNames::ColorBlack as usize]; + let x = (width / FONT_WIDTH - msg.len() as i32) / 2; + let y = (height / FONT_HEIGHT - 1) / 2; + + for (index, ch) in msg.chars().enumerate() { + let glyph = pixman_glyph_from_vgafont(FONT_HEIGHT as u32, ch as u32); + if glyph.is_null() { + continue; + } + pixman_glyph_render( + glyph, + surface.image, + &fg, + &bg, + (x + index as i32, y), + FONT_WIDTH, + FONT_HEIGHT, + ); + unref_pixman_image(glyph); + } + Some(surface) +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::VmConfig; + pub struct DclOpts {} + impl DisplayChangeListenerOperations for DclOpts { + fn dpy_switch(&self, _surface: &DisplaySurface) -> Result<()> { + Ok(()) + } + + fn dpy_refresh(&self, _dcl: &Arc>) -> Result<()> { + Ok(()) + } + + fn dpy_image_update(&self, _x: i32, _y: i32, _w: i32, _h: i32) -> Result<()> { + Ok(()) + } + + fn dpy_cursor_update(&self, _cursor: &DisplayMouse) -> Result<()> { + Ok(()) + } + } + struct HwOpts {} + impl HardWareOperations for HwOpts {} + + #[test] + fn test_console_select() { + let con_opts = Arc::new(HwOpts {}); + let dev_name0 = "test_device0".to_string(); + let con_0 = console_init(dev_name0, ConsoleType::Graphic, con_opts.clone()); + let clone_con = con_0.clone(); + assert_eq!( + clone_con.unwrap().upgrade().unwrap().lock().unwrap().con_id, + 0 + ); + let dev_name1 = "test_device1".to_string(); + let con_1 = console_init(dev_name1, ConsoleType::Graphic, con_opts.clone()); + assert_eq!(con_1.unwrap().upgrade().unwrap().lock().unwrap().con_id, 1); + let dev_name2 = "test_device2".to_string(); + let con_2 = console_init(dev_name2, ConsoleType::Graphic, con_opts.clone()); + assert_eq!(con_2.unwrap().upgrade().unwrap().lock().unwrap().con_id, 2); + assert!(console_close(&con_0).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(1)); + let dev_name3 = "test_device3".to_string(); + let con_3 = console_init(dev_name3, ConsoleType::Graphic, con_opts); + assert_eq!(con_3.unwrap().upgrade().unwrap().lock().unwrap().con_id, 3); + assert!(console_select(Some(0)).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(0)); + assert!(console_select(Some(1)).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(1)); + assert!(console_select(Some(2)).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(2)); + assert!(console_select(Some(3)).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(3)); + assert!(console_select(None).is_ok()); + assert_eq!(CONSOLES.lock().unwrap().activate_id, Some(3)); + } + + #[test] + fn test_register_display() { + let vm_config = VmConfig::default(); + assert!(EventLoop::object_init(&vm_config.iothreads).is_ok()); + let dcl_opts = Arc::new(DclOpts {}); + let dcl_0 = Arc::new(Mutex::new(DisplayChangeListener::new( + None, + dcl_opts.clone(), + ))); + let dcl_1 = Arc::new(Mutex::new(DisplayChangeListener::new( + None, + dcl_opts.clone(), + ))); + let dcl_2 = Arc::new(Mutex::new(DisplayChangeListener::new( + None, + dcl_opts.clone(), + ))); + let dcl_3 = Arc::new(Mutex::new(DisplayChangeListener::new(None, dcl_opts))); + + assert!(register_display(&dcl_0).is_ok()); + assert_eq!(dcl_0.lock().unwrap().dcl_id, Some(0)); + assert!(register_display(&dcl_1).is_ok()); + assert_eq!(dcl_1.lock().unwrap().dcl_id, Some(1)); + assert!(register_display(&dcl_2).is_ok()); + assert_eq!(dcl_2.lock().unwrap().dcl_id, Some(2)); + assert!(unregister_display(&Some(Arc::downgrade(&dcl_0))).is_ok()); + assert!(register_display(&dcl_3).is_ok()); + assert_eq!(dcl_3.lock().unwrap().dcl_id, Some(0)); + } +} diff --git a/ui/src/error.rs b/ui/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..d8bb09d570caf401cfaf9da6631b4d6729fb323e --- /dev/null +++ b/ui/src/error.rs @@ -0,0 +1,42 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum VncError { + #[error("Util")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("Unsupported RFB Protocol Version!")] + UnsupportedRFBProtocolVersion, + #[error("Invalid Image Size: width: {0}, height: {0}")] + InvalidImageSize(i32, i32), + #[error("Tcp bind failed: {0}")] + TcpBindFailed(String), + #[error("Make connection failed: {0}")] + MakeConnectionFailed(String), + #[error("Make tls connection failed: {0}")] + MakeTlsConnectionFailed(String), + #[error("ProtocolMessage failed: {0}")] + ProtocolMessageFailed(String), + #[error("Read buf form tcpstream failed: {0}")] + ReadMessageFailed(String), + #[error("Authentication failed: func: {0} reason: {0}")] + AuthFailed(String, String), + #[error("ParseKeyBoardFailed: {0}")] + ParseKeyBoardFailed(String), + #[error("Disconnection")] + Disconnection, +} diff --git a/ui/src/gtk/draw.rs b/ui/src/gtk/draw.rs new file mode 100644 index 0000000000000000000000000000000000000000..b9cbfc6eee87151c85c7f810c8631e1bdeb3ecb3 --- /dev/null +++ b/ui/src/gtk/draw.rs @@ -0,0 +1,347 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, rc::Rc}; + +use anyhow::Result; +use gdk::{prelude::SeatExt, SeatCapabilities}; +use gtk::{ + cairo, + gdk::{self, EventMask, ScrollDirection}, + glib::{self, translate::IntoGlib}, + prelude::WidgetExtManual, + traits::WidgetExt, + DrawingArea, Inhibit, +}; +use log::error; + +use crate::{ + console::graphic_hardware_ui_info, + gtk::GtkDisplayScreen, + input::{ + self, input_button, input_move_abs, input_point_sync, press_mouse, release_all_key, + update_key_state, Axis, ABS_MAX, INPUT_BUTTON_WHEEL_DOWN, INPUT_BUTTON_WHEEL_LEFT, + INPUT_BUTTON_WHEEL_RIGHT, INPUT_BUTTON_WHEEL_UP, INPUT_POINT_BACK, INPUT_POINT_FORWARD, + INPUT_POINT_LEFT, INPUT_POINT_MIDDLE, INPUT_POINT_RIGHT, + }, +}; + +const GTK_INPUT_BUTTON_LEFT: u32 = 1; +const GTK_INPUT_BUTTON_MIDDLE: u32 = 2; +const GTK_INPUT_BUTTON_RIGHT: u32 = 3; +const GTK_INPUT_BUTTON_BACK: u32 = 8; +const GTK_INPUT_BUTTON_FORWARD: u32 = 9; + +pub(crate) fn set_callback_for_draw_area( + draw_area: &DrawingArea, + gs: Rc>, +) -> Result<()> { + draw_area.connect_draw( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, cr| { + da_draw_callback(&gs, cr).unwrap_or_else(|e| error!("Draw: {}", e)); + Inhibit(false) + }), + ); + draw_area.connect_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, event| { + da_event_callback(&gs, event).unwrap_or_else(|e| error!("Draw event: {}", e)); + Inhibit(false)}), + ); + draw_area.connect_button_press_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, button_event| { + da_pointer_callback(button_event).unwrap_or_else(|e| error!("Press event: {}", e)); + Inhibit(false) + }), + ); + draw_area.connect_button_release_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, button_event| { + da_pointer_callback(button_event).unwrap_or_else(|e| error!("Release event: {}", e)); + Inhibit(false) + }), + ); + draw_area.connect_scroll_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, scroll_event| { + da_scroll_callback(scroll_event).unwrap_or_else(|e| error!("Scroll event: {}", e)); + Inhibit(false) + }), + ); + draw_area.connect_key_press_event( + glib::clone!(@weak gs => @default-return Inhibit(true), move |_, key_event| { + da_key_callback(&gs,key_event, true).unwrap_or_else(|e|error!("Press event: {}", e)); + Inhibit(true)} + ), + ); + draw_area.connect_key_release_event( + glib::clone!(@weak gs => @default-return Inhibit(true), move |_, key_event| { + da_key_callback(&gs,key_event, false).unwrap_or_else(|e|error!("Key event: {}", e)); + Inhibit(true)} + ), + ); + draw_area.connect_configure_event( + glib::clone!(@weak gs => @default-return false, move |_, event_configure| { + da_configure_callback(&gs, event_configure).unwrap_or_else(|e|error!("Configure event: {}", e)); + false} + ), + ); + + draw_area.connect_focus_out_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, _| { + da_focus_out_callback().unwrap_or_else(|e|error!("Focus out event: {:?}", e)); + Inhibit(false)} + ), + ); + draw_area.connect_enter_notify_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_,enter_event| { + da_enter_callback(&gs, enter_event).unwrap_or_else(|e|error!("Enter event: {:?}", e)); + Inhibit(false)} + ), + ); + draw_area.connect_leave_notify_event( + glib::clone!(@weak gs => @default-return Inhibit(false), move |_, leave_event| { + da_leave_callback(&gs, leave_event).unwrap_or_else(|e|error!("Leave event: {:?}", e)); + Inhibit(false)} + ), + ); + + let event_mask = EventMask::BUTTON_PRESS_MASK + | EventMask::BUTTON_RELEASE_MASK + | EventMask::BUTTON_MOTION_MASK + | EventMask::SCROLL_MASK + | EventMask::SMOOTH_SCROLL_MASK + | EventMask::KEY_PRESS_MASK + | EventMask::KEY_RELEASE_MASK + | EventMask::BUTTON1_MOTION_MASK + | EventMask::FOCUS_CHANGE_MASK + | EventMask::ENTER_NOTIFY_MASK + | EventMask::LEAVE_NOTIFY_MASK + | EventMask::POINTER_MOTION_MASK; + draw_area.add_events(event_mask); + + Ok(()) +} + +fn da_enter_callback( + gs: &Rc>, + _event: &gdk::EventCrossing, +) -> Result<()> { + trace::gtk_enter_callback(&"enter".to_string()); + update_keyboard_grab(gs, true); + Ok(()) +} + +fn da_leave_callback( + gs: &Rc>, + _event: &gdk::EventCrossing, +) -> Result<()> { + trace::gtk_enter_callback(&"leave".to_string()); + update_keyboard_grab(gs, false); + Ok(()) +} + +fn update_keyboard_grab(gs: &Rc>, grab: bool) { + let borrowed_gs = gs.borrow(); + let display = borrowed_gs.draw_area.display(); + if let Some(seat) = display.default_seat() { + if grab { + if let Some(w) = borrowed_gs.draw_area.window() { + seat.grab(&w, SeatCapabilities::KEYBOARD, false, None, None, None); + } + } else { + seat.ungrab(); + } + } +} + +/// When the window size changes, +/// the image resolution adapts to the window. +fn da_configure_callback( + gs: &Rc>, + event_configure: &gdk::EventConfigure, +) -> Result<()> { + trace::gtk_configure_callback(&event_configure.size().0, &event_configure.size().1); + + let borrowed_gs = gs.borrow(); + if !borrowed_gs.scale_mode.borrow().is_free_scale() { + return Ok(()); + } + + let con = match borrowed_gs.con.upgrade() { + Some(c) => c, + None => return Ok(()), + }; + drop(borrowed_gs); + let (width, height) = event_configure.size(); + + graphic_hardware_ui_info(con, width, height) +} + +fn da_focus_out_callback() -> Result<()> { + release_all_key() +} + +fn da_key_callback( + gs: &Rc>, + key_event: &gdk::EventKey, + press: bool, +) -> Result<()> { + let keysym2keycode = gs.borrow().keysym2keycode.clone(); + let org_key_value = key_event.keyval().into_glib() as i32; + let key_value: u16 = key_event.keyval().to_lower().into_glib() as u16; + let keycode: u16 = match keysym2keycode.borrow().get(&key_value) { + Some(k) => *k, + None => 0, + }; + trace::gtk_key_event_callback(&key_value, &press); + update_key_state(press, org_key_value, keycode)?; + input::key_event(keycode, press)?; + Ok(()) +} + +fn da_event_callback(gs: &Rc>, event: &gdk::Event) -> Result<()> { + // Cursor movement. + if event.event_type() == gdk::EventType::MotionNotify { + gd_cursor_move_event(gs, event).unwrap_or_else(|e| error!("Cursor movement: {:?}", e)); + } + Ok(()) +} + +/// Cursor Movement. +fn gd_cursor_move_event(gs: &Rc>, event: &gdk::Event) -> Result<()> { + let mut borrowed_gs = gs.borrow_mut(); + let (width, height) = match &borrowed_gs.cairo_image { + Some(image) => (f64::from(image.width()), f64::from(image.height())), + None => return Ok(()), + }; + + let (x, y) = match event.coords() { + Some(value) => value, + None => return Ok(()), + }; + trace::gtk_cursor_move_event(&x, &y); + let (real_x, real_y) = borrowed_gs.convert_coord(x, y)?; + let standard_x = ((real_x * (ABS_MAX as f64)) / width) as u16; + let standard_y = ((real_y * (ABS_MAX as f64)) / height) as u16; + + input_move_abs(Axis::X, u32::from(standard_x))?; + input_move_abs(Axis::Y, u32::from(standard_y))?; + input_point_sync() +} + +fn da_pointer_callback(button_event: &gdk::EventButton) -> Result<()> { + let button_mask = match button_event.button() { + GTK_INPUT_BUTTON_LEFT => INPUT_POINT_LEFT, + GTK_INPUT_BUTTON_RIGHT => INPUT_POINT_RIGHT, + GTK_INPUT_BUTTON_MIDDLE => INPUT_POINT_MIDDLE, + GTK_INPUT_BUTTON_BACK => INPUT_POINT_BACK, + GTK_INPUT_BUTTON_FORWARD => INPUT_POINT_FORWARD, + _ => return Ok(()), + }; + trace::gtk_pointer_callback(&button_mask); + + match button_event.event_type() { + gdk::EventType::ButtonRelease => { + input_button(button_mask, false)?; + input_point_sync() + } + gdk::EventType::ButtonPress => { + input_button(button_mask, true)?; + input_point_sync() + } + gdk::EventType::DoubleButtonPress => { + press_mouse(button_mask)?; + press_mouse(button_mask) + } + _ => Ok(()), + } +} + +fn da_scroll_callback(scroll_event: &gdk::EventScroll) -> Result<()> { + trace::gtk_scroll_callback(&scroll_event.direction()); + + match scroll_event.direction() { + ScrollDirection::Up => press_mouse(INPUT_BUTTON_WHEEL_UP), + ScrollDirection::Down => press_mouse(INPUT_BUTTON_WHEEL_DOWN), + ScrollDirection::Left => press_mouse(INPUT_BUTTON_WHEEL_LEFT), + ScrollDirection::Right => press_mouse(INPUT_BUTTON_WHEEL_RIGHT), + ScrollDirection::Smooth => match scroll_event.scroll_deltas() { + Some((delta_x, delta_y)) => { + if delta_x.eq(&0.0) && delta_y.eq(&0.0) { + return Ok(()); + } + + // Horizontal scrolling. + if delta_x.gt(&0.0) { + press_mouse(INPUT_BUTTON_WHEEL_RIGHT)?; + } else if delta_x.lt(&0.0) { + press_mouse(INPUT_BUTTON_WHEEL_LEFT)?; + } + + // Vertical scrolling. + if delta_y.gt(&0.0) { + press_mouse(INPUT_BUTTON_WHEEL_DOWN)?; + } else if delta_y.lt(&0.0) { + press_mouse(INPUT_BUTTON_WHEEL_UP)?; + } + Ok(()) + } + None => Ok(()), + }, + _ => Ok(()), + } +} + +/// Draw_area callback func for draw signal. +fn da_draw_callback(gs: &Rc>, cr: &cairo::Context) -> Result<()> { + let mut borrowed_gs = gs.borrow_mut(); + let scale_mode = borrowed_gs.scale_mode.clone(); + let (mut surface_width, mut surface_height) = match &borrowed_gs.cairo_image { + Some(image) => (f64::from(image.width()), f64::from(image.height())), + None => return Ok(()), + }; + + if surface_width.le(&0.0) || surface_height.le(&0.0) { + return Ok(()); + } + + let (window_width, window_height); + match borrowed_gs.get_window_size() { + Some((w, h)) => (window_width, window_height) = (w, h), + None => return Ok(()), + }; + + if scale_mode.borrow().is_full_screen() || scale_mode.borrow().is_free_scale() { + borrowed_gs.scale_x = window_width / surface_width; + borrowed_gs.scale_y = window_height / surface_height; + } + surface_width *= borrowed_gs.scale_x; + surface_height *= borrowed_gs.scale_y; + + let mut mx: f64 = 0.0; + let mut my: f64 = 0.0; + if window_width.gt(&surface_width) { + mx = (window_width - surface_width) / (2.0); + } + if window_height.gt(&surface_height) { + my = (window_height - surface_height) / (2.0); + } + + cr.rectangle(0.0, 0.0, window_width, window_height); + cr.rectangle(mx + surface_width, my, surface_width * -1.0, surface_height); + cr.fill()?; + cr.scale(borrowed_gs.scale_x, borrowed_gs.scale_y); + if let Some(image) = &borrowed_gs.cairo_image { + cr.set_source_surface(image, mx / borrowed_gs.scale_x, my / borrowed_gs.scale_y)?; + } + cr.paint()?; + + Ok(()) +} diff --git a/ui/src/gtk/menu.rs b/ui/src/gtk/menu.rs new file mode 100644 index 0000000000000000000000000000000000000000..c1e4b6b64d13223939c46dca48057624bab061af --- /dev/null +++ b/ui/src/gtk/menu.rs @@ -0,0 +1,425 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cell::RefCell, rc::Rc}; + +use anyhow::{bail, Result}; +use gettextrs::gettext; +use gtk::{ + ffi::{gtk_button_set_label, GtkButton, GtkWidget}, + gdk::{ + ffi::{GDK_KEY_equal, GDK_KEY_minus, GDK_KEY_B, GDK_KEY_F, GDK_KEY_M, GDK_KEY_S}, + ModifierType, + }, + glib::{self, gobject_ffi::GObject, translate::ToGlibPtr}, + prelude::{AccelGroupExtManual, NotebookExtManual, ObjectType, WidgetExtManual}, + traits::{ + BoxExt, CheckMenuItemExt, ContainerExt, DialogExt, GtkMenuExt, GtkMenuItemExt, + GtkWindowExt, MenuShellExt, NotebookExt, WidgetExt, + }, + AccelFlags, AccelGroup, ApplicationWindow, ButtonsType, CheckMenuItem, DialogFlags, Inhibit, + Menu, MenuBar, MenuItem, MessageDialog, MessageType, Orientation, RadioMenuItem, +}; +use log::error; + +use super::ScaleMode; +use crate::{ + console::{get_run_stage, VmRunningStage}, + gtk::{renew_image, update_window_size, GtkDisplay, ZoomOperate, GTK_SCALE_MIN, GTK_ZOOM_STEP}, +}; + +#[derive(Clone)] +pub(crate) struct GtkMenu { + pub(crate) window: ApplicationWindow, + container: gtk::Box, + pub(crate) note_book: gtk::Notebook, + pub(crate) radio_group: Rc>>, + accel_group: AccelGroup, + menu_bar: MenuBar, + machine_menu: Menu, + machine_item: MenuItem, + shutdown_item: MenuItem, + pub(crate) view_menu: Menu, + view_item: MenuItem, + full_screen_item: MenuItem, + zoom_in_item: MenuItem, + zoom_out_item: MenuItem, + zoom_fit: CheckMenuItem, + best_fit_item: MenuItem, + show_menu_bar: CheckMenuItem, +} + +impl GtkMenu { + pub(crate) fn new(window: ApplicationWindow) -> Self { + Self { + window, + container: gtk::Box::new(Orientation::Vertical, 0), + note_book: gtk::Notebook::default(), + radio_group: Rc::new(RefCell::new(vec![])), + accel_group: AccelGroup::default(), + menu_bar: MenuBar::new(), + machine_menu: Menu::new(), + machine_item: MenuItem::with_mnemonic(&gettext("_Machine")), + shutdown_item: MenuItem::with_mnemonic(&gettext("Power _Down")), + view_menu: Menu::new(), + view_item: MenuItem::with_mnemonic(&gettext("_View")), + full_screen_item: MenuItem::with_mnemonic(&gettext("_Fullscreen")), + zoom_in_item: MenuItem::with_mnemonic(&gettext("Zoom _In")), + zoom_out_item: MenuItem::with_mnemonic(&gettext("Zoom _Out")), + zoom_fit: CheckMenuItem::with_mnemonic(&gettext("Zoom To _Fit")), + best_fit_item: MenuItem::with_mnemonic(&gettext("Best _Fit")), + show_menu_bar: CheckMenuItem::with_mnemonic(&gettext("Show Menubar")), + } + } + + /// 1. Setting callback function for button. + /// 2. Set shortcut keys for buttons. + /// Button shortcut key + /// shutdown_item: Ctrl + Alt + S. + /// full_screen_item Ctrl + Alt + F + /// zoom_in_item Ctrl + Alt + + + /// zoom_out_item Ctrl + Alt + - + /// best_fit_item Ctrl + Alt + B + /// show_menu_bar Ctrl + Alt + M + pub(crate) fn set_signal(&mut self, gd: &Rc>) { + let modifier = ModifierType::CONTROL_MASK | ModifierType::MOD1_MASK; + let accel_flags = AccelFlags::VISIBLE; + + self.shutdown_item + .connect_activate(glib::clone!(@weak gd => move |_| { + power_down_callback(&gd).unwrap_or_else(|e| error!("Gtk shutdown failed: {:?}", e)); + })); + self.shutdown_item.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_S as u32, + modifier, + accel_flags, + ); + + self.full_screen_item + .connect_activate(glib::clone!(@weak gd => move |_| { + full_screen_callback(&gd).unwrap_or_else(|e| error!("Full Screen Item: {:?}", e)); + })); + self.full_screen_item.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_F as u32, + modifier, + accel_flags, + ); + let full_screen_item = self.full_screen_item.clone(); + self.accel_group.connect_accel_group( + GDK_KEY_F as u32, + modifier, + accel_flags, + glib::clone!(@weak full_screen_item => @default-return false, move |_, _, _, _| { + full_screen_item.activate(); + false + }), + ); + + self.zoom_in_item + .connect_activate(glib::clone!(@weak gd => move |_| { + menu_zoom_callback(&gd, ZoomOperate::ZoomIn).unwrap_or_else(|e| error!("Zoom In Item: {:?}", e)); + })); + self.zoom_in_item.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_equal as u32, + modifier, + accel_flags, + ); + + self.zoom_out_item + .connect_activate(glib::clone!(@weak gd => move |_| { + menu_zoom_callback(&gd, ZoomOperate::ZoomOut).unwrap_or_else(|e| error!("Zoom Out Item: {:?}", e)); + })); + self.zoom_out_item.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_minus as u32, + modifier, + accel_flags, + ); + + self.best_fit_item + .connect_activate(glib::clone!(@weak gd => move |_| { + menu_zoom_callback(&gd, ZoomOperate::BestFit).unwrap_or_else(|e| error!("Best Fit Item: {:?}", e)); + })); + self.best_fit_item.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_B as u32, + modifier, + accel_flags, + ); + + // Set the hiding of menu_bar. + self.show_menu_bar + .connect_activate(glib::clone!(@weak gd => move |_| { + show_menubar_callback(&gd).unwrap_or_else(|e| error!("Shoe Menu Bar: {:?}", e)); + })); + let show_menu_bar = self.show_menu_bar.clone(); + self.show_menu_bar.add_accelerator( + "activate", + &self.accel_group, + GDK_KEY_M as u32, + modifier, + accel_flags, + ); + self.accel_group.connect_accel_group( + GDK_KEY_M as u32, + modifier, + accel_flags, + move |_, _, _, _| { + if !show_menu_bar.is_active() { + show_menu_bar.activate(); + } + true + }, + ); + + // Connect delete for window. + self.window.connect_delete_event( + glib::clone!(@weak gd => @default-return Inhibit(false), move |_, _| { + window_close_callback(&gd).unwrap_or_else(|e| error!("Standard vm shut down failed: {:?}", e)); + Inhibit(true) + }), + ); + + // By confirmation this button, the size of window is fixed and + // can not be changed. + self.zoom_fit + .connect_activate(glib::clone!(@weak gd => move |_| { + zoom_fit_callback(&gd).unwrap_or_else(|e| error!("Zoom fit: {:?}", e)); + })); + } + + pub(crate) fn set_menu(&mut self) { + // Machine menu. + self.machine_menu.set_accel_group(Some(&self.accel_group)); + self.machine_menu.append(&self.shutdown_item); + self.machine_item.set_submenu(Some(&self.machine_menu)); + + // View menu. + self.view_menu.set_accel_group(Some(&self.accel_group)); + self.view_menu.append(&self.full_screen_item); + self.view_menu.append(&self.zoom_in_item); + self.view_menu.append(&self.zoom_out_item); + self.view_menu.append(&self.zoom_fit); + self.view_menu.append(&self.best_fit_item); + self.view_menu.append(&self.show_menu_bar); + self.view_item.set_submenu(Some(&self.view_menu)); + + self.menu_bar.append(&self.machine_item); + self.menu_bar.append(&self.view_item); + + // Set the visible of note_book. + self.note_book.set_show_tabs(false); + self.note_book.set_show_border(false); + + self.window.add_accel_group(&self.accel_group); + self.container.pack_start(&self.menu_bar, false, false, 0); + self.container.pack_start(&self.note_book, true, true, 0); + self.window.add(&self.container); + + // Disable the default F10 menu shortcut. + if let Some(setting) = self.window.settings() { + // SAFETY: self.windows can be guaranteed to be legal. + unsafe { + gtk::glib::gobject_ffi::g_object_set_property( + setting.as_ptr() as *mut GObject, + "gtk-menu-bar-accel".to_glib_none().0, + glib::Value::from("").to_glib_none().0, + ); + } + } + } + + /// Show window. + pub(crate) fn show_window(&self, scale_mode: Rc>, full_screen: bool) { + self.window.show_all(); + + if full_screen { + self.full_screen_item.activate(); + } + + if scale_mode.borrow().free_scale { + self.zoom_fit.activate(); + } + + if let Some(page_num) = self.note_book.current_page() { + let radio_item = &self.radio_group.borrow()[page_num as usize]; + radio_item.activate(); + } + + self.menu_bar.hide(); + } +} + +/// Fixed the window size. +fn power_down_callback(gd: &Rc>) -> Result<()> { + let borrowed_gd = gd.borrow(); + if borrowed_gd.powerdown_button.is_some() { + borrowed_gd.vm_powerdown(); + } else { + drop(borrowed_gd); + window_close_callback(gd)?; + } + Ok(()) +} + +/// Hid/show title bar. +fn show_menubar_callback(gd: &Rc>) -> Result<()> { + let borrowed_gd = gd.borrow(); + let gtk_menu = borrowed_gd.gtk_menu.clone(); + if borrowed_gd.scale_mode.borrow().is_full_screen() { + return Ok(()); + } + if gtk_menu.show_menu_bar.is_active() { + gtk_menu.menu_bar.show(); + } else { + gtk_menu.menu_bar.hide(); + } + drop(gtk_menu); + + let active_gs = borrowed_gd.get_current_display()?; + drop(borrowed_gd); + update_window_size(&active_gs) +} + +/// Make the window to fill the entir desktop. +fn full_screen_callback(gd: &Rc>) -> Result<()> { + let borrowed_gd = gd.borrow(); + let gtk_menu = borrowed_gd.gtk_menu.clone(); + let gs = borrowed_gd.get_current_display()?; + let scale_mode = borrowed_gd.scale_mode.clone(); + let mut borrowed_scale = scale_mode.borrow_mut(); + drop(borrowed_gd); + if !borrowed_scale.is_full_screen() { + gtk_menu.note_book.set_show_tabs(false); + gtk_menu.menu_bar.hide(); + gs.borrow().draw_area.set_size_request(-1, -1); + gtk_menu.window.fullscreen(); + borrowed_scale.full_screen = true; + } else { + gtk_menu.window.unfullscreen(); + if gtk_menu.show_menu_bar.is_active() { + gtk_menu.menu_bar.show(); + } + borrowed_scale.full_screen = false; + gs.borrow_mut().scale_x = 1.0; + gs.borrow_mut().scale_y = 1.0; + drop(borrowed_scale); + update_window_size(&gs)?; + }; + + Ok(()) +} + +/// Zoom in/out the display. +fn menu_zoom_callback(gd: &Rc>, zoom_opt: ZoomOperate) -> Result<()> { + let borrowed_gd = gd.borrow(); + let page_num = borrowed_gd.gtk_menu.note_book.current_page(); + let gs = match borrowed_gd.get_ds_by_pagenum(page_num) { + Some(ds) => ds, + None => bail!("Display Can not found."), + }; + drop(borrowed_gd); + let mut borrowed_gs = gs.borrow_mut(); + match zoom_opt { + ZoomOperate::ZoomIn => { + borrowed_gs.scale_x += GTK_ZOOM_STEP; + borrowed_gs.scale_y += GTK_ZOOM_STEP; + } + ZoomOperate::ZoomOut => { + borrowed_gs.scale_x -= GTK_ZOOM_STEP; + borrowed_gs.scale_y -= GTK_ZOOM_STEP; + borrowed_gs.scale_x = borrowed_gs.scale_x.max(GTK_SCALE_MIN); + borrowed_gs.scale_y = borrowed_gs.scale_y.max(GTK_SCALE_MIN); + } + ZoomOperate::BestFit => { + borrowed_gs.scale_x = 1.0; + borrowed_gs.scale_y = 1.0; + } + } + drop(borrowed_gs); + update_window_size(&gs) +} + +/// Fixed the window size. +fn zoom_fit_callback(gd: &Rc>) -> Result<()> { + let gtk_menu = gd.borrow().gtk_menu.clone(); + let gs = gd.borrow().get_current_display()?; + if gtk_menu.zoom_fit.is_active() { + gd.borrow_mut().scale_mode.borrow_mut().free_scale = true; + } else { + gd.borrow_mut().scale_mode.borrow_mut().free_scale = false; + gs.borrow_mut().scale_x = 1.0; + gs.borrow_mut().scale_y = 1.0; + } + + update_window_size(&gs)?; + renew_image(&gs) +} + +/// Close window. +fn window_close_callback(gd: &Rc>) -> Result<()> { + let borrowed_gd = gd.borrow(); + if get_run_stage() != VmRunningStage::Os || borrowed_gd.powerdown_button.is_none() { + let dialog = MessageDialog::new( + Some(&borrowed_gd.gtk_menu.window), + DialogFlags::DESTROY_WITH_PARENT, + MessageType::Question, + ButtonsType::YesNo, + &gettext("Forced shutdown may cause installation failure, blue screen, unusable and other abnormalities."), + ); + dialog.set_title(&gettext( + "Please confirm whether to exit the virtual machine", + )); + if let Some(button_yes) = &dialog.widget_for_response(gtk::ResponseType::Yes) { + let label: &str = &gettext("Yes"); + // SAFETY: Tt can be ensure that the pointer is not empty. + unsafe { + let button: *mut GtkWidget = button_yes.as_ptr(); + gtk_button_set_label(button as *mut GtkButton, label.to_glib_none().0); + } + } + if let Some(button_no) = dialog.widget_for_response(gtk::ResponseType::No) { + let label: &str = &gettext("No"); + // SAFETY: Tt can be ensure that the pointer is not empty. + unsafe { + let button: *mut GtkWidget = button_no.as_ptr(); + gtk_button_set_label(button as *mut GtkButton, label.to_glib_none().0); + } + } + + borrowed_gd.vm_pause(); + let answer = dialog.run(); + // SAFETY: Dialog is created in the current function and can be guaranteed not to be empty. + unsafe { dialog.destroy() }; + + if answer != gtk::ResponseType::Yes { + borrowed_gd.vm_resume(); + return Ok(()); + } + } + + if get_run_stage() == VmRunningStage::Os && borrowed_gd.powerdown_button.is_some() { + borrowed_gd.vm_powerdown(); + } else { + borrowed_gd.vm_shutdown(); + } + + Ok(()) +} diff --git a/ui/src/gtk/mod.rs b/ui/src/gtk/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..2b055adc973012ca69fca7a3ed10534ad352c784 --- /dev/null +++ b/ui/src/gtk/mod.rs @@ -0,0 +1,1109 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod draw; +mod menu; + +use std::{ + cell::RefCell, + cmp, + collections::HashMap, + env, fs, + path::Path, + ptr, + rc::Rc, + sync::{Arc, Mutex, Weak}, + thread, +}; + +use anyhow::{bail, Context, Result}; +use gettextrs::LocaleCategory; +use gtk::{ + cairo::{Format, ImageSurface}, + gdk::{self, Geometry, Gravity, WindowHints}, + gdk_pixbuf::Colorspace, + glib::{self, Priority, SyncSender}, + prelude::{ApplicationExt, ApplicationExtManual, Continue, NotebookExtManual}, + traits::{ + CheckMenuItemExt, GtkMenuItemExt, GtkWindowExt, HeaderBarExt, LabelExt, MenuShellExt, + RadioMenuItemExt, WidgetExt, + }, + Application, ApplicationWindow, DrawingArea, HeaderBar, Label, RadioMenuItem, +}; +use log::error; +use vmm_sys_util::eventfd::EventFd; + +use crate::{ + console::{ + create_msg_surface, get_active_console, get_run_stage, graphic_hardware_update, + register_display, DisplayChangeListener, DisplayChangeListenerOperations, DisplayConsole, + DisplayMouse, DisplaySurface, VmRunningStage, DEFAULT_SURFACE_HEIGHT, + DEFAULT_SURFACE_WIDTH, DISPLAY_UPDATE_INTERVAL_DEFAULT, + }, + gtk::{draw::set_callback_for_draw_area, menu::GtkMenu}, + keycode::{DpyMod, KeyCode}, + pixman::{ + create_pixman_image, get_image_data, get_image_height, get_image_width, ref_pixman_image, + unref_pixman_image, + }, +}; +use machine_manager::config::{DisplayConfig, UiContext}; +use machine_manager::qmp::qmp_schema::GpuInfo; +use util::pixman::{pixman_format_code_t, pixman_image_composite, pixman_op_t}; +use util::time::gettime; + +const CHANNEL_BOUND: usize = 1024; +/// Width of default window. +const DEFAULT_WINDOW_WIDTH: i32 = 1024; +/// Height of default window. +const DEFAULT_WINDOW_HEIGHT: i32 = 768; +pub(crate) const GTK_SCALE_MIN: f64 = 0.25; +pub(crate) const GTK_ZOOM_STEP: f64 = 0.25; +/// Domain name. +const DOMAIN_NAME: &str = "desktop-app-engine"; +/// The path of message information is located. +const LOCALE_PATH: &str = "/usr/share/locale"; + +/// Gtk window display mode. +#[derive(Clone, Eq, PartialEq)] +pub struct ScaleMode { + /// Display fill desktop. + full_screen: bool, + /// Scaling operation does not change the aspect ratio. + free_scale: bool, +} + +impl ScaleMode { + fn is_full_screen(&self) -> bool { + self.full_screen + } + + fn is_free_scale(&self) -> bool { + self.free_scale + } +} + +/// Display zoom operation. +/// Zoom in the display. +/// Zoom out the display. +/// Window adapt to display. +#[derive(Eq, PartialEq)] +pub enum ZoomOperate { + ZoomIn, + ZoomOut, + BestFit, +} + +#[derive(Debug, PartialEq)] +enum DisplayEventType { + DisplaySwitch, + DisplayUpdate, + CursorDefine, + DisplayRefresh, + DisplaySetMajor, +} + +impl Default for DisplayEventType { + fn default() -> Self { + Self::DisplayRefresh + } +} + +#[derive(Default)] +struct DisplayChangeEvent { + dev_name: String, + event_type: DisplayEventType, + x: i32, + y: i32, + w: i32, + h: i32, + cursor: Option, +} + +impl DisplayChangeEvent { + fn new(dev_name: String, event_type: DisplayEventType) -> Self { + Self { + dev_name, + event_type, + ..Default::default() + } + } +} + +struct GtkInterface { + dev_name: String, + dce_sender: SyncSender, +} + +impl GtkInterface { + fn new(dev_name: String, dce_sender: SyncSender) -> Self { + Self { + dev_name, + dce_sender, + } + } +} + +impl DisplayChangeListenerOperations for GtkInterface { + fn dpy_switch(&self, _surface: &crate::console::DisplaySurface) -> Result<()> { + trace::gtk_dyp_channel_switch(&self.dev_name.clone()); + + let event = DisplayChangeEvent::new(self.dev_name.clone(), DisplayEventType::DisplaySwitch); + self.dce_sender.send(event)?; + Ok(()) + } + + fn dpy_refresh( + &self, + dcl: &std::sync::Arc>, + ) -> Result<()> { + trace::gtk_dyp_channel_refresh(&self.dev_name.clone()); + + // The way virtio-gpu devices are used in phase OS and others is different. + if self.dev_name.starts_with("virtio-gpu") { + if get_run_stage() == VmRunningStage::Os { + dcl.lock().unwrap().update_interval = 0; + } else { + dcl.lock().unwrap().update_interval = DISPLAY_UPDATE_INTERVAL_DEFAULT; + } + } + + let event = + DisplayChangeEvent::new(self.dev_name.clone(), DisplayEventType::DisplayRefresh); + let con_id = dcl.lock().unwrap().con_id; + graphic_hardware_update(con_id); + self.dce_sender.send(event)?; + Ok(()) + } + + fn dpy_image_update(&self, x: i32, y: i32, w: i32, h: i32) -> Result<()> { + trace::gtk_dyp_channel_image_update(&self.dev_name.clone(), &x, &y, &w, &h); + + let mut event = + DisplayChangeEvent::new(self.dev_name.clone(), DisplayEventType::DisplayUpdate); + event.x = x; + event.y = y; + event.w = w; + event.h = h; + self.dce_sender.send(event)?; + Ok(()) + } + + fn dpy_cursor_update(&self, cursor_data: &DisplayMouse) -> Result<()> { + trace::gtk_dyp_channel_cursor_update(&self.dev_name.clone()); + + let mut event = + DisplayChangeEvent::new(self.dev_name.clone(), DisplayEventType::CursorDefine); + event.cursor = Some(cursor_data.clone()); + self.dce_sender.send(event)?; + Ok(()) + } + + fn dpy_set_major(&self) -> Result<()> { + let event = + DisplayChangeEvent::new(self.dev_name.clone(), DisplayEventType::DisplaySetMajor); + self.dce_sender.send(event)?; + Ok(()) + } +} + +pub(crate) struct GtkDisplay { + gtk_menu: GtkMenu, + scale_mode: Rc>, + pagenum2ds: HashMap>>, + powerdown_button: Option>, + shutdown_button: Option>, + pause_button: Option>, + resume_button: Option>, + keysym2keycode: Rc>>, +} + +impl GtkDisplay { + fn create(gtk_menu: GtkMenu, gtk_cfg: &GtkConfig) -> Self { + // Window scale mode. + let scale_mode = Rc::new(RefCell::new(ScaleMode { + full_screen: false, + free_scale: true, + })); + // Mapping ASCII to keycode. + let keysym2keycode = Rc::new(RefCell::new(KeyCode::keysym_to_qkeycode(DpyMod::Gtk))); + Self { + gtk_menu, + scale_mode, + pagenum2ds: HashMap::new(), + powerdown_button: gtk_cfg.powerdown_button.clone(), + shutdown_button: gtk_cfg.shutdown_button.clone(), + pause_button: gtk_cfg.pause_button.clone(), + resume_button: gtk_cfg.resume_button.clone(), + keysym2keycode, + } + } + + // Get the current active drawing_area in note_book. + fn get_current_display(&self) -> Result>> { + let page_num = self.gtk_menu.note_book.current_page(); + let gs = match page_num { + Some(num) if self.pagenum2ds.get(&num).is_some() => self.pagenum2ds.get(&num).unwrap(), + _ => bail!("No active display"), + }; + Ok(gs.clone()) + } + + // Get the displays based on device name. + fn get_ds_by_pagenum(&self, page_num: Option) -> Option>> { + let ds = self.pagenum2ds.get(&page_num?)?; + Some(ds.clone()) + } + + // Get the display base the page number in notebook. + fn get_ds_by_devname(&self, dev_name: &str) -> Option>> { + for ds in self.pagenum2ds.values() { + if ds.borrow().dev_name.eq(dev_name) { + return Some(ds.clone()); + } + } + None + } + + fn set_draw_area(&mut self, gs: Rc>) -> Result<()> { + let draw_area = DrawingArea::new(); + draw_area.set_size_request(DEFAULT_SURFACE_WIDTH, DEFAULT_SURFACE_HEIGHT); + draw_area.set_can_focus(true); + set_callback_for_draw_area(&draw_area, gs.clone())?; + + // Add notebook page. + let active_con = gs.borrow().con.upgrade(); + let con = match active_con { + Some(con) => con, + None => bail!("No active console!"), + }; + let label_name = con.lock().unwrap().dev_name.clone(); + let label = gtk::Label::new(Some(&label_name)); + let page_num = self + .gtk_menu + .note_book + .append_page(&draw_area, Some(&label)); + self.pagenum2ds.insert(page_num, gs.clone()); + draw_area.grab_focus(); + + // Create a radio button. + // Only one screen can be displayed at a time. + let gs_show_menu = RadioMenuItem::with_label(&label_name); + let note_book = self.gtk_menu.note_book.clone(); + gs_show_menu.connect_activate(glib::clone!(@weak gs, @weak note_book => move |show_menu| { + gs_show_menu_callback(&gs, note_book, show_menu).unwrap_or_else(|e| error!("Display show menu: {:?}", e)); + })); + self.gtk_menu.view_menu.append(&gs_show_menu); + + if !self.gtk_menu.radio_group.borrow().is_empty() { + let first_radio = &self.gtk_menu.radio_group.borrow()[0]; + gs_show_menu.join_group(Some(first_radio)); + } else { + note_book.set_current_page(Some(page_num)); + } + + self.gtk_menu + .radio_group + .borrow_mut() + .push(gs_show_menu.clone()); + gs.borrow_mut().show_menu = gs_show_menu; + gs.borrow_mut().draw_area = draw_area; + + Ok(()) + } + + /// Gracefully Shutdown. + pub(crate) fn vm_powerdown(&self) { + if let Some(button) = &self.powerdown_button { + button + .write(1) + .unwrap_or_else(|e| error!("Vm power down failed: {:?}", e)); + } + } + + /// Forced Shutdown. + pub(crate) fn vm_shutdown(&self) { + if let Some(button) = &self.shutdown_button { + button + .write(1) + .unwrap_or_else(|e| error!("Vm shut down failed: {:?}", e)); + } + } + + /// Pause Virtual Machine. + pub(crate) fn vm_pause(&self) { + if let Some(button) = &self.pause_button { + button + .write(1) + .unwrap_or_else(|e| error!("Vm pause failed: {:?}", e)); + } + } + + /// Resume Virtual Machine. + pub(crate) fn vm_resume(&self) { + if let Some(button) = &self.resume_button { + button + .write(1) + .unwrap_or_else(|e| error!("Vm resume failed: {:?}", e)); + } + } +} + +pub struct GtkDisplayScreen { + window: ApplicationWindow, + dev_name: String, + show_menu: RadioMenuItem, + draw_area: DrawingArea, + cursor_trsp: bool, // GTK own default cursor transparent or not + source_surface: DisplaySurface, + transfer_surface: Option, + cairo_image: Option, + con: Weak>, + dcl: Weak>, + scale_mode: Rc>, + scale_x: f64, + scale_y: f64, + keysym2keycode: Rc>>, +} + +/// A displayscreen corresponds to a display area. +impl GtkDisplayScreen { + fn create( + window: ApplicationWindow, + con: Weak>, + dcl: Weak>, + keysym2keycode: Rc>>, + scale_mode: Rc>, + ) -> Self { + let surface = create_msg_surface( + DEFAULT_SURFACE_WIDTH, + DEFAULT_SURFACE_HEIGHT, + "Please wait a moment".to_string(), + ) + .map_or(DisplaySurface::default(), |s| s); + + // SAFETY: The image is created within the function, it can be ensure + // that the data ptr is not nullptr and the image size matches the image data. + let cairo_image = unsafe { + ImageSurface::create_for_data_unsafe( + surface.data() as *mut u8, + Format::Rgb24, + surface.width(), + surface.height(), + surface.stride(), + ) + } + .ok(); + + let dev_name = match con.upgrade() { + Some(c) => c.lock().unwrap().dev_name.clone(), + None => "default".to_string(), + }; + + Self { + window, + dev_name, + draw_area: DrawingArea::default(), + cursor_trsp: false, + show_menu: RadioMenuItem::default(), + source_surface: surface, + transfer_surface: None, + cairo_image, + con, + dcl, + scale_mode, + scale_x: 1.0, + scale_y: 1.0, + keysym2keycode, + } + } + + fn get_window_size(&self) -> Option<(f64, f64)> { + if let Some(win) = self.draw_area.window() { + let w_width = f64::from(win.width()); + let w_height = f64::from(win.height()); + + if w_width.ne(&0.0) && w_height.ne(&0.0) { + return Some((w_width, w_height)); + } + }; + + None + } + + /// Convert coordinates of the window to relative coordinates of the image. + /// In some situation: + /// 1. Image is scaled. + /// 2. There may be unfilled areas between the window and the image. + /// Input: relative coordinates of window. + /// Output: relative coordinates of images. + fn convert_coord(&mut self, mut x: f64, mut y: f64) -> Result<(f64, f64)> { + let (surface_width, surface_height) = match &self.cairo_image { + Some(image) => (image.width(), image.height()), + None => bail!("No display image."), + }; + let (scale_width, scale_height) = ( + f64::from(surface_width) * self.scale_x, + f64::from(surface_height) * self.scale_y, + ); + + let (mut window_width, mut window_height) = (0.0, 0.0); + if let Some((w, h)) = self.get_window_size() { + (window_width, window_height) = (w, h); + }; + let scale_factor = match self.draw_area.window() { + Some(window) => f64::from(window.scale_factor()), + None => bail!("No display window."), + }; + + x = x.max(0.0); + x = x.min(window_width); + y = y.max(0.0); + y = y.min(window_height); + + // There may be unfilled areas between the window and the image. + let (mut mx, mut my) = (0.0, 0.0); + if window_width > scale_width { + mx = (window_width - scale_width) / (2.0); + } + if window_height > scale_height { + my = (window_height - scale_height) / (2.0); + } + let real_x = ((x - mx) / self.scale_x) * scale_factor; + let real_y = ((y - my) / self.scale_y) * scale_factor; + + Ok((real_x, real_y)) + } +} + +/// Args for creating gtk thread. +#[derive(Clone)] +struct GtkConfig { + full_screen: bool, + app_name: Option, + vm_name: String, + /// Gracefully Shutdown. + powerdown_button: Option>, + /// Forced Shutdown. + shutdown_button: Option>, + /// Pause Virtual Machine. + pause_button: Option>, + /// Resume Virtual Machine. + resume_button: Option>, + gtk_args: Vec, +} + +/// Gtk display init. +pub fn gtk_display_init(ds_cfg: &DisplayConfig, ui_context: UiContext) -> Result<()> { + let mut gtk_args: Vec = vec![]; + if let Some(app_name) = &ds_cfg.app_name { + gtk_args.push(app_name.clone()); + } + let gtk_cfg = GtkConfig { + full_screen: ds_cfg.full_screen, + app_name: ds_cfg.app_name.clone(), + vm_name: ui_context.vm_name, + powerdown_button: ui_context.power_button, + shutdown_button: ui_context.shutdown_req, + pause_button: ui_context.pause_req, + resume_button: ui_context.resume_req, + gtk_args, + }; + let _handle = thread::Builder::new() + .name("gtk display".to_string()) + .spawn(move || create_gtk_thread(>k_cfg)) + .with_context(|| "Fail to create gtk display thread!")?; + Ok(()) +} + +/// Create a gtk thread. +fn create_gtk_thread(gtk_cfg: &GtkConfig) { + let application = Application::builder() + .application_id("stratovirt.gtk") + .build(); + let gtk_cfg_clone = gtk_cfg.clone(); + + application.connect_activate(move |app| build_ui(app, >k_cfg_clone)); + application.run_with_args(>k_cfg.gtk_args); +} + +// Create window. +fn build_ui(app: &Application, gtk_cfg: &GtkConfig) { + let window = ApplicationWindow::builder() + .application(app) + .default_width(DEFAULT_WINDOW_WIDTH) + .default_height(DEFAULT_WINDOW_HEIGHT) + .build(); + + set_program_attribute(gtk_cfg, &window) + .with_context(|| "Failed to set properties for program") + .unwrap(); + + // Create menu. + let mut gtk_menu = GtkMenu::new(window); + let gd = Rc::new(RefCell::new(GtkDisplay::create(gtk_menu.clone(), gtk_cfg))); + gtk_menu.set_menu(); + gtk_menu.set_signal(&gd); + + let scale_mode = gd.borrow().scale_mode.clone(); + // Gtk display init. + graphic_display_init(gd) + .with_context(|| "Gtk display init failed!") + .unwrap(); + + gtk_menu.show_window(scale_mode, gtk_cfg.full_screen); +} + +fn set_program_attribute(gtk_cfg: &GtkConfig, window: &ApplicationWindow) -> Result<()> { + // Set title bar. + let header = HeaderBar::new(); + header.set_show_close_button(true); + header.set_decoration_layout(Some("menu:minimize,maximize,close")); + + let label: Label = Label::new(Some(>k_cfg.vm_name)); + label.set_markup( + &("".to_string() + >k_cfg.vm_name + ""), + ); + header.set_custom_title(Some(&label)); + window.set_titlebar(Some(&header)); + + // Set default icon. + if let Some(app_name) = >k_cfg.app_name { + window.set_icon_name(Some(app_name)); + } + + // Set text attributes for the program. + gettextrs::setlocale(LocaleCategory::LcMessages, ""); + gettextrs::setlocale(LocaleCategory::LcCType, "C.UTF-8"); + gettextrs::bindtextdomain(DOMAIN_NAME, LOCALE_PATH)?; + gettextrs::bind_textdomain_codeset(DOMAIN_NAME, "UTF-8")?; + gettextrs::textdomain(DOMAIN_NAME)?; + + Ok(()) +} + +fn graphic_display_init(gd: Rc>) -> Result<()> { + let console_list = get_active_console(); + let mut borrowed_gd = gd.borrow_mut(); + let keysym2keycode = borrowed_gd.keysym2keycode.clone(); + let window = borrowed_gd.gtk_menu.window.clone(); + let scale_mode = borrowed_gd.scale_mode.clone(); + let (dce_sender, dce_receiver) = + glib::MainContext::sync_channel::(Priority::default(), CHANNEL_BOUND); + // Create a display area for each console. + for con in console_list { + let c = match con.upgrade() { + Some(c) => c, + None => continue, + }; + let locked_con = c.lock().unwrap(); + let dev_name = locked_con.dev_name.clone(); + let con_id = locked_con.con_id; + drop(locked_con); + // Register displaychangelistener in the console. + let gtk_opts = Arc::new(GtkInterface::new(dev_name, dce_sender.clone())); + let dcl = Arc::new(Mutex::new(DisplayChangeListener::new( + Some(con_id), + gtk_opts, + ))); + register_display(&dcl)?; + let gs = Rc::new(RefCell::new(GtkDisplayScreen::create( + window.clone(), + con.clone(), + Arc::downgrade(&dcl), + keysym2keycode.clone(), + scale_mode.clone(), + ))); + borrowed_gd.set_draw_area(gs)?; + } + drop(borrowed_gd); + + dce_receiver.attach( + None, + glib::clone!(@strong gd => @default-return Continue(true), move |event| { + gd_handle_event(&gd, event).unwrap_or_else(|e| error!("gd_handle_event: {:?}", e)); + Continue(true) + }), + ); + + Ok(()) +} + +/// Receive display update events from the mainloop of Stratovirt , +/// assigns the event to the corresponding draw display by the field +/// of device name. And then update the specific gtk display. +fn gd_handle_event(gd: &Rc>, event: DisplayChangeEvent) -> Result<()> { + let ds = match gd.borrow().get_ds_by_devname(&event.dev_name) { + Some(display) => display, + None => return Ok(()), + }; + match event.event_type { + DisplayEventType::DisplaySwitch => do_switch_event(&ds), + DisplayEventType::DisplayUpdate => do_update_event(&ds, event), + DisplayEventType::CursorDefine => do_cursor_define(&ds, event), + DisplayEventType::DisplayRefresh => do_refresh_event(&ds), + DisplayEventType::DisplaySetMajor => do_set_major_event(&ds), + } +} + +// Select the specified display area. +fn gs_show_menu_callback( + gs: &Rc>, + note_book: gtk::Notebook, + show_menu: &RadioMenuItem, +) -> Result<()> { + let borrowed_gs = gs.borrow(); + let page_num = note_book.page_num(&borrowed_gs.draw_area); + note_book.set_current_page(page_num); + + if borrowed_gs.dev_name == "ramfb" { + match borrowed_gs.dcl.upgrade() { + Some(dcl) if show_menu.is_active() => dcl.lock().unwrap().update_interval = 30, + Some(dcl) if !show_menu.is_active() => dcl.lock().unwrap().update_interval = 0, + _ => {} + } + } + + borrowed_gs.draw_area.grab_focus(); + drop(borrowed_gs); + update_window_size(gs) +} + +/// Refresh image. +/// There is a situation: +/// 1. Switch operation 1, the gtk display should change the image from a to b. +/// 2. Switch operation 2, the gtk display should change the image from b to c, but +/// the channel between stratovirt mainloop and gtk mainloop lost the event. +/// 3. The gtk display always show the image. +/// So, the refresh operation will always check if the image has been switched, if +/// the result is yes, then use the switch operation to switch the latest image. +fn do_refresh_event(gs: &Rc>) -> Result<()> { + trace::gtk_dyp_refresh(); + + let borrowed_gs = gs.borrow(); + let active_con = borrowed_gs.con.upgrade(); + let con = match active_con { + Some(con) => con, + None => return Ok(()), + }; + let locked_con = con.lock().unwrap(); + let surface = match locked_con.surface { + Some(s) => s, + None => return Ok(()), + }; + + let width = borrowed_gs.source_surface.width(); + let height = borrowed_gs.source_surface.height(); + let surface_width = surface.width(); + let surface_height = surface.height(); + if width == 0 || height == 0 || width != surface_width || height != surface_height { + drop(locked_con); + drop(borrowed_gs); + do_switch_event(gs)?; + } + Ok(()) +} + +/// Update cursor image. +fn do_cursor_define(gs: &Rc>, event: DisplayChangeEvent) -> Result<()> { + let c: DisplayMouse = match event.cursor { + Some(c) => c, + None => bail!("Invalid Cursor image"), + }; + + trace::gtk_dyp_cursor_define(&c.width, &c.height, &c.hot_x, &c.hot_y, &c.data.len()); + + if c.data.len() < ((c.width * c.height) as usize) * 4 { + bail!("Invalid Cursor image"); + } + + let borrowed_gs = gs.borrow(); + if !borrowed_gs.draw_area.is_realized() { + bail!("The draw_area is not realized"); + } + let display = borrowed_gs.draw_area.display(); + + let pixbuf = gdk::gdk_pixbuf::Pixbuf::from_mut_slice( + c.data, + Colorspace::Rgb, + true, + 8, + c.width as i32, + c.height as i32, + (c.width as i32) * 4, + ); + let gtk_cursor = gdk::Cursor::from_pixbuf(&display, &pixbuf, c.hot_x as i32, c.hot_y as i32); + if let Some(win) = &borrowed_gs.draw_area.window() { + win.set_cursor(Some(>k_cursor)); + } + Ok(()) +} + +// Update dirty area of image. +fn do_update_event(gs: &Rc>, event: DisplayChangeEvent) -> Result<()> { + trace::gtk_dyp_update(&event.x, &event.y, &event.w, &event.h); + + let borrowed_gs = gs.borrow(); + let active_con = borrowed_gs.con.upgrade(); + let con = match active_con { + Some(con) => con, + None => return Ok(()), + }; + let locked_con = con.lock().unwrap(); + let surface = match locked_con.surface { + Some(s) => s, + None => return Ok(()), + }; + + // drea_area is hidden behind the screen. + if !borrowed_gs.draw_area.is_realized() { + return Ok(()); + } + + if surface.image.is_null() { + bail!("Image is null"); + } + + let src_width = get_image_width(surface.image); + let src_height = get_image_height(surface.image); + let dest_width = get_image_width(borrowed_gs.source_surface.image); + let dest_height = get_image_height(borrowed_gs.source_surface.image); + + let surface_width = cmp::min(src_width, dest_width); + let surface_height = cmp::min(src_height, dest_height); + + let (x, y) = (event.x, event.y); + let x1 = cmp::min(x + event.w, surface_width); + let y1 = cmp::min(y + event.h, surface_height); + let w = (x1 - x).abs(); + let h = (y1 - y).abs(); + + match borrowed_gs.transfer_surface { + Some(s) if borrowed_gs.source_surface.format != pixman_format_code_t::PIXMAN_x8r8g8b8 => { + if src_width != s.width() || src_height != s.height() { + bail!("Wrong format of image format."); + } + // SAFETY: Verified that the pointer of source image and dest image + // is not empty, and the copied data will not exceed the image area + unsafe { + pixman_image_composite( + pixman_op_t::PIXMAN_OP_SRC, + surface.image, + ptr::null_mut(), + s.image, + x as i16, + y as i16, + 0, + 0, + x as i16, + y as i16, + w as u16, + h as u16, + ) + }; + } + _ => {} + }; + drop(locked_con); + + // Image scalling. + let x1 = (f64::from(x) * borrowed_gs.scale_x).floor(); + let y1 = (f64::from(y) * borrowed_gs.scale_y).floor(); + let x2 = (f64::from(x) * borrowed_gs.scale_x + f64::from(w) * borrowed_gs.scale_x).ceil(); + let y2 = (f64::from(y) * borrowed_gs.scale_y + f64::from(h) * borrowed_gs.scale_y).ceil(); + + let scale_width = f64::from(surface_width) * borrowed_gs.scale_x; + let scale_height = f64::from(surface_height) * borrowed_gs.scale_y; + let (window_width, window_height); + match borrowed_gs.get_window_size() { + Some((w, h)) => (window_width, window_height) = (w, h), + None => return Ok(()), + }; + + let mut mx: f64 = 0.0; + let mut my: f64 = 0.0; + if window_width > scale_width { + mx = (window_width - scale_width) / (2.0); + } + if window_height > scale_height { + my = (window_height - scale_height) / (2.0); + } + + borrowed_gs.draw_area.queue_draw_area( + (mx + x1) as i32, + (my + y1) as i32, + (x2 - x1) as i32, + (y2 - y1) as i32, + ); + + Ok(()) +} + +/// Switch display image. +fn do_switch_event(gs: &Rc>) -> Result<()> { + let mut borrowed_gs = gs.borrow_mut(); + let scale_mode = borrowed_gs.scale_mode.clone(); + let active_con = borrowed_gs.con.upgrade(); + let con = match active_con { + Some(con) => con, + None => return Ok(()), + }; + let locked_con = con.lock().unwrap(); + let surface = match locked_con.surface { + Some(s) => s, + None => return Ok(()), + }; + + let mut need_resize: bool = true; + + let width = borrowed_gs.source_surface.width(); + let height = borrowed_gs.source_surface.height(); + let surface_width = surface.width(); + let surface_height = surface.height(); + let surface_stride = surface.stride(); + trace::gtk_dyp_switch(&width, &height, &surface_width, &surface_height); + + if width != 0 && height != 0 && width == surface_width && height == surface_height { + need_resize = false; + } + + if surface.image.is_null() { + bail!("Image data is invalid."); + } + + let source_surface = DisplaySurface { + format: surface.format, + image: ref_pixman_image(surface.image), + }; + unref_pixman_image(borrowed_gs.source_surface.image); + borrowed_gs.source_surface = source_surface; + if let Some(s) = borrowed_gs.transfer_surface { + unref_pixman_image(s.image); + borrowed_gs.transfer_surface = None; + } + drop(locked_con); + + if borrowed_gs.source_surface.format == pixman_format_code_t::PIXMAN_x8r8g8b8 { + let data = get_image_data(borrowed_gs.source_surface.image) as *mut u8; + borrowed_gs.cairo_image = + // SAFETY: + // 1. It can be sure that the ptr of data is not nullptr. + // 2. The copy range will not exceed the image data. + unsafe { + ImageSurface::create_for_data_unsafe( + data as *mut u8, + Format::Rgb24, + surface_width, + surface_height, + surface_stride, + ) + } + .ok() + } else { + let transfer_image = create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + surface_width, + surface_height, + ptr::null_mut(), + surface_stride, + ); + + let data = get_image_data(transfer_image) as *mut u8; + borrowed_gs.cairo_image = + // SAFETY: + // 1. It can be sure that the ptr of data is not nullptr. + // 2. The copy range will not exceed the image data. + unsafe { + ImageSurface::create_for_data_unsafe( + data as *mut u8, + Format::Rgb24, + surface_width, + surface_height, + surface_stride, + ) + } + .ok(); + + // SAFETY: + // 1. It can be sure that source ptr and dest ptr is not nullptr. + // 2. The copy range will not exceed the image area. + unsafe { + pixman_image_composite( + pixman_op_t::PIXMAN_OP_SRC, + borrowed_gs.source_surface.image, + ptr::null_mut(), + transfer_image, + 0, + 0, + 0, + 0, + 0, + 0, + surface_width as u16, + surface_height as u16, + ) + }; + borrowed_gs.transfer_surface = Some(DisplaySurface { + format: pixman_format_code_t::PIXMAN_x8r8g8b8, + image: transfer_image, + }); + }; + + let (window_width, window_height); + match borrowed_gs.get_window_size() { + Some((w, h)) => (window_width, window_height) = (w, h), + None => return Ok(()), + }; + if scale_mode.borrow().is_full_screen() || scale_mode.borrow().is_free_scale() { + borrowed_gs.scale_x = window_width / f64::from(surface_width); + borrowed_gs.scale_y = window_height / f64::from(surface_height); + } + + // Vm desktop manage its own cursor, gtk cursor need to be trsp firstly. + if !borrowed_gs.cursor_trsp { + if let Some(win) = borrowed_gs.draw_area.window() { + let dpy = borrowed_gs.window.display(); + let gtk_cursor = gdk::Cursor::for_display(&dpy, gdk::CursorType::BlankCursor); + win.set_cursor(gtk_cursor.as_ref()); + } + borrowed_gs.cursor_trsp = true; + } + + drop(borrowed_gs); + + if need_resize { + update_window_size(gs) + } else { + renew_image(gs) + } +} + +/// Activate the current screen. +fn do_set_major_event(gs: &Rc>) -> Result<()> { + let borrowed_gs = gs.borrow(); + if borrowed_gs.show_menu.is_active() { + return Ok(()); + } + borrowed_gs.show_menu.activate(); + Ok(()) +} + +pub(crate) fn update_window_size(gs: &Rc>) -> Result<()> { + let borrowed_gs = gs.borrow(); + let scale_mode = borrowed_gs.scale_mode.borrow().clone(); + let (width, height) = match &borrowed_gs.cairo_image { + Some(image) => (f64::from(image.width()), f64::from(image.height())), + None => (0.0, 0.0), + }; + let (mut scale_width, mut scale_height) = if scale_mode.is_free_scale() { + (width * GTK_SCALE_MIN, height * GTK_SCALE_MIN) + } else { + (width * borrowed_gs.scale_x, height * borrowed_gs.scale_y) + }; + scale_width = scale_width.max(f64::from(DEFAULT_SURFACE_WIDTH)); + scale_height = scale_height.max(f64::from(DEFAULT_SURFACE_HEIGHT)); + + let geo: Geometry = Geometry::new( + scale_width as i32, + scale_height as i32, + 0, + 0, + 0, + 0, + 0, + 0, + 0.0, + 0.0, + Gravity::Center, + ); + + let geo_mask = WindowHints::MIN_SIZE; + + borrowed_gs + .draw_area + .set_size_request(geo.min_width(), geo.min_height()); + if let Some(window) = borrowed_gs.draw_area.window() { + window.set_geometry_hints(&geo, geo_mask) + } + + if !scale_mode.is_full_screen() && !scale_mode.is_free_scale() { + borrowed_gs + .window + .resize(DEFAULT_SURFACE_WIDTH, DEFAULT_SURFACE_HEIGHT); + } + Ok(()) +} + +/// Ask the gtk display to update the display. +pub(crate) fn renew_image(gs: &Rc>) -> Result<()> { + let borrowed_gs = gs.borrow(); + let (width, height); + match borrowed_gs.get_window_size() { + Some((w, h)) => (width, height) = (w, h), + None => return Ok(()), + }; + + borrowed_gs + .draw_area + .queue_draw_area(0, 0, width as i32, height as i32); + Ok(()) +} + +pub fn qmp_query_display_image() -> Result { + let mut gpu_info = GpuInfo::default(); + let console_list = get_active_console(); + for con in console_list { + let c = match con.upgrade() { + Some(c) => c, + None => continue, + }; + let mut locked_con = c.lock().unwrap(); + if !locked_con.active { + continue; + } + let dev_name = &locked_con.dev_name.clone(); + + if let Some(surface) = &mut locked_con.surface { + // SAFETY: The image is created within the function, it can be ensure + // that the data ptr is not nullptr and the image size matches the image data. + let cairo_image = unsafe { + ImageSurface::create_for_data_unsafe( + surface.data() as *mut u8, + Format::Rgb24, + surface.width(), + surface.height(), + surface.stride(), + ) + }?; + let mut file = create_file(&mut gpu_info, dev_name)?; + cairo_image.write_to_png(&mut file)?; + }; + } + gpu_info.isSuccess = true; + Ok(gpu_info) +} + +fn create_file(gpu_info: &mut GpuInfo, dev_name: &String) -> Result { + let temp_dir = env::temp_dir().display().to_string(); + let binding = temp_dir + "/stratovirt-images"; + let path = Path::new(&binding); + + if !path.exists() { + fs::create_dir(path)?; + } + let file_dir = path.display().to_string(); + gpu_info.fileDir = file_dir.clone(); + let nsec = gettime()?.1; + let file_name = file_dir + "/stratovirt-display-" + dev_name + "-" + &nsec.to_string() + ".png"; + let file = fs::File::create(file_name)?; + Ok(file) +} diff --git a/ui/src/input.rs b/ui/src/input.rs new file mode 100644 index 0000000000000000000000000000000000000000..f72119cf5a721afe6b2b14c009173fa318fcaca8 --- /dev/null +++ b/ui/src/input.rs @@ -0,0 +1,674 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + collections::{HashMap, HashSet}, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use log::debug; +use once_cell::sync::Lazy; + +use util::bitmap::Bitmap; + +// Logical window size for mouse. +pub const ABS_MAX: u64 = 0x7fff; +// Event type of Point. +pub const INPUT_POINT_LEFT: u32 = 0x01; +pub const INPUT_POINT_RIGHT: u32 = 0x02; +pub const INPUT_POINT_MIDDLE: u32 = 0x04; +pub const INPUT_POINT_BACK: u32 = 0x08; +pub const INPUT_POINT_FORWARD: u32 = 0x10; +pub const INPUT_BUTTON_WHEEL_UP: u32 = 0x20; +pub const INPUT_BUTTON_WHEEL_DOWN: u32 = 0x40; +pub const INPUT_BUTTON_WHEEL_LEFT: u32 = 0x80; +pub const INPUT_BUTTON_WHEEL_RIGHT: u32 = 0x100; +pub const INPUT_BUTTON_MASK: u32 = 0x1f; + +// ASCII value. +pub const ASCII_A: i32 = 65; +pub const ASCII_Z: i32 = 90; +pub const UPPERCASE_TO_LOWERCASE: i32 = 32; +const ASCII_A_LOWERCASE: i32 = 97; +const ASCII_Z_LOWERCASE: i32 = 122; +const BIT_PER_BYTE: u32 = 8; + +// Keycode. +pub const KEYCODE_1: u16 = 2; +pub const KEYCODE_9: u16 = 10; +const KEYCODE_CTRL: u16 = 29; +pub const KEYCODE_RET: u16 = 38; +const KEYCODE_SHIFT: u16 = 42; +const KEYCODE_SHIFT_R: u16 = 54; +const KEYCODE_ALT: u16 = 56; +pub const KEYCODE_CAPS_LOCK: u16 = 58; +pub const KEYCODE_NUM_LOCK: u16 = 69; +pub const KEYCODE_SCR_LOCK: u16 = 70; +const KEYCODE_CTRL_R: u16 = 157; +const KEYCODE_ALT_R: u16 = 184; +const KEYPAD_1: u16 = 0xffb0; +const KEYPAD_9: u16 = 0xffb9; +const KEYPAD_SEPARATOR: u16 = 0xffac; +const KEYPAD_DECIMAL: u16 = 0xffae; +const KEYCODE_KP_7: u16 = 0x47; +const KEYCODE_KP_DECIMAL: u16 = 0x53; +// Led (HID) +pub const NUM_LOCK_LED: u8 = 0x1; +pub const CAPS_LOCK_LED: u8 = 0x2; +pub const SCROLL_LOCK_LED: u8 = 0x4; + +static INPUTS: Lazy>> = Lazy::new(|| Arc::new(Mutex::new(Inputs::default()))); + +static LED_STATE: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new(LedState::default()))); + +#[derive(Debug)] +pub enum InputType { + KeyEvent, + MoveEvent, + ButtonEvent, +} + +#[derive(Default)] +pub enum Axis { + #[default] + X, + Y, +} + +#[derive(Default)] +pub struct MoveEvent { + pub axis: Axis, + pub data: u32, +} + +#[derive(Default)] +pub struct ButtonEvent { + pub button: u32, + pub down: bool, +} + +#[derive(Default)] +pub struct KeyEvent { + pub keycode: u16, + pub down: bool, +} + +pub struct InputEvent { + pub input_type: InputType, + pub move_event: MoveEvent, + pub button_event: ButtonEvent, + pub key_event: KeyEvent, +} + +impl InputEvent { + fn new(input_type: InputType) -> Self { + Self { + input_type, + move_event: MoveEvent::default(), + button_event: ButtonEvent::default(), + key_event: KeyEvent::default(), + } + } +} + +// Keyboard Modifier State +pub enum KeyboardModifier { + KeyModNone = 0, + KeyModShift = 1, + KeyModCtrl = 2, + KeyModAlt = 3, + KeyModAltgr = 4, + KeyModNumlock = 5, + KeyModCapslock = 6, + KeyModMax = 7, +} + +/// Record the keyboard status, +/// Including the press information of keys, +/// and some status information. +pub struct KeyBoardState { + /// Keyboard state. + pub keystate: HashSet, + /// Key Modifier states. + pub keymods: Bitmap, +} + +impl Default for KeyBoardState { + fn default() -> Self { + Self { + keystate: HashSet::new(), + keymods: Bitmap::new( + KeyboardModifier::KeyModMax as usize / (BIT_PER_BYTE as usize) + 1, + ), + } + } +} + +impl KeyBoardState { + /// Get the corresponding keyboard modifier. + fn keyboard_modifier_get(&self, key_mod: KeyboardModifier) -> bool { + match self.keymods.contain(key_mod as usize) { + Ok(res) => res, + Err(_e) => false, + } + } + + /// Reset all keyboard modifier state. + fn keyboard_state_reset(&mut self) { + self.keymods.clear_all(); + } + + /// Record the press and up state in the keyboard. + fn keyboard_state_update(&mut self, keycode: u16, down: bool) -> Result<()> { + // Key is not pressed and the incoming key action is up. + if !down && !self.keystate.contains(&keycode) { + return Ok(()); + } + + // Update Keyboard key modifier state. + if down { + self.keystate.insert(keycode); + } else { + self.keystate.remove(&keycode); + } + + // Update Keyboard modifier state. + match keycode { + KEYCODE_SHIFT | KEYCODE_SHIFT_R => { + self.keyboard_modstate_update( + KEYCODE_SHIFT, + KEYCODE_SHIFT_R, + KeyboardModifier::KeyModShift, + )?; + } + KEYCODE_CTRL | KEYCODE_CTRL_R => { + self.keyboard_modstate_update( + KEYCODE_CTRL, + KEYCODE_CTRL_R, + KeyboardModifier::KeyModCtrl, + )?; + } + KEYCODE_ALT => { + self.keyboard_modstate_update( + KEYCODE_ALT, + KEYCODE_ALT, + KeyboardModifier::KeyModAlt, + )?; + } + KEYCODE_ALT_R => { + self.keyboard_modstate_update( + KEYCODE_ALT_R, + KEYCODE_ALT_R, + KeyboardModifier::KeyModAltgr, + )?; + } + KEYCODE_CAPS_LOCK => { + if down { + self.keymods + .change(KeyboardModifier::KeyModCapslock as usize)?; + } + } + KEYCODE_NUM_LOCK => { + if down { + self.keymods + .change(KeyboardModifier::KeyModNumlock as usize)?; + } + } + _ => {} + } + + Ok(()) + } + + /// If one of the keys keycode_1 and keycode_2 is pressed, + /// Then the corresponding keyboard modifier state will be set. + /// Otherwise, it will be clear. + fn keyboard_modstate_update( + &mut self, + keycode_1: u16, + keycode_2: u16, + mod_state: KeyboardModifier, + ) -> Result<()> { + if self.keystate.contains(&keycode_1) | self.keystate.contains(&keycode_2) { + self.keymods.set(mod_state as usize)?; + } else { + self.keymods.clear(mod_state as usize)?; + } + Ok(()) + } +} + +#[derive(Default)] +struct LedState { + kbd_led: u8, +} + +#[derive(Default)] +struct Inputs { + kbd_ids: Vec, + kbd_lists: HashMap>>, + tablet_ids: Vec, + tablet_lists: HashMap>>, + keyboard_state: KeyBoardState, +} + +impl Inputs { + fn register_kbd(&mut self, device: &str, kbd: Arc>) { + self.kbd_ids.insert(0, device.to_string()); + self.kbd_lists.insert(device.to_string(), kbd); + } + + fn unregister_kbd(&mut self, device: &str) { + self.kbd_lists.remove(&device.to_string()); + let len = self.kbd_ids.len(); + for i in 0..len { + if self.kbd_ids[i] == device { + self.kbd_ids.remove(i); + break; + } + } + } + + fn register_mouse(&mut self, device: &str, tablet: Arc>) { + self.tablet_ids.insert(0, device.to_string()); + self.tablet_lists.insert(device.to_string(), tablet); + } + + fn unregister_mouse(&mut self, device: &str) { + self.tablet_lists.remove(&device.to_string()); + let len = self.tablet_ids.len(); + for i in 0..len { + if self.tablet_ids[i] == device { + self.tablet_ids.remove(i); + break; + } + } + } + + fn get_active_kbd(&mut self) -> Option>> { + if !self.kbd_ids.is_empty() { + let kbd = self.kbd_lists.get(&self.kbd_ids[0])?.clone(); + Some(kbd) + } else { + None + } + } + + fn get_active_mouse(&mut self) -> Option>> { + if !self.tablet_ids.is_empty() { + let mouse = self.tablet_lists.get(&self.tablet_ids[0])?.clone(); + Some(mouse) + } else { + None + } + } + + fn press_key(&mut self, keycode: u16) -> Result<()> { + self.keyboard_state.keyboard_state_update(keycode, true)?; + let kbd = self.get_active_kbd(); + if let Some(k) = kbd.as_ref() { + k.lock().unwrap().do_key_event(keycode, true)?; + } + self.keyboard_state.keyboard_state_update(keycode, false)?; + if let Some(k) = kbd.as_ref() { + k.lock().unwrap().do_key_event(keycode, false)?; + } + Ok(()) + } +} + +pub fn register_keyboard(device: &str, kbd: Arc>) { + INPUTS.lock().unwrap().register_kbd(device, kbd); +} + +pub fn unregister_keyboard(device: &str) { + INPUTS.lock().unwrap().unregister_kbd(device); +} + +pub fn register_pointer(device: &str, tablet: Arc>) { + INPUTS.lock().unwrap().register_mouse(device, tablet); +} + +pub fn unregister_pointer(device: &str) { + INPUTS.lock().unwrap().unregister_mouse(device); +} + +pub fn input_move_abs(axis: Axis, data: u32) -> Result<()> { + let mut input_event = InputEvent::new(InputType::MoveEvent); + let move_event = MoveEvent { axis, data }; + input_event.move_event = move_event; + + let mouse = INPUTS.lock().unwrap().get_active_mouse(); + if let Some(m) = mouse { + m.lock().unwrap().update_point_state(input_event)?; + } + + Ok(()) +} + +pub fn input_button(button: u32, down: bool) -> Result<()> { + let mut input_event = InputEvent::new(InputType::ButtonEvent); + let button_event = ButtonEvent { button, down }; + input_event.button_event = button_event; + + let mouse = INPUTS.lock().unwrap().get_active_mouse(); + if let Some(m) = mouse { + m.lock().unwrap().update_point_state(input_event)?; + } + + Ok(()) +} + +pub fn input_point_sync() -> Result<()> { + let mouse = INPUTS.lock().unwrap().get_active_mouse(); + if let Some(m) = mouse { + m.lock().unwrap().sync()?; + } + Ok(()) +} + +pub fn key_event(keycode: u16, down: bool) -> Result<()> { + let kbd = INPUTS.lock().unwrap().get_active_kbd(); + if let Some(k) = kbd { + k.lock().unwrap().do_key_event(keycode, down)?; + } + Ok(()) +} + +pub fn trigger_key(keycode: u16) -> Result<()> { + key_event(keycode, true)?; + key_event(keycode, false) +} + +/// A complete mouse click event. +pub fn press_mouse(button: u32) -> Result<()> { + input_button(button, true)?; + input_point_sync()?; + input_button(button, false)?; + input_point_sync() +} + +/// 1. Keep the key state in keyboard_state. +/// 2. Sync the caps lock and num lock state to guest. +pub fn update_key_state(down: bool, keysym: i32, keycode: u16) -> Result<()> { + let mut locked_input = INPUTS.lock().unwrap(); + let upper = (ASCII_A..=ASCII_Z).contains(&keysym); + let is_letter = upper || (ASCII_A_LOWERCASE..=ASCII_Z_LOWERCASE).contains(&keysym); + let in_keypad = (KEYCODE_KP_7..=KEYCODE_KP_DECIMAL).contains(&keycode); + + if down && is_letter { + let shift = locked_input + .keyboard_state + .keyboard_modifier_get(KeyboardModifier::KeyModShift); + let in_upper = check_kbd_led_state(CAPS_LOCK_LED); + if (shift && upper == in_upper) || (!shift && upper != in_upper) { + debug!("Correct caps lock {} inside {}", upper, in_upper); + locked_input.press_key(KEYCODE_CAPS_LOCK)?; + } + } else if down && in_keypad { + let numlock = keysym_is_num_lock(keysym); + let in_numlock = check_kbd_led_state(NUM_LOCK_LED); + if in_numlock != numlock { + debug!("Correct num lock {} inside {}", numlock, in_numlock); + locked_input.press_key(KEYCODE_NUM_LOCK)?; + } + } + + locked_input + .keyboard_state + .keyboard_state_update(keycode, down) +} + +pub fn keyboard_update(down: bool, keycode: u16) -> Result<()> { + let mut locked_input = INPUTS.lock().unwrap(); + locked_input + .keyboard_state + .keyboard_state_update(keycode, down) +} + +/// Release all pressed key. +pub fn release_all_key() -> Result<()> { + let mut locked_input = INPUTS.lock().unwrap(); + let mut keycode_lists: Vec = Vec::new(); + for keycode in locked_input.keyboard_state.keystate.iter() { + keycode_lists.push(*keycode); + } + for keycode in keycode_lists.iter() { + locked_input + .keyboard_state + .keyboard_state_update(*keycode, false)?; + if let Some(k) = locked_input.get_active_kbd().as_ref() { + k.lock().unwrap().do_key_event(*keycode, false)?; + } + } + Ok(()) +} + +pub fn check_kbd_led_state(state: u8) -> bool { + LED_STATE.lock().unwrap().kbd_led & state == state +} + +pub fn get_kbd_led_state() -> u8 { + LED_STATE.lock().unwrap().kbd_led +} + +pub fn set_kbd_led_state(state: u8) { + LED_STATE.lock().unwrap().kbd_led = state; +} + +pub fn keyboard_modifier_get(key_mod: KeyboardModifier) -> bool { + INPUTS + .lock() + .unwrap() + .keyboard_state + .keyboard_modifier_get(key_mod) +} + +pub fn keyboard_state_reset() { + INPUTS.lock().unwrap().keyboard_state.keyboard_state_reset(); +} + +fn keysym_is_num_lock(sym: i32) -> bool { + matches!( + (sym & 0xffff) as u16, + KEYPAD_1..=KEYPAD_9 | KEYPAD_SEPARATOR | KEYPAD_DECIMAL + ) +} + +pub trait KeyboardOpts: Send { + fn do_key_event(&mut self, keycode: u16, down: bool) -> Result<()>; +} + +pub trait PointerOpts: Send { + fn update_point_state(&mut self, input_event: InputEvent) -> Result<()>; + fn sync(&mut self) -> Result<()>; +} + +#[cfg(test)] +mod tests { + use anyhow::bail; + + #[cfg(feature = "keycode")] + use crate::keycode::{DpyMod, KeyCode}; + static TEST_INPUT: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new(TestInput::default()))); + + use super::*; + + pub struct TestInput { + kbd: Arc>, + tablet: Arc>, + } + + impl Default for TestInput { + fn default() -> Self { + Self { + kbd: Arc::new(Mutex::new(TestKbd { + keycode: 0, + down: false, + })), + tablet: Arc::new(Mutex::new(TestTablet { + button: 0, + x: 0, + y: 0, + })), + } + } + } + + impl TestInput { + fn register_input(&self) { + register_keyboard("TestKeyboard", self.kbd.clone()); + register_pointer("TestPointer", self.tablet.clone()); + } + + fn unregister_input(&self) { + unregister_keyboard("TestKeyboard"); + unregister_pointer("TestPointer"); + self.kbd.lock().unwrap().keycode = 0; + self.kbd.lock().unwrap().down = false; + self.tablet.lock().unwrap().x = 0; + self.tablet.lock().unwrap().y = 0; + self.tablet.lock().unwrap().button = 0; + } + } + + #[derive(Default)] + pub struct TestKbd { + keycode: u16, + down: bool, + } + + impl KeyboardOpts for TestKbd { + fn do_key_event(&mut self, keycode: u16, down: bool) -> Result<()> { + self.keycode = keycode; + self.down = down; + Ok(()) + } + } + + #[derive(Default)] + pub struct TestTablet { + pub button: u32, + x: u32, + y: u32, + } + + impl PointerOpts for TestTablet { + fn update_point_state(&mut self, input_event: InputEvent) -> Result<()> { + match input_event.input_type { + InputType::MoveEvent => match input_event.move_event.axis { + Axis::X => self.x = input_event.move_event.data, + Axis::Y => self.y = input_event.move_event.data, + }, + InputType::ButtonEvent => { + if input_event.button_event.down { + self.button |= input_event.button_event.button; + } else { + self.button &= !(input_event.button_event.button & 0x7); + } + } + _ => bail!("Input type: {:?} is unsupported", input_event.input_type), + } + Ok(()) + } + + fn sync(&mut self) -> Result<()> { + Ok(()) + } + } + + #[test] + fn test_input_basic() { + let test_input = TEST_INPUT.lock().unwrap(); + test_input.register_input(); + let test_kdb = test_input.kbd.clone(); + let test_mouse = test_input.tablet.clone(); + + assert!(key_event(12, true).is_ok()); + assert_eq!(test_kdb.lock().unwrap().keycode, 12); + assert!(test_kdb.lock().unwrap().down); + + // Test point event. + assert_eq!(test_mouse.lock().unwrap().button, 0); + assert_eq!(test_mouse.lock().unwrap().x, 0); + assert_eq!(test_mouse.lock().unwrap().y, 0); + register_pointer("TestPointer", test_mouse.clone()); + + assert!(input_move_abs(Axis::X, 54).is_ok()); + assert!(input_move_abs(Axis::Y, 12).is_ok()); + assert!(input_button(1, true).is_ok()); + assert!(input_point_sync().is_ok()); + + assert_eq!(test_mouse.lock().unwrap().button, 1); + assert_eq!(test_mouse.lock().unwrap().x, 54); + assert_eq!(test_mouse.lock().unwrap().y, 12); + + test_input.unregister_input(); + } + + #[cfg(feature = "keycode")] + #[test] + fn test_release_all_key() { + fn do_key_event(press: bool, keysym: i32, keycode: u16) -> Result<()> { + update_key_state(press, keysym, keycode)?; + key_event(keycode, press) + } + + // Test keyboard event. + let test_input = TEST_INPUT.lock().unwrap(); + test_input.register_input(); + let test_kdb = test_input.kbd.clone(); + + #[cfg(not(all(target_env = "ohos", feature = "ohui_srv")))] + let keysym2qkeycode = KeyCode::keysym_to_qkeycode(DpyMod::Gtk); + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + let keysym2qkeycode = KeyCode::keysym_to_qkeycode(DpyMod::Ohui); + // ["0", "a", "space"] + #[cfg(not(all(target_env = "ohos", feature = "ohui_srv")))] + let keysym_lists: Vec = vec![0x0030, 0x0061, 0x0020]; + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + let keysym_lists: Vec = vec![0x07D0, 0x07E1, 0x0802]; + let keycode_lists: Vec = keysym_lists + .iter() + .map(|x| *keysym2qkeycode.get(x).unwrap()) + .collect(); + for idx in 0..keysym_lists.len() { + let keysym = keycode_lists[idx]; + let keycode = keycode_lists[idx]; + assert!(do_key_event(true, i32::from(keysym), keycode).is_ok()); + assert_eq!(test_kdb.lock().unwrap().keycode, keycode); + assert!(test_kdb.lock().unwrap().down); + } + + let locked_input = INPUTS.lock().unwrap(); + for keycode in &keycode_lists { + assert!(locked_input.keyboard_state.keystate.contains(keycode)); + assert!(locked_input.keyboard_state.keystate.contains(keycode)); + } + drop(locked_input); + + // Release all keys + assert!(release_all_key().is_ok()); + + let locked_input = INPUTS.lock().unwrap(); + for keycode in &keycode_lists { + assert!(!locked_input.keyboard_state.keystate.contains(keycode)); + assert!(!locked_input.keyboard_state.keystate.contains(keycode)); + } + drop(locked_input); + + test_input.unregister_input(); + } +} diff --git a/ui/src/keycode.rs b/ui/src/keycode.rs new file mode 100644 index 0000000000000000000000000000000000000000..16db2dbd431fa95fb2293a96c4d2c3c69ef9d73a --- /dev/null +++ b/ui/src/keycode.rs @@ -0,0 +1,683 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; + +pub enum DpyMod { + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + Ohui, + #[cfg(feature = "gtk")] + Gtk, + #[cfg(feature = "vnc")] + Vnc, +} + +#[allow(unused)] +#[derive(Clone, Copy, Debug)] +// Some of KeyCodes are not used on OHOS. +pub enum KeyCode { + Escape, + Key1, + Key2, + Key3, + Key4, + Key5, + Key6, + Key7, + Key8, + Key9, + Key0, + Minus, + Equal, + BackSpace, + Tab, + Keyq, + Keyw, + Keye, + Keyr, + Keyt, + Keyy, + Keyu, + Keyi, + Keyo, + Keyp, + BracketLeft, + BracketRight, + Return, + ControlL, + Keya, + Keys, + Keyd, + Keyf, + Keyg, + Keyh, + Keyj, + Keyk, + Keyl, + Semicolon, + Apostrophe, + Grave, + ShiftL, + BackSlash, + Keyz, + Keyx, + Keyc, + Keyv, + Keyb, + Keyn, + Keym, + Comma, + Period, + Slash, + ShiftR, + KPMultiply, + AltL, + Space, + CapsLock, + F1, + F2, + F3, + F4, + F5, + F6, + F7, + F8, + F9, + F10, + NumLock, + ScrollLock, + KPHome, + KP7, + KPUp, + KP8, + KPPrior, + KP9, + KPSubtract, + KPLeft, + KP4, + KPBegin, + KP5, + KPRight, + KP6, + KPAdd, + KPEnd, + KP1, + KPDown, + KP2, + KPNext, + KP3, + KPInsert, + KP0, + KPDelete, + KPSeparator, + KPDecimal, + Execute, + Print, + SysReq, + F11, + F12, + KPEqual, + HenkanMode, + Muhenkan, + KPEnter, + ControlR, + KPDivide, + AltR, + ISOLevel3Shift, + ModeSwitch, + Pause, + Home, + Up, + Prior, + Left, + Right, + End, + Down, + Next, + Insert, + Delete, + SuperL, + SuperR, + Menu, + Exclam, + At, + Numbersign, + Dollar, + Percent, + Asciicircum, + Ampersand, + Asterisk, + Parenleft, + Parenright, + Underscore, + Plus, + KeyQ, + KeyW, + KeyE, + KeyR, + KeyT, + KeyY, + KeyU, + KeyI, + KeyO, + KeyP, + Braceleft, + Braceright, + KeyA, + KeyS, + KeyD, + KeyF, + KeyG, + KeyH, + KeyJ, + KeyK, + KeyL, + Colon, + Quotedbl, + Asciitilde, + Bar, + KeyZ, + KeyX, + KeyC, + KeyV, + KeyB, + KeyN, + KeyM, + Less, + Greater, + Question, + MetaL, + MetaR, + Brokenbar, +} + +impl KeyCode { + fn to_key_num(self) -> u16 { + match self { + KeyCode::Escape => 0x0001, + KeyCode::Key1 => 0x0002, + KeyCode::Key2 => 0x0003, + KeyCode::Key3 => 0x0004, + KeyCode::Key4 => 0x0005, + KeyCode::Key5 => 0x0006, + KeyCode::Key6 => 0x0007, + KeyCode::Key7 => 0x0008, + KeyCode::Key8 => 0x0009, + KeyCode::Key9 => 0x000A, + KeyCode::Key0 => 0x000B, + KeyCode::Minus => 0x000C, + KeyCode::Equal => 0x000D, + KeyCode::BackSpace => 0x000E, + KeyCode::Tab => 0x000F, + KeyCode::Keyq => 0x0010, + KeyCode::Keyw => 0x0011, + KeyCode::Keye => 0x0012, + KeyCode::Keyr => 0x0013, + KeyCode::Keyt => 0x0014, + KeyCode::Keyy => 0x0015, + KeyCode::Keyu => 0x0016, + KeyCode::Keyi => 0x0017, + KeyCode::Keyo => 0x0018, + KeyCode::Keyp => 0x0019, + KeyCode::BracketLeft => 0x001A, + KeyCode::BracketRight => 0x001B, + KeyCode::Return => 0x001C, + KeyCode::ControlL => 0x001D, + KeyCode::Keya => 0x001E, + KeyCode::Keys => 0x001F, + KeyCode::Keyd => 0x0020, + KeyCode::Keyf => 0x0021, + KeyCode::Keyg => 0x0022, + KeyCode::Keyh => 0x0023, + KeyCode::Keyj => 0x0024, + KeyCode::Keyk => 0x0025, + KeyCode::Keyl => 0x0026, + KeyCode::Semicolon => 0x0027, + KeyCode::Apostrophe => 0x0028, + KeyCode::Grave => 0x0029, + KeyCode::ShiftL => 0x002A, + KeyCode::BackSlash => 0x002B, + KeyCode::Keyz => 0x002C, + KeyCode::Keyx => 0x002D, + KeyCode::Keyc => 0x002E, + KeyCode::Keyv => 0x002F, + KeyCode::Keyb => 0x0030, + KeyCode::Keyn => 0x0031, + KeyCode::Keym => 0x0032, + KeyCode::Comma => 0x0033, + KeyCode::Period => 0x0034, + KeyCode::Slash => 0x0035, + KeyCode::ShiftR => 0x0036, + KeyCode::KPMultiply => 0x0037, + KeyCode::AltL => 0x0038, + KeyCode::Space => 0x0039, + KeyCode::CapsLock => 0x003A, + KeyCode::F1 => 0x003B, + KeyCode::F2 => 0x003C, + KeyCode::F3 => 0x003D, + KeyCode::F4 => 0x003E, + KeyCode::F5 => 0x003F, + KeyCode::F6 => 0x0040, + KeyCode::F7 => 0x0041, + KeyCode::F8 => 0x0042, + KeyCode::F9 => 0x0043, + KeyCode::F10 => 0x0044, + KeyCode::NumLock => 0x0045, + KeyCode::ScrollLock => 0x0046, + KeyCode::KPHome => 0x0047, + KeyCode::KPUp => 0x0048, + KeyCode::KPPrior => 0x0049, + KeyCode::KPSubtract => 0x004A, + KeyCode::KPLeft => 0x004B, + KeyCode::KPBegin => 0x004C, + KeyCode::KPRight => 0x004D, + KeyCode::KPAdd => 0x004E, + KeyCode::KPEnd => 0x004F, + KeyCode::KPDown => 0x0050, + KeyCode::KPNext => 0x0051, + KeyCode::KPInsert => 0x0052, + KeyCode::KPDelete => 0x0053, + KeyCode::KPSeparator => 0x0053, + KeyCode::KP1 => 0x004F, // Numlock KPEnd + KeyCode::KP2 => 0x0050, // Numlock KPDown + KeyCode::KP3 => 0x0051, // Numlock KPNext + KeyCode::KP4 => 0x004B, // Numlock KPLeft + KeyCode::KP5 => 0x004C, // Numlock KPBegin + KeyCode::KP6 => 0x004D, // Numlock KPRight + KeyCode::KP7 => 0x0047, // Numlock KPHome + KeyCode::KP8 => 0x0048, // Numlock KPUp + KeyCode::KP9 => 0x0049, // Numlock KPPrior + KeyCode::KP0 => 0x0052, // Numlock KPInsert + KeyCode::KPDecimal => 0x0053, // Numlock KPDelete + KeyCode::Execute => 0x0054, + KeyCode::Print => 0x0054, + KeyCode::SysReq => 0x0054, + KeyCode::F11 => 0x0057, + KeyCode::F12 => 0x0058, + KeyCode::KPEqual => 0x0059, + KeyCode::HenkanMode => 0x0079, + KeyCode::Muhenkan => 0x007B, + KeyCode::KPEnter => 0x009C, + KeyCode::ControlR => 0x009D, + KeyCode::KPDivide => 0x00B5, + KeyCode::AltR => 0x00B8, + KeyCode::ISOLevel3Shift => 0x00B8, + KeyCode::ModeSwitch => 0x00B8, + KeyCode::Pause => 0x00C6, + KeyCode::Home => 0x00C7, + KeyCode::Up => 0x00C8, + KeyCode::Prior => 0x00C9, + KeyCode::Left => 0x00CB, + KeyCode::Right => 0x00CD, + KeyCode::End => 0x00CF, + KeyCode::Down => 0x00D0, + KeyCode::Next => 0x00D1, + KeyCode::Insert => 0x00D2, + KeyCode::Delete => 0x00D3, + KeyCode::SuperL => 0x00DB, + KeyCode::SuperR => 0x00DC, + KeyCode::Menu => 0x00DD, + KeyCode::Exclam => 0x0102, // Shift 1 + KeyCode::At => 0x0103, // Shift 2 + KeyCode::Numbersign => 0x0104, // Shift 3 + KeyCode::Dollar => 0x0105, // Shift 4 + KeyCode::Percent => 0x0106, // Shift 5 + KeyCode::Asciicircum => 0x0107, // Shift 6 + KeyCode::Ampersand => 0x0108, // Shift 7 + KeyCode::Asterisk => 0x0109, // Shift 8 + KeyCode::Parenleft => 0x010A, // Shift 9 + KeyCode::Parenright => 0x010B, // Shift 0 + KeyCode::Underscore => 0x010C, // Shift Minus + KeyCode::Plus => 0x010D, // Shift Equal + KeyCode::KeyQ => 0x0110, // Shift q + KeyCode::KeyW => 0x0111, // Shift w + KeyCode::KeyE => 0x0112, // Shift e + KeyCode::KeyR => 0x0113, // Shift r + KeyCode::KeyT => 0x0114, // Shift t + KeyCode::KeyY => 0x0115, // Shift y + KeyCode::KeyU => 0x0116, // Shift u + KeyCode::KeyI => 0x0117, // Shift i + KeyCode::KeyO => 0x0118, // Shift o + KeyCode::KeyP => 0x0119, // Shift p + KeyCode::Braceleft => 0x011A, // Shift Bracketleft + KeyCode::Braceright => 0x011B, // Shift Bracketright + KeyCode::KeyA => 0x011E, // Shift a + KeyCode::KeyS => 0x011F, // Shift s + KeyCode::KeyD => 0x0120, // Shift d + KeyCode::KeyF => 0x0121, // Shift f + KeyCode::KeyG => 0x0122, // Shift g + KeyCode::KeyH => 0x0123, // Shift h + KeyCode::KeyJ => 0x0124, // Shift j + KeyCode::KeyK => 0x0125, // Shift k + KeyCode::KeyL => 0x0126, // Shift l + KeyCode::Colon => 0x0127, // Shift Semicolon + KeyCode::Quotedbl => 0x0128, // Shift Apostrophe + KeyCode::Asciitilde => 0x0129, // Shift Grave + KeyCode::Bar => 0x012B, // Shift Backslash + KeyCode::KeyZ => 0x012C, // Shift z + KeyCode::KeyX => 0x012D, // Shift x + KeyCode::KeyC => 0x012E, // Shift c + KeyCode::KeyV => 0x012F, // Shift v + KeyCode::KeyB => 0x0130, // Shift b + KeyCode::KeyN => 0x0131, // Shift n + KeyCode::KeyM => 0x0132, // Shift m + KeyCode::Less => 0x0133, // Shift Comma + KeyCode::Greater => 0x0134, // Shift Period + KeyCode::Question => 0x0135, // Shift Slash + KeyCode::MetaL => 0x0138, // Shift AltL + KeyCode::MetaR => 0x01B8, // Shift AltR + KeyCode::Brokenbar => 0x0956, // Shift Altgr Less + } + } + + pub fn keysym_to_qkeycode(mode: DpyMod) -> HashMap { + let mut keysym2qkeycode: HashMap = HashMap::new(); + let keycodes = match mode { + #[cfg(all(target_env = "ohos", feature = "ohui_srv"))] + DpyMod::Ohui => KEY_CODE_OH.as_ref(), + #[cfg(any(feature = "gtk", feature = "vnc"))] + _ => KEY_CODE_ASCLL.as_ref(), + }; + // Mapping ASCII to keycode. + for &(keycode, keysym) in keycodes.iter() { + let qkeycode = keycode.to_key_num(); + keysym2qkeycode.insert(keysym, qkeycode); + } + keysym2qkeycode + } +} + +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +const KEY_CODE_OH: [(KeyCode, u16); 105] = [ + (KeyCode::Key0, 0x07D0), + (KeyCode::Key1, 0x07D1), + (KeyCode::Key2, 0x07D2), + (KeyCode::Key3, 0x07D3), + (KeyCode::Key4, 0x07D4), + (KeyCode::Key5, 0x07D5), + (KeyCode::Key6, 0x07D6), + (KeyCode::Key7, 0x07D7), + (KeyCode::Key8, 0x07D8), + (KeyCode::Key9, 0x07D9), + (KeyCode::Minus, 0x0809), + (KeyCode::Equal, 0x080A), + (KeyCode::BackSpace, 0x0807), + (KeyCode::Tab, 0x0801), + (KeyCode::Keya, 0x07E1), + (KeyCode::Keyb, 0x07E2), + (KeyCode::Keyc, 0x07E3), + (KeyCode::Keyd, 0x07E4), + (KeyCode::Keye, 0x07E5), + (KeyCode::Keyf, 0x07E6), + (KeyCode::Keyg, 0x07E7), + (KeyCode::Keyh, 0x07E8), + (KeyCode::Keyi, 0x07E9), + (KeyCode::Keyj, 0x07EA), + (KeyCode::Keyk, 0x07EB), + (KeyCode::Keyl, 0x07EC), + (KeyCode::Keym, 0x07ED), + (KeyCode::Keyn, 0x07EE), + (KeyCode::Keyo, 0x07EF), + (KeyCode::Keyp, 0x07F0), + (KeyCode::Keyq, 0x07F1), + (KeyCode::Keyr, 0x07F2), + (KeyCode::Keys, 0x07F3), + (KeyCode::Keyt, 0x07F4), + (KeyCode::Keyu, 0x07F5), + (KeyCode::Keyv, 0x07F6), + (KeyCode::Keyw, 0x07F7), + (KeyCode::Keyx, 0x07F8), + (KeyCode::Keyy, 0x07F9), + (KeyCode::Keyz, 0x07FA), + (KeyCode::Space, 0x0802), + (KeyCode::Comma, 0x07FB), + (KeyCode::Slash, 0x0810), + (KeyCode::Down, 0x07DD), + (KeyCode::Left, 0x07DE), + (KeyCode::End, 0x0822), + (KeyCode::Escape, 0x0816), + (KeyCode::Period, 0x07FC), + (KeyCode::Up, 0x07DC), + (KeyCode::Apostrophe, 0x080F), + (KeyCode::Semicolon, 0x080E), + (KeyCode::BackSlash, 0x080D), + (KeyCode::Braceleft, 0x080B), + (KeyCode::Braceright, 0x080C), + (KeyCode::AltR, 0x07FE), + (KeyCode::Return, 0x0806), + (KeyCode::Grave, 0x0808), + (KeyCode::Home, 0x0821), + (KeyCode::SysReq, 0x081F), + (KeyCode::Right, 0x07DF), + (KeyCode::Menu, 0x0813), + (KeyCode::Prior, 0x0814), + (KeyCode::Insert, 0x0823), + (KeyCode::NumLock, 0x0836), + (KeyCode::Next, 0x0815), + (KeyCode::KPAdd, 0x0844), + (KeyCode::KPMultiply, 0x0842), + (KeyCode::KPEnter, 0x0847), + (KeyCode::Pause, 0x0820), + (KeyCode::ScrollLock, 0x081B), + (KeyCode::SuperL, 0x081C), + (KeyCode::SuperR, 0x081D), + (KeyCode::KPDecimal, 0x0845), + (KeyCode::KPSubtract, 0x0843), + (KeyCode::KPDivide, 0x0841), + (KeyCode::KP0, 0x0837), + (KeyCode::KP1, 0x0838), + (KeyCode::KP2, 0x0839), + (KeyCode::KP3, 0x083A), + (KeyCode::KP4, 0x083B), + (KeyCode::KP5, 0x083C), + (KeyCode::KP6, 0x083D), + (KeyCode::KP7, 0x083E), + (KeyCode::KP8, 0x083F), + (KeyCode::KP9, 0x0840), + (KeyCode::KPEqual, 0x0848), + (KeyCode::F1, 0x082A), + (KeyCode::F2, 0x082B), + (KeyCode::F3, 0x082C), + (KeyCode::F4, 0x082D), + (KeyCode::F5, 0x082E), + (KeyCode::F6, 0x082F), + (KeyCode::F7, 0x0830), + (KeyCode::F8, 0x0831), + (KeyCode::F9, 0x0832), + (KeyCode::F10, 0x0833), + (KeyCode::F11, 0x0834), + (KeyCode::F12, 0x0835), + (KeyCode::ShiftL, 0x7FF), + (KeyCode::ShiftR, 0x0800), + (KeyCode::ControlL, 0x0818), + (KeyCode::ControlR, 0x0819), + (KeyCode::CapsLock, 0x081A), + (KeyCode::AltL, 0x07FD), + (KeyCode::Delete, 0x0817), +]; + +#[cfg(any(feature = "gtk", feature = "vnc"))] +const KEY_CODE_ASCLL: [(KeyCode, u16); 173] = [ + (KeyCode::Space, 0x0020), + (KeyCode::Exclam, 0x0021), + (KeyCode::Quotedbl, 0x0022), + (KeyCode::Numbersign, 0x0023), + (KeyCode::Dollar, 0x0024), + (KeyCode::Percent, 0x0025), + (KeyCode::Ampersand, 0x0026), + (KeyCode::Apostrophe, 0x0027), + (KeyCode::Parenleft, 0x0028), + (KeyCode::Parenright, 0x0029), + (KeyCode::Asterisk, 0x002A), + (KeyCode::Plus, 0x002B), + (KeyCode::Comma, 0x002C), + (KeyCode::Minus, 0x002D), + (KeyCode::Period, 0x002E), + (KeyCode::Slash, 0x002F), + (KeyCode::Key0, 0x0030), + (KeyCode::Key1, 0x0031), + (KeyCode::Key2, 0x0032), + (KeyCode::Key3, 0x0033), + (KeyCode::Key4, 0x0034), + (KeyCode::Key5, 0x0035), + (KeyCode::Key6, 0x0036), + (KeyCode::Key7, 0x0037), + (KeyCode::Key8, 0x0038), + (KeyCode::Key9, 0x0039), + (KeyCode::Colon, 0x003A), + (KeyCode::Semicolon, 0x003B), + (KeyCode::Less, 0x003C), + (KeyCode::Equal, 0x003D), + (KeyCode::Greater, 0x003E), + (KeyCode::Question, 0x003F), + (KeyCode::At, 0x0040), + (KeyCode::KeyA, 0x0041), + (KeyCode::KeyB, 0x0042), + (KeyCode::KeyC, 0x0043), + (KeyCode::KeyD, 0x0044), + (KeyCode::KeyE, 0x0045), + (KeyCode::KeyF, 0x0046), + (KeyCode::KeyG, 0x0047), + (KeyCode::KeyH, 0x0048), + (KeyCode::KeyI, 0x0049), + (KeyCode::KeyJ, 0x004A), + (KeyCode::KeyK, 0x004B), + (KeyCode::KeyL, 0x004C), + (KeyCode::KeyM, 0x004D), + (KeyCode::KeyN, 0x004E), + (KeyCode::KeyO, 0x004F), + (KeyCode::KeyP, 0x0050), + (KeyCode::KeyQ, 0x0051), + (KeyCode::KeyR, 0x0052), + (KeyCode::KeyS, 0x0053), + (KeyCode::KeyT, 0x0054), + (KeyCode::KeyU, 0x0055), + (KeyCode::KeyV, 0x0056), + (KeyCode::KeyW, 0x0057), + (KeyCode::KeyX, 0x0058), + (KeyCode::KeyY, 0x0059), + (KeyCode::KeyZ, 0x005A), + (KeyCode::BracketLeft, 0x005B), + (KeyCode::BackSlash, 0x005C), + (KeyCode::BracketRight, 0x005D), + (KeyCode::Asciicircum, 0x005E), + (KeyCode::Underscore, 0x005F), + (KeyCode::Grave, 0x0060), + (KeyCode::Keya, 0x0061), + (KeyCode::Keyb, 0x0062), + (KeyCode::Keyc, 0x0063), + (KeyCode::Keyd, 0x0064), + (KeyCode::Keye, 0x0065), + (KeyCode::Keyf, 0x0066), + (KeyCode::Keyg, 0x0067), + (KeyCode::Keyh, 0x0068), + (KeyCode::Keyi, 0x0069), + (KeyCode::Keyj, 0x006A), + (KeyCode::Keyk, 0x006B), + (KeyCode::Keyl, 0x006C), + (KeyCode::Keym, 0x006D), + (KeyCode::Keyn, 0x006E), + (KeyCode::Keyo, 0x006F), + (KeyCode::Keyp, 0x0070), + (KeyCode::Keyq, 0x0071), + (KeyCode::Keyr, 0x0072), + (KeyCode::Keys, 0x0073), + (KeyCode::Keyt, 0x0074), + (KeyCode::Keyu, 0x0075), + (KeyCode::Keyv, 0x0076), + (KeyCode::Keyw, 0x0077), + (KeyCode::Keyx, 0x0078), + (KeyCode::Keyy, 0x0079), + (KeyCode::Keyz, 0x007A), + (KeyCode::Braceleft, 0x007B), + (KeyCode::Bar, 0x007C), + (KeyCode::Braceright, 0x007D), + (KeyCode::Asciitilde, 0x007E), + (KeyCode::Brokenbar, 0x00A6), + (KeyCode::ISOLevel3Shift, 0xFE03), + (KeyCode::BackSpace, 0xFF08), + (KeyCode::Tab, 0xFF09), + (KeyCode::Return, 0xFF0D), + (KeyCode::Pause, 0xFF13), + (KeyCode::ScrollLock, 0xFF14), + (KeyCode::SysReq, 0xFF15), + (KeyCode::Escape, 0xFF1B), + (KeyCode::Muhenkan, 0xFF22), + (KeyCode::HenkanMode, 0xFF23), + (KeyCode::Home, 0xFF50), + (KeyCode::Left, 0xFF51), + (KeyCode::Up, 0xFF52), + (KeyCode::Right, 0xFF53), + (KeyCode::Down, 0xFF54), + (KeyCode::Prior, 0xFF55), + (KeyCode::Next, 0xFF56), + (KeyCode::End, 0xFF57), + (KeyCode::Print, 0xFF61), + (KeyCode::Execute, 0xFF62), + (KeyCode::Insert, 0xFF63), + (KeyCode::Menu, 0xFF67), + (KeyCode::ModeSwitch, 0xFF7E), + (KeyCode::NumLock, 0xFF7F), + (KeyCode::KPEnter, 0xFF8D), + (KeyCode::KPHome, 0xFF95), + (KeyCode::KPLeft, 0xFF96), + (KeyCode::KPUp, 0xFF97), + (KeyCode::KPRight, 0xFF98), + (KeyCode::KPDown, 0xFF99), + (KeyCode::KPPrior, 0xFF9A), + (KeyCode::KPNext, 0xFF9B), + (KeyCode::KPEnd, 0xFF9C), + (KeyCode::KPBegin, 0xFF9D), + (KeyCode::KPInsert, 0xFF9E), + (KeyCode::KPDelete, 0xFF9F), + (KeyCode::KPMultiply, 0xFFAA), + (KeyCode::KPAdd, 0xFFAB), + (KeyCode::KPSeparator, 0xFFAC), + (KeyCode::KPSubtract, 0xFFAD), + (KeyCode::KPDecimal, 0xFFAE), + (KeyCode::KPDivide, 0xFFAF), + (KeyCode::KP0, 0xFFB0), + (KeyCode::KP1, 0xFFB1), + (KeyCode::KP2, 0xFFB2), + (KeyCode::KP3, 0xFFB3), + (KeyCode::KP4, 0xFFB4), + (KeyCode::KP5, 0xFFB5), + (KeyCode::KP6, 0xFFB6), + (KeyCode::KP7, 0xFFB7), + (KeyCode::KP8, 0xFFB8), + (KeyCode::KP9, 0xFFB9), + (KeyCode::KPEqual, 0xFFBD), + (KeyCode::F1, 0xFFBE), + (KeyCode::F2, 0xFFBF), + (KeyCode::F3, 0xFFC0), + (KeyCode::F4, 0xFFC1), + (KeyCode::F5, 0xFFC2), + (KeyCode::F6, 0xFFC3), + (KeyCode::F7, 0xFFC4), + (KeyCode::F8, 0xFFC5), + (KeyCode::F9, 0xFFC6), + (KeyCode::F10, 0xFFC7), + (KeyCode::F11, 0xFFC8), + (KeyCode::F12, 0xFFC9), + (KeyCode::ShiftL, 0xFFE1), + (KeyCode::ShiftR, 0xFFE2), + (KeyCode::ControlL, 0xFFE3), + (KeyCode::ControlR, 0xFFE4), + (KeyCode::CapsLock, 0xFFE5), + (KeyCode::MetaL, 0xFFE7), + (KeyCode::MetaR, 0xFFE8), + (KeyCode::AltL, 0xFFE9), + (KeyCode::AltR, 0xFFEA), + (KeyCode::SuperL, 0xFFEB), + (KeyCode::SuperR, 0xFFEC), + (KeyCode::Delete, 0xFFFF), +]; diff --git a/ui/src/lib.rs b/ui/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..fdc805610902d9acfdf42668845ca7378b6edd7d --- /dev/null +++ b/ui/src/lib.rs @@ -0,0 +1,27 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "console")] +pub mod console; +pub mod error; +#[cfg(feature = "gtk")] +pub mod gtk; +pub mod input; +#[cfg(feature = "keycode")] +mod keycode; +#[cfg(all(target_env = "ohos", feature = "ohui_srv"))] +pub mod ohui_srv; +#[cfg(feature = "pixman")] +pub mod pixman; +pub mod utils; +#[cfg(feature = "vnc")] +pub mod vnc; diff --git a/ui/src/ohui_srv/channel.rs b/ui/src/ohui_srv/channel.rs new file mode 100755 index 0000000000000000000000000000000000000000..e55279c99852accc9da5cc8f0d69cffbfa07495f --- /dev/null +++ b/ui/src/ohui_srv/channel.rs @@ -0,0 +1,107 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// Stratovirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::io::{ErrorKind, Read, Write}; +use std::os::fd::AsRawFd; +use std::os::unix::io::RawFd; + +use anyhow::{bail, Result}; +use log::error; + +use util::byte_code::ByteCode; +use util::socket::{SocketListener, SocketStream}; +use util::unix::limit_permission; + +pub struct OhUiChannel { + listener: SocketListener, + stream: Option, +} + +impl OhUiChannel { + pub fn new(path: &str) -> Result { + let listener = match SocketListener::bind_by_uds(path) { + Ok(l) => l, + Err(e) => bail!("Failed to create listener with path {}, {:?}", path, e), + }; + limit_permission(path).unwrap_or_else(|e| { + error!( + "Failed to limit permission for ohui-sock {}, err: {:?}", + path, e + ); + }); + + Ok(OhUiChannel { + listener, + stream: None, + }) + } + + pub fn get_listener_raw_fd(&self) -> RawFd { + self.listener.as_raw_fd() + } + + pub fn get_stream_raw_fd(&self) -> Option { + self.stream.as_ref().map(|s| s.as_raw_fd()) + } + + pub fn accept(&mut self) -> Result<()> { + self.stream = Some(self.listener.accept()?); + Ok(()) + } + + pub fn disconnect(&mut self) { + self.stream = None; + } +} + +pub fn recv_slice(stream: &mut dyn Read, data: &mut [u8]) -> Result { + let len = data.len(); + let mut ret = 0_usize; + + while ret < len { + match stream.read(&mut data[ret..len]) { + Ok(0) => break, + Ok(n) => ret += n, + Err(e) => { + let ek = e.kind(); + if ek != ErrorKind::WouldBlock && ek != ErrorKind::Interrupted { + bail!("recv_slice: error occurred: {:?}", e); + } + break; + } + } + } + Ok(ret) +} + +pub fn send_obj(stream: &mut dyn Write, obj: &T) -> Result<()> { + let slice = obj.as_bytes(); + let mut left = slice.len(); + let mut count = 0_usize; + + while left > 0 { + match stream.write(&slice[count..]) { + Ok(n) => { + left -= n; + count += n; + } + Err(e) => { + let ek = e.kind(); + if ek == ErrorKind::WouldBlock || ek == ErrorKind::Interrupted { + continue; + } + bail!(e); + } + } + } + Ok(()) +} diff --git a/ui/src/ohui_srv/mod.rs b/ui/src/ohui_srv/mod.rs new file mode 100755 index 0000000000000000000000000000000000000000..92e22ea82588fb53b4b820730f64cff82e05c3ed --- /dev/null +++ b/ui/src/ohui_srv/mod.rs @@ -0,0 +1,570 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// Stratovirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod channel; +pub mod msg; +pub mod msg_handle; + +use std::os::unix::io::RawFd; +use std::path::Path; +use std::ptr; +use std::rc::Rc; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, RwLock, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use log::{error, info}; +use once_cell::sync::OnceCell; +use vmm_sys_util::epoll::EventSet; + +use crate::{ + console::{ + graphic_hardware_update, register_display, DisplayChangeListener, + DisplayChangeListenerOperations, DisplayMouse, DisplaySurface, + DISPLAY_UPDATE_INTERVAL_DEFAULT, + }, + pixman::{bytes_per_pixel, get_image_data, ref_pixman_image, unref_pixman_image}, +}; +use address_space::FileBackend; +use channel::*; +use machine_manager::{ + config::{DisplayConfig, VIRTIO_GPU_ENABLE_BAR0_SIZE}, + event_loop::register_event_helper, + temp_cleaner::TempCleaner, +}; +use msg_handle::*; +use util::{ + loop_context::{ + gen_delete_notifiers, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, + }, + pixman::{pixman_format_code_t, pixman_image_t}, + unix::{do_mmap, limit_permission}, +}; + +#[derive(Debug, Clone)] +struct GuestSurface { + /// Image from display device. + guest_image: *mut pixman_image_t, + /// Image format of pixman. + guest_format: pixman_format_code_t, + stride: i32, + width: i32, + height: i32, +} + +// SAFETY: Send and Sync is not auto-implemented for `*mut pixman_image_t` type. +// implementing them is safe because GuestSurface will be protected by +// RwLock +unsafe impl Sync for GuestSurface {} +// SAFETY: Same as 'Sync for GuestSurface' +unsafe impl Send for GuestSurface {} + +impl GuestSurface { + fn new() -> GuestSurface { + GuestSurface { + guest_image: ptr::null_mut::(), + guest_format: pixman_format_code_t::PIXMAN_x8r8g8b8, + stride: 0, + width: 0, + height: 0, + } + } +} + +const CURSOR_SIZE: u64 = 16 * 1024; +const DEFAULT_CURSOR_WIDTH: u32 = 128; +const DEFAULT_CURSOR_HEIGHT: u32 = 128; + +pub struct OhUiServer { + // framebuffer passthru to the guest + passthru: OnceCell, + // guest surface for framebuffer + surface: RwLock, + // transfer channel via unix sock + channel: Arc>, + // message handler + msg_handler: OhUiMsgHandler, + // connected or not + connected: AtomicBool, + // iothread processing unix socket + iothread: OnceCell>, + // address of cursor buffer + cursorbuffer: u64, + //address of framebuffer + framebuffer: u64, + // framebuffer file backend + fb_file: Option, + // tokenID of OHUI client + pub token_id: Arc>, +} + +impl OhUiServer { + fn init_channel(path: &String) -> Result>> { + let file_path = Path::new(path.as_str()).join("ohui.sock"); + let sock_file = file_path + .to_str() + .ok_or_else(|| anyhow!("init_channel: Failed to get str from {}", path))?; + TempCleaner::add_path(sock_file.to_string()); + Ok(Arc::new(Mutex::new(OhUiChannel::new(sock_file)?))) + } + + fn init_fb_file(path: &String) -> Result<(Option, u64)> { + let file_path = Path::new(path.as_str()).join("ohui-fb"); + let fb_file = file_path + .to_str() + .ok_or_else(|| anyhow!("init_fb_file: Failed to get str from {}", path))?; + let fb_backend = FileBackend::new_mem(fb_file, VIRTIO_GPU_ENABLE_BAR0_SIZE)?; + TempCleaner::add_path(fb_file.to_string()); + limit_permission(fb_file).unwrap_or_else(|e| { + error!( + "Failed to limit permission for ohui-fb {}, err: {:?}", + fb_file, e + ); + }); + + let host_addr = do_mmap( + &Some(fb_backend.file.as_ref()), + VIRTIO_GPU_ENABLE_BAR0_SIZE, + 0, + false, + true, + false, + )?; + + Ok((Some(fb_backend), host_addr)) + } + + fn init_cursor_file(path: &String) -> Result { + let file_path = Path::new(path.as_str()).join("ohui-cursor"); + let cursor_file = file_path + .to_str() + .ok_or_else(|| anyhow!("init_cursor_file: Failed to get str from {}", path))?; + let cursor_backend = FileBackend::new_mem(cursor_file, CURSOR_SIZE)?; + TempCleaner::add_path(cursor_file.to_string()); + limit_permission(cursor_file).unwrap_or_else(|e| { + error!( + "Failed to limit permission for ohui-cursor {}, err: {:?}", + cursor_file, e + ); + }); + + let cursorbuffer = do_mmap( + &Some(cursor_backend.file.as_ref()), + CURSOR_SIZE, + 0, + false, + true, + false, + )?; + + Ok(cursorbuffer) + } + + pub fn new(ui_path: String, sock_path: String) -> Result { + let channel = Self::init_channel(&sock_path)?; + let (fb_file, framebuffer) = Self::init_fb_file(&ui_path)?; + let cursorbuffer = Self::init_cursor_file(&ui_path)?; + + Ok(OhUiServer { + passthru: OnceCell::new(), + surface: RwLock::new(GuestSurface::new()), + channel, + msg_handler: OhUiMsgHandler::new(), + connected: AtomicBool::new(false), + iothread: OnceCell::new(), + cursorbuffer, + framebuffer, + fb_file, + token_id: Arc::new(RwLock::new(0)), + }) + } + + pub fn set_passthru(&self, passthru: bool) { + self.passthru + .set(passthru) + .unwrap_or_else(|_| error!("Failed to initialize passthru of OHUI Server.")); + } + + #[inline(always)] + fn get_channel(&self) -> Arc> { + self.channel.clone() + } + + #[inline(always)] + pub fn get_ohui_fb(&self) -> Option { + self.fb_file.clone() + } + + fn handle_recv(&self) -> Result<()> { + if !self.connected() { + return Err(anyhow!("connection has not establish".to_string())); + } + self.msg_handler.handle_msg(self.token_id.clone()) + } + + // check dirty area data before call it. + unsafe fn raw_update_dirty_area( + &self, + surface_data: *mut u32, + stride: i32, + pos: (i32, i32), + size: (i32, i32), + force_copy: bool, + ) { + let (x, y) = pos; + let (w, h) = size; + + if self.framebuffer == 0 + || surface_data.is_null() + || (!force_copy && *self.passthru.get_or_init(|| false)) + { + return; + } + + let offset = (x * bytes_per_pixel() as i32 + y * stride) as u64; + let mut src_ptr = surface_data as u64 + offset; + let mut dst_ptr = self.framebuffer + offset; + + for _ in 0..h { + // SAFETY: it can be ensure the raw pointer will not exceed the range. + unsafe { + ptr::copy_nonoverlapping( + src_ptr as *const u8, + dst_ptr as *mut u8, + w as usize * bytes_per_pixel(), + ); + } + src_ptr += stride as u64; + dst_ptr += stride as u64; + } + } + + fn send_window_info(&self) { + let locked_surface = self.surface.read().unwrap(); + + if locked_surface.guest_image.is_null() { + return; + } + + self.msg_handler + .send_windowinfo(locked_surface.width as u32, locked_surface.height as u32); + } + + #[inline(always)] + fn connected(&self) -> bool { + self.connected.load(Ordering::Relaxed) + } + + #[inline(always)] + fn set_connect(&self, conn: bool) { + self.connected.store(conn, Ordering::Relaxed); + if conn { + self.msg_handler.update_sock(self.channel.clone()); + } else { + self.channel.lock().unwrap().disconnect(); + self.msg_handler.reset(); + } + } + + fn set_iothread(&self, iothread: Option) { + if self.iothread.set(iothread).is_err() { + error!("Failed to initialize iothread of OHUI Server."); + } + } + + fn clear_cursor_buffer(&self) { + if self.cursorbuffer == 0 { + error!("Cursor buffer is invalid."); + return; + } + //SAFETY: we make sure that buffer info is valid. + unsafe { + ptr::write_bytes(self.cursorbuffer as *mut u8, 0, CURSOR_SIZE as usize); + } + } +} + +impl DisplayChangeListenerOperations for OhUiServer { + fn dpy_switch(&self, surface: &DisplaySurface) -> Result<()> { + let mut locked_surface = self.surface.write().unwrap(); + + unref_pixman_image(locked_surface.guest_image); + + locked_surface.guest_image = ref_pixman_image(surface.image); + locked_surface.guest_format = surface.format; + locked_surface.stride = surface.stride(); + locked_surface.width = surface.width(); + locked_surface.height = surface.height(); + drop(locked_surface); + let locked_surface = self.surface.read().unwrap(); + // SAFETY: Dirty area does not exceed surface buffer. + unsafe { + self.raw_update_dirty_area( + get_image_data(locked_surface.guest_image), + locked_surface.stride, + (0, 0), + (locked_surface.width, locked_surface.height), + true, + ) + }; + self.clear_cursor_buffer(); + + if !self.connected() { + return Ok(()); + } + self.msg_handler + .send_windowinfo(locked_surface.width as u32, locked_surface.height as u32); + self.msg_handler.handle_cursor_define( + DEFAULT_CURSOR_WIDTH, + DEFAULT_CURSOR_HEIGHT, + 0, + 0, + bytes_per_pixel().try_into()?, + ); + Ok(()) + } + + fn dpy_refresh(&self, dcl: &Arc>) -> Result<()> { + let con_id = dcl.lock().unwrap().con_id; + graphic_hardware_update(con_id); + Ok(()) + } + + fn dpy_image_update(&self, x: i32, y: i32, w: i32, h: i32) -> Result<()> { + if !self.connected() { + return Ok(()); + } + + let locked_surface = self.surface.read().unwrap(); + if locked_surface.guest_image.is_null() { + return Ok(()); + } + + if locked_surface.width < x + || locked_surface.height < y + || locked_surface.width < x.saturating_add(w) + || locked_surface.height < y.saturating_add(h) + { + bail!("dpy_image_update: invalid dirty area"); + } + + // SAFETY: We checked dirty area data before. + unsafe { + self.raw_update_dirty_area( + get_image_data(locked_surface.guest_image), + locked_surface.stride, + (x, y), + (w, h), + false, + ) + }; + + self.msg_handler + .handle_dirty_area(x as u32, y as u32, w as u32, h as u32); + Ok(()) + } + + fn dpy_cursor_update(&self, cursor: &DisplayMouse) -> Result<()> { + if self.cursorbuffer == 0 { + error!("Hwcursor not set."); + // No need to return Err for this situation is not fatal + return Ok(()); + } + + let len = cursor + .width + .checked_mul(cursor.height) + .with_context(|| "Invalid cursor width * height")? + .checked_mul(bytes_per_pixel() as u32) + .with_context(|| "Invalid cursor size")?; + if len > CURSOR_SIZE as u32 || len > cursor.data.len().try_into()? { + error!("Too large cursor length {}.", len); + // No need to return Err for this situation is not fatal + return Ok(()); + } + + // SAFETY: len is checked before copying, it's safe to do this. + unsafe { + ptr::copy_nonoverlapping( + cursor.data.as_ptr(), + self.cursorbuffer as *mut u8, + len as usize, + ); + } + + self.msg_handler.handle_cursor_define( + cursor.width, + cursor.height, + cursor.hot_x, + cursor.hot_y, + bytes_per_pixel() as u32, + ); + Ok(()) + } +} + +pub fn ohui_init(ohui_srv: Arc, cfg: &DisplayConfig) -> Result<()> { + // set iothread + ohui_srv.set_iothread(cfg.iothread.clone()); + // Register ohui interface + let dcl = Arc::new(Mutex::new(DisplayChangeListener::new( + None, + ohui_srv.clone(), + ))); + dcl.lock().unwrap().update_interval = DISPLAY_UPDATE_INTERVAL_DEFAULT; + register_display(&dcl)?; + // start listener + ohui_start_listener(ohui_srv) +} + +struct OhUiTrans { + server: Arc, +} + +impl OhUiTrans { + pub fn new(server: Arc) -> Self { + OhUiTrans { server } + } + + fn handle_disconnect(&self) { + self.server.set_connect(false); + if let Err(e) = ohui_start_listener(self.server.clone()) { + error!("Failed to restart listener: {:?}.", e) + } + } + + fn handle_recv(&self) -> Result<()> { + self.server.handle_recv() + } + + fn get_fd(&self) -> RawFd { + self.server + .get_channel() + .lock() + .unwrap() + .get_stream_raw_fd() + .unwrap() + } +} + +impl EventNotifierHelper for OhUiTrans { + fn internal_notifiers(trans: Arc>) -> Vec { + let trans_ref = trans.clone(); + let handler: Rc = Rc::new(move |event: EventSet, fd: RawFd| { + if event & EventSet::HANG_UP == EventSet::HANG_UP { + error!("OhUiTrans: disconnected."); + trans_ref.lock().unwrap().handle_disconnect(); + // Delete stream notifiers + return Some(gen_delete_notifiers(&[fd])); + } else if event & EventSet::IN == EventSet::IN { + let locked_trans = trans_ref.lock().unwrap(); + // Handle incoming data + if let Err(e) = locked_trans.handle_recv() { + error!("{}.", e); + locked_trans.handle_disconnect(); + return Some(gen_delete_notifiers(&[fd])); + } + } + None + }); + + vec![EventNotifier::new( + NotifierOperation::AddShared, + trans.lock().unwrap().get_fd(), + None, + EventSet::IN | EventSet::HANG_UP, + vec![handler], + )] + } +} + +struct OhUiListener { + server: Arc, +} + +impl OhUiListener { + fn new(server: Arc) -> Self { + OhUiListener { server } + } + + fn handle_connection(&self) -> Result<()> { + // Register OhUiTrans read notifier + ohui_register_event(OhUiTrans::new(self.server.clone()), self.server.clone())?; + self.server.set_connect(true); + // Send window info to the client + self.server.send_window_info(); + Ok(()) + } + + fn accept(&self) -> Result<()> { + self.server.get_channel().lock().unwrap().accept() + } + + fn get_fd(&self) -> RawFd { + self.server + .get_channel() + .lock() + .unwrap() + .get_listener_raw_fd() + } +} + +impl EventNotifierHelper for OhUiListener { + fn internal_notifiers(listener: Arc>) -> Vec { + let listener_ref = listener.clone(); + let handler: Rc = Rc::new(move |_event: EventSet, fd: RawFd| { + let locked_listener = listener_ref.lock().unwrap(); + match locked_listener.accept() { + Ok(()) => match locked_listener.handle_connection() { + Ok(()) => info!("New connection accepted."), + Err(e) => { + error!("Failed to start connection and going to restart listening {e}."); + return None; + } + }, + Err(e) => { + error!("Accept failed: {:?}.", e); + return None; + } + } + // Only support one connection so remove listener + Some(gen_delete_notifiers(&[fd])) + }); + + vec![EventNotifier::new( + NotifierOperation::AddShared, + listener.lock().unwrap().get_fd(), + None, + EventSet::IN, + vec![handler], + )] + } +} + +fn ohui_register_event(e: T, srv: Arc) -> Result<()> { + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(e))); + let mut evts: Vec = Vec::new(); + register_event_helper( + notifiers, + srv.iothread.get_or_init(|| None).as_ref(), + &mut evts, + ) +} + +fn ohui_start_listener(server: Arc) -> Result<()> { + ohui_register_event(OhUiListener::new(server.clone()), server)?; + info!("Successfully start listener."); + Ok(()) +} diff --git a/ui/src/ohui_srv/msg.rs b/ui/src/ohui_srv/msg.rs new file mode 100755 index 0000000000000000000000000000000000000000..1ce2d4465610e4509ee1edbc2f4c2b57249dabc7 --- /dev/null +++ b/ui/src/ohui_srv/msg.rs @@ -0,0 +1,205 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// Stratovirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; + +use util::byte_code::ByteCode; + +pub const CLIENT_FOCUSOUT_EVENT: u32 = 0x1; +pub const CLIENT_PRESS_BTN: u32 = 0x1; +pub const CLIENT_RELEASE_BTN: u32 = 0x0; +pub const CLIENT_WHEEL_UP: u32 = 0x1; +pub const CLIENT_WHEEL_DOWN: u32 = 0x2; +pub const CLIENT_WHEEL_LEFT: u32 = 0x3; +pub const CLIENT_WHEEL_RIGHT: u32 = 0x4; +pub const CLIENT_MOUSE_BUTTON_LEFT: u32 = 0x0; +pub const CLIENT_MOUSE_BUTTON_RIGHT: u32 = 0x1; +pub const CLIENT_MOUSE_BUTTON_MIDDLE: u32 = 0x2; +pub const CLIENT_MOUSE_BUTTON_BACK: u32 = 0x3; +pub const CLIENT_MOUSE_BUTTON_FORWARD: u32 = 0x4; +pub const EVENT_MSG_HDR_SIZE: u32 = size_of::() as u32; + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub enum EventType { + WindowInfo, + MouseButton, + MouseMotion, + Keyboard, + Scroll, + Ledstate, + FrameBufferDirty, + Greet, + CursorDefine, + Focus, + VmCtrlInfo, + #[default] + Max, +} + +impl ByteCode for EventType {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct WindowInfoEvent { + pub width: u32, + pub height: u32, +} + +impl ByteCode for WindowInfoEvent {} + +impl WindowInfoEvent { + pub fn new(width: u32, height: u32) -> Self { + WindowInfoEvent { width, height } + } +} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct HWCursorEvent { + pub w: u32, + pub h: u32, + pub hot_x: u32, + pub hot_y: u32, + pub size_per_pixel: u32, +} + +impl HWCursorEvent { + pub fn new(w: u32, h: u32, hot_x: u32, hot_y: u32, size_per_pixel: u32) -> Self { + HWCursorEvent { + w, + h, + hot_x, + hot_y, + size_per_pixel, + } + } +} + +impl ByteCode for HWCursorEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct MouseButtonEvent { + pub button: u32, + pub btn_action: u32, +} + +impl ByteCode for MouseButtonEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct MouseMotionEvent { + pub x: f64, + pub y: f64, +} + +impl ByteCode for MouseMotionEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KeyboardEvent { + pub key_action: u16, + pub keycode: u16, + pub led_state: u8, + pad: [u8; 3], +} + +impl ByteCode for KeyboardEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct ScrollEvent { + pub direction: u32, + pub delta_x: f64, + pub delta_y: f64, +} + +impl ByteCode for ScrollEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct LedstateEvent { + pub state: u32, +} + +impl ByteCode for LedstateEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GreetEvent { + pad: [u32; 6], + pub token_id: u64, +} + +impl ByteCode for GreetEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FocusEvent { + pub state: u32, +} + +impl ByteCode for FocusEvent {} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FrameBufferDirtyEvent { + x: u32, + y: u32, + w: u32, + h: u32, +} + +impl ByteCode for FrameBufferDirtyEvent {} + +impl FrameBufferDirtyEvent { + pub fn new(x: u32, y: u32, w: u32, h: u32) -> Self { + FrameBufferDirtyEvent { x, y, w, h } + } +} + +#[repr(C, packed)] +#[derive(Debug, Default, Copy, Clone)] +pub struct EventMsgHdr { + pub magic: u32, + pub size: u32, + pub event_type: EventType, +} + +impl ByteCode for EventMsgHdr {} + +impl EventMsgHdr { + pub fn new(event_type: EventType) -> EventMsgHdr { + EventMsgHdr { + magic: 0, + size: event_msg_data_len(event_type) as u32, + event_type, + } + } +} + +pub fn event_msg_data_len(event_type: EventType) -> usize { + match event_type { + EventType::WindowInfo => size_of::(), + EventType::MouseButton => size_of::(), + EventType::MouseMotion => size_of::(), + EventType::Keyboard => size_of::(), + EventType::Scroll => size_of::(), + EventType::Focus => size_of::(), + EventType::FrameBufferDirty => size_of::(), + EventType::CursorDefine => size_of::(), + EventType::Ledstate => size_of::(), + EventType::Greet => size_of::(), + _ => 0, + } +} diff --git a/ui/src/ohui_srv/msg_handle.rs b/ui/src/ohui_srv/msg_handle.rs new file mode 100755 index 0000000000000000000000000000000000000000..21a11637e5e301771e0a7a001019a6d124fa6cc3 --- /dev/null +++ b/ui/src/ohui_srv/msg_handle.rs @@ -0,0 +1,432 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// Stratovirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::os::fd::{FromRawFd, RawFd}; +use std::os::unix::net::UnixStream; +use std::sync::{Arc, Mutex, RwLock}; + +use anyhow::{anyhow, bail, Context, Result}; +use log::error; +use util::byte_code::ByteCode; + +use super::{ + channel::{recv_slice, send_obj, OhUiChannel}, + msg::*, +}; +use crate::{ + console::{get_active_console, graphic_hardware_ui_info}, + input::{ + self, get_kbd_led_state, input_button, input_move_abs, input_point_sync, keyboard_update, + release_all_key, trigger_key, Axis, ABS_MAX, CAPS_LOCK_LED, INPUT_BUTTON_WHEEL_DOWN, + INPUT_BUTTON_WHEEL_LEFT, INPUT_BUTTON_WHEEL_RIGHT, INPUT_BUTTON_WHEEL_UP, INPUT_POINT_BACK, + INPUT_POINT_FORWARD, INPUT_POINT_LEFT, INPUT_POINT_MIDDLE, INPUT_POINT_RIGHT, + KEYCODE_CAPS_LOCK, KEYCODE_NUM_LOCK, KEYCODE_SCR_LOCK, NUM_LOCK_LED, SCROLL_LOCK_LED, + }, + keycode::{DpyMod, KeyCode}, +}; + +fn trans_mouse_pos(x: f64, y: f64, w: f64, h: f64) -> (u32, u32) { + if x < 0.0 || y < 0.0 || x > w || y > h { + error!("incorrect mouse pos info, ({}, {}) of {} * {}", x, y, w, h); + return (0, 0); + } + // TODO: we don't take the situation that image is scaled into consideration + // + // The horizontal and vertical directions of the USB tablet are mapped as follows: + // Horizontal: [0, ABS_MAX] in the tablet is mapped to screan pixels [0, PIXELMAX_W] linearly; + // Vertical: [0, ABS_MAX] in the tablet is mapped linearly to [0, PIXELMAX_H] on the screen; + // For example, if the mouse position is (x, y) and the screen size is wh, + // the data converted to the USB tablet is as follows: ((x/w) ABS_MAX, (y/h) * ABS_MAX). + ( + (x * (ABS_MAX as f64) / w) as u32, + (y * (ABS_MAX as f64) / h) as u32, + ) +} + +#[derive(Default)] +struct WindowState { + width: u32, + height: u32, +} + +impl WindowState { + fn update_window_info(&mut self, w: u32, h: u32) { + self.width = w; + self.height = h; + } + + fn press_btn(&mut self, btn: u32) -> Result<()> { + input_button(btn, true)?; + input_point_sync() + } + + fn release_btn(&mut self, btn: u32) -> Result<()> { + input_button(btn, false)?; + input_point_sync() + } + + fn do_key_action(&self, keycode: u16, action: u16) -> Result<()> { + let press = action != 0; + keyboard_update(press, keycode)?; + input::key_event(keycode, press).map_err(|e| { + anyhow!( + "do key event failed: code: {}, action: {}, {:?}", + keycode, + press, + e + ) + }) + } + + fn move_pointer(&mut self, x: f64, y: f64) -> Result<()> { + let (pos_x, pos_y) = trans_mouse_pos(x, y, f64::from(self.width), f64::from(self.height)); + input_move_abs(Axis::X, pos_x)?; + input_move_abs(Axis::Y, pos_y)?; + input_point_sync() + } + + fn sync_kbd_led_state(&mut self, led: u8) -> Result<()> { + let guest_stat = get_kbd_led_state(); + if led != guest_stat { + let sync_bits = led ^ guest_stat; + if (sync_bits & CAPS_LOCK_LED) != 0 { + trigger_key(KEYCODE_CAPS_LOCK)?; + } + if (sync_bits & NUM_LOCK_LED) != 0 { + trigger_key(KEYCODE_NUM_LOCK)?; + } + if (sync_bits & SCROLL_LOCK_LED) != 0 { + trigger_key(KEYCODE_SCR_LOCK)?; + } + } + Ok(()) + } +} + +#[derive(Default)] +pub struct OhUiMsgHandler { + state: Mutex, + hmcode2svcode: HashMap, + reader: Mutex>, + writer: Mutex>, +} + +impl OhUiMsgHandler { + pub fn new() -> Self { + OhUiMsgHandler { + state: Mutex::new(WindowState::default()), + hmcode2svcode: KeyCode::keysym_to_qkeycode(DpyMod::Ohui), + reader: Mutex::new(None), + writer: Mutex::new(None), + } + } + + pub fn update_sock(&self, channel: Arc>) { + let fd = channel.lock().unwrap().get_stream_raw_fd().unwrap(); + *self.reader.lock().unwrap() = Some(MsgReader::new(fd)); + *self.writer.lock().unwrap() = Some(MsgWriter::new(fd)); + } + + pub fn handle_msg(&self, token_id: Arc>) -> Result<()> { + let mut locked_reader = self.reader.lock().unwrap(); + let reader = locked_reader + .as_mut() + .with_context(|| "handle_msg: no connection established")?; + if !reader.recv()? { + return Ok(()); + } + + let hdr = &reader.header; + let event_type = hdr.event_type; + let body_size = hdr.size as usize; + trace::trace_scope_start!(handle_msg, args = (&event_type)); + + let body_bytes = reader.body.as_ref().unwrap(); + if let Err(e) = match event_type { + EventType::MouseButton => { + let body = MouseButtonEvent::from_bytes(&body_bytes[..]).unwrap(); + self.handle_mouse_button(body) + } + EventType::MouseMotion => { + let body = MouseMotionEvent::from_bytes(&body_bytes[..]).unwrap(); + self.handle_mouse_motion(body) + } + EventType::Keyboard => { + let body = KeyboardEvent::from_bytes(&body_bytes[..]).unwrap(); + self.handle_keyboard(body) + } + EventType::WindowInfo => { + let body = WindowInfoEvent::from_bytes(&body_bytes[..]).unwrap(); + self.handle_windowinfo(body); + Ok(()) + } + EventType::Scroll => { + let body = ScrollEvent::from_bytes(&body_bytes[..]).unwrap(); + self.handle_scroll(body) + } + EventType::Focus => { + let body = FocusEvent::from_bytes(&body_bytes[..]).unwrap(); + trace::oh_event_focus(body.state); + if body.state == CLIENT_FOCUSOUT_EVENT { + reader.clear(); + release_all_key()?; + } + Ok(()) + } + EventType::Ledstate => Ok(()), + EventType::Greet => { + let body = GreetEvent::from_bytes(&body_bytes[..]).unwrap(); + trace::oh_event_greet(body.token_id); + *token_id.write().unwrap() = body.token_id; + Ok(()) + } + _ => { + error!( + "unsupported type {:?} and body size {}", + event_type, body_size + ); + trace::oh_event_unsupported_type(&event_type, body_size.try_into().unwrap()); + Ok(()) + } + } { + error!("handle_msg: error: {e}"); + } + reader.clear(); + Ok(()) + } + + fn handle_mouse_button(&self, mb: &MouseButtonEvent) -> Result<()> { + let (msg_btn, action) = (mb.button, mb.btn_action); + trace::oh_event_mouse_button(msg_btn, action); + let btn = match msg_btn { + CLIENT_MOUSE_BUTTON_LEFT => INPUT_POINT_LEFT, + CLIENT_MOUSE_BUTTON_RIGHT => INPUT_POINT_RIGHT, + CLIENT_MOUSE_BUTTON_MIDDLE => INPUT_POINT_MIDDLE, + CLIENT_MOUSE_BUTTON_FORWARD => INPUT_POINT_FORWARD, + CLIENT_MOUSE_BUTTON_BACK => INPUT_POINT_BACK, + _ => bail!("Invalid mouse button number {}", msg_btn), + }; + match action { + CLIENT_PRESS_BTN => self.state.lock().unwrap().press_btn(btn), + CLIENT_RELEASE_BTN => self.state.lock().unwrap().release_btn(btn), + _ => bail!("Invalid mouse event number {}", action), + } + } + + pub fn handle_cursor_define( + &self, + w: u32, + h: u32, + hot_x: u32, + hot_y: u32, + size_per_pixel: u32, + ) { + if let Some(writer) = self.writer.lock().unwrap().as_mut() { + let body = HWCursorEvent::new(w, h, hot_x, hot_y, size_per_pixel); + if let Err(e) = writer.send_message(EventType::CursorDefine, &body) { + error!("handle_cursor_define: failed to send message with error {e}"); + } + } + } + + // NOTE: we only support absolute position info now, that means usb-mouse does not work. + fn handle_mouse_motion(&self, mm: &MouseMotionEvent) -> Result<()> { + trace::oh_event_mouse_motion(mm.x, mm.y); + self.state.lock().unwrap().move_pointer(mm.x, mm.y) + } + + fn handle_keyboard(&self, ke: &KeyboardEvent) -> Result<()> { + self.state + .lock() + .unwrap() + .sync_kbd_led_state(ke.led_state)?; + let hmkey = ke.keycode; + let keycode = match self.hmcode2svcode.get(&hmkey) { + Some(k) => *k, + None => { + bail!("not supported keycode {}", hmkey); + } + }; + trace::oh_event_keyboard(keycode, ke.key_action); + self.state + .lock() + .unwrap() + .do_key_action(keycode, ke.key_action) + } + + fn handle_scroll(&self, se: &ScrollEvent) -> Result<()> { + let org_dir = se.direction; + let dir = match org_dir { + CLIENT_WHEEL_UP => INPUT_BUTTON_WHEEL_UP, + CLIENT_WHEEL_DOWN => INPUT_BUTTON_WHEEL_DOWN, + CLIENT_WHEEL_LEFT => INPUT_BUTTON_WHEEL_LEFT, + CLIENT_WHEEL_RIGHT => INPUT_BUTTON_WHEEL_RIGHT, + _ => bail!("Invalid mouse scroll number {}", org_dir), + }; + self.state.lock().unwrap().press_btn(dir)?; + self.state.lock().unwrap().release_btn(dir)?; + trace::oh_event_scroll(dir); + Ok(()) + } + + fn handle_windowinfo(&self, wi: &WindowInfoEvent) { + let cons = get_active_console(); + + for con in cons { + let c = match con.upgrade() { + Some(c) => c, + None => continue, + }; + if let Err(e) = graphic_hardware_ui_info(c, wi.width, wi.height) { + error!("handle_windowinfo failed with error {e}"); + } + } + trace::oh_event_windowinfo(wi.width, wi.height); + } + + pub fn send_windowinfo(&self, w: u32, h: u32) { + self.state.lock().unwrap().update_window_info(w, h); + if let Some(writer) = self.writer.lock().unwrap().as_mut() { + let body = WindowInfoEvent::new(w, h); + if let Err(e) = writer.send_message(EventType::WindowInfo, &body) { + error!("send_windowinfo: failed to send message with error {e}"); + } + } + } + + pub fn handle_dirty_area(&self, x: u32, y: u32, w: u32, h: u32) { + if let Some(writer) = self.writer.lock().unwrap().as_mut() { + let body = FrameBufferDirtyEvent::new(x, y, w, h); + if let Err(e) = writer.send_message(EventType::FrameBufferDirty, &body) { + error!("handle_dirty_area: failed to send message with error {e}"); + } + } + } + + pub fn reset(&self) { + *self.reader.lock().unwrap() = None; + *self.writer.lock().unwrap() = None; + } +} + +struct MsgReader { + /// cache for header + header: EventMsgHdr, + /// received byte size of header + header_ready: usize, + /// cache of body + body: Option>, + /// received byte size of body + body_ready: usize, + /// UnixStream to read + sock: UnixStream, +} + +impl MsgReader { + pub fn new(fd: RawFd) -> Self { + MsgReader { + header: EventMsgHdr::default(), + header_ready: 0, + body: None, + body_ready: 0, + // SAFETY: The fd is valid only when the new connection has been established + // and MsgReader instance would be destroyed when disconnected. + sock: unsafe { UnixStream::from_raw_fd(fd) }, + } + } + + pub fn recv(&mut self) -> Result { + if self.recv_header()? { + self.check_header()?; + return self.recv_body(); + } + Ok(false) + } + + fn clear(&mut self) { + self.header_ready = 0; + self.body_ready = 0; + self.body = None; + } + + fn check_header(&mut self) -> Result<()> { + let expected_size = event_msg_data_len(self.header.event_type); + if expected_size != self.header.size as usize { + self.clear(); + bail!( + "{:?} data len is wrong, we want {}, but receive {}", + self.header.event_type as EventType, + expected_size, + self.header.size as usize, + ); + } + Ok(()) + } + + fn recv_header(&mut self) -> Result { + if self.header_ready == EVENT_MSG_HDR_SIZE as usize { + return Ok(true); + } + + let buf = self.header.as_mut_bytes(); + self.header_ready += recv_slice(&mut self.sock, &mut buf[self.header_ready..])?; + Ok(self.header_ready == EVENT_MSG_HDR_SIZE as usize) + } + + fn recv_body(&mut self) -> Result { + let body_size = self.header.size as usize; + if body_size == self.body_ready { + return Ok(true); + } + + // The caller make sure that self.clear() is + // called after a complete message receiving. + if self.body.is_none() { + self.body = Some(Vec::with_capacity(body_size)); + } + let buf = self.body.as_mut().unwrap(); + // SAFETY: 1. we guarantee new message has new body, so + // buf's capcity is equal to body_size. 2. buf has 'u8' + // type elements, it will be initialized by zero. + unsafe { + buf.set_len(body_size); + } + self.body_ready += recv_slice(&mut self.sock, &mut buf[self.body_ready..])?; + + Ok(self.body_ready == body_size) + } +} + +struct MsgWriter { + sock: UnixStream, +} + +impl MsgWriter { + fn new(fd: RawFd) -> Self { + Self { + // SAFETY: The fd is valid only when the new connection has been established + // and MsgWriter instance would be destroyed when disconnected. + sock: unsafe { UnixStream::from_raw_fd(fd) }, + } + } + + fn send_message( + &mut self, + t: EventType, + body: &T, + ) -> Result<()> { + let hdr = EventMsgHdr::new(t); + send_obj(&mut self.sock, &hdr)?; + send_obj(&mut self.sock, body) + } +} diff --git a/ui/src/pixman.rs b/ui/src/pixman.rs new file mode 100644 index 0000000000000000000000000000000000000000..2163210659e497e4f07030548a0c09a0df7ff268 --- /dev/null +++ b/ui/src/pixman.rs @@ -0,0 +1,717 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use bitintr::Popcnt; +use std::ptr; +use util::pixman::{ + pixman_color_t, pixman_format_a, pixman_format_b, pixman_format_bpp, pixman_format_code_t, + pixman_format_depth, pixman_format_g, pixman_format_r, pixman_image_composite, + pixman_image_create_bits, pixman_image_create_solid_fill, pixman_image_get_data, + pixman_image_get_format, pixman_image_get_height, pixman_image_get_stride, + pixman_image_get_width, pixman_image_ref, pixman_image_t, pixman_image_unref, pixman_op_t, +}; + +const MAX_IMAGE_SIZE: i32 = 65535; + +#[derive(Clone, Default)] +pub struct ColorInfo { + /// Mask color. + pub mask: u32, + /// Shift to the lowest bit + pub shift: u8, + /// Max bits. + pub max: u8, + /// Color bits. + pub bits: u8, +} + +impl ColorInfo { + pub fn set_color_info(&mut self, shift: u8, max: u16) { + self.mask = u32::from(max) << u32::from(shift); + self.shift = shift; + self.max = if max == 0 { 0xFF } else { max as u8 }; + self.bits = max.popcnt() as u8; + } +} + +#[derive(Clone, Default)] +pub struct PixelFormat { + /// Bits per pixel. + pub pixel_bits: u8, + /// Bytes per pixel. + pub pixel_bytes: u8, + /// Color depth. + pub depth: u8, + /// Red info. + pub red: ColorInfo, + /// Green info. + pub green: ColorInfo, + /// Blue info. + pub blue: ColorInfo, + /// Alpha channel. + pub alpha_chl: ColorInfo, +} + +impl PixelFormat { + // Pixelformat_from_pixman. + pub fn init_pixelformat(&mut self) { + let fmt = pixman_format_code_t::PIXMAN_x8r8g8b8 as u32; + self.pixel_bits = pixman_format_bpp(fmt); + self.pixel_bytes = self.pixel_bits / 8; + self.depth = pixman_format_depth(fmt); + + self.alpha_chl.bits = pixman_format_a(fmt); + self.red.bits = pixman_format_r(fmt); + self.green.bits = pixman_format_g(fmt); + self.blue.bits = pixman_format_b(fmt); + + self.alpha_chl.shift = self.blue.bits + self.green.bits + self.red.bits; + self.red.shift = self.blue.bits + self.green.bits; + self.green.shift = self.blue.bits; + self.blue.shift = 0; + + self.alpha_chl.max = ((1 << self.alpha_chl.bits) - 1) as u8; + self.red.max = ((1 << self.red.bits) - 1) as u8; + self.green.max = ((1 << self.green.bits) - 1) as u8; + self.blue.max = ((1 << self.blue.bits) - 1) as u8; + + self.alpha_chl.mask = u32::from( + self.alpha_chl + .max + .wrapping_shl(u32::from(self.alpha_chl.shift)), + ); + self.red.mask = u32::from(self.red.max.wrapping_shl(u32::from(self.red.shift))); + self.green.mask = u32::from(self.green.max.wrapping_shl(u32::from(self.green.shift))); + self.blue.mask = u32::from(self.blue.max.wrapping_shl(u32::from(self.blue.shift))); + } + + pub fn is_default_pixel_format(&self) -> bool { + // Check if type is PIXMAN_TYPE_ARGB. + if self.red.shift <= self.green.shift + || self.green.shift <= self.blue.shift + || self.blue.shift != 0 + { + return false; + } + + // Check if format is PIXMAN_x8r8g8b8. + if self.pixel_bits != 32 + || self.alpha_chl.bits != 0 + || self.red.bits != 8 + || self.green.bits != 8 + || self.blue.bits != 8 + { + return false; + } + + true + } +} + +// SAFETY: Before calling the c function of pixman. All +// parameters passed of the function have been checked. +pub fn get_image_width(image: *mut pixman_image_t) -> i32 { + if image.is_null() { + return 0; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_get_width(image as *mut pixman_image_t) } +} + +pub fn get_image_height(image: *mut pixman_image_t) -> i32 { + if image.is_null() { + return 0; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_get_height(image as *mut pixman_image_t) } +} + +pub fn get_image_stride(image: *mut pixman_image_t) -> i32 { + if image.is_null() { + return 0; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_get_stride(image as *mut pixman_image_t) } +} + +pub fn get_image_data(image: *mut pixman_image_t) -> *mut u32 { + if image.is_null() { + return ptr::null_mut() as *mut u32; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_get_data(image as *mut pixman_image_t) } +} + +pub fn get_image_format(image: *mut pixman_image_t) -> pixman_format_code_t { + if image.is_null() { + return pixman_format_code_t::PIXMAN_x8r8g8b8; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_get_format(image as *mut pixman_image_t) } +} + +pub fn create_pixman_image( + image_format: pixman_format_code_t, + width: i32, + height: i32, + image_data: *mut u32, + stride: i32, +) -> *mut pixman_image_t { + if !(0..MAX_IMAGE_SIZE).contains(&width) || !(0..MAX_IMAGE_SIZE).contains(&height) { + return ptr::null_mut() as *mut pixman_image_t; + } + + // SAFETY: The reason is the same as above. + unsafe { pixman_image_create_bits(image_format, width, height, image_data as *mut u32, stride) } +} + +/// Bpp: bit per pixel +pub fn bytes_per_pixel() -> usize { + ((pixman_format_bpp(pixman_format_code_t::PIXMAN_x8r8g8b8 as u32) + 7) / 8) as usize +} + +/// Decrease the reference of image +/// # Arguments +/// +/// * `image` - the pointer to image in pixman +pub fn unref_pixman_image(image: *mut pixman_image_t) { + if image.is_null() { + return; + } + // SAFETY: The reason is the same as above. + unsafe { pixman_image_unref(image as *mut pixman_image_t) }; +} + +/// Increase the reference of image +/// # Arguments +/// +/// * `image` - the pointer to image in pixman +pub fn ref_pixman_image(image: *mut pixman_image_t) -> *mut pixman_image_t { + if image.is_null() { + return ptr::null_mut() as *mut pixman_image_t; + } + // SAFETY: The reason is the same as above. + unsafe { pixman_image_ref(image as *mut pixman_image_t) } +} + +/// Create a pixman image with a height of 1 +pub fn pixman_image_linebuf_create( + image_format: pixman_format_code_t, + width: i32, +) -> *mut pixman_image_t { + if !(0..MAX_IMAGE_SIZE).contains(&width) { + return ptr::null_mut() as *mut pixman_image_t; + } + // SAFETY: The reason is the same as above. + unsafe { pixman_image_create_bits(image_format, width, 1, ptr::null_mut(), 0) } +} + +pub fn pixman_image_linebuf_fill( + line_buf: *mut pixman_image_t, + fb: *mut pixman_image_t, + width: i32, + x: i32, + y: i32, +) { + if line_buf.is_null() + || fb.is_null() + || !(0..MAX_IMAGE_SIZE).contains(&width) + || !(0..MAX_IMAGE_SIZE).contains(&x) + || !(0..MAX_IMAGE_SIZE).contains(&y) + { + return; + }; + // SAFETY: The reason is the same as above. + unsafe { + pixman_image_composite( + pixman_op_t::PIXMAN_OP_SRC, + fb as *mut pixman_image_t, + ptr::null_mut(), + line_buf as *mut pixman_image_t, + x as i16, + y as i16, + 0, + 0, + 0, + 0, + width as u16, + 1, + ); + }; +} + +pub enum ColorNames { + ColorBlack = 0, + ColorBlue = 1, + ColorGreen = 2, + ColorCyan = 3, + ColorRed = 4, + ColorMagenta = 5, + ColorYellow = 6, + ColorWhite = 7, +} + +pub const COLOR_TABLE_RGB: [[pixman_color_t; 8]; 2] = [ + [ + pixman_color_t { + red: 0x00 << 8, + green: 0x00 << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // black + pixman_color_t { + red: 0x00 << 8, + green: 0x00 << 8, + blue: 0xaa << 8, + alpha: 0xffff, + }, // blue + pixman_color_t { + red: 0x00 << 8, + green: 0xaa << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // green + pixman_color_t { + red: 0x00 << 8, + green: 0xaa << 8, + blue: 0xaa << 8, + alpha: 0xffff, + }, // cyan + pixman_color_t { + red: 0xaa << 8, + green: 0x00 << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // red + pixman_color_t { + red: 0xaa << 8, + green: 0x00 << 8, + blue: 0xaa << 8, + alpha: 0xffff, + }, // magenta + pixman_color_t { + red: 0xaa << 8, + green: 0xaa << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // yellow + pixman_color_t { + red: 0xaa << 8, + green: 0xaa << 8, + blue: 0xaa << 8, + alpha: 0xffff, + }, // white + ], + [ + pixman_color_t { + red: 0x00 << 8, + green: 0x00 << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // black + pixman_color_t { + red: 0x00 << 8, + green: 0x00 << 8, + blue: 0xff << 8, + alpha: 0xffff, + }, // blue + pixman_color_t { + red: 0x00 << 8, + green: 0xff << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // green + pixman_color_t { + red: 0x00 << 8, + green: 0xff << 8, + blue: 0xff << 8, + alpha: 0xffff, + }, // cyan + pixman_color_t { + red: 0xff << 8, + green: 0x00 << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // red + pixman_color_t { + red: 0xff << 8, + green: 0x00 << 8, + blue: 0xff << 8, + alpha: 0xffff, + }, // magenta + pixman_color_t { + red: 0xff << 8, + green: 0xff << 8, + blue: 0x00 << 8, + alpha: 0xffff, + }, // yellow + pixman_color_t { + red: 0xff << 8, + green: 0xff << 8, + blue: 0xff << 8, + alpha: 0xffff, + }, // white + ], +]; + +const VGA_FONTS: [u16; 256 * 16] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, 0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7e, 0xff, 0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x1e, 0x0e, 0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, 0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x06, 0x0e, 0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7f, 0xdb, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, 0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, 0x18, 0x18, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, 0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x66, 0xc3, 0xc3, 0xdb, 0xdb, 0xc3, 0xc3, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xde, 0xde, 0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x1e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, 0x0c, 0x0e, 0x00, 0x00, + 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, + 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xe0, 0x60, 0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, + 0x00, 0x00, 0xe0, 0x60, 0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, + 0x00, 0x00, 0xe0, 0x60, 0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x70, 0x18, 0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, + 0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x38, 0x6c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x38, 0x6c, 0x38, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, 0x3c, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x18, 0x3c, 0x66, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x38, 0x6c, 0x38, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, 0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x6c, 0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc6, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, + 0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x18, 0x18, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xfc, 0x66, 0x66, 0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0x70, 0x00, 0x00, + 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0c, 0x18, 0x30, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x76, 0xdc, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, + 0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, 0x0c, 0x1f, 0x00, 0x00, + 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, 0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, + 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, + 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, + 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, 0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x0f, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; + +pub fn pixman_glyph_from_vgafont(height: u32, ch: u32) -> *mut pixman_image_t { + let glyph = create_pixman_image( + pixman_format_code_t::PIXMAN_a8, + 8, + height as i32, + ptr::null_mut(), + 0, + ); + let data = get_image_data(glyph) as *mut u8; + if data.is_null() { + return glyph; + } + + let mut data_index: usize = 0; + let mut font_index: usize = height as usize * ch as usize; + let slice = + // SAFETY: The pointer of data can be guaranteed not null. + unsafe { + std::slice::from_raw_parts_mut(data, height as usize * 8 ) + }; + + for _ in 0..height { + for x in 0..8 { + if VGA_FONTS[font_index] & (1 << (7 - x)) > 0 { + slice[data_index] = 0xff; + } else { + slice[data_index] = 0x00; + }; + data_index += 1; + } + font_index += 1; + } + glyph +} + +pub fn pixman_glyph_render( + glyph: *mut pixman_image_t, + surface: *mut pixman_image_t, + fgcolor: *const pixman_color_t, + bgcolor: *const pixman_color_t, + rec: (i32, i32), + cw: i32, + ch: i32, +) { + let glyph = glyph as *mut pixman_image_t; + let surface = surface as *mut pixman_image_t; + let fgcolor = fgcolor as *const pixman_color_t; + let bgcolor = bgcolor as *const pixman_color_t; + let (x, y) = rec; + + // SAFETY: All pointers can be guaranteed not be null. + unsafe { + let ifg = pixman_image_create_solid_fill(fgcolor); + let ibg = pixman_image_create_solid_fill(bgcolor); + + pixman_image_composite( + pixman_op_t::PIXMAN_OP_SRC, + ibg, + ptr::null_mut(), + surface, + 0, + 0, + 0, + 0, + (cw * x) as i16, + (ch * y) as i16, + cw as u16, + ch as u16, + ); + + pixman_image_composite( + pixman_op_t::PIXMAN_OP_OVER, + ifg, + glyph, + surface, + 0, + 0, + 0, + 0, + (cw * x) as i16, + (ch * y) as i16, + cw as u16, + ch as u16, + ); + unref_pixman_image(ifg); + unref_pixman_image(ibg); + } +} diff --git a/ui/src/utils.rs b/ui/src/utils.rs new file mode 100644 index 0000000000000000000000000000000000000000..c61ae8552d74edbe50b90f5de815653c945d5222 --- /dev/null +++ b/ui/src/utils.rs @@ -0,0 +1,203 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::LinkedList; +use std::io::Read; + +/// Linked the bytes buffer by linklist, to avoid the +/// extra copies when appending a new bytes buffer. +pub struct BuffPool { + /// Cache received data. + buf_list: LinkedList>, + /// Limit size of the buffpool. + limit: Option, + /// Total length of Buffer. + len: usize, +} + +impl Default for BuffPool { + fn default() -> Self { + Self::new() + } +} + +impl BuffPool { + pub fn new() -> Self { + Self { + buf_list: LinkedList::new(), + limit: None, + len: 0, + } + } + + /// Update the length of bufflist. + fn update_len(&mut self) { + let mut len: usize = 0; + for bytes in &self.buf_list { + len += bytes.len(); + } + self.len = len; + } + + /// Return the len of the pool. + pub fn len(&self) -> usize { + self.len + } + + /// If it is empty. + pub fn is_empty(&self) -> bool { + self.buf_list.is_empty() + } + + /// For a given length of buffer data, whether there is + /// enough space left to store. + pub fn is_enough(&self, require: usize) -> bool { + if let Some(limit) = self.limit { + if self.len() + require > limit { + return false; + } + } + true + } + + /// Set the limitation for bufferpool. + /// + /// # Example + /// ```rust + /// use ui::utils::BuffPool; + /// + /// let mut buffpool = BuffPool::new(); + /// buffpool.set_limit(Some(1)); + /// assert!(!buffpool.is_enough(2)); + /// ``` + pub fn set_limit(&mut self, limit: Option) { + self.limit = limit; + } + + /// Add data to the bufferpool. If the remaining + /// free space is not enough, it will not work. So it is + /// recommended to call is_enough() before this function. + /// + /// # Example + /// ```rust + /// use ui::utils::BuffPool; + /// + /// let mut buffpool = BuffPool::new(); + /// buffpool.append_limit((0_u8).to_be_bytes().to_vec()); + /// ``` + pub fn append_limit(&mut self, buf: Vec) { + let len = buf.len(); + if len == 0 { + return; + } + if self.is_enough(len) { + self.buf_list.push_back(buf); + } + self.update_len(); + } + + /// Read the first n bytes. + /// + /// # Example + /// ```rust + /// use ui::utils::BuffPool; + /// + /// let mut buffpool = BuffPool::new(); + /// buffpool.append_limit((0x12345678 as u32).to_be_bytes().to_vec()); + /// let mut buf: Vec = vec![0_u8; 4]; + /// buffpool.read_front(&mut buf, 4); + /// assert_eq!(buf, vec![18, 52, 86, 120]); + /// ``` + pub fn read_front(&mut self, buf: &mut [u8], len: usize) -> usize { + if buf.len() < len { + return 0_usize; + } + + let mut offset: usize = 0; + for bytes in &self.buf_list { + if let Ok(n) = bytes.as_slice().read(&mut buf[offset..]) { + offset += n; + } else { + return 0_usize; + } + if offset >= len { + break; + } + } + offset + } + + /// Remove the first n bytes. + /// + /// # Example + /// ```rust + /// use ui::utils::BuffPool; + /// + /// let mut buffpool = BuffPool::new(); + /// buffpool.append_limit((0x12345678 as u32).to_be_bytes().to_vec()); + /// buffpool.remove_front(1); + /// let mut buf: Vec = vec![0_u8; 3]; + /// buffpool.read_front(&mut buf, 3); + /// assert_eq!(buf, vec![52, 86, 120]); + /// ``` + pub fn remove_front(&mut self, mut len: usize) { + while let Some(mut bytes) = self.buf_list.pop_front() { + if len < bytes.len() { + self.buf_list.push_front(bytes.split_off(len)); + break; + } else { + len -= bytes.len(); + } + } + self.update_len(); + } + + /// Read first chunk of vec in linklist. + pub fn read_front_chunk(&mut self) -> Option<&Vec> { + self.buf_list.front() + } + + /// Remove first front chunk of vec in linklist. + pub fn remove_front_chunk(&mut self) { + if !self.is_empty() { + self.buf_list.pop_front(); + } + self.update_len(); + } +} +#[cfg(test)] +mod tests { + use crate::utils::BuffPool; + + #[test] + fn test_buffpool_base() { + let mut buffpool = BuffPool::new(); + buffpool.set_limit(Some(7)); + buffpool.append_limit(0x12345678_u32.to_be_bytes().to_vec()); + buffpool.append_limit(0x12_u8.to_be_bytes().to_vec()); + buffpool.append_limit(0x1234_u16.to_be_bytes().to_vec()); + assert!(buffpool.len() == 7_usize); + buffpool.remove_front(1); + assert!(buffpool.len() == 6_usize); + let mut buf: Vec = vec![0_u8; 4]; + buffpool.read_front(&mut buf, 4); + assert!(buf == vec![52, 86, 120, 18]); + + let ans: Vec> = vec![vec![52, 86, 120], vec![18], vec![18, 52]]; + let mut idx: usize = 0; + while let Some(buf) = buffpool.read_front_chunk() { + assert_eq!(ans[idx], buf.to_vec()); + idx += 1; + buffpool.remove_front_chunk(); + } + } +} diff --git a/ui/src/vnc/auth_sasl.rs b/ui/src/vnc/auth_sasl.rs new file mode 100644 index 0000000000000000000000000000000000000000..6465c012c7c1b3da2b31a8662661e05c70659eac --- /dev/null +++ b/ui/src/vnc/auth_sasl.rs @@ -0,0 +1,530 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::ffi::{CStr, CString}; +use std::ptr; + +use anyhow::{anyhow, Result}; +use libc::{c_char, c_int, c_uint, c_void}; +use log::info; +use sasl2_sys::prelude::{ + sasl_conn_t, sasl_dispose, sasl_getprop, sasl_listmech, sasl_security_properties_t, + sasl_server_init, sasl_server_new, sasl_server_start, sasl_server_step, sasl_setprop, + sasl_ssf_t, SASL_CONTINUE, SASL_OK, SASL_SEC_PROPS, SASL_SSF, SASL_SSF_EXTERNAL, + SASL_SUCCESS_DATA, +}; +use sasl2_sys::sasl::SASL_USERNAME; + +use crate::{ + error::VncError, + vnc::client_io::{vnc_flush, vnc_write, ClientIoHandler, APP_NAME}, +}; +use util::byte_code::ByteCode; + +/// Vnc Service. +const SERVICE: &str = "vnc"; +const MECHNAME_MAX_LEN: u32 = 100; +const MECHNAME_MIN_LEN: u32 = 1; +const SASL_DATA_MAX_LEN: u32 = 1024 * 1024; +/// Minimum supported encryption length of ssf layer in sasl. +const MIN_SSF_LENGTH: usize = 56; + +/// Authentication and encryption method. +#[derive(Debug, Clone, Copy)] +pub enum SubAuthState { + /// Send plain Message + no auth. + VncAuthVencryptPlain = 256, + /// Tls vencry with anon + no auth. + VncAuthVencryptTlNone = 257, + /// Tls vencrypt with x509 + no auth. + VncAuthVencryptX509None = 260, + /// Tls vencrypt with x509 + sasl. + VncAuthVencryptX509Sasl = 263, + /// Tls vencrypt + sasl. + VncAuthVencryptTlssasl = 264, +} + +/// Configuration for authentication. +/// Identity: authentication user. +#[derive(Debug, Clone)] +pub struct SaslAuth { + pub identity: String, +} + +impl SaslAuth { + pub fn new(identity: String) -> Self { + SaslAuth { identity } + } +} + +/// Struct of sasl authentication. +#[derive(Debug, Clone)] +pub struct SaslConfig { + /// State of sasl connection . + sasl_conn: *mut sasl_conn_t, + /// Mech list server support. + mech_list: String, + /// Authentication mechanism currently in use. + mech_name: String, + /// State of auth. + sasl_stage: SaslStage, + /// Security layer in sasl. + want_ssf: bool, + /// Strength of ssf. + run_ssf: u32, +} + +impl Default for SaslConfig { + fn default() -> Self { + SaslConfig { + sasl_conn: ptr::null_mut() as *mut sasl_conn_t, + mech_list: String::new(), + mech_name: String::new(), + sasl_stage: SaslStage::SaslServerStart, + want_ssf: false, + run_ssf: 0, + } + } +} + +/// Authentication stage. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum SaslStage { + SaslServerStart, + SaslServerStep, +} + +impl ClientIoHandler { + /// Get length of mechname send form client. + pub fn get_mechname_length(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + let len = u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]); + trace::vnc_client_get_mechname_length(&len); + if !(MECHNAME_MIN_LEN..MECHNAME_MAX_LEN).contains(&len) { + return Err(anyhow!(VncError::AuthFailed( + "get_mechname_length".to_string(), + "SASL mechname too short or too long".to_string() + ))); + } + + self.update_event_handler(len as usize, ClientIoHandler::get_sasl_mechname); + Ok(()) + } + + /// Start sasl authentication. + /// 1. Sals server init. + /// 2. Get the mechlist support by Sasl server. + /// 3. Send the mechlist to client. + pub fn start_sasl_auth(&mut self) -> Result<()> { + self.sasl_server_init()?; + + self.set_ssf_for_sasl()?; + + self.send_mech_list()?; + + Ok(()) + } + + /// Get authentication mechanism supported by client. + pub fn get_sasl_mechname(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + let mech_name = String::from_utf8_lossy(&buf).to_string(); + trace::vnc_client_get_mechname(&mech_name); + + let mut security = self.server.security_type.borrow_mut(); + let mech_list: Vec<&str> = security.saslconfig.mech_list.split(',').collect(); + for mech in mech_list { + if mech_name == *mech { + security.saslconfig.mech_name = mech_name; + break; + } + } + // Unsupported mechanism. + if security.saslconfig.mech_name.is_empty() { + return Err(anyhow!(VncError::AuthFailed( + "get_sasl_mechname".to_string(), + "Unsupported mechanism".to_string() + ))); + } + drop(security); + + self.update_event_handler(4, ClientIoHandler::get_authmessage_length); + Ok(()) + } + + /// Length of client authentication message. + fn get_authmessage_length(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + let buf = [buf[0], buf[1], buf[2], buf[3]]; + let len = u32::from_be_bytes(buf); + trace::vnc_client_get_authmessage_length(&len); + + if len > SASL_DATA_MAX_LEN { + return Err(anyhow!(VncError::AuthFailed( + "get_authmessage_length".to_string(), + "SASL start len too large".to_string() + ))); + } + + if len == 0 { + return self.client_sasl_auth(); + } + self.update_event_handler(len as usize, ClientIoHandler::client_sasl_auth); + Ok(()) + } + + /// Receive the authentication information from client and return the result. + fn client_sasl_auth(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + + let mut client_data = buf.to_vec(); + let mut client_len: c_uint = 0; + if self.expect > 0 { + client_len = (self.expect - 1) as c_uint; + client_data[self.expect - 1] = 0_u8; + } + + let server = self.server.clone(); + let client = self.client.clone(); + let mut security = server.security_type.borrow_mut(); + let mut serverout: *const c_char = ptr::null_mut(); + let mut serverout_len: c_uint = 0; + let mech_name = CString::new(security.saslconfig.mech_name.as_str())?; + + // Start authentication. + let err: c_int = match security.saslconfig.sasl_stage { + // SAFETY: sasl_server_start() and sasl_server_step() is C function. All parameters + // passed of the function have been checked. Memory will be allocated for the incoming + // pointer inside the function. + SaslStage::SaslServerStart => unsafe { + sasl_server_start( + security.saslconfig.sasl_conn, + mech_name.as_ptr(), + client_data.as_ptr() as *const c_char, + client_len, + &mut serverout, + &mut serverout_len, + ) + }, + // SAFETY: The reason is same as above. + SaslStage::SaslServerStep => unsafe { + sasl_server_step( + security.saslconfig.sasl_conn, + client_data.as_ptr() as *const c_char, + client_len, + &mut serverout, + &mut serverout_len, + ) + }, + }; + + trace::vnc_client_sasl_auth(&err, &serverout_len); + + if err != SASL_OK && err != SASL_CONTINUE { + // SAFETY: sasl_dispose() is C function. All parameters passed of the + // function have been checked. + unsafe { sasl_dispose(&mut security.saslconfig.sasl_conn) } + return Err(anyhow!(VncError::AuthFailed( + "client_sasl_auth".to_string(), + "Auth failed!".to_string() + ))); + } + if serverout_len > SASL_DATA_MAX_LEN { + // SAFETY: The reason is same as above. + unsafe { sasl_dispose(&mut security.saslconfig.sasl_conn) } + return Err(anyhow!(VncError::AuthFailed( + "client_sasl_auth".to_string(), + "SASL data too long".to_string() + ))); + } + + let mut buf = Vec::new(); + if serverout_len > 0 { + // Authentication related information. + // SAFETY: pointer of serverout can be guaranteed not null. + let serverout = unsafe { CStr::from_ptr(serverout as *const c_char) }; + let auth_message = String::from(serverout.to_str().unwrap_or("")); + buf.append(&mut (serverout_len + 1).to_be_bytes().to_vec()); + buf.append(&mut auth_message.as_bytes().to_vec()); + } else { + buf.append(&mut (0_u32).to_be_bytes().to_vec()); + } + + if err == SASL_OK { + buf.append(&mut (1_u8).as_bytes().to_vec()); + } else if err == SASL_CONTINUE { + buf.append(&mut (0_u8).as_bytes().to_vec()); + } + drop(security); + + if err == SASL_CONTINUE { + // Authentication continue. + let mut security = server.security_type.borrow_mut(); + security.saslconfig.sasl_stage = SaslStage::SaslServerStep; + self.update_event_handler(4, ClientIoHandler::get_authmessage_length); + drop(security); + return Ok(()); + } else { + if let Err(err) = self.sasl_check_ssf() { + // Reject auth: the strength of ssf is too weak. + auth_reject(&mut buf); + vnc_write(&client, buf); + vnc_flush(&client); + return Err(err); + } + + if let Err(err) = self.sasl_check_authz() { + // Reject auth: wrong sasl username. + auth_reject(&mut buf); + vnc_write(&client, buf); + vnc_flush(&client); + return Err(err); + } + // Accept auth. + buf.append(&mut (0_u32).as_bytes().to_vec()); + } + + vnc_write(&client, buf); + vnc_flush(&client); + self.update_event_handler(1, ClientIoHandler::handle_client_init); + Ok(()) + } + + /// Sasl server init. + fn sasl_server_init(&mut self) -> Result<()> { + let mut err: c_int; + let service = CString::new(SERVICE)?; + let appname = CString::new(APP_NAME)?; + let local_addr = self.stream.local_addr()?.to_string().replace(':', ";"); + let remote_addr = self.stream.peer_addr()?.to_string().replace(':', ";"); + info!("local_addr: {} remote_addr: {}", local_addr, remote_addr); + let local_addr = CString::new(local_addr)?; + let remote_addr = CString::new(remote_addr)?; + // SAFETY: sasl_server_init() and sasl_server_new() is C function. All parameters passed of + // the function have been checked. Memory will be allocated for the incoming pointer inside + // the function. Sasl server init. + unsafe { + err = sasl_server_init(ptr::null_mut(), appname.as_ptr()); + } + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "sasl_server_init".to_string(), + format!("SASL_FAIL error code {}", err) + ))); + } + let mut saslconfig = SaslConfig::default(); + // SAFETY: The reason is same as above. + unsafe { + err = sasl_server_new( + service.as_ptr(), + ptr::null_mut(), + ptr::null_mut(), + local_addr.as_ptr(), + remote_addr.as_ptr(), + ptr::null_mut(), + SASL_SUCCESS_DATA, + &mut saslconfig.sasl_conn, + ); + } + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "sasl_server_init".to_string(), + format!("SASL_FAIL error code {}", err) + ))); + } + self.server.security_type.borrow_mut().saslconfig = saslconfig; + + Ok(()) + } + + /// Set properties for sasl. + fn set_ssf_for_sasl(&mut self) -> Result<()> { + // Set the relevant properties of sasl. + let mut err: c_int; + let ssf: sasl_ssf_t = 256; + let ssf = &ssf as *const sasl_ssf_t; + let security = self.server.security_type.borrow_mut(); + // SAFETY: sasl_setprop() and sasl_server_new() is C function. It can be ensure + // that security.saslconfig.sasl_conn is not null. + unsafe { + err = sasl_setprop( + security.saslconfig.sasl_conn, + SASL_SSF_EXTERNAL as i32, + ssf as *const c_void, + ); + } + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "set_ssf_for_sasl".to_string(), + format!("SASL_FAIL error code {}", err) + ))); + } + + // Already using tls, disable ssf in sasl. + let props_name = ptr::null_mut() as *mut *const c_char; + let props_value = ptr::null_mut() as *mut *const c_char; + let saslprops = sasl_security_properties_t { + min_ssf: 0, + max_ssf: 0, + maxbufsize: 8192, + security_flags: 0, + property_names: props_name, + property_values: props_value, + }; + + let props = &saslprops as *const sasl_security_properties_t; + // SAFETY: sasl_setprop() and sasl_server_new() is C function. It can be ensure + // that security.saslconfig.sasl_conn is not null. + unsafe { + err = sasl_setprop( + security.saslconfig.sasl_conn, + SASL_SEC_PROPS as i32, + props as *const c_void, + ); + } + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "set_ssf_for_sasl".to_string(), + format!("SASL_FAIL error code {}", err) + ))); + } + + Ok(()) + } + + /// Get the mechlist support by Sasl server. + /// Send the mechlist to client. + fn send_mech_list(&mut self) -> Result<()> { + let err: c_int; + let prefix = CString::new("")?; + let sep = CString::new(",")?; + let suffix = CString::new("")?; + let mut mechlist: *const c_char = ptr::null_mut(); + let mut security = self.server.security_type.borrow_mut(); + let client = self.client.clone(); + // SAFETY: sasl_listmech() is C function. It can be ensure + // that security.saslconfig.sasl_conn is not null. + unsafe { + err = sasl_listmech( + security.saslconfig.sasl_conn, + ptr::null_mut(), + prefix.as_ptr(), + sep.as_ptr(), + suffix.as_ptr(), + &mut mechlist, + ptr::null_mut(), + ptr::null_mut(), + ); + } + if err != SASL_OK || mechlist.is_null() { + return Err(anyhow!(VncError::AuthFailed( + "send_mech_list".to_string(), + "SASL_FAIL: no support sasl mechlist".to_string() + ))); + } + // SAFETY: It can be ensure that the pointer of mechlist is not null. + let mech_list = unsafe { CStr::from_ptr(mechlist as *const c_char) }; + trace::vnc_server_send_mech_list(&mech_list); + security.saslconfig.mech_list = String::from(mech_list.to_str()?); + let mut buf = Vec::new(); + let len = security.saslconfig.mech_list.len(); + buf.append(&mut (len as u32).to_be_bytes().to_vec()); + buf.append(&mut security.saslconfig.mech_list.as_bytes().to_vec()); + drop(security); + vnc_write(&client, buf); + vnc_flush(&client); + + Ok(()) + } + + /// Check whether the ssf layer of sasl meets the strength requirements. + fn sasl_check_ssf(&mut self) -> Result<()> { + let server = self.server.clone(); + let mut security = server.security_type.borrow_mut(); + if !security.saslconfig.want_ssf { + return Ok(()); + } + let err: c_int; + let mut val: *const c_void = ptr::null_mut(); + // SAFETY: sasl_getprop() is C function. It can be ensure + // that security.saslconfig.sasl_conn is not null. + unsafe { err = sasl_getprop(security.saslconfig.sasl_conn, SASL_SSF as c_int, &mut val) } + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "sasl_check_ssf".to_string(), + "sasl_getprop: internal error".to_string() + ))); + } + + // SAFETY: It can be ensure that the ptr of val is not null. + let ssf: usize = unsafe { *(val as *const usize) }; + if ssf < MIN_SSF_LENGTH { + return Err(anyhow!(VncError::AuthFailed( + "sasl_check_ssf".to_string(), + "SASL SSF too weak".to_string() + ))); + } + + security.saslconfig.run_ssf = 1; + drop(security); + Ok(()) + } + + /// Check username. + fn sasl_check_authz(&mut self) -> Result<()> { + let security = self.server.security_type.borrow_mut(); + let mut val: *const c_void = ptr::null_mut(); + // SAFETY: sasl_getprop() is C function. It can be ensure + // that security.saslconfig.sasl_conn is not null. + let err = unsafe { + sasl_getprop( + security.saslconfig.sasl_conn, + SASL_USERNAME as c_int, + &mut val, + ) + }; + drop(security); + if err != SASL_OK { + return Err(anyhow!(VncError::AuthFailed( + "sasl_check_authz".to_string(), + "Cannot fetch SASL username".to_string() + ))); + } + if val.is_null() { + return Err(anyhow!(VncError::AuthFailed( + "sasl_check_authz".to_string(), + "No SASL username set".to_string() + ))); + } + // SAFETY: It can ensure that the pointer val is not null. + let username = unsafe { CStr::from_ptr(val as *const c_char) }; + let username = String::from(username.to_str()?); + + let server = self.server.clone(); + let security = server.security_type.borrow_mut(); + match &security.saslauth { + Some(saslauth) if saslauth.identity == username => Ok(()), + _ => Err(anyhow!(VncError::AuthFailed( + "sasl_check_authz".to_string(), + "No SASL username set".to_string() + ))), + } + } +} + +/// Auth reject. +fn auth_reject(buf: &mut Vec) { + let reason = String::from("Authentication failed"); + buf.append(&mut (1_u32).to_be_bytes().to_vec()); + buf.append(&mut (reason.len() as u32).to_be_bytes().to_vec()); + buf.append(&mut reason.as_bytes().to_vec()); +} diff --git a/ui/src/vnc/auth_vencrypt.rs b/ui/src/vnc/auth_vencrypt.rs new file mode 100644 index 0000000000000000000000000000000000000000..fc5164f5d8a44b2e23f85efced821bc45a785ca8 --- /dev/null +++ b/ui/src/vnc/auth_vencrypt.rs @@ -0,0 +1,434 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + fs::File, + io::{BufReader, ErrorKind, Read, Write}, + net::TcpStream, + os::unix::prelude::{AsRawFd, RawFd}, + rc::Rc, + sync::Arc, +}; + +use anyhow::{anyhow, bail, Result}; +use log::error; +use rustls::{ + self, + cipher_suite::{ + TLS13_AES_128_GCM_SHA256, TLS13_AES_256_GCM_SHA384, TLS13_CHACHA20_POLY1305_SHA256, + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, + }, + kx_group::{SECP256R1, SECP384R1, X25519}, + server::{ + AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, + ServerSessionMemoryCache, + }, + version::{TLS12, TLS13}, + Certificate, KeyLogFile, PrivateKey, RootCertStore, ServerConfig, ServerConnection, + SupportedCipherSuite, SupportedKxGroup, SupportedProtocolVersion, Ticketer, +}; +use vmm_sys_util::epoll::EventSet; + +use super::client_io::vnc_disconnect_start; +use crate::{ + error::VncError, + vnc::{ + auth_sasl::SubAuthState, + client_io::{vnc_flush, vnc_write, ClientIoHandler, IoOperations}, + }, +}; +use machine_manager::event_loop::EventLoop; +use util::loop_context::{EventNotifier, NotifierCallback, NotifierOperation}; + +const TLS_CREDS_SERVER_CACERT: &str = "cacert.pem"; +const TLS_CREDS_SERVERCERT: &str = "servercert.pem"; +const TLS_CREDS_SERVERKEY: &str = "serverkey.pem"; +pub const X509_CERT: &str = "x509"; +pub const ANON_CERT: &str = "anon"; +const CLIENT_REQUIRE_AUTH: bool = true; +/// Number of stored sessions. +const MAXIMUM_SESSION_STORAGE: usize = 256; + +/// Cipher suites supported by server. +static TLS_CIPHER_SUITES: &[SupportedCipherSuite] = &[ + TLS13_AES_128_GCM_SHA256, + TLS13_AES_256_GCM_SHA384, + TLS13_CHACHA20_POLY1305_SHA256, + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, +]; +/// Tls version supported by server. +static TLS_VERSIONS: &[&SupportedProtocolVersion] = &[&TLS13, &TLS12]; +/// Key exchange groups supported by server. +static TLS_KX_GROUPS: [&SupportedKxGroup; 3] = [&X25519, &SECP256R1, &SECP384R1]; + +/// Configuration for tls. +#[derive(Debug, Clone, Default)] +pub struct TlsCreds { + /// X509 or anon. + pub cred_type: String, + /// Path of cred file. + pub dir: String, + /// Server of client. + pub endpoint: Option, + /// Verify peer. + pub verifypeer: bool, +} + +impl ClientIoHandler { + /// Exchange auth version with client + pub fn client_vencrypt_init(&mut self) -> Result<()> { + trace::vnc_client_vencrypt_init(); + + let buf = self.read_incoming_msg(); + let client = self.client.clone(); + let subauth = self.server.security_type.borrow().subauth; + // VeNCrypt version 0.2. + if buf[0] != 0 || buf[1] != 2 { + let mut buf = Vec::new(); + // Reject version. + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + return Err(anyhow!(VncError::UnsupportedRFBProtocolVersion)); + } else { + let mut buf = Vec::new(); + // Accept version. + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + // Number of sub-auths. + buf.append(&mut (1_u8).to_be_bytes().to_vec()); + // The supported auth. + buf.append(&mut (subauth as u32).to_be_bytes().to_vec()); + vnc_write(&client, buf); + } + + vnc_flush(&client); + self.update_event_handler(4, ClientIoHandler::client_vencrypt_auth); + Ok(()) + } + + /// Encrypted Channel Initialize. + pub fn client_vencrypt_auth(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + let buf = [buf[0], buf[1], buf[2], buf[3]]; + let auth = u32::from_be_bytes(buf); + let client = self.client.clone(); + let subauth = self.server.security_type.borrow().subauth; + trace::vnc_client_vencrypt_auth(&auth, &subauth); + + if auth != subauth as u32 { + let mut buf = Vec::new(); + // Reject auth. + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + return Err(anyhow!(VncError::AuthFailed( + "client_vencrypt_auth".to_string(), + "sub auth is not supported".to_string() + ))); + } + + let mut buf = Vec::new(); + // Accept auth. + buf.append(&mut (1_u8).to_be_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + + let tls_config = self + .server + .security_type + .borrow() + .tls_config + .clone() + .unwrap(); + let tls_conn = ServerConnection::new(tls_config)?; + let tls_io_channel = Rc::new(RefCell::new(TlsIoChannel::new( + self.stream.try_clone().unwrap(), + tls_conn, + ))); + + let handler: Rc = Rc::new(move |event, _fd: RawFd| { + let mut dis_conn = false; + if event & EventSet::READ_HANG_UP == EventSet::READ_HANG_UP { + dis_conn = true; + } else if event & EventSet::IN == EventSet::IN { + if let Err(e) = tls_io_channel.borrow_mut().tls_handshake() { + error!("Tls handle shake error: {:?}", e); + dis_conn = true; + } + } + + if !dis_conn && !tls_io_channel.borrow().tls_conn.is_handshaking() { + let client_io = client.conn_state.lock().unwrap().client_io.clone(); + let client_io = client_io.and_then(|c| c.upgrade()).unwrap(); + let mut locked_client = client_io.lock().unwrap(); + locked_client.io_channel = tls_io_channel.clone(); + if let Err(_e) = locked_client.tls_handshake_done() { + dis_conn = true; + } + } + if dis_conn { + client.conn_state.lock().unwrap().dis_conn = true; + vnc_disconnect_start(&client); + } + None + }); + self.handlers + .insert("vnc_tls_io".to_string(), handler.clone()); + let handlers = vec![handler]; + EventLoop::update_event( + vec![EventNotifier::new( + NotifierOperation::Modify, + self.stream.as_raw_fd(), + None, + EventSet::empty(), + handlers, + )], + None, + )?; + + self.client + .in_buffer + .lock() + .unwrap() + .remove_front(self.expect); + self.expect = 0; + Ok(()) + } + + fn tls_handshake_done(&mut self) -> Result<()> { + trace::vnc_client_tls_handshake_done(); + + let handler = self.handlers.get("vnc_client_io").unwrap().clone(); + let handlers = vec![handler]; + EventLoop::update_event( + vec![EventNotifier::new( + NotifierOperation::Modify, + self.stream.as_raw_fd(), + None, + EventSet::empty(), + handlers, + )], + None, + )?; + self.handle_vencrypt_subauth()?; + Ok(()) + } + + fn handle_vencrypt_subauth(&mut self) -> Result<()> { + let subauth = self.server.security_type.borrow().subauth; + let client = self.client.clone(); + match subauth { + SubAuthState::VncAuthVencryptX509Sasl => { + self.expect = 4; + self.msg_handler = ClientIoHandler::get_mechname_length; + self.start_sasl_auth()?; + } + SubAuthState::VncAuthVencryptX509None => { + let buf = [0u8; 4]; + vnc_write(&client, buf.to_vec()); + vnc_flush(&client); + self.expect = 1; + self.msg_handler = ClientIoHandler::handle_client_init; + } + _ => { + let mut buf: Vec = Vec::new(); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + let version = self.client.conn_state.lock().unwrap().version.clone(); + if version.minor >= 8 { + let err_msg: String = "Unsupported subauth type".to_string(); + buf.append(&mut (err_msg.len() as u32).to_be_bytes().to_vec()); + buf.append(&mut err_msg.as_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + } + + return Err(anyhow!(VncError::MakeTlsConnectionFailed(String::from( + "Unsupported subauth type", + )))); + } + } + Ok(()) + } +} + +/// Config encrypted channel. +/// +/// # Arguments +/// +/// * `args` - tls configuration. +pub fn make_vencrypt_config(args: &TlsCreds) -> Result> { + let server_cacert = args.dir.clone() + "/" + TLS_CREDS_SERVER_CACERT; + let server_cert = args.dir.clone() + "/" + TLS_CREDS_SERVERCERT; + let server_key = args.dir.clone() + "/" + TLS_CREDS_SERVERKEY; + + // Load cacert.pem and provide verification for certificate chain + let client_auth = if args.verifypeer { + let roots = load_certs(server_cacert.as_str())?; + let mut client_auth_roots = RootCertStore::empty(); + for root in roots { + client_auth_roots.add(&root)?; + } + if CLIENT_REQUIRE_AUTH { + AllowAnyAuthenticatedClient::new(client_auth_roots).boxed() + } else { + AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots).boxed() + } + } else { + NoClientAuth::boxed() + }; + + // Cipher suiter. + let suites = TLS_CIPHER_SUITES.to_vec(); + // Tls protocol version supported by server. + let versions = TLS_VERSIONS.to_vec(); + // Server certificate. + let certs: Vec = load_certs(server_cert.as_str())?; + // Server private key. + let privkey: PrivateKey = load_private_key(server_key.as_str())?; + + let mut config = ServerConfig::builder() + .with_cipher_suites(&suites) + .with_kx_groups(&TLS_KX_GROUPS) + .with_protocol_versions(&versions) + .expect("Unsupported cipher-suite/version") + .with_client_cert_verifier(client_auth) + .with_single_cert_with_ocsp_and_sct(certs, privkey, vec![], vec![]) + .expect("Invalid Certificate format"); + + // SSLKEYLOGFILE=path configure key log path. + config.key_log = Arc::new(KeyLogFile::new()); + // Limit data size in one time. + config.session_storage = ServerSessionMemoryCache::new(MAXIMUM_SESSION_STORAGE); + // Tickets. + config.ticketer = Ticketer::new()?; + config.alpn_protocols = Vec::new(); + + Ok(Arc::new(config)) +} + +/// load private key +/// +/// # Arguments +/// +/// * `filepath` - the path private key. +fn load_private_key(filepath: &str) -> Result { + let file = File::open(filepath)?; + + let mut reader = BufReader::new(file); + loop { + match rustls_pemfile::read_one(&mut reader).expect("Cannot parse .pem file") { + Some(rustls_pemfile::Item::RSAKey(ras)) => return Ok(PrivateKey(ras)), + Some(rustls_pemfile::Item::PKCS8Key(pkcs8)) => return Ok(PrivateKey(pkcs8)), + Some(rustls_pemfile::Item::ECKey(ec)) => return Ok(PrivateKey(ec)), + None => break, + _ => {} + } + } + + Err(anyhow!(VncError::MakeTlsConnectionFailed( + "Load private key failed!".to_string() + ))) +} + +/// Load certificate. +/// +/// # Arguments +/// +/// * `filepath` - the file path of certificate. +fn load_certs(filepath: &str) -> Result> { + let certfile = File::open(filepath)?; + let mut reader = BufReader::new(certfile); + let certs = rustls_pemfile::certs(&mut reader)? + .iter() + .map(|v| Certificate(v.clone())) + .collect(); + Ok(certs) +} + +struct TlsIoChannel { + /// TcpStream connected with client. + stream: TcpStream, + /// Tls server connection. + tls_conn: ServerConnection, +} + +impl TlsIoChannel { + fn new(stream: TcpStream, tls_conn: ServerConnection) -> Self { + Self { stream, tls_conn } + } + + fn tls_handshake(&mut self) -> Result<()> { + if self.tls_conn.read_tls(&mut self.stream)? == 0 { + bail!("Tls hand shake failed: EOF"); + } + self.tls_conn.process_new_packets()?; + if self.tls_conn.wants_write() { + self.tls_conn.write_tls(&mut self.stream)?; + } + Ok(()) + } +} + +impl IoOperations for TlsIoChannel { + fn channel_write(&mut self, buf: &[u8]) -> Result { + let buf_size = buf.len(); + let mut offset = 0; + while offset < buf_size { + let tmp_buf = &buf[offset..]; + match self.tls_conn.writer().write(tmp_buf) { + Ok(0) => { + bail!("Failed to write tls message!"); + } + Ok(n) => offset += n, + Err(ref e) if e.kind() == ErrorKind::Interrupted => {} + Err(e) => { + bail!("Internal error: {}", e); + } + } + + while self.tls_conn.wants_write() { + match self.tls_conn.write_tls(&mut self.stream) { + Ok(_) => {} + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + continue; + } + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => { + continue; + } + Err(e) => { + bail!("Unable to write msg on tls socket: {:?}", e); + } + } + } + } + + Ok(buf_size) + } + + fn channel_read(&mut self, buf: &mut Vec) -> Result { + let mut len = 0_usize; + self.tls_conn.read_tls(&mut self.stream)?; + + let io_state = self.tls_conn.process_new_packets()?; + if io_state.plaintext_bytes_to_read() > 0 { + len = io_state.plaintext_bytes_to_read(); + // FIXME: Split len to avoid possible OOM. + buf.resize(len, 0u8); + self.tls_conn.reader().read_exact(buf)?; + } + Ok(len) + } +} diff --git a/ui/src/vnc/client_io.rs b/ui/src/vnc/client_io.rs new file mode 100644 index 0000000000000000000000000000000000000000..7ffa8996833d8a66ef0bc3d6a399519df943ab91 --- /dev/null +++ b/ui/src/vnc/client_io.rs @@ -0,0 +1,1474 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights r&eserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + cmp, + collections::HashMap, + io::{Read, Write}, + net::{Shutdown, TcpStream}, + os::unix::prelude::{AsRawFd, RawFd}, + rc::Rc, + sync::{Arc, Mutex, Weak}, +}; + +use anyhow::{anyhow, bail, Result}; +use log::error; +use sscanf::scanf; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + +use crate::{ + console::console_select, + error::VncError, + input::{ + input_button, input_move_abs, input_point_sync, key_event, keyboard_modifier_get, + keyboard_state_reset, update_key_state, Axis, KeyboardModifier, ABS_MAX, ASCII_A, ASCII_Z, + INPUT_BUTTON_WHEEL_DOWN, INPUT_BUTTON_WHEEL_LEFT, INPUT_BUTTON_WHEEL_RIGHT, + INPUT_BUTTON_WHEEL_UP, INPUT_POINT_BACK, INPUT_POINT_LEFT, INPUT_POINT_MIDDLE, + INPUT_POINT_RIGHT, KEYCODE_1, KEYCODE_9, UPPERCASE_TO_LOWERCASE, + }, + pixman::{bytes_per_pixel, get_image_height, get_image_width, PixelFormat}, + utils::BuffPool, + vnc::{ + framebuffer_update, round_up_div, server_io::VncServer, set_area_dirty, write_pixel, + AuthState, BIT_PER_BYTE, DIRTY_PIXELS_NUM, DIRTY_WIDTH_BITS, MAX_IMAGE_SIZE, + MAX_WINDOW_HEIGHT, MIN_OUTPUT_LIMIT, OUTPUT_THROTTLE_SCALE, + }, +}; +use util::{ + bitmap::Bitmap, + loop_context::{ + create_new_eventfd, gen_delete_notifiers, read_fd, EventNotifier, EventNotifierHelper, + NotifierCallback, NotifierOperation, + }, +}; + +pub const APP_NAME: &str = "stratovirt"; +const MAX_RECVBUF_LEN: usize = 1024; +const NUM_OF_COLORMAP: u16 = 256; + +// VNC encodings types. +pub const ENCODING_RAW: i32 = 0; +pub const ENCODING_HEXTILE: i32 = 5; +const ENCODING_ZLIB: i32 = 6; +const ENCODING_TIGHT: i32 = 7; +const ENCODING_ZRLE: i32 = 16; +const ENCODING_ZYWRLE: i32 = 17; +const ENCODING_DESKTOPRESIZE: i32 = -223; +const ENCODING_RICH_CURSOR: i32 = -239; +const ENCODING_POINTER_TYPE_CHANGE: i32 = -257; +const ENCODING_LED_STATE: i32 = -261; +const ENCODING_DESKTOP_RESIZE_EXT: i32 = -308; +const ENCODING_ALPHA_CURSOR: i32 = -314; +const ENCODING_WMVI: i32 = 1464686185; + +const VNC_INPUT_BUTTON_LEFT: u8 = 0x01; +const VNC_INPUT_BUTTON_MIDDLE: u8 = 0x02; +const VNC_INPUT_BUTTON_RIGHT: u8 = 0x04; +const VNC_INPUT_BUTTON_WHEEL_UP: u8 = 0x08; +const VNC_INPUT_BUTTON_WHEEL_DOWN: u8 = 0x10; +const VNC_INPUT_BUTTON_WHEEL_LEFT: u8 = 0x20; +const VNC_INPUT_BUTTON_WHEEL_RIGHT: u8 = 0x40; +// NOTE: VNC only affords 8 bits for mouse button. Button "back" +// occupies bit7, while there is no bit for button "forward". +// Here just support side button "back" only. +const VNC_INPUT_BUTTON_BACK: u8 = 0x80; +const VNC_INPUT_BUTTON_MAX_NUM: u32 = 8; + +/// This trait is used to send bytes, +/// the return is the total number of bytes sented. +pub trait IoOperations { + fn channel_write(&mut self, buf: &[u8]) -> Result; + fn channel_read(&mut self, buf: &mut Vec) -> Result; +} + +/// Image display feature. +enum VncFeatures { + VncFeatureResize, + VncFeatureResizeExt, + VncFeatureHextile, + VncFeaturePointerTypeChange, + VncFeatureWmvi, + VncFeatureTight, + VncFeatureZlib, + VncFeatureRichCursor, + VncFeatureAlphaCursor, + _VncFeatureTightPng, + VncFeatureZrle, + VncFeatureZywrle, + VncFeatureLedState, + _VncFeatureXvp, + _VncFeatureClipboardExt, +} + +/// Client to server message in Remote Framebuffer Protocol. +enum ClientMsg { + SetPixelFormat = 0, + SetEncodings = 2, + FramebufferUpdateRequest = 3, + KeyEvent = 4, + PointerEvent = 5, + ClientCutText = 6, + InvalidMsg, +} + +/// Server to client message in Remote Framebuffer Protocol. +pub enum ServerMsg { + FramebufferUpdate = 0, + SetColourMapEntries = 1, +} + +impl From for ClientMsg { + fn from(v: u8) -> Self { + match v { + 0 => ClientMsg::SetPixelFormat, + 2 => ClientMsg::SetEncodings, + 3 => ClientMsg::FramebufferUpdateRequest, + 4 => ClientMsg::KeyEvent, + 5 => ClientMsg::PointerEvent, + 6 => ClientMsg::ClientCutText, + _ => ClientMsg::InvalidMsg, + } + } +} + +/// RFB protocol version. +#[derive(Clone)] +pub struct VncVersion { + pub major: u16, + pub minor: u16, +} + +impl VncVersion { + fn new(major: u16, minor: u16) -> Self { + VncVersion { major, minor } + } +} + +impl Default for VncVersion { + fn default() -> Self { + Self::new(0, 0) + } +} + +#[derive(PartialEq, Eq)] +pub enum UpdateState { + No, + Incremental, + Force, +} + +/// Dirty area of image +#[derive(Clone)] +pub struct Rectangle { + pub x: i32, + pub y: i32, + pub w: i32, + pub h: i32, +} + +impl Rectangle { + pub fn new(x: i32, y: i32, w: i32, h: i32) -> Self { + Rectangle { x, y, w, h } + } +} + +/// Display Output mode information of client. +#[derive(Clone)] +pub struct DisplayMode { + /// Vnc display feature. + feature: i32, + /// Width of client display. + pub client_width: i32, + /// Height of client display. + pub client_height: i32, + /// Encoding type. + pub enc: i32, + /// Data storage type for client. + pub client_be: bool, + /// The pixel need to convert. + pub convert: bool, + /// Last button state. + pub last_button: u8, + /// Image pixel format in pixman. + pub pf: PixelFormat, +} + +impl DisplayMode { + pub fn new(enc: i32, client_be: bool, convert: bool, pf: PixelFormat) -> Self { + DisplayMode { + feature: 0, + client_width: 0, + client_height: 0, + enc, + client_be, + convert, + last_button: 0, + pf, + } + } + + fn has_feature(&self, feature: VncFeatures) -> bool { + self.feature & (1 << feature as usize) != 0 + } +} + +impl Default for DisplayMode { + fn default() -> Self { + Self::new(0, false, false, PixelFormat::default()) + } +} + +pub struct RectInfo { + /// Vnc client state. + pub client: Arc, + /// Dirty area of image. + pub rects: Vec, +} + +impl RectInfo { + pub fn new(client: &Arc, rects: Vec) -> Self { + RectInfo { + client: client.clone(), + rects, + } + } +} + +impl Clone for RectInfo { + fn clone(&self) -> Self { + let mut rects = Vec::new(); + for rect in &self.rects { + rects.push(rect.clone()); + } + Self { + client: self.client.clone(), + rects, + } + } +} + +pub struct IoChannel { + stream: TcpStream, +} + +impl IoChannel { + pub fn new(stream: TcpStream) -> Self { + Self { stream } + } +} + +impl IoOperations for IoChannel { + fn channel_write(&mut self, buf: &[u8]) -> Result { + let buf_size = buf.len(); + let mut offset = 0; + while offset < buf_size { + let tmp_buf = &buf[offset..]; + match self.stream.write(tmp_buf) { + Ok(ret) => { + offset += ret; + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + return Ok(offset); + } + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => { + continue; + } + Err(e) => { + bail!("Unable to write msg on socket: {:?}", e); + } + } + } + + Ok(buf_size) + } + + fn channel_read(&mut self, buf: &mut Vec) -> Result { + let mut len = 0_usize; + loop { + let mut bytes = vec![0_u8; MAX_RECVBUF_LEN]; + match self.stream.read(&mut bytes) { + Ok(ret) => { + buf.append(&mut bytes[..ret].to_vec()); + len += ret; + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + return Ok(len); + } + Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => { + continue; + } + Err(e) => { + bail!("Unable to read msg from socket: {:?}", e); + } + } + break; + } + + Ok(len) + } +} + +/// The connection state of vnc client. +pub struct ConnState { + /// Dirty number need to update. + dirty_num: i32, + /// Connection status. + pub dis_conn: bool, + /// State flags whether the image needs to be updated for the client. + update_state: UpdateState, + /// RFB protocol version. + pub version: VncVersion, + /// Point to Client Io handler. + pub client_io: Option>>, +} + +impl Default for ConnState { + fn default() -> Self { + ConnState { + dirty_num: 0, + dis_conn: false, + update_state: UpdateState::No, + version: VncVersion::default(), + client_io: None, + } + } +} + +impl ConnState { + fn is_disconnect(&mut self) -> bool { + self.dis_conn + } + + /// Whether the client's image data needs to be updated. + fn is_need_update(&mut self) -> bool { + if self.is_disconnect() { + return false; + } + + match self.update_state { + UpdateState::No => false, + UpdateState::Incremental => self.dirty_num > 0, + UpdateState::Force => true, + } + } + + fn clear_update_state(&mut self) { + self.dirty_num = 0; + self.update_state = UpdateState::No; + } +} + +/// Struct to record the state with the vnc client. +pub struct ClientState { + /// Tcp listening address. + pub addr: String, + /// Disconnect event fd. + pub disconn_evt: Arc>, + /// Write event fd. + write_fd: Arc>, + /// TcpStream receive buffer. + pub in_buffer: Arc>, + /// TcpStream write buffer. + pub out_buffer: Arc>, + /// Output mode information of client display. + pub client_dpm: Arc>, + /// The connection state of vnc client. + pub conn_state: Arc>, + /// Identify the image update area. + pub dirty_bitmap: Arc>>, +} + +impl ClientState { + pub fn new(addr: String) -> Self { + ClientState { + addr, + disconn_evt: Arc::new(Mutex::new(create_new_eventfd().unwrap())), + write_fd: Arc::new(Mutex::new(create_new_eventfd().unwrap())), + in_buffer: Arc::new(Mutex::new(BuffPool::new())), + out_buffer: Arc::new(Mutex::new(BuffPool::new())), + client_dpm: Arc::new(Mutex::new(DisplayMode::default())), + conn_state: Arc::new(Mutex::new(ConnState::default())), + dirty_bitmap: Arc::new(Mutex::new(Bitmap::::new( + MAX_WINDOW_HEIGHT as usize + * round_up_div(u64::from(DIRTY_WIDTH_BITS), u64::from(u64::BITS)) as usize, + ))), + } + } +} + +/// Handle the message with vnc client. +pub struct ClientIoHandler { + /// TcpStream connected with client. + pub stream: TcpStream, + /// Io channel to handle read or write. + pub io_channel: Rc>, + /// Vnc client io handler. + pub handlers: HashMap>, + /// Tls server connection. + #[cfg(feature = "vnc_auth")] + pub tls_conn: Option, + /// Message handler. + pub msg_handler: fn(&mut ClientIoHandler) -> Result<()>, + /// Size of buff in next handle. + pub expect: usize, + /// State with vnc client. + pub client: Arc, + /// Configure for vnc server. + pub server: Arc, +} + +impl ClientIoHandler { + pub fn new( + stream: TcpStream, + io_channel: Rc>, + client: Arc, + server: Arc, + ) -> Self { + ClientIoHandler { + stream, + io_channel, + handlers: HashMap::new(), + #[cfg(feature = "vnc_auth")] + tls_conn: None, + msg_handler: ClientIoHandler::handle_version, + expect: 12, + client, + server, + } + } +} + +impl ClientIoHandler { + /// This function interacts with the client interface, it includs several + /// steps: Read the data stream from the fd, save the data in buffer, + /// and then process the data by io handle function. + fn client_handle_read(&mut self) -> Result<(), anyhow::Error> { + self.read_msg()?; + + let client = self.client.clone(); + while client.in_buffer.lock().unwrap().len() >= self.expect { + (self.msg_handler)(self)?; + + if self.client.conn_state.lock().unwrap().dis_conn { + return Err(anyhow!(VncError::Disconnection)); + } + + if self.expect == 0 { + break; + } + } + + Ok(()) + } + + /// Write a chunk of data to client socket. If there is some + /// error in io channel, then return and break the connection. + fn client_handle_write(&mut self) { + let client = self.client.clone(); + if client.conn_state.lock().unwrap().dis_conn { + return; + } + + let mut locked_buffer = client.out_buffer.lock().unwrap(); + while let Some(bytes) = locked_buffer.read_front_chunk() { + let message_len = bytes.len(); + let send_len = match self.write_msg(bytes) { + Ok(ret) => ret, + Err(_e) => { + self.client.conn_state.lock().unwrap().dis_conn = true; + return; + } + }; + + locked_buffer.remove_front(send_len); + if send_len != message_len { + break; + } + } + + if !locked_buffer.is_empty() { + vnc_flush(&client); + } + + drop(locked_buffer); + } + + /// Read buf from stream, return the size. + fn read_msg(&mut self) -> Result { + let mut buf: Vec = vec![]; + let len = self.io_channel.borrow_mut().channel_read(&mut buf)?; + if len > 0 { + buf = buf[..len].to_vec(); + self.client.in_buffer.lock().unwrap().append_limit(buf); + } + Ok(len) + } + + /// Write buf to stream + /// Choose different channel according to whether or not to encrypt + pub fn write_msg(&mut self, buf: &[u8]) -> Result { + self.io_channel.borrow_mut().channel_write(buf) + } + + /// Exchange RFB protocol version with client. + fn handle_version(&mut self) -> Result<()> { + let client = self.client.clone(); + let mut buf = self.read_incoming_msg(); + // The last character should be '\n' + let lf_char = buf.pop().ok_or(VncError::UnsupportedRFBProtocolVersion)?; + if !lf_char.eq(&10) { + return Err(anyhow!(VncError::UnsupportedRFBProtocolVersion)); + } + let ver_str = String::from_utf8_lossy(&buf).to_string(); + let ver = match scanf!(ver_str, "RFB {usize:/\\d{3}/}.{usize:/\\d{3}/}") { + Ok(v) => v, + Err(_e) => { + return Err(anyhow!(VncError::UnsupportedRFBProtocolVersion)); + } + }; + + trace::vnc_client_handle_version(&ver.0, &ver.1); + let mut version = VncVersion::new(ver.0 as u16, ver.1 as u16); + if version.major != 3 || ![3, 4, 5, 7, 8].contains(&version.minor) { + let mut buf = Vec::new(); + buf.append(&mut (AuthState::Invalid as u32).to_be_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + return Err(anyhow!(VncError::UnsupportedRFBProtocolVersion)); + } + + if [4, 5].contains(&version.minor) { + version.minor = 3; + } + self.client.conn_state.lock().unwrap().version = version; + #[cfg(feature = "vnc_auth")] + let auth = self.server.security_type.borrow().auth; + #[cfg(not(feature = "vnc_auth"))] + let auth = AuthState::No; + + if self.client.conn_state.lock().unwrap().version.minor == 3 { + match auth { + AuthState::No => { + let mut buf = Vec::new(); + buf.append(&mut (AuthState::No as u32).to_be_bytes().to_vec()); + vnc_write(&client, buf); + self.update_event_handler(1, ClientIoHandler::handle_client_init); + } + _ => { + self.auth_failed("Unsupported auth method"); + return Err(anyhow!(VncError::AuthFailed( + "handle_version".to_string(), + "Unsupported auth method".to_string() + ))); + } + } + } else { + let mut buf = [0u8; 2]; + buf[0] = 1; // Number of security types. + buf[1] = auth as u8; + vnc_write(&client, buf.to_vec()); + self.update_event_handler(1, ClientIoHandler::handle_auth); + } + vnc_flush(&client); + Ok(()) + } + + /// Initialize the connection of vnc client. + pub fn handle_client_init(&mut self) -> Result<()> { + let mut buf = Vec::new(); + // If the total number of connection exceeds the limit, + // then the old client will be disconnected. + let server = self.server.clone(); + let client = self.client.clone(); + let addr = client.addr.clone(); + let mut locked_clients = server.client_handlers.lock().unwrap(); + let mut len = locked_clients.len() as i32; + trace::vnc_client_handle_init(&len, &server.conn_limits); + for client in locked_clients.values_mut() { + if len <= server.conn_limits as i32 { + break; + } + if client.addr != addr { + vnc_disconnect_start(client); + len -= 1; + } + } + drop(locked_clients); + + // Send server framebuffer info. + let locked_surface = self.server.vnc_surface.lock().unwrap(); + let width = get_image_width(locked_surface.server_image); + let height = get_image_height(locked_surface.server_image); + drop(locked_surface); + if !(0..=MAX_IMAGE_SIZE).contains(&width) || !(0..=MAX_IMAGE_SIZE).contains(&height) { + return Err(anyhow!(VncError::InvalidImageSize(width, height))); + } + let mut locked_dpm = client.client_dpm.lock().unwrap(); + locked_dpm.client_width = width; + locked_dpm.client_height = height; + drop(locked_dpm); + + buf.append(&mut (width as u16).to_be_bytes().to_vec()); + buf.append(&mut (height as u16).to_be_bytes().to_vec()); + pixel_format_message(&client, &mut buf); + + buf.append(&mut (APP_NAME.to_string().len() as u32).to_be_bytes().to_vec()); + buf.append(&mut APP_NAME.to_string().as_bytes().to_vec()); + vnc_write(&client, buf); + vnc_flush(&client); + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + /// Authentication + fn handle_auth(&mut self) -> Result<()> { + let buf = self.read_incoming_msg(); + trace::vnc_client_handle_auth(&buf[0]); + #[cfg(feature = "vnc_auth")] + let auth = self.server.security_type.borrow().auth; + #[cfg(not(feature = "vnc_auth"))] + let auth = AuthState::No; + let client = self.client.clone(); + let version = client.conn_state.lock().unwrap().version.clone(); + + if buf[0] != auth as u8 { + self.auth_failed("Authentication failed"); + return Err(anyhow!(VncError::AuthFailed( + "handle_auth".to_string(), + "auth type is not supported".to_string() + ))); + } + + match auth { + AuthState::No => { + if version.minor >= 8 { + let buf = [0u8; 4]; + vnc_write(&client, buf.to_vec()); + } + self.update_event_handler(1, ClientIoHandler::handle_client_init); + } + #[cfg(feature = "vnc_auth")] + AuthState::Vencrypt => { + // Send VeNCrypt version 0.2. + let mut buf = [0u8; 2]; + buf[0] = 0_u8; + buf[1] = 2_u8; + + vnc_write(&client, buf.to_vec()); + self.update_event_handler(2, ClientIoHandler::client_vencrypt_init); + } + _ => { + self.auth_failed("Unhandled auth method"); + return Err(anyhow!(VncError::AuthFailed( + "handle_auth".to_string(), + "auth type is not supported".to_string() + ))); + } + } + vnc_flush(&client); + Ok(()) + } + + /// Process the data sent by the client + pub fn handle_protocol_msg(&mut self) -> Result<()> { + // According to RFB protocol, first byte identifies the event type. + let buf = self.read_incoming_msg(); + match ClientMsg::from(buf[0]) { + ClientMsg::SetPixelFormat => { + self.set_pixel_format()?; + } + ClientMsg::SetEncodings => { + self.set_encodings()?; + } + ClientMsg::FramebufferUpdateRequest => { + self.update_frame_buff()?; + } + ClientMsg::KeyEvent => { + self.key_envent() + .unwrap_or_else(|e| error!("Key event error: {:?}", e)); + } + ClientMsg::PointerEvent => { + self.point_event() + .unwrap_or_else(|e| error!("Point event error: {:?}", e)); + } + ClientMsg::ClientCutText => { + self.client_cut_event(); + } + _ => { + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + } + } + Ok(()) + } + + /// Tell the client that the specified pixel values should be + /// mapped to the given RGB intensities. + fn send_color_map(&mut self) { + let mut buf: Vec = Vec::new(); + buf.append( + &mut (ServerMsg::SetColourMapEntries as u8) + .to_be_bytes() + .to_vec(), + ); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + // First color. + buf.append(&mut (0_u16).to_be_bytes().to_vec()); + // Number of colors. + buf.append(&mut NUM_OF_COLORMAP.to_be_bytes().to_vec()); + + let pf = self.client.client_dpm.lock().unwrap().pf.clone(); + for i in 0..NUM_OF_COLORMAP { + let r = ((i >> pf.red.shift) & u16::from(pf.red.max)) << (16 - pf.red.bits); + let g = ((i >> pf.green.shift) & u16::from(pf.green.max)) << (16 - pf.green.bits); + let b = ((i >> pf.blue.shift) & u16::from(pf.blue.max)) << (16 - pf.blue.bits); + buf.append(&mut r.to_be_bytes().to_vec()); + buf.append(&mut g.to_be_bytes().to_vec()); + buf.append(&mut b.to_be_bytes().to_vec()); + } + + let client = self.client.clone(); + vnc_write(&client, buf); + vnc_flush(&client); + } + + /// Set image format. + fn set_pixel_format(&mut self) -> Result<()> { + if self.expect == 1 { + self.expect = 20; + return Ok(()); + } + + let buf = self.read_incoming_msg(); + let mut bit_per_pixel: u8 = buf[4]; + let big_endian_flag = buf[6]; + let true_color_flag: u8 = buf[7]; + let mut red_max: u16 = u16::from_be_bytes([buf[8], buf[9]]); + let mut green_max: u16 = u16::from_be_bytes([buf[10], buf[11]]); + let mut blue_max: u16 = u16::from_be_bytes([buf[12], buf[13]]); + let mut red_shift: u8 = buf[14]; + let mut green_shift: u8 = buf[15]; + let mut blue_shift: u8 = buf[16]; + if true_color_flag == 0 { + bit_per_pixel = 8; + red_max = 7; + green_max = 7; + blue_max = 3; + red_shift = 0; + green_shift = 3; + blue_shift = 6; + } + + // Verify the validity of pixel format. + // bit_per_pixel: Bits occupied by each pixel. + if ![8, 16, 32].contains(&bit_per_pixel) { + self.client.conn_state.lock().unwrap().dis_conn = true; + return Err(anyhow!(VncError::ProtocolMessageFailed(String::from( + "set pixel format" + )))); + } + + let mut locked_dpm = self.client.client_dpm.lock().unwrap(); + locked_dpm.pf.red.set_color_info(red_shift, red_max); + locked_dpm.pf.green.set_color_info(green_shift, green_max); + locked_dpm.pf.blue.set_color_info(blue_shift, blue_max); + locked_dpm.pf.pixel_bits = bit_per_pixel; + locked_dpm.pf.pixel_bytes = bit_per_pixel / BIT_PER_BYTE as u8; + // Standard pixel format, depth is equal to 24. + locked_dpm.pf.depth = if bit_per_pixel == 32 { + 24 + } else { + bit_per_pixel + }; + locked_dpm.client_be = big_endian_flag != 0; + + if !locked_dpm.pf.is_default_pixel_format() { + locked_dpm.convert = true; + } + drop(locked_dpm); + if true_color_flag == 0 { + self.send_color_map(); + } + + self.server.rect_jobs.lock().unwrap().clear(); + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + /// Set encoding. + fn set_encodings(&mut self) -> Result<()> { + let client = self.client.clone(); + let server = self.server.clone(); + let buf = self.read_incoming_msg(); + if self.expect == 1 { + self.expect = 4; + return Ok(()); + } + + let mut num_encoding: u16; + if self.expect == 4 { + num_encoding = u16::from_be_bytes([buf[2], buf[3]]); + if num_encoding > 0 { + self.expect = 4 + (num_encoding as usize) * 4; + return Ok(()); + } + } else { + num_encoding = u16::from_be_bytes([buf[2], buf[3]]); + } + + let mut locked_dpm = self.client.client_dpm.lock().unwrap(); + locked_dpm.feature = 0; + locked_dpm.enc = 0; + num_encoding = cmp::min(num_encoding as usize, (buf.len() - 4) / 4) as u16; + while num_encoding > 0 { + let offset = (4 * num_encoding) as usize; + let enc = i32::from_be_bytes([ + buf[offset], + buf[offset + 1], + buf[offset + 2], + buf[offset + 3], + ]); + match enc { + ENCODING_RAW => { + locked_dpm.enc = enc; + } + ENCODING_HEXTILE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureHextile as usize; + locked_dpm.enc = enc; + } + ENCODING_TIGHT => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureTight as usize; + locked_dpm.enc = enc; + } + ENCODING_ZLIB => { + // ZRLE compress better than ZLIB, so prioritize ZRLE. + if locked_dpm.feature & (1 << VncFeatures::VncFeatureZrle as usize) == 0 { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureZlib as usize; + locked_dpm.enc = enc; + } + } + ENCODING_ZRLE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureZrle as usize; + locked_dpm.enc = enc; + } + ENCODING_ZYWRLE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureZywrle as usize; + locked_dpm.enc = enc; + } + ENCODING_DESKTOPRESIZE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureResize as usize; + } + ENCODING_DESKTOP_RESIZE_EXT => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureResizeExt as usize; + } + ENCODING_POINTER_TYPE_CHANGE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeaturePointerTypeChange as usize; + } + ENCODING_RICH_CURSOR => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureRichCursor as usize; + } + ENCODING_ALPHA_CURSOR => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureAlphaCursor as usize; + } + ENCODING_WMVI => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureWmvi as usize; + } + ENCODING_LED_STATE => { + locked_dpm.feature |= 1 << VncFeatures::VncFeatureLedState as usize; + } + _ => {} + } + + num_encoding -= 1; + } + + drop(locked_dpm); + let mut buf: Vec = Vec::new(); + // VNC desktop resize. + desktop_resize(&client, &server, &mut buf)?; + // VNC display cursor define. + display_cursor_define(&client, &server, &mut buf); + vnc_write(&client, buf); + vnc_flush(&client); + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + /// Update image for client. + fn update_frame_buff(&mut self) -> Result<()> { + if self.expect == 1 { + self.expect = 10; + return Ok(()); + } + let buf = self.read_incoming_msg(); + let locked_dpm = self.client.client_dpm.lock().unwrap(); + let width = locked_dpm.client_width; + let height = locked_dpm.client_height; + drop(locked_dpm); + let client = self.client.clone(); + let mut locked_state = client.conn_state.lock().unwrap(); + if buf[1] != 0 { + if locked_state.update_state != UpdateState::Force { + locked_state.update_state = UpdateState::Incremental; + } + } else { + locked_state.update_state = UpdateState::Force; + let x = i32::from(u16::from_be_bytes([buf[2], buf[3]])); + let y = i32::from(u16::from_be_bytes([buf[4], buf[5]])); + let w = i32::from(u16::from_be_bytes([buf[6], buf[7]])); + let h = i32::from(u16::from_be_bytes([buf[8], buf[9]])); + set_area_dirty( + &mut client.dirty_bitmap.lock().unwrap(), + x, + y, + w, + h, + width, + height, + )?; + } + drop(locked_state); + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + /// Keyboard event. + fn key_envent(&mut self) -> Result<()> { + if self.expect == 1 { + self.expect = 8; + return Ok(()); + } + let buf = self.read_incoming_msg(); + let down: bool = buf[1] != 0; + let org_keysym = i32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]); + let mut keysym = org_keysym; + + trace::vnc_client_key_event(&keysym, &down); + let server = self.server.clone(); + + // Uppercase -> Lowercase. + if (ASCII_A..=ASCII_Z).contains(&keysym) { + keysym += UPPERCASE_TO_LOWERCASE; + } + + let keycode: u16 = match server.keysym2keycode.get(&(keysym as u16)) { + Some(k) => *k, + None => 0, + }; + + // Ctr + Alt + Num(1~9) + // Switch to the corresponding display device. + if (KEYCODE_1..KEYCODE_9 + 1).contains(&keycode) + && down + && self.server.display_listener.is_some() + && keyboard_modifier_get(KeyboardModifier::KeyModCtrl) + && keyboard_modifier_get(KeyboardModifier::KeyModAlt) + { + keyboard_state_reset(); + console_select(Some((keycode - KEYCODE_1) as usize))?; + } + + update_key_state(down, org_keysym, keycode)?; + key_event(keycode, down)?; + + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + // Mouse event. + pub fn point_event(&mut self) -> Result<()> { + if self.expect == 1 { + self.expect = 6; + return Ok(()); + } + + let buf = self.read_incoming_msg(); + let mut x = (u16::from(buf[2]) << 8) + u16::from(buf[3]); + let mut y = (u16::from(buf[4]) << 8) + u16::from(buf[5]); + trace::vnc_client_point_event(&buf[1], &x, &y); + + // Window size alignment. + let locked_surface = self.server.vnc_surface.lock().unwrap(); + let width = get_image_width(locked_surface.server_image); + let height = get_image_height(locked_surface.server_image); + drop(locked_surface); + x = ((u64::from(x) * ABS_MAX) / width as u64) as u16; + y = ((u64::from(y) * ABS_MAX) / height as u64) as u16; + + // ASCII -> HidCode. + let new_button = buf[1]; + let last_button = self.client.client_dpm.lock().unwrap().last_button; + if last_button != new_button { + for bit in 0..VNC_INPUT_BUTTON_MAX_NUM { + let button_mask = 1 << bit; + if last_button & button_mask == new_button & button_mask { + continue; + } + + let button = match button_mask { + VNC_INPUT_BUTTON_LEFT => INPUT_POINT_LEFT, + VNC_INPUT_BUTTON_RIGHT => INPUT_POINT_RIGHT, + VNC_INPUT_BUTTON_MIDDLE => INPUT_POINT_MIDDLE, + VNC_INPUT_BUTTON_WHEEL_UP => INPUT_BUTTON_WHEEL_UP, + VNC_INPUT_BUTTON_WHEEL_DOWN => INPUT_BUTTON_WHEEL_DOWN, + VNC_INPUT_BUTTON_WHEEL_RIGHT => INPUT_BUTTON_WHEEL_RIGHT, + VNC_INPUT_BUTTON_WHEEL_LEFT => INPUT_BUTTON_WHEEL_LEFT, + VNC_INPUT_BUTTON_BACK => INPUT_POINT_BACK, + _ => u32::from(button_mask), + }; + input_button(button, new_button & button_mask != 0)?; + } + self.client.client_dpm.lock().unwrap().last_button = new_button; + } + + input_move_abs(Axis::X, u32::from(x))?; + input_move_abs(Axis::Y, u32::from(y))?; + input_point_sync()?; + + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + Ok(()) + } + + /// Client cut text. + fn client_cut_event(&mut self) { + let buf = self.read_incoming_msg(); + if self.expect == 1 { + self.expect = 8; + return; + } + if self.expect == 8 { + let buf = [buf[4], buf[5], buf[6], buf[7]]; + let len = u32::from_be_bytes(buf); + if len > 0 { + self.expect += len as usize; + return; + } + } + + self.update_event_handler(1, ClientIoHandler::handle_protocol_msg); + } + + /// Invalid authentication, send 1 to reject. + fn auth_failed(&mut self, msg: &str) { + let auth_rej: u8 = 1; + let mut buf: Vec = vec![1u8]; + buf.append(&mut u32::from(auth_rej).to_be_bytes().to_vec()); + // If the RFB protocol version is above 3.8, an error reason will be returned. + if self.client.conn_state.lock().unwrap().version.minor >= 8 { + let err_msg = msg; + buf.append(&mut (err_msg.len() as u32).to_be_bytes().to_vec()); + buf.append(&mut err_msg.as_bytes().to_vec()); + } + let client = self.client.clone(); + vnc_write(&client, buf); + vnc_flush(&client); + } + + /// Read the data from the receiver buffer. + pub fn read_incoming_msg(&mut self) -> Vec { + let mut buf: Vec = vec![0_u8; self.expect]; + let mut locked_in_buffer = self.client.in_buffer.lock().unwrap(); + let _size: usize = locked_in_buffer.read_front(&mut buf, self.expect); + buf + } + + /// Action token after the event. + /// + /// # Arguments + /// + /// * `expect` - the size of bytes of next callback function. + /// * `msg_handler` - callback function of the next event. + pub fn update_event_handler( + &mut self, + expect: usize, + msg_handler: fn(&mut ClientIoHandler) -> Result<()>, + ) { + self.client + .in_buffer + .lock() + .unwrap() + .remove_front(self.expect); + self.expect = expect; + self.msg_handler = msg_handler; + } + + fn disconn_evt_handler(&mut self) -> Vec { + let notifiers_fds = vec![ + self.stream.as_raw_fd(), + self.client.write_fd.lock().unwrap().as_raw_fd(), + self.client.disconn_evt.lock().unwrap().as_raw_fd(), + ]; + gen_delete_notifiers(¬ifiers_fds) + } +} + +/// Internal notifiers for Client message. +impl EventNotifierHelper for ClientIoHandler { + fn internal_notifiers(client_io_handler: Arc>) -> Vec { + let mut notifiers: Vec = Vec::new(); + + // Register event notifier for read. + let client_io = client_io_handler.clone(); + let handler: Rc = Rc::new(move |event, _fd: RawFd| { + let mut locked_client_io = client_io.lock().unwrap(); + let client = locked_client_io.client.clone(); + if event & EventSet::READ_HANG_UP == EventSet::READ_HANG_UP { + client.conn_state.lock().unwrap().dis_conn = true; + } else if event & EventSet::IN == EventSet::IN { + if let Err(e) = locked_client_io.client_handle_read() { + error!("{:?}", e); + client.conn_state.lock().unwrap().dis_conn = true; + } + } + // Do disconnection event. + if client.conn_state.lock().unwrap().is_disconnect() { + vnc_disconnect_start(&client); + } + drop(locked_client_io); + None + }); + let client_io = client_io_handler.clone(); + client_io + .lock() + .unwrap() + .handlers + .insert("vnc_client_io".to_string(), handler.clone()); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + client_io.lock().unwrap().stream.as_raw_fd(), + None, + EventSet::IN | EventSet::READ_HANG_UP, + vec![handler], + )); + + // Register event notifier for write. + let client_io = client_io_handler.clone(); + let client = client_io.lock().unwrap().client.clone(); + let handler: Rc = Rc::new(move |_event, fd| { + read_fd(fd); + let mut locked_client_io = client_io.lock().unwrap(); + let client = locked_client_io.client.clone(); + locked_client_io.client_handle_write(); + // do disconnection event. + if client.conn_state.lock().unwrap().is_disconnect() { + vnc_disconnect_start(&client); + } + drop(locked_client_io); + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + client.write_fd.lock().unwrap().as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + + // Register event for disconnect. + let client_io = client_io_handler.clone(); + let handler: Rc = Rc::new(move |_event, fd| { + read_fd(fd); + // Drop client info from vnc server. + let mut locked_client_io = client_io.lock().unwrap(); + let client = locked_client_io.client.clone(); + let addr = client.addr.clone(); + let server = locked_client_io.server.clone(); + let notifiers = locked_client_io.disconn_evt_handler(); + // Shutdown stream. + if let Err(e) = locked_client_io.stream.shutdown(Shutdown::Both) { + error!("Shutdown stream failed: {:?}", e); + } + drop(locked_client_io); + server.client_handlers.lock().unwrap().remove(&addr); + Some(notifiers) + }); + let client = client_io_handler.lock().unwrap().client.clone(); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + client.disconn_evt.lock().unwrap().as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + + notifiers + } +} + +/// Generate the data that needs to be sent. +/// Add to send queue +pub fn get_rects(client: &Arc, server: &Arc, dirty_num: i32) -> Result<()> { + let mut locked_state = client.conn_state.lock().unwrap(); + let num = locked_state.dirty_num; + locked_state.dirty_num = num.checked_add(dirty_num).unwrap_or(0); + if !locked_state.is_need_update() { + return Ok(()); + } + drop(locked_state); + + let mut x: u64; + let mut y: u64 = 0; + let mut h: u64; + let mut x2: u64; + let mut rects = Vec::new(); + let locked_dpm = client.client_dpm.lock().unwrap(); + let height = locked_dpm.client_height as u64; + let width = locked_dpm.client_width as u64; + drop(locked_dpm); + let mut locked_dirty = client.dirty_bitmap.lock().unwrap(); + let bpl = locked_dirty.vol() / MAX_WINDOW_HEIGHT as usize; + + loop { + // Find the first non-zero bit in dirty bitmap. + let offset = locked_dirty.find_next_bit(y as usize * bpl).unwrap() as u64; + if offset >= height * bpl as u64 { + break; + } + + x = offset % bpl as u64; + y = offset / bpl as u64; + // Find value in one line to the end. + x2 = locked_dirty.find_next_zero(offset as usize).unwrap() as u64 % bpl as u64; + let mut i = y; + while i < height { + if !locked_dirty.contain((i * bpl as u64 + x) as usize).unwrap() { + break; + } + let start = (i * bpl as u64 + x) as usize; + let len = (x2 - x) as usize; + locked_dirty.clear_range(start, len)?; + i += 1; + } + + h = i - y; + x2 = cmp::min(x2, width / u64::from(DIRTY_PIXELS_NUM)); + if x2 > x { + rects.push(Rectangle::new( + (x * u64::from(DIRTY_PIXELS_NUM)) as i32, + y as i32, + ((x2 - x) * u64::from(DIRTY_PIXELS_NUM)) as i32, + h as i32, + )); + } + + if x == 0 && x2 == width / u64::from(DIRTY_PIXELS_NUM) { + y += h; + if y == height { + break; + } + } + } + + drop(locked_dirty); + + server + .rect_jobs + .lock() + .unwrap() + .push(RectInfo::new(client, rects)); + + client.conn_state.lock().unwrap().clear_update_state(); + Ok(()) +} + +/// Set pixformat for client. +fn pixel_format_message(client: &Arc, buf: &mut Vec) { + let mut locked_dpm = client.client_dpm.lock().unwrap(); + locked_dpm.pf.init_pixelformat(); + let big_endian: u8 = u8::from(cfg!(target_endian = "big")); + buf.append(&mut locked_dpm.pf.pixel_bits.to_be_bytes().to_vec()); // Bit per pixel. + buf.append(&mut locked_dpm.pf.depth.to_be_bytes().to_vec()); // Depth. + buf.append(&mut big_endian.to_be_bytes().to_vec()); // Big-endian flag. + buf.append(&mut (1_u8).to_be_bytes().to_vec()); // True-color flag. + buf.append(&mut u16::from(locked_dpm.pf.red.max).to_be_bytes().to_vec()); // Red max. + buf.append(&mut u16::from(locked_dpm.pf.green.max).to_be_bytes().to_vec()); // Green max. + buf.append(&mut u16::from(locked_dpm.pf.blue.max).to_be_bytes().to_vec()); // Blue max. + buf.append(&mut locked_dpm.pf.red.shift.to_be_bytes().to_vec()); // Red shift. + buf.append(&mut locked_dpm.pf.green.shift.to_be_bytes().to_vec()); // Green shift. + buf.append(&mut locked_dpm.pf.blue.shift.to_be_bytes().to_vec()); // Blue shift. + buf.append(&mut [0; 3].to_vec()); // Padding. + drop(locked_dpm); +} + +/// Set Desktop Size. +pub fn desktop_resize( + client: &Arc, + server: &Arc, + buf: &mut Vec, +) -> Result<()> { + let locked_surface = server.vnc_surface.lock().unwrap(); + let width = get_image_width(locked_surface.server_image); + let height = get_image_height(locked_surface.server_image); + trace::vnc_server_desktop_resize(&width, &height); + + if !(0..=MAX_IMAGE_SIZE).contains(&width) || !(0..=MAX_IMAGE_SIZE).contains(&height) { + return Err(anyhow!(VncError::InvalidImageSize(width, height))); + } + drop(locked_surface); + let mut locked_dpm = client.client_dpm.lock().unwrap(); + if (!locked_dpm.has_feature(VncFeatures::VncFeatureResizeExt) + && !locked_dpm.has_feature(VncFeatures::VncFeatureResize)) + || (locked_dpm.client_width == width && locked_dpm.client_height == height) + { + return Ok(()); + } + locked_dpm.client_width = width; + locked_dpm.client_height = height; + drop(locked_dpm); + + buf.append(&mut (ServerMsg::FramebufferUpdate as u8).to_be_bytes().to_vec()); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + buf.append(&mut (1_u16).to_be_bytes().to_vec()); + framebuffer_update(0, 0, width, height, ENCODING_DESKTOPRESIZE, buf); + Ok(()) +} + +/// Set color depth for client. +pub fn set_color_depth(client: &Arc, buf: &mut Vec) { + let mut locked_dpm = client.client_dpm.lock().unwrap(); + if locked_dpm.has_feature(VncFeatures::VncFeatureWmvi) { + let client_width = locked_dpm.client_width; + let client_height = locked_dpm.client_height; + drop(locked_dpm); + buf.append(&mut (ServerMsg::FramebufferUpdate as u8).to_be_bytes().to_vec()); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + buf.append(&mut (1_u16).to_be_bytes().to_vec()); + framebuffer_update(0, 0, client_width, client_height, ENCODING_WMVI, buf); + pixel_format_message(client, buf); + } else if !locked_dpm.pf.is_default_pixel_format() { + locked_dpm.convert = true; + } +} + +/// Send framebuf of mouse to the client. +pub fn display_cursor_define( + client: &Arc, + server: &Arc, + buf: &mut Vec, +) { + let locked_cursor = server.vnc_cursor.lock().unwrap(); + let mut cursor = match &locked_cursor.cursor { + Some(c) => c.clone(), + None => { + return; + } + }; + let mut mask = match &locked_cursor.mask { + Some(m) => m.clone(), + None => { + return; + } + }; + drop(locked_cursor); + if cursor.data.is_empty() + || cursor.data.len() != ((cursor.width * cursor.height) as usize) * bytes_per_pixel() + { + return; + } + if client + .client_dpm + .lock() + .unwrap() + .has_feature(VncFeatures::VncFeatureAlphaCursor) + { + buf.append(&mut (ServerMsg::FramebufferUpdate as u8).to_be_bytes().to_vec()); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); // padding + buf.append(&mut (1_u16).to_be_bytes().to_vec()); // number of rects + + framebuffer_update( + cursor.hot_x as i32, + cursor.hot_y as i32, + cursor.width as i32, + cursor.height as i32, + ENCODING_ALPHA_CURSOR, + buf, + ); + buf.append(&mut (ENCODING_RAW as u32).to_be_bytes().to_vec()); + buf.append(&mut cursor.data); + return; + } + + if client + .client_dpm + .lock() + .unwrap() + .has_feature(VncFeatures::VncFeatureRichCursor) + { + buf.append(&mut (ServerMsg::FramebufferUpdate as u8).to_be_bytes().to_vec()); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); // padding + buf.append(&mut (1_u16).to_be_bytes().to_vec()); // number of rects + + framebuffer_update( + cursor.hot_x as i32, + cursor.hot_y as i32, + cursor.width as i32, + cursor.height as i32, + ENCODING_RICH_CURSOR, + buf, + ); + let dpm = client.client_dpm.lock().unwrap().clone(); + let data_size = cursor.width * cursor.height * u32::from(dpm.pf.pixel_bytes); + let data_ptr = cursor.data.as_ptr() as *mut u8; + write_pixel(data_ptr, data_size as usize, &dpm, buf); + buf.append(&mut mask); + } +} + +pub fn vnc_write(client: &Arc, buf: Vec) { + if client.conn_state.lock().unwrap().dis_conn { + return; + } + let mut locked_buffer = client.out_buffer.lock().unwrap(); + if !locked_buffer.is_enough(buf.len()) { + client.conn_state.lock().unwrap().dis_conn = true; + return; + } + locked_buffer.append_limit(buf); +} + +/// Set the limit size of the output buffer to prevent the client +/// from stopping receiving data. +pub fn vnc_update_output_throttle(client: &Arc) { + let locked_dpm = client.client_dpm.lock().unwrap(); + let width = locked_dpm.client_width; + let height = locked_dpm.client_height; + let bytes_per_pixel = locked_dpm.pf.pixel_bytes; + let mut offset = width * height * i32::from(bytes_per_pixel) * OUTPUT_THROTTLE_SCALE; + drop(locked_dpm); + + offset = cmp::max(offset, MIN_OUTPUT_LIMIT); + client + .out_buffer + .lock() + .unwrap() + .set_limit(Some(offset as usize)); +} + +/// Flush the output buffer. +pub fn vnc_flush(client: &Arc) { + client + .write_fd + .lock() + .unwrap() + .write(1) + .unwrap_or_else(|e| error!("Error occurs during data flush:{:?}", e)); +} + +/// Disconnect for vnc client. +pub fn vnc_disconnect_start(client: &Arc) { + client + .disconn_evt + .lock() + .unwrap() + .write(1) + .unwrap_or_else(|e| error!("Error occurs during disconnection: {:?}", e)); +} diff --git a/ui/src/vnc/encoding/enc_hextile.rs b/ui/src/vnc/encoding/enc_hextile.rs new file mode 100644 index 0000000000000000000000000000000000000000..f41a4d8c1c273870d97edcce0b63d134dd5bf062 --- /dev/null +++ b/ui/src/vnc/encoding/enc_hextile.rs @@ -0,0 +1,477 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{cmp, mem}; + +use crate::{ + pixman::{bytes_per_pixel, get_image_data, get_image_stride}, + vnc::{ + client_io::{DisplayMode, Rectangle}, + write_pixel, + }, +}; +use util::pixman::pixman_image_t; + +/// Size of subrectangle. +const HEXTILE_BLOCK_SIZE: usize = 16; +/// SubEncoding type of hextile. +const RAW: u8 = 0x01; +const BACKGROUND_SPECIFIC: u8 = 0x02; +const FOREGROUND_SPECIFIC: u8 = 0x04; +const ANY_SUBRECTS: u8 = 0x08; +const SUBRECTS_COLOURED: u8 = 0x10; + +/// Compress data by hextile algorithm before sending. +/// Rectangles are split up into 16 * 16 tiles. +/// +/// # Arguments +/// +/// * `image` - pointer to the data need to be send. +/// * `rect` - dirty area of image. +/// * `client_dpm` - Output mode information of client display. +/// * `buf` - send buffer. +pub fn hextile_send_framebuffer_update( + image: *mut pixman_image_t, + rect: &Rectangle, + client_dpm: &DisplayMode, + buf: &mut Vec, +) -> i32 { + let mut last_bg: Option = None; + let mut last_fg: Option = None; + for j in (0..rect.h).step_by(HEXTILE_BLOCK_SIZE) { + for i in (0..rect.w).step_by(HEXTILE_BLOCK_SIZE) { + let sub_rect = Rectangle::new( + rect.x + i, + rect.y + j, + cmp::min(HEXTILE_BLOCK_SIZE as i32, rect.w - i), + cmp::min(HEXTILE_BLOCK_SIZE as i32, rect.h - j), + ); + compress_each_tile( + image, + &sub_rect, + client_dpm, + buf, + &mut last_bg, + &mut last_fg, + ); + } + } + 1 +} + +/// Compress each tiles by hextile algorithm. +/// +/// # Arguments +/// +/// * `image` - pointer to the data need to be send. +/// * `sub_rect` - area of tile. +/// * `client_dpm` - Output mode information of client display. +/// * `buf` - send buffer. +/// * `last_bg` - background of last tile. +/// * `last_fg` - foreground of last tile. +fn compress_each_tile<'a>( + image: *mut pixman_image_t, + sub_rect: &Rectangle, + client_dpm: &DisplayMode, + buf: &mut Vec, + last_bg: &'a mut Option, + last_fg: &'a mut Option, +) { + let stride = get_image_stride(image); + let mut data_ptr = get_image_data(image) as *mut u8; + data_ptr = (data_ptr as usize + + (sub_rect.y * stride) as usize + + sub_rect.x as usize * bytes_per_pixel()) as *mut u8; + let mut flag: u8 = 0; // Subencoding mask. + let mut bg: u32 = 0; // Pixel value of background. + let mut fg: u32 = 0; // Pixel value of foreground. + let n_colors = pixel_statistical(data_ptr, stride, sub_rect, &mut bg, &mut fg); + let mut n_subtiles = 0; // Number of subrectangle. + let mut tmp_buf: Vec = Vec::new(); + + if last_bg.is_none() || Some(bg) != *last_bg { + flag |= BACKGROUND_SPECIFIC; + *last_bg = Some(bg); + } + if n_colors < 3 && (last_fg.is_none() || Some(fg) != *last_fg) { + flag |= FOREGROUND_SPECIFIC; + *last_fg = Some(fg); + } + + match n_colors { + 2 => { + flag |= ANY_SUBRECTS; + n_subtiles = + subrectangle_of_foreground(sub_rect, data_ptr, bg, fg, stride, &mut tmp_buf); + } + 3 => { + flag |= ANY_SUBRECTS | SUBRECTS_COLOURED; + if last_bg.is_none() || Some(bg) != *last_bg { + flag |= BACKGROUND_SPECIFIC; + } + n_subtiles = subrectangle_with_pixel_value( + sub_rect, + data_ptr, + bg, + stride, + client_dpm, + &mut tmp_buf, + ); + // If the length becomes longer after compression, give up compression. + if tmp_buf.len() + > (sub_rect.h * sub_rect.w * i32::from(client_dpm.pf.pixel_bytes)) as usize + { + flag = RAW; + *last_bg = None; + } + *last_fg = None; + } + _ => {} + } + + buf.append(&mut flag.to_be_bytes().to_vec()); // SubEncoding-mask. + if flag & RAW == 0 { + if flag & BACKGROUND_SPECIFIC != 0 { + write_pixel( + bg.to_ne_bytes().as_ptr() as *mut u8, + bytes_per_pixel(), + client_dpm, + buf, + ); + } + if flag & FOREGROUND_SPECIFIC != 0 { + write_pixel( + fg.to_ne_bytes().as_ptr() as *mut u8, + bytes_per_pixel(), + client_dpm, + buf, + ); + } + if n_subtiles != 0 { + buf.append(&mut (n_subtiles as u8).to_be_bytes().to_vec()); // Num of SubRectangles. + buf.append(&mut tmp_buf); // SubrectsColoured. + } + } else { + // Send data directly without compression. + for j in 0..sub_rect.h { + let ptr = (data_ptr as usize + (j * stride) as usize) as *mut u8; + write_pixel(ptr, (sub_rect.w * 4) as usize, client_dpm, buf); + } + } +} + +/// Specifies all subrectangles of foreground colour in this tile. +/// +/// # Arguments +/// +/// * `sub_rect` - area of tile. +/// * `data_ptr` - pointer to the data of image. +/// * `bg` - background of current tile. +/// * `fg` - foreground of current tile. +/// * `stride` - stride of image. +/// * `buf` - send buffer. +fn subrectangle_of_foreground( + sub_rect: &Rectangle, + data_ptr: *mut u8, + bg: u32, + fg: u32, + stride: i32, + buf: &mut Vec, +) -> i32 { + let mut n_subtiles = 0; + for j in 0..sub_rect.h { + let ptr = (data_ptr as usize + (j * stride) as usize) as *mut u32; + let mut x_begin = -1; + for i in 0..sub_rect.w { + // SAFETY: it can be ensure the raw pointer will not exceed the range. + let value = unsafe { *ptr.add(i as usize) }; + if value == fg && x_begin == -1 { + x_begin = i; + } else if value == bg && x_begin != -1 { + hextile_enc_sub_coloured(buf, x_begin, j, i - x_begin, 1); + n_subtiles += 1; + x_begin = -1; + } + } + if x_begin != -1 { + hextile_enc_sub_coloured(buf, x_begin, j, sub_rect.w - x_begin, 1); + n_subtiles += 1; + } + } + n_subtiles +} + +/// Specifies all subrectangles with pixel value. +/// +/// # Arguments +/// +/// * `sub_rect` - area of tile. +/// * `data_ptr` - pointer to the data of image. +/// * `bg` - background of current tile. +/// * `stride` - stride of image. +/// * `client_dpm` - Output mode information of client display. +/// * `buf` - send buffer. +fn subrectangle_with_pixel_value( + sub_rect: &Rectangle, + data_ptr: *mut u8, + bg: u32, + stride: i32, + client_dpm: &DisplayMode, + buf: &mut Vec, +) -> i32 { + let mut n_subtiles = 0; + for j in 0..sub_rect.h { + let mut x_begin = -1; + let mut last_color: Option = None; + let ptr = (data_ptr as usize + (j * stride) as usize) as *mut u32; + for i in 0..sub_rect.w { + // SAFETY: it can be ensure the raw pointer will not exceed the range. + let value = unsafe { *ptr.offset(i as isize) }; + match last_color { + Some(color) => { + if color != value { + last_color = None; + write_pixel( + color.to_ne_bytes().as_ptr() as *mut u8, + bytes_per_pixel(), + client_dpm, + buf, + ); + hextile_enc_sub_coloured(buf, x_begin, j, i - x_begin, 1); + n_subtiles += 1; + x_begin = -1; + if value != bg { + last_color = Some(value); + x_begin = i; + } + } + } + None => { + if value == bg { + continue; + } + last_color = Some(value); + x_begin = i; + } + } + } + if let Some(color) = last_color { + write_pixel( + color.to_ne_bytes().as_ptr() as *mut u8, + bytes_per_pixel(), + client_dpm, + buf, + ); + n_subtiles += 1; + hextile_enc_sub_coloured(buf, x_begin, j, sub_rect.w - x_begin, 1) + } + } + + n_subtiles +} + +/// Encode SubrectsColoured. +/// First Byte: x-and-y-position +/// Second Byte: width-and-height-position. +fn hextile_enc_sub_coloured(buf: &mut Vec, x: i32, y: i32, w: i32, h: i32) { + buf.append( + &mut (((x & 0x0f) << 4 | (y & 0x0f)) as u8) + .to_be_bytes() + .to_vec(), + ); + buf.append( + &mut ((((w - 1) & 0x0f) << 4 | ((h - 1) & 0x0f)) as u8) + .to_be_bytes() + .to_vec(), + ); +} + +/// Count the total number of different pixels in rectangle. +/// +/// # Arguments +/// +/// * `data_ptr` - pointer to the data. +/// * `stride` - number of bytes for one line of image data. +/// * `sub_rect` - subrectangle. +/// * `bg` - background. +/// * `fg` - foreground. +fn pixel_statistical<'a>( + data_ptr: *mut u8, + stride: i32, + sub_rect: &Rectangle, + bg: &'a mut u32, + fg: &'a mut u32, +) -> usize { + let mut n_colors = 0; + let mut bg_count = 0; // Number of background. + let mut fg_count = 0; // Number of foreground. + + for j in 0..sub_rect.h { + let ptr = (data_ptr as usize + (j * stride) as usize) as *mut u32; + for i in 0..sub_rect.w { + // SAFETY: it can be ensure the raw pointer will not exceed the range. + let value = unsafe { *ptr.offset(i as isize) }; + match n_colors { + 0 => { + *bg = value; + n_colors = 1; + } + 1 => { + if *bg != value { + *fg = value; + n_colors = 2; + } + } + 2 => { + if value == *bg { + bg_count += 1; + } else if value == *fg { + fg_count += 1; + } else { + n_colors = 3; + } + } + _ => { + break; + } + } + } + if n_colors > 2 { + break; + } + } + + if n_colors > 1 && fg_count > bg_count { + mem::swap(bg, fg); + } + + n_colors +} + +#[cfg(test)] +mod tests { + use super::hextile_send_framebuffer_update; + use crate::{ + pixman::{create_pixman_image, PixelFormat}, + vnc::{ + client_io::{DisplayMode, Rectangle, ENCODING_HEXTILE}, + encoding::test_hextile_image_data::{ + IMAGE_DATA_MULTI_PIXELS, IMAGE_DATA_SINGLE_PIXEL, IMAGE_DATA_TWO_PIXEL, + TARGET_DATA_MULTI_PIXELS, TARGET_DATA_SINGLE_PIXEL, TARGET_DATA_TWO_PIXEL, + }, + }, + }; + use util::pixman::pixman_format_code_t; + fn color_init() -> PixelFormat { + let mut pf = PixelFormat::default(); + pf.red.set_color_info(16, 255); + pf.green.set_color_info(8, 255); + pf.blue.set_color_info(0, 255); + pf.pixel_bits = 32; + pf.pixel_bytes = 4; + pf.depth = 24; + pf + } + + #[test] + fn test_hextile_send_framebuffer_single_pixel() { + let pf = color_init(); + let convert = false; + let client_be = false; + let enc = ENCODING_HEXTILE; + let client_dpm = DisplayMode::new(enc, client_be, convert, pf); + let image_data = IMAGE_DATA_SINGLE_PIXEL; + let target_data = TARGET_DATA_SINGLE_PIXEL; + let image_width: i32 = 32; + let image_height: i32 = 32; + let image_stride: i32 = 128; + + let image = create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + image_width, + image_height, + image_data.as_ptr() as *mut u32, + image_stride, + ); + let mut buf: Vec = Vec::new(); + let rect = Rectangle { + x: 0, + y: 0, + w: image_width, + h: image_height, + }; + hextile_send_framebuffer_update(image, &rect, &client_dpm, &mut buf); + assert_eq!(buf, target_data); + } + + #[test] + fn test_hextile_send_framebuffer_two_pixels() { + let pf = color_init(); + let convert = false; + let client_be = false; + let enc = ENCODING_HEXTILE; + let client_dpm = DisplayMode::new(enc, client_be, convert, pf); + let image_data = IMAGE_DATA_TWO_PIXEL; + let target_data = TARGET_DATA_TWO_PIXEL; + let image_width: i32 = 40; + let image_height: i32 = 40; + let image_stride: i32 = 160; + + let image = create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + image_width, + image_height, + image_data.as_ptr() as *mut u32, + image_stride, + ); + let mut buf: Vec = Vec::new(); + let rect = Rectangle { + x: 0, + y: 0, + w: image_width, + h: image_height, + }; + hextile_send_framebuffer_update(image, &rect, &client_dpm, &mut buf); + assert_eq!(buf, target_data); + } + + #[test] + fn test_hextile_send_framebuffer_multi_pixels() { + let pf = color_init(); + let convert = false; + let client_be = false; + let enc = ENCODING_HEXTILE; + let client_dpm = DisplayMode::new(enc, client_be, convert, pf); + let image_data = IMAGE_DATA_MULTI_PIXELS; + let target_data = TARGET_DATA_MULTI_PIXELS; + let image_width: i32 = 40; + let image_height: i32 = 40; + let image_stride: i32 = 160; + + let image = create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + image_width, + image_height, + image_data.as_ptr() as *mut u32, + image_stride, + ); + let mut buf: Vec = Vec::new(); + let rect = Rectangle { + x: 0, + y: 0, + w: image_width, + h: image_height, + }; + hextile_send_framebuffer_update(image, &rect, &client_dpm, &mut buf); + assert_eq!(buf, target_data); + } +} diff --git a/ui/src/vnc/encoding/mod.rs b/ui/src/vnc/encoding/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..1cb6fe98da8134f983a1655bf8257bdac919c46f --- /dev/null +++ b/ui/src/vnc/encoding/mod.rs @@ -0,0 +1,16 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod enc_hextile; + +#[cfg(test)] +mod test_hextile_image_data; diff --git a/ui/src/vnc/encoding/test_hextile_image_data.rs b/ui/src/vnc/encoding/test_hextile_image_data.rs new file mode 100644 index 0000000000000000000000000000000000000000..4aeadf1e4bdb2d148ec673c047a8ad376af88b6e --- /dev/null +++ b/ui/src/vnc/encoding/test_hextile_image_data.rs @@ -0,0 +1,1160 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// Image data: Each tile contains only one pixel. +/// Width of image = 32 +/// Height of image = 32 +/// Stride of image = 128 +/// Total length is 4096 Byte. +pub const IMAGE_DATA_SINGLE_PIXEL: [u8; 4096] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; + +/// The data stream obtained after the IMAGE_DATA_2 is compressed using the Hextile algorithm. +/// Total length is equal to 12. +pub const TARGET_DATA_SINGLE_PIXEL: [u8; 12] = [ + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; + +/// Image data: Each tile contains two different pixels. +/// Width of image = 40 +/// Height of image = 40 +/// Stride of image = 160 +/// Total length is 6400 Byte. +pub const IMAGE_DATA_TWO_PIXEL: [u8; 6400] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0xaa, 0xaa, 0xaa, 0x00, +]; + +pub const TARGET_DATA_TWO_PIXEL: [u8; 348] = [ + 0x0e, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0x00, 0x19, 0x02, 0x70, 0x03, 0x10, 0x33, 0x10, + 0x63, 0x10, 0x04, 0x00, 0x34, 0x10, 0x74, 0x00, 0x35, 0x10, 0x95, 0x30, 0x36, 0x10, 0xc6, 0x10, + 0x37, 0x10, 0x97, 0x40, 0x38, 0x10, 0x88, 0x10, 0xc8, 0x10, 0x39, 0x10, 0x89, 0x10, 0xc9, 0x10, + 0x3a, 0x10, 0x8a, 0x10, 0xca, 0x10, 0x2b, 0x30, 0x9b, 0x20, 0xdb, 0x10, 0x08, 0x18, 0x82, 0x20, + 0x93, 0x10, 0x94, 0x10, 0x15, 0x40, 0x95, 0x10, 0xd5, 0x10, 0x06, 0x10, 0x56, 0x10, 0x96, 0x10, + 0xc6, 0x10, 0x17, 0x10, 0x97, 0x30, 0x28, 0x20, 0x98, 0x30, 0x49, 0x10, 0x99, 0x10, 0xc9, 0x10, + 0x0a, 0x10, 0x5a, 0x10, 0x9a, 0x10, 0xda, 0x10, 0x1b, 0x40, 0x8b, 0x20, 0xdb, 0x10, 0x08, 0x09, + 0x15, 0x40, 0x06, 0x10, 0x56, 0x10, 0x17, 0x10, 0x28, 0x20, 0x49, 0x10, 0x0a, 0x10, 0x5a, 0x10, + 0x1b, 0x40, 0x08, 0x1a, 0xa2, 0x30, 0x93, 0x10, 0xd3, 0x10, 0x04, 0x10, 0x64, 0x00, 0x84, 0x10, + 0xe4, 0x00, 0x05, 0x10, 0x55, 0x10, 0x85, 0x10, 0x46, 0x10, 0x86, 0x10, 0x37, 0x10, 0x87, 0x10, + 0x28, 0x10, 0x88, 0x10, 0x19, 0x10, 0x89, 0x10, 0xe9, 0x00, 0x0a, 0x10, 0x5a, 0x10, 0x9a, 0x10, + 0xda, 0x10, 0x0b, 0x00, 0x5b, 0x10, 0xab, 0x30, 0x08, 0x1e, 0x05, 0x10, 0x35, 0x20, 0x85, 0x10, + 0xc5, 0x10, 0x16, 0x10, 0x56, 0x10, 0x86, 0x10, 0xc6, 0x10, 0x17, 0x10, 0x57, 0x10, 0x87, 0x10, + 0xc7, 0x10, 0x18, 0x10, 0x58, 0x10, 0x88, 0x10, 0xc8, 0x10, 0x19, 0x10, 0x59, 0x10, 0x89, 0x10, + 0xc9, 0x10, 0x1a, 0x10, 0x5a, 0x10, 0x8a, 0x10, 0xca, 0x10, 0x1b, 0x40, 0x9b, 0x20, 0xdb, 0x10, + 0x1c, 0x10, 0x1d, 0x10, 0x0e, 0x30, 0x08, 0x0a, 0x42, 0x10, 0x33, 0x10, 0x24, 0x10, 0x25, 0x10, + 0x26, 0x10, 0x27, 0x10, 0x28, 0x10, 0x29, 0x10, 0x3a, 0x10, 0x4b, 0x10, 0x08, 0x10, 0x02, 0x10, + 0x62, 0x10, 0xb2, 0x10, 0x03, 0x20, 0x53, 0x20, 0xb3, 0x10, 0x04, 0x70, 0x05, 0x70, 0xa5, 0x20, + 0x06, 0x10, 0x36, 0x10, 0x66, 0x10, 0xb6, 0x10, 0x07, 0x10, 0x67, 0x10, 0xb7, 0x10, 0x08, 0x0a, + 0x02, 0x50, 0x13, 0x10, 0x53, 0x10, 0x14, 0x10, 0x54, 0x10, 0x15, 0x10, 0x55, 0x10, 0x16, 0x40, + 0x17, 0x10, 0x57, 0x10, 0x0e, 0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x70, + 0x01, 0x70, 0x22, 0x30, 0x33, 0x10, 0x26, 0x00, 0x56, 0x00, 0x27, 0x30, +]; + +/// Image data: Each tile contains multi pixels. +/// Width of image = 40 +/// Height of image = 40 +/// Stride of image = 160 +/// Total length is 6400 Byte. +pub const IMAGE_DATA_MULTI_PIXELS: [u8; 6400] = [ + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, + 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, +]; + +pub const TARGET_DATA_MULTI_PIXELS: [u8; 557] = [ + 0x1a, 0x8a, 0x71, 0x7b, 0xff, 0x20, 0x8a, 0x72, 0x7b, 0xff, 0x00, 0x50, 0x8a, 0x71, 0x7a, 0xff, + 0xe0, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x01, 0x50, 0x8a, 0x71, 0x7a, 0xff, 0xe1, 0x10, 0x8a, 0x72, + 0x7b, 0xff, 0x02, 0x50, 0x8a, 0x71, 0x7a, 0xff, 0xe2, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x03, 0x50, + 0x8a, 0x71, 0x7a, 0xff, 0xe3, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x04, 0x50, 0x8a, 0x71, 0x7a, 0xff, + 0xe4, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x05, 0x50, 0x8a, 0x71, 0x7a, 0xff, 0xe5, 0x10, 0x8a, 0x72, + 0x7b, 0xff, 0x06, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe6, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x07, 0x10, + 0x8a, 0x71, 0x7a, 0xff, 0xe7, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x08, 0x10, 0x8a, 0x71, 0x7a, 0xff, + 0xe8, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x09, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe9, 0x10, 0x8a, 0x72, + 0x7b, 0xff, 0x0a, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xea, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x0b, 0x10, + 0x8a, 0x71, 0x7a, 0xff, 0xeb, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x0c, 0x10, 0x8a, 0x71, 0x7a, 0xff, + 0xec, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x0d, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xed, 0x10, 0x8a, 0x72, + 0x7b, 0xff, 0x0e, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xee, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x0f, 0x10, + 0x8a, 0x71, 0x7a, 0xff, 0xef, 0x10, 0x06, 0x8a, 0x71, 0x7a, 0xff, 0x00, 0x00, 0x00, 0x00, 0x0e, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, + 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07, 0x10, 0x08, 0x10, 0x09, 0x10, 0x0a, 0x10, 0x0b, + 0x10, 0x0c, 0x10, 0x0d, 0x10, 0x0e, 0x10, 0x0f, 0x10, 0x1a, 0x8a, 0x71, 0x7b, 0xff, 0x20, 0x8a, + 0x72, 0x7b, 0xff, 0x00, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe0, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x01, + 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe1, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x02, 0x10, 0x8a, 0x71, 0x7a, + 0xff, 0xe2, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x03, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe3, 0x10, 0x8a, + 0x72, 0x7b, 0xff, 0x04, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe4, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x05, + 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe5, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x06, 0x10, 0x8a, 0x71, 0x7a, + 0xff, 0xe6, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x07, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe7, 0x10, 0x8a, + 0x72, 0x7b, 0xff, 0x08, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe8, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x09, + 0x10, 0x8a, 0x71, 0x7a, 0xff, 0xe9, 0x10, 0x8a, 0x72, 0x7b, 0xff, 0x0a, 0x10, 0x8a, 0x71, 0x7a, + 0xff, 0x2a, 0xd0, 0x8a, 0x72, 0x7b, 0xff, 0x0b, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0x2b, 0xd0, 0x8a, + 0x72, 0x7b, 0xff, 0x0c, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0x2c, 0xd0, 0x8a, 0x72, 0x7b, 0xff, 0x0d, + 0x10, 0x8a, 0x71, 0x7a, 0xff, 0x2d, 0xd0, 0x8a, 0x72, 0x7b, 0xff, 0x0e, 0x10, 0x8a, 0x71, 0x7a, + 0xff, 0x2e, 0xd0, 0x8a, 0x72, 0x7b, 0xff, 0x0f, 0x10, 0x8a, 0x71, 0x7a, 0xff, 0x2f, 0xd0, 0x0e, + 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x70, 0x7a, 0xff, 0x0a, 0xe6, 0x10, 0xe7, 0x10, 0xe8, 0x10, 0xe9, + 0x10, 0xea, 0x10, 0xeb, 0x10, 0xec, 0x10, 0xed, 0x10, 0xee, 0x10, 0xef, 0x10, 0x0e, 0x8a, 0x70, + 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x06, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, 0x10, 0x04, + 0x10, 0x05, 0x10, 0x0e, 0x8a, 0x71, 0x7a, 0xff, 0x8a, 0x72, 0x7b, 0xff, 0x08, 0x00, 0x10, 0x01, + 0x10, 0x02, 0x10, 0x03, 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07, 0x10, 0x0c, 0x8a, 0x70, + 0x7a, 0xff, 0x06, 0xe0, 0x10, 0xe1, 0x10, 0xe2, 0x10, 0xe3, 0x10, 0xe4, 0x10, 0xe5, 0x10, 0x0e, + 0x8a, 0x70, 0x7a, 0xff, 0x8a, 0x71, 0x7a, 0xff, 0x02, 0x06, 0x10, 0x07, 0x10, +]; diff --git a/ui/src/vnc/mod.rs b/ui/src/vnc/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..a760ae3b331efd8fb69a28c9a0efb0f1af549071 --- /dev/null +++ b/ui/src/vnc/mod.rs @@ -0,0 +1,692 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "vnc_auth")] +pub mod auth_sasl; +#[cfg(feature = "vnc_auth")] +pub mod auth_vencrypt; +pub mod client_io; +pub mod encoding; +pub mod server_io; + +use std::{ + cmp, + net::TcpListener, + ptr, + sync::{Arc, Mutex}, + thread, +}; + +use anyhow::{anyhow, Result}; +use core::time; +use once_cell::sync::Lazy; + +use crate::{ + console::{ + graphic_hardware_update, register_display, DisplayChangeListener, + DisplayChangeListenerOperations, DisplayMouse, DisplaySurface, + DISPLAY_UPDATE_INTERVAL_DEFAULT, DISPLAY_UPDATE_INTERVAL_INC, DISPLAY_UPDATE_INTERVAL_MAX, + }, + error::VncError, + keycode::{DpyMod, KeyCode}, + pixman::{ + bytes_per_pixel, create_pixman_image, get_image_data, get_image_height, get_image_stride, + get_image_width, ref_pixman_image, unref_pixman_image, + }, + vnc::{ + client_io::{ + desktop_resize, display_cursor_define, get_rects, set_color_depth, vnc_flush, + vnc_update_output_throttle, vnc_write, DisplayMode, Rectangle, ServerMsg, + ENCODING_HEXTILE, ENCODING_RAW, + }, + encoding::enc_hextile::hextile_send_framebuffer_update, + server_io::{make_server_config, VncConnHandler, VncServer, VncSurface}, + }, +}; +use machine_manager::{ + config::{ObjectConfig, VncConfig}, + event_loop::EventLoop, + qmp::qmp_schema::{VncClientInfo, VncInfo}, +}; +use util::{ + bitmap::Bitmap, + loop_context::EventNotifierHelper, + pixman::{pixman_format_code_t, pixman_image_t}, +}; + +/// The number of dirty pixels represented bt one bit in dirty bitmap. +pub const DIRTY_PIXELS_NUM: u16 = 16; +/// The default max window width. +pub const MAX_WINDOW_WIDTH: u16 = round_up(2560, DIRTY_PIXELS_NUM as u64) as u16; +/// The default max window height. +pub const MAX_WINDOW_HEIGHT: u16 = 2048; +pub const DIRTY_WIDTH_BITS: u16 = MAX_WINDOW_WIDTH / DIRTY_PIXELS_NUM; +pub const VNC_BITMAP_WIDTH: u64 = + round_up_div(DIRTY_WIDTH_BITS as u64, u64::BITS as u64) * u64::BITS as u64; +pub const MAX_IMAGE_SIZE: i32 = 65535; + +/// Output throttle scale. +pub const OUTPUT_THROTTLE_SCALE: i32 = 5; +/// Min size of output buffer. +pub const MIN_OUTPUT_LIMIT: i32 = 1024 * 1024 * OUTPUT_THROTTLE_SCALE; +const DEFAULT_REFRESH_INTERVAL: u64 = 30; +pub const BIT_PER_BYTE: u32 = 8; + +pub const fn round_up_div(n: u64, d: u64) -> u64 { + (n + d - 1) / d +} + +pub const fn round_up(n: u64, d: u64) -> u64 { + round_up_div(n, d) * d +} + +/// Authentication type +#[derive(Clone, Copy)] +pub enum AuthState { + Invalid = 0, + No = 1, + Vnc = 2, + Vencrypt = 19, + Sasl = 20, +} + +#[derive(Default)] +pub struct VncInterface {} +impl DisplayChangeListenerOperations for VncInterface { + /// Update guest_image + /// Send a resize command to the client based on whether the image size has changed + fn dpy_switch(&self, surface: &DisplaySurface) -> Result<()> { + if VNC_SERVERS.lock().unwrap().is_empty() { + return Ok(()); + } + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + let mut locked_vnc_surface = server.vnc_surface.lock().unwrap(); + let need_resize = check_surface(&mut locked_vnc_surface, surface); + unref_pixman_image(locked_vnc_surface.guest_image); + + // Vnc_pixman_image_ref + locked_vnc_surface.guest_image = ref_pixman_image(surface.image); + locked_vnc_surface.guest_format = surface.format; + + let guest_width: i32 = get_image_width(locked_vnc_surface.guest_image); + let guest_height: i32 = get_image_height(locked_vnc_surface.guest_image); + trace::vnc_dpy_switch( + &guest_width, + &guest_height, + &surface.width(), + &surface.height(), + ); + if !need_resize { + trace::vnc_dpy_pageflip(&guest_width, &guest_height, &surface.format); + set_area_dirty( + &mut locked_vnc_surface.guest_dirty_bitmap, + 0, + 0, + guest_width, + guest_height, + guest_width, + guest_height, + )?; + return Ok(()); + } + drop(locked_vnc_surface); + + update_server_surface(&server)?; + + let mut locked_handlers = server.client_handlers.lock().unwrap(); + for client in locked_handlers.values_mut() { + let width = vnc_width(guest_width); + let height = vnc_height(guest_height); + let mut buf: Vec = Vec::new(); + // Set Color depth. + set_color_depth(client, &mut buf); + // Desktop_resize. + desktop_resize(client, &server, &mut buf)?; + // Cursor define. + display_cursor_define(client, &server, &mut buf); + vnc_write(client, buf); + vnc_flush(client); + client.dirty_bitmap.lock().unwrap().clear_all(); + set_area_dirty( + &mut client.dirty_bitmap.lock().unwrap(), + 0, + 0, + width, + height, + guest_width, + guest_height, + )?; + vnc_update_output_throttle(client); + } + Ok(()) + } + + /// Refresh server_image to guest_image. + fn dpy_refresh(&self, dcl: &Arc>) -> Result<()> { + if VNC_SERVERS.lock().unwrap().is_empty() { + return Ok(()); + } + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + if server.client_handlers.lock().unwrap().is_empty() { + return Ok(()); + } + let con_id = dcl.lock().unwrap().con_id; + graphic_hardware_update(con_id); + + // Update refresh interval. + let mut update_interval = dcl.lock().unwrap().update_interval; + let dirty_num = server.vnc_surface.lock().unwrap().update_server_image()?; + if dirty_num != 0 { + update_interval /= 2; + if update_interval < DISPLAY_UPDATE_INTERVAL_DEFAULT { + update_interval = DISPLAY_UPDATE_INTERVAL_DEFAULT + } + } else { + update_interval += DISPLAY_UPDATE_INTERVAL_INC; + if update_interval > DISPLAY_UPDATE_INTERVAL_MAX { + update_interval = DISPLAY_UPDATE_INTERVAL_MAX; + } + } + dcl.lock().unwrap().update_interval = update_interval; + trace::vnc_dpy_refresh(&dirty_num, &update_interval); + + let mut locked_handlers = server.client_handlers.lock().unwrap(); + for client in locked_handlers.values_mut() { + get_rects(client, &server, dirty_num)?; + } + Ok(()) + } + + fn dpy_image_update(&self, x: i32, y: i32, w: i32, h: i32) -> Result<()> { + if VNC_SERVERS.lock().unwrap().is_empty() { + return Ok(()); + } + + trace::vnc_dpy_image_update(&x, &y, &w, &h); + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + let mut locked_vnc_surface = server.vnc_surface.lock().unwrap(); + let g_w = get_image_width(locked_vnc_surface.guest_image); + let g_h = get_image_height(locked_vnc_surface.guest_image); + set_area_dirty( + &mut locked_vnc_surface.guest_dirty_bitmap, + x, + y, + w, + h, + g_w, + g_h, + )?; + drop(locked_vnc_surface); + Ok(()) + } + + fn dpy_cursor_update(&self, cursor: &DisplayMouse) -> Result<()> { + if VNC_SERVERS.lock().unwrap().is_empty() { + return Ok(()); + } + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + let width = u64::from(cursor.width); + let height = u64::from(cursor.height); + trace::vnc_dpy_cursor_update(&width, &height); + let bpl = round_up_div(width, u64::from(BIT_PER_BYTE)); + // Set the bit for mask. + let bit_mask: u8 = 0x80; + + let mut mask: Vec = vec![0; (bpl * height) as usize]; + let first_bit = if cfg!(target_endian = "big") { + 0_usize + } else { + bytes_per_pixel() - 1 + }; + + for j in 0..height { + let mut bit = bit_mask; + for i in 0..width { + let idx = ((i + j * width) as usize) * bytes_per_pixel() + first_bit; + if let Some(n) = cursor.data.get(idx) { + if *n == 0xff { + mask[(j * bpl + i / u64::from(BIT_PER_BYTE)) as usize] |= bit; + } + } + bit >>= 1; + if bit == 0 { + bit = bit_mask; + } + } + } + + server.vnc_cursor.lock().unwrap().cursor = Some(cursor.clone()); + server.vnc_cursor.lock().unwrap().mask = Some(mask.clone()); + + let mut locked_handler = server.client_handlers.lock().unwrap(); + // Send the framebuff for each client. + for client in locked_handler.values_mut() { + let mut buf: Vec = Vec::new(); + display_cursor_define(client, &server, &mut buf); + vnc_write(client, buf); + vnc_flush(client); + } + Ok(()) + } +} + +/// Initizlization function of vnc +/// +/// # Arguments +/// +/// * `VncConfig` `object`- vnc related parameters +pub fn vnc_init(vnc: &Option, object: &ObjectConfig) -> Result<()> { + let vnc_cfg = match vnc { + Some(cfg) => cfg, + None => return Ok(()), + }; + + let addr = format!("{}:{}", vnc_cfg.addr.0, vnc_cfg.addr.1); + let listener: TcpListener = match TcpListener::bind(addr.as_str()) { + Ok(l) => l, + Err(e) => { + let msg = format!("Bind {} failed {}", addr, e); + return Err(anyhow!(VncError::TcpBindFailed(msg))); + } + }; + + listener + .set_nonblocking(true) + .expect("Set noblocking for vnc socket failed"); + + // Mapping ASCII to keycode. + let keysym2keycode = KeyCode::keysym_to_qkeycode(DpyMod::Vnc); + + let vnc_opts = Arc::new(VncInterface::default()); + let dcl = Arc::new(Mutex::new(DisplayChangeListener::new(None, vnc_opts))); + + let server = Arc::new(VncServer::new( + get_client_image(), + keysym2keycode, + Some(Arc::downgrade(&dcl)), + )); + + // Parameter configuration for VncServeer. + make_server_config(&server, vnc_cfg, object)?; + + // Add an VncServer. + add_vnc_server(server.clone()); + + // Register in display console. + register_display(&dcl)?; + + // Register the event to listen for client's connection. + let vnc_io = Arc::new(Mutex::new(VncConnHandler::new(listener, server))); + + // Vnc_thread: a thread to send the framebuffer + start_vnc_thread()?; + + EventLoop::update_event(EventNotifierHelper::internal_notifiers(vnc_io), None)?; + Ok(()) +} + +fn start_vnc_thread() -> Result<()> { + let interval = DEFAULT_REFRESH_INTERVAL; + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + let _handle = thread::Builder::new() + .name("vnc_worker".to_string()) + .spawn(move || loop { + let rect_jobs = server.rect_jobs.clone(); + if rect_jobs.lock().unwrap().is_empty() { + thread::sleep(time::Duration::from_millis(interval)); + continue; + } + + let mut rect_info = match rect_jobs.lock().unwrap().get_mut(0) { + Some(rect) => rect.clone(), + None => { + thread::sleep(time::Duration::from_millis(interval)); + continue; + } + }; + rect_jobs.lock().unwrap().remove(0); + + let mut num_rects: i32 = 0; + let mut buf = Vec::new(); + buf.append(&mut (ServerMsg::FramebufferUpdate as u8).to_be_bytes().to_vec()); + buf.append(&mut (0_u8).to_be_bytes().to_vec()); + buf.append(&mut [0_u8; 2].to_vec()); + + for rect in rect_info.rects.iter_mut() { + let locked_surface = server.vnc_surface.lock().unwrap(); + let dpm = rect_info.client.client_dpm.lock().unwrap().clone(); + let width = dpm.client_width; + let height = dpm.client_height; + if check_rect(rect, width, height) { + let n = + send_framebuffer_update(locked_surface.server_image, rect, &dpm, &mut buf); + if n >= 0 { + num_rects += n; + } + } + } + buf[2] = (num_rects >> 8) as u8; + buf[3] = num_rects as u8; + + let client = rect_info.client; + vnc_write(&client, buf); + vnc_flush(&client); + })?; + Ok(()) +} + +/// Add a vnc server during initialization. +fn add_vnc_server(server: Arc) { + VNC_SERVERS.lock().unwrap().push(server); +} + +/// Qmp: return the information about current VNC server. +pub fn qmp_query_vnc() -> Option { + let mut vnc_info = VncInfo::default(); + if VNC_SERVERS.lock().unwrap().is_empty() { + vnc_info.enabled = false; + return Some(vnc_info); + } + vnc_info.enabled = true; + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + vnc_info.family = "ipv4".to_string(); + + let mut locked_handler = server.client_handlers.lock().unwrap(); + for client in locked_handler.values_mut() { + let mut client_info = VncClientInfo { + host: client.addr.clone(), + ..Default::default() + }; + client_info.family = "ipv4".to_string(); + vnc_info.clients.push(client_info); + } + + Some(vnc_info) +} + +/// Set dirty in bitmap. +pub fn set_area_dirty( + dirty: &mut Bitmap, + mut x: i32, + mut y: i32, + mut w: i32, + mut h: i32, + g_w: i32, + g_h: i32, +) -> Result<()> { + let width: i32 = vnc_width(g_w); + let height: i32 = vnc_height(g_h); + + w += x % i32::from(DIRTY_PIXELS_NUM); + x -= x % i32::from(DIRTY_PIXELS_NUM); + + x = cmp::min(x, width); + y = cmp::min(y, height); + w = cmp::min(x + w, width) - x; + h = cmp::min(y + h, height); + while y < h { + let pos = (y * VNC_BITMAP_WIDTH as i32 + x / i32::from(DIRTY_PIXELS_NUM)) as usize; + let len = round_up_div(w as u64, u64::from(DIRTY_PIXELS_NUM)) as usize; + dirty.set_range(pos, len)?; + y += 1; + } + Ok(()) +} + +/// Get the width of image. +fn vnc_width(width: i32) -> i32 { + cmp::min( + i32::from(MAX_WINDOW_WIDTH), + round_up(width as u64, u64::from(DIRTY_PIXELS_NUM)) as i32, + ) +} + +/// Get the height of image. +fn vnc_height(height: i32) -> i32 { + cmp::min(i32::from(MAX_WINDOW_HEIGHT), height) +} + +/// Update server image +pub fn update_server_surface(server: &Arc) -> Result<()> { + let mut locked_vnc_surface = server.vnc_surface.lock().unwrap(); + unref_pixman_image(locked_vnc_surface.server_image); + locked_vnc_surface.server_image = ptr::null_mut(); + // Server image changes, clear the task queue. + server.rect_jobs.lock().unwrap().clear(); + if server.client_handlers.lock().unwrap().is_empty() { + return Ok(()); + } + + let g_width = get_image_width(locked_vnc_surface.guest_image); + let g_height = get_image_height(locked_vnc_surface.guest_image); + let width = vnc_width(g_width); + let height = vnc_height(g_height); + locked_vnc_surface.server_image = create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + width, + height, + ptr::null_mut(), + 0, + ); + + locked_vnc_surface.guest_dirty_bitmap.clear_all(); + set_area_dirty( + &mut locked_vnc_surface.guest_dirty_bitmap, + 0, + 0, + width, + height, + g_width, + g_height, + ) +} + +/// Check if the surface for VncClient is need update +fn check_surface(locked_vnc_surface: &mut VncSurface, surface: &DisplaySurface) -> bool { + let guest_width = get_image_width(surface.image); + let guest_height = get_image_height(surface.image); + let server_width = get_image_width(locked_vnc_surface.server_image); + let server_height = get_image_height(locked_vnc_surface.server_image); + if !(0..=MAX_IMAGE_SIZE).contains(&guest_width) || !(0..=MAX_IMAGE_SIZE).contains(&guest_height) + { + return false; + } + + if surface.image.is_null() + || locked_vnc_surface.server_image.is_null() + || locked_vnc_surface.guest_format != surface.format + || guest_width != server_width + || guest_height != server_height + { + return true; + } + + false +} + +/// Check if rectangle is in spec +fn check_rect(rect: &mut Rectangle, width: i32, height: i32) -> bool { + if rect.x >= width || rect.y >= height { + return false; + } + + rect.w = cmp::min(width - rect.x, rect.w); + rect.h = cmp::min(height - rect.y, rect.h); + if rect.w <= 0 || rect.h <= 0 { + return false; + } + + true +} + +/// Send updated pixel information to client +/// +/// # Arguments +/// +/// * `x` `y` `w` `h` - coordinate, width, height +/// * `buf` - send buffer +pub fn framebuffer_update(x: i32, y: i32, w: i32, h: i32, encoding: i32, buf: &mut Vec) { + buf.append(&mut (x as u16).to_be_bytes().to_vec()); + buf.append(&mut (y as u16).to_be_bytes().to_vec()); + buf.append(&mut (w as u16).to_be_bytes().to_vec()); + buf.append(&mut (h as u16).to_be_bytes().to_vec()); + buf.append(&mut encoding.to_be_bytes().to_vec()); +} + +/// Write pixel to client. +/// +/// # Arguments +/// +/// * `data_ptr` - pointer to the data need. +/// * `copy_bytes` - total pixel to write. +/// * `client_dpm` - Output mod of client display. +/// * `buf` - send buffer. +pub fn write_pixel( + data_ptr: *mut u8, + copy_bytes: usize, + client_dpm: &DisplayMode, + buf: &mut Vec, +) { + if !client_dpm.convert { + let mut con = vec![0; copy_bytes]; + // SAFETY: Tt can be ensure the raw pointer will not exceed the range. + unsafe { + ptr::copy(data_ptr as *mut u8, con.as_mut_ptr(), copy_bytes); + } + buf.append(&mut con); + } else if client_dpm.convert && bytes_per_pixel() == 4 { + let num = copy_bytes >> 2; + let ptr = data_ptr as *mut u32; + for i in 0..num { + // SAFETY: Tt can be ensure the raw pointer will not exceed the range. + let color = unsafe { *ptr.add(i) }; + convert_pixel(client_dpm, buf, color); + } + } +} + +/// Convert the sent information to a format supported +/// by the client depend on byte arrangement +/// +/// # Arguments +/// +/// * `client_dpm` - Output mod of client display. +/// * `buf` - send buffer. +/// * `color` - the pixel value need to be convert. +fn convert_pixel(client_dpm: &DisplayMode, buf: &mut Vec, color: u32) { + let mut ret = [0u8; 4]; + let r = ((color & 0x00ff0000) >> 16) << client_dpm.pf.red.bits >> 8; + let g = ((color & 0x0000ff00) >> 8) << client_dpm.pf.green.bits >> 8; + let b = (color & 0x000000ff) << client_dpm.pf.blue.bits >> 8; + let v = (r << client_dpm.pf.red.shift) + | (g << client_dpm.pf.green.shift) + | (b << client_dpm.pf.blue.shift); + match client_dpm.pf.pixel_bytes { + 1 => { + ret[0] = v as u8; + } + 2 => { + if client_dpm.client_be { + ret[0] = (v >> 8) as u8; + ret[1] = v as u8; + } else { + ret[1] = (v >> 8) as u8; + ret[0] = v as u8; + } + } + 4 => { + if client_dpm.client_be { + ret = v.to_be_bytes(); + } else { + ret = v.to_le_bytes(); + } + } + _ => { + if client_dpm.client_be { + ret = v.to_be_bytes(); + } else { + ret = v.to_le_bytes(); + } + } + } + buf.append(&mut ret[..client_dpm.pf.pixel_bytes as usize].to_vec()); +} + +/// Send raw data directly without compression +/// +/// # Arguments +/// +/// * `image` - pointer to the data need to be send. +/// * `rect` - dirty area of image. +/// * `client_dpm` - Output mod information of client display. +/// * `buf` - send buffer. +fn raw_send_framebuffer_update( + image: *mut pixman_image_t, + rect: &Rectangle, + client_dpm: &DisplayMode, + buf: &mut Vec, +) -> i32 { + let mut data_ptr = get_image_data(image) as *mut u8; + let stride = get_image_stride(image); + data_ptr = (data_ptr as usize + + (rect.y * stride) as usize + + rect.x as usize * bytes_per_pixel()) as *mut u8; + + let copy_bytes = rect.w as usize * bytes_per_pixel(); + + for _i in 0..rect.h { + write_pixel(data_ptr, copy_bytes, client_dpm, buf); + data_ptr = (data_ptr as usize + stride as usize) as *mut u8; + } + + 1 +} + +/// Send data according to compression algorithm +/// +/// # Arguments +/// +/// * `image` = pointer to the data need to be send. +/// * `rect` - dirty area of image. +/// * `client_dpm` - Output mod information of client display. +/// * `buf` - send buffer. +fn send_framebuffer_update( + image: *mut pixman_image_t, + rect: &Rectangle, + client_dpm: &DisplayMode, + buf: &mut Vec, +) -> i32 { + match client_dpm.enc { + ENCODING_HEXTILE => { + framebuffer_update(rect.x, rect.y, rect.w, rect.h, ENCODING_HEXTILE, buf); + hextile_send_framebuffer_update(image, rect, client_dpm, buf) + } + _ => { + framebuffer_update(rect.x, rect.y, rect.w, rect.h, ENCODING_RAW, buf); + raw_send_framebuffer_update(image, rect, client_dpm, buf) + } + } +} + +/// Initialize a default image +/// Default: width is 640, height is 480, stride is 640 * 4 +fn get_client_image() -> *mut pixman_image_t { + create_pixman_image( + pixman_format_code_t::PIXMAN_x8r8g8b8, + 640, + 480, + ptr::null_mut(), + 640 * 4, + ) +} + +pub static VNC_SERVERS: Lazy>>> = Lazy::new(|| Mutex::new(Vec::new())); diff --git a/ui/src/vnc/server_io.rs b/ui/src/vnc/server_io.rs new file mode 100644 index 0000000000000000000000000000000000000000..2b29035593a4b765fe01e5309e9473df26bfbe8f --- /dev/null +++ b/ui/src/vnc/server_io.rs @@ -0,0 +1,555 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights r&eserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{ + cell::RefCell, + cmp, + collections::HashMap, + net::{SocketAddr, TcpListener, TcpStream}, + os::unix::prelude::{AsRawFd, RawFd}, + ptr, + rc::Rc, + sync::{Arc, Mutex, Weak}, +}; + +#[cfg(feature = "vnc_auth")] +use anyhow::anyhow; +use anyhow::Result; +use log::error; +use vmm_sys_util::epoll::EventSet; + +use crate::{ + console::{DisplayChangeListener, DisplayMouse}, + pixman::{ + bytes_per_pixel, get_image_data, get_image_format, get_image_height, get_image_stride, + get_image_width, pixman_image_linebuf_create, pixman_image_linebuf_fill, + unref_pixman_image, + }, + vnc::{ + client_io::{vnc_flush, vnc_write, ClientIoHandler, ClientState, IoChannel, RectInfo}, + round_up_div, update_server_surface, DIRTY_PIXELS_NUM, MAX_WINDOW_HEIGHT, MAX_WINDOW_WIDTH, + VNC_BITMAP_WIDTH, VNC_SERVERS, + }, +}; +#[cfg(feature = "vnc_auth")] +use crate::{ + error::VncError, + vnc::{ + auth_sasl::{SaslAuth, SaslConfig, SubAuthState}, + auth_vencrypt::{make_vencrypt_config, TlsCreds, ANON_CERT, X509_CERT}, + AuthState, + }, +}; +use machine_manager::{ + config::{ObjectConfig, VncConfig}, + event_loop::EventLoop, +}; +use util::{ + bitmap::Bitmap, + loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, + }, + pixman::{pixman_format_bpp, pixman_format_code_t, pixman_image_t}, +}; + +const CONNECTION_LIMIT: usize = 1; + +/// Information of VncServer. +pub struct VncServer { + /// Client io handler. + pub client_handlers: Arc>>>, + /// Security Type for connection. + #[cfg(feature = "vnc_auth")] + pub security_type: Rc>, + /// Mapping ASCII to keycode. + pub keysym2keycode: HashMap, + /// Image data of surface. + pub vnc_surface: Arc>, + /// Data for cursor image. + pub vnc_cursor: Arc>, + /// Display Change Listener. + pub display_listener: Option>>, + /// Saves all image regions that need to be updated. + /// It will be sent to vnc_worker thread, and be transferred into byte stream, + /// which will be sent to vnc client in main loop. + pub rect_jobs: Arc>>, + /// Connection limit. + pub conn_limits: usize, +} + +// SAFETY: The raw pointer in rust doesn't impl Send, the target thread can only read the +// memory of image by this pointer. +unsafe impl Send for VncServer {} +// SAFETY: Tt can be guaranteed that Rc> and Rc> +// are used only in single thread. +unsafe impl Sync for VncServer {} + +impl VncServer { + /// Create a new VncServer. + pub fn new( + guest_image: *mut pixman_image_t, + keysym2keycode: HashMap, + display_listener: Option>>, + ) -> Self { + VncServer { + client_handlers: Arc::new(Mutex::new(HashMap::new())), + #[cfg(feature = "vnc_auth")] + security_type: Rc::new(RefCell::new(SecurityType::default())), + keysym2keycode, + vnc_surface: Arc::new(Mutex::new(VncSurface::new(guest_image))), + vnc_cursor: Arc::new(Mutex::new(VncCursor::default())), + display_listener, + rect_jobs: Arc::new(Mutex::new(Vec::new())), + conn_limits: CONNECTION_LIMIT, + } + } +} + +pub struct VncConnHandler { + /// Tcp connection listened by server. + listener: TcpListener, + /// VncServer. + server: Arc, +} + +impl VncConnHandler { + pub fn new(listener: TcpListener, server: Arc) -> Self { + VncConnHandler { listener, server } + } +} + +/// Internal_notifiers for VncServer. +impl EventNotifierHelper for VncConnHandler { + fn internal_notifiers(vnc_io: Arc>) -> Vec { + let vnc_io_clone = vnc_io.clone(); + let server = vnc_io.lock().unwrap().server.clone(); + // Register event notifier for connection. + let handler: Rc = Rc::new(move |_event, fd: RawFd| { + read_fd(fd); + match vnc_io_clone.clone().lock().unwrap().listener.accept() { + Ok((stream, addr)) => { + if let Err(e) = handle_connection(&server, stream, addr) { + error!("{:?}", e); + } + } + Err(e) => { + error!("Connect failed: {:?}", e); + } + } + None + }); + vec![EventNotifier::new( + NotifierOperation::AddShared, + vnc_io.lock().unwrap().listener.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )] + } +} + +/// Info of image. +/// stride is not always equal to stride because of memory alignment. +struct ImageInfo { + /// The start pointer to image. + data: *mut u8, + /// The memory size of each line for image. + stride: i32, + /// The memory size of each line to store pixel for image + length: i32, + /// Middle pointer. + ptr: *mut u8, +} + +impl ImageInfo { + fn new(image: *mut pixman_image_t) -> Self { + let bpp = pixman_format_bpp(get_image_format(image) as u32); + let length = get_image_width(image) * round_up_div(u64::from(bpp), 8) as i32; + ImageInfo { + data: get_image_data(image) as *mut u8, + stride: get_image_stride(image), + length, + ptr: ptr::null_mut(), + } + } +} + +/// Security type for connection and transport. +#[cfg(feature = "vnc_auth")] +pub struct SecurityType { + /// Configuration for tls connection. + pub tlscreds: Option, + /// Authentication for connection + pub saslauth: Option, + /// Configuration for sasl Authentication. + pub saslconfig: SaslConfig, + /// Configuration to make tls channel. + pub tls_config: Option>, + /// Auth type. + pub auth: AuthState, + /// Subauth type. + pub subauth: SubAuthState, +} + +#[cfg(feature = "vnc_auth")] +impl Default for SecurityType { + fn default() -> Self { + SecurityType { + tlscreds: None, + saslauth: None, + saslconfig: SaslConfig::default(), + tls_config: None, + auth: AuthState::No, + subauth: SubAuthState::VncAuthVencryptPlain, + } + } +} + +#[cfg(feature = "vnc_auth")] +impl SecurityType { + // Set security config. + fn set_security_config(&mut self, vnc_cfg: &VncConfig, object: &ObjectConfig) -> Result<()> { + // Tls configuration. + if let Some(tls_cred) = object.tls_object.get(&vnc_cfg.tls_creds) { + let tlscred = TlsCreds { + cred_type: "x509".to_string(), + dir: tls_cred.dir.clone(), + endpoint: tls_cred.endpoint.clone(), + verifypeer: tls_cred.verifypeer, + }; + + match make_vencrypt_config(&tlscred) { + Ok(tls_config) => { + self.tls_config = Some(tls_config); + } + Err(e) => { + return Err(e); + } + } + self.tlscreds = Some(tlscred); + } + + // Sasl configuration. + if let Some(sasl_auth) = object.sasl_object.get(&vnc_cfg.sasl_authz) { + self.saslauth = Some(SaslAuth::new(sasl_auth.identity.clone())); + } + + Ok(()) + } + + /// Encryption configuration. + fn set_auth(&mut self) -> Result<()> { + let is_x509: bool; + let is_anon: bool; + let is_sasl: bool = self.saslauth.is_some(); + + if let Some(tlscred) = self.tlscreds.clone() { + is_x509 = tlscred.cred_type == *X509_CERT; + is_anon = tlscred.cred_type == *ANON_CERT; + self.auth = AuthState::Vencrypt; + } else { + self.auth = AuthState::No; + self.subauth = SubAuthState::VncAuthVencryptPlain; + return Ok(()); + } + + if !is_x509 && !is_anon { + return Err(anyhow!(VncError::MakeTlsConnectionFailed(String::from( + "Unsupported tls cred type", + )))); + } + if is_sasl { + if is_x509 { + self.subauth = SubAuthState::VncAuthVencryptX509Sasl; + } else { + self.subauth = SubAuthState::VncAuthVencryptTlssasl; + } + } else if is_x509 { + self.subauth = SubAuthState::VncAuthVencryptX509None; + } else { + self.subauth = SubAuthState::VncAuthVencryptTlNone; + } + Ok(()) + } +} + +/// Image date of cursor. +#[derive(Default)] +pub struct VncCursor { + /// Cursor property. + pub cursor: Option, + /// Identify the area need update for cursor. + pub mask: Option>, +} + +/// The image data for vnc display surface. +pub struct VncSurface { + /// Image from display device. + pub guest_image: *mut pixman_image_t, + /// Identify the image update area for guest image. + pub guest_dirty_bitmap: Bitmap, + /// Image refresh to vnc client. + pub server_image: *mut pixman_image_t, + /// Image format of pixman. + pub guest_format: pixman_format_code_t, +} + +impl VncSurface { + fn new(guest_image: *mut pixman_image_t) -> Self { + VncSurface { + guest_image, + guest_dirty_bitmap: Bitmap::::new( + MAX_WINDOW_HEIGHT as usize + * round_up_div( + u64::from(MAX_WINDOW_WIDTH / DIRTY_PIXELS_NUM), + u64::from(u64::BITS), + ) as usize, + ), + server_image: ptr::null_mut(), + guest_format: pixman_format_code_t::PIXMAN_x8r8g8b8, + } + } + + /// Get min width. + fn get_min_width(&self) -> i32 { + cmp::min( + get_image_width(self.server_image), + get_image_width(self.guest_image), + ) + } + + /// Get min height. + fn get_min_height(&self) -> i32 { + cmp::min( + get_image_height(self.server_image), + get_image_height(self.guest_image), + ) + } + + /// Flush dirty data from guest_image to server_image. + /// Return the number of dirty area. + pub fn update_server_image(&mut self) -> Result { + let mut dirty_num = 0; + let height = self.get_min_height() as usize; + let g_bpl = self.guest_dirty_bitmap.vol() / MAX_WINDOW_HEIGHT as usize; + let total_dirty_bits = height.checked_mul(g_bpl).unwrap_or(0); + let mut offset = self + .guest_dirty_bitmap + .find_next_bit(0) + .unwrap_or(total_dirty_bits); + + if offset >= total_dirty_bits { + return Ok(dirty_num); + } + + let mut s_info = ImageInfo::new(self.server_image); + let mut g_info = ImageInfo::new(self.guest_image); + + // The guset image is not changed, so there is no + // need to update the server image. + let cmp_bytes = cmp::min( + DIRTY_PIXELS_NUM as usize * bytes_per_pixel(), + s_info.stride as usize, + ); + + let mut line_buf = ptr::null_mut(); + if self.guest_format != pixman_format_code_t::PIXMAN_x8r8g8b8 { + line_buf = pixman_image_linebuf_create( + pixman_format_code_t::PIXMAN_x8r8g8b8, + get_image_width(self.server_image), + ); + g_info.stride = s_info.stride; + g_info.length = g_info.stride; + } + + loop { + let mut y = offset / g_bpl; + let x = offset % g_bpl; + s_info.ptr = + (s_info.data as usize + y * s_info.stride as usize + x * cmp_bytes) as *mut u8; + + if self.guest_format != pixman_format_code_t::PIXMAN_x8r8g8b8 { + pixman_image_linebuf_fill( + line_buf, + self.guest_image, + self.get_min_width(), + 0_i32, + y as i32, + ); + g_info.ptr = get_image_data(line_buf) as *mut u8; + } else { + g_info.ptr = (g_info.data as usize + y * g_info.stride as usize) as *mut u8; + } + g_info.ptr = (g_info.ptr as usize + x * cmp_bytes) as *mut u8; + dirty_num += self.update_one_line(x, y, &mut s_info, &mut g_info, cmp_bytes)?; + y += 1; + offset = self + .guest_dirty_bitmap + .find_next_bit(y * g_bpl) + .unwrap_or(total_dirty_bits); + if offset >= total_dirty_bits { + break; + } + } + + unref_pixman_image(line_buf); + Ok(dirty_num) + } + + /// Update each line + /// + /// # Arguments + /// + /// * `x` `y` - start coordinate in image to refresh + /// * `s_info` - Info of Server image. + /// * `g_info` - Info of Guest image. + fn update_one_line( + &mut self, + mut x: usize, + y: usize, + s_info: &mut ImageInfo, + g_info: &mut ImageInfo, + cmp_bytes: usize, + ) -> Result { + let mut count = 0; + let width = self.get_min_width(); + let line_bytes = cmp::min(s_info.stride, g_info.length); + + while x < round_up_div(width as u64, u64::from(DIRTY_PIXELS_NUM)) as usize { + if !self + .guest_dirty_bitmap + .contain(x + y * VNC_BITMAP_WIDTH as usize) + .unwrap_or(false) + { + x += 1; + g_info.ptr = (g_info.ptr as usize + cmp_bytes) as *mut u8; + s_info.ptr = (s_info.ptr as usize + cmp_bytes) as *mut u8; + continue; + } + self.guest_dirty_bitmap + .clear(x + y * VNC_BITMAP_WIDTH as usize)?; + let mut _cmp_bytes = cmp_bytes; + if (x + 1) * cmp_bytes > line_bytes as usize { + _cmp_bytes = line_bytes as usize - x * cmp_bytes; + } + + // SAFETY: Tt can be ensure the raw pointer will not exceed the range. + unsafe { + if libc::memcmp( + s_info.ptr as *mut libc::c_void, + g_info.ptr as *mut libc::c_void, + _cmp_bytes, + ) == 0 + { + x += 1; + g_info.ptr = (g_info.ptr as usize + cmp_bytes) as *mut u8; + s_info.ptr = (s_info.ptr as usize + cmp_bytes) as *mut u8; + continue; + } + + ptr::copy(g_info.ptr, s_info.ptr, _cmp_bytes); + }; + + set_dirty_for_each_clients(x, y)?; + count += 1; + + x += 1; + g_info.ptr = (g_info.ptr as usize + cmp_bytes) as *mut u8; + s_info.ptr = (s_info.ptr as usize + cmp_bytes) as *mut u8; + } + + Ok(count) + } +} + +/// Set diry for each client. +/// +/// # Arguments +/// +/// * `x` `y`- coordinates of dirty area. +fn set_dirty_for_each_clients(x: usize, y: usize) -> Result<()> { + let server = VNC_SERVERS.lock().unwrap()[0].clone(); + let mut locked_handlers = server.client_handlers.lock().unwrap(); + for client in locked_handlers.values_mut() { + client + .dirty_bitmap + .lock() + .unwrap() + .set(x + y * VNC_BITMAP_WIDTH as usize)?; + } + Ok(()) +} + +/// Accept client's connection. +/// +/// # Arguments +/// +/// * `stream` - TcpStream. +/// * `addr`- SocketAddr. +pub fn handle_connection( + server: &Arc, + stream: TcpStream, + addr: SocketAddr, +) -> Result<()> { + trace::vnc_client_connect(&stream); + + stream + .set_nonblocking(true) + .expect("set nonblocking failed"); + + let io_channel = Rc::new(RefCell::new(IoChannel::new(stream.try_clone().unwrap()))); + // Register event notifier for vnc client. + let client = Arc::new(ClientState::new(addr.to_string())); + let client_io = Arc::new(Mutex::new(ClientIoHandler::new( + stream, + io_channel, + client.clone(), + server.clone(), + ))); + client.conn_state.lock().unwrap().client_io = Some(Arc::downgrade(&client_io)); + vnc_write(&client, "RFB 003.008\n".as_bytes().to_vec()); + vnc_flush(&client); + server + .client_handlers + .lock() + .unwrap() + .insert(addr.to_string(), client); + + EventLoop::update_event(EventNotifierHelper::internal_notifiers(client_io), None)?; + + update_server_surface(server) +} + +/// make configuration for VncServer +/// +/// # Arguments +/// +/// * `vnc_cfg` - configure of vnc. +/// * `object` - configure of sasl and tls. +#[allow(unused_variables)] +pub fn make_server_config( + server: &Arc, + vnc_cfg: &VncConfig, + object: &ObjectConfig, +) -> Result<()> { + #[cfg(feature = "vnc_auth")] + { + // Set security config. + server + .security_type + .borrow_mut() + .set_security_config(vnc_cfg, object)?; + // Set auth type. + server.security_type.borrow_mut().set_auth()?; + } + Ok(()) +} diff --git a/util/Cargo.toml b/util/Cargo.toml index 2bbc9e27f802612416231682cbcc4c4277a3d70e..806fe935e14fe015275892b2b9982bbd9479cf5f 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -1,19 +1,33 @@ [package] name = "util" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arc-swap = "0.4.8" -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = { version = "0.4.8", features = ["std"]} -vmm-sys-util = ">=0.7.0" -byteorder = "1.3.4" -once_cell = "1.9.0" +arc-swap = "1.6.0" +thiserror = "1.0" +anyhow = "1.0" +kvm-bindings = { version = "0.7.0", features = ["fam-wrappers"] } +nix = { version = "0.26.2", default-features = false, features = ["poll", "term", "time", "signal", "fs", "feature"] } +libc = "0.2" +libloading = "0.7.4" +log = { version = "0.4", features = ["std"]} +vmm-sys-util = "0.12.1" +byteorder = "1.4.3" +once_cell = "1.18.0" +io-uring = "0.6.0" +serde = { version = "1.0", features = ["derive"] } +v4l2-sys-mit = { version = "0.3.0", optional = true } +trace = {path = "../trace"} + +[features] +default = [] +usb_camera_v4l2 = ["dep:v4l2-sys-mit"] +usb_camera_oh = [] +usb_host = [] +scream_ohaudio = [] +pixman = [] diff --git a/util/build.rs b/util/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..8bc2d6095cb896b46a8a82b6359b75322ee96125 --- /dev/null +++ b/util/build.rs @@ -0,0 +1,44 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::{env, fs::File, io::Write, path::Path, process::Command}; + +fn get_git_commit() -> String { + println!("cargo:rerun-if-changed=../.git/HEAD"); + println!("cargo:rerun-if-changed=../.git/refs"); + println!("cargo:rerun-if-changed=build.rs"); + + let output = Command::new("git") + .args(["rev-parse", "--short", "HEAD"]) + .output(); + match output { + Ok(o) if o.status.success() => { + String::from_utf8(o.stdout).expect("Failed to read git commit id") + } + Ok(o) => { + println!("Get git commit id failed with status: {}", o.status); + String::from("unknown") + } + Err(e) => { + println!("Get git commit id failed: {:?}", e); + String::from("unknown") + } + } +} + +fn main() { + let commit = get_git_commit(); + // Save commit id to pkg build out directory. + let path = Path::new(&env::var("OUT_DIR").unwrap()).join("GIT_COMMIT"); + let mut file = File::create(path).unwrap(); + file.write_all(commit.as_bytes()).unwrap(); +} diff --git a/util/src/aio/libaio.rs b/util/src/aio/libaio.rs index eaac9059e75c9f434a1d0ef549400b01d9b89ed5..46e1d32f54fb5f4d5cbac5e87142c4ddc309860a 100644 --- a/util/src/aio/libaio.rs +++ b/util/src/aio/libaio.rs @@ -10,75 +10,77 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::Result; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::{fence, Ordering}; + +use anyhow::bail; use kvm_bindings::__IncompleteArrayField; +use vmm_sys_util::eventfd::EventFd; + +use super::threads::ThreadsAioContext; +use super::{AioCb, AioContext, AioEvent, OpCode, Result}; -pub const IOCB_FLAG_RESFD: u32 = 1; -pub const IOCB_FLAG_IOPRIO: u32 = 1 << 1; +const IOCB_FLAG_RESFD: u32 = 1; -#[derive(Debug, Clone)] -pub struct Iovec { - pub iov_base: u64, - pub iov_len: u64, +#[repr(C)] +#[allow(non_camel_case_types)] +#[derive(Default, Clone)] +struct IoEvent { + data: u64, + obj: u64, + res: i64, + res2: i64, } #[repr(C)] #[allow(non_camel_case_types)] #[derive(Default)] -pub struct IoCb { - pub data: u64, - pub key: u32, - pub aio_reserved1: u32, - pub aio_lio_opcode: u16, - pub aio_reqprio: u16, - pub aio_fildes: u32, - pub aio_buf: u64, - pub aio_nbytes: u64, - pub aio_offset: u64, - pub aio_reserved2: u64, - pub aio_flags: u32, - pub aio_resfd: u32, +struct IoCb { + data: u64, + key: u32, + aio_reserved1: u32, + aio_lio_opcode: u16, + aio_reqprio: u16, + aio_fildes: u32, + aio_buf: u64, + aio_nbytes: u64, + aio_offset: u64, + aio_reserved2: u64, + aio_flags: u32, + aio_resfd: u32, } #[repr(C)] #[allow(non_camel_case_types)] #[derive(Copy, Clone)] -pub enum IoCmd { - Pread = 0, - Pwrite = 1, - Fsync = 2, +enum IoCmd { Fdsync = 3, - Noop = 6, Preadv = 7, Pwritev = 8, } -#[repr(C)] -#[allow(non_camel_case_types)] -#[derive(Default)] -pub struct IoEvent { - pub data: u64, - pub obj: u64, - pub res: i64, - pub res2: i64, -} - #[allow(non_camel_case_types)] -pub enum IoContext {} +pub(crate) enum IoContext {} -pub struct EventResult { - pub events: Vec, - pub nr: usize, +pub(crate) struct LibaioContext { + ctx: *mut IoContext, + threads_aio_ctx: ThreadsAioContext, + resfd: RawFd, + events: Vec, } -pub struct LibaioContext { - pub ctx: *mut IoContext, - pub max_size: i32, +impl Drop for LibaioContext { + fn drop(&mut self) { + if !self.ctx.is_null() { + // SAFETY: self.ctx is generated by SYS_io_setup. + unsafe { libc::syscall(libc::SYS_io_destroy, self.ctx) }; + } + } } #[repr(C)] #[derive(Default)] -pub struct AioRing { +struct AioRing { id: u32, nr: u32, head: u32, @@ -93,41 +95,120 @@ pub struct AioRing { } impl LibaioContext { - pub fn new(max_size: i32) -> Result { + pub fn probe(max_size: u32) -> Result<*mut IoContext> { let mut ctx = std::ptr::null_mut(); - + // SAFETY: ctx is a valid ptr. let ret = unsafe { libc::syscall(libc::SYS_io_setup, max_size, &mut ctx) }; if ret < 0 { - bail!("Failed to setup aio context, return {}.", ret); + bail!("Failed to setup linux native aio context, return {}.", ret); } + Ok(ctx) + } - Ok(LibaioContext { ctx, max_size }) + pub fn new( + max_size: u32, + threads_aio_ctx: ThreadsAioContext, + eventfd: &EventFd, + ) -> Result { + let ctx = Self::probe(max_size)?; + Ok(LibaioContext { + ctx, + threads_aio_ctx, + resfd: eventfd.as_raw_fd(), + events: Vec::with_capacity(max_size as usize), + }) } +} - pub fn submit(&self, nr: i64, iocbp: &mut Vec<*mut IoCb>) -> Result<()> { - let ret = unsafe { libc::syscall(libc::SYS_io_submit, self.ctx, nr, iocbp.as_ptr()) }; - if ret < 0 { +/// Implements the AioContext for libaio. +impl AioContext for LibaioContext { + fn submit(&mut self, iocbp: &[*const AioCb]) -> Result { + let mut iocbs = Vec::with_capacity(iocbp.len()); + for iocb in iocbp { + // SAFETY: iocb is valid until request is finished. + let cb = unsafe { &*(*iocb) }; + let opcode = match cb.opcode { + OpCode::Preadv => IoCmd::Preadv, + OpCode::Pwritev => IoCmd::Pwritev, + OpCode::Fdsync => IoCmd::Fdsync, + _ => bail!("Failed to submit aio, opcode is not supported."), + }; + let aio_buf = match cb.opcode { + OpCode::Fdsync => 0, + _ => cb.iovec.as_ptr() as u64, + }; + iocbs.push(IoCb { + data: cb.user_data, + aio_lio_opcode: opcode as u16, + aio_fildes: cb.file_fd as u32, + aio_buf, + aio_nbytes: cb.iovec.len() as u64, + aio_offset: cb.offset as u64, + aio_flags: IOCB_FLAG_RESFD, + aio_resfd: self.resfd as u32, + ..Default::default() + }); + } + + // SYS_io_submit needs vec of references. + let mut iocbp = Vec::with_capacity(iocbs.len()); + for iocb in iocbs.iter() { + iocbp.push(iocb); + } + + let ret = + // SAFETY: self.ctx is generated by SYS_io_setup. + unsafe { libc::syscall(libc::SYS_io_submit, self.ctx, iocbp.len(), iocbp.as_ptr()) }; + if ret >= 0 { + return Ok(ret as usize); + } + if nix::errno::errno() != libc::EAGAIN { bail!("Failed to submit aio, return {}.", ret); } + Ok(0) + } - Ok(()) + fn submit_threads_pool(&mut self, iocbp: &[*const AioCb]) -> Result { + self.threads_aio_ctx.submit(iocbp) } - #[allow(clippy::zero_ptr)] - pub fn get_events(&self) -> (&[IoEvent], u32, u32) { + fn get_events(&mut self) -> &[AioEvent] { + let mut locked_list = self.threads_aio_ctx.complete_list.lock().unwrap(); + self.events = locked_list.drain(0..).collect(); + drop(locked_list); + let ring = self.ctx as *mut AioRing; + // SAFETY: self.ctx is generated by SYS_io_setup. let head = unsafe { (*ring).head }; + // SAFETY: self.ctx is generated by SYS_io_setup. let tail = unsafe { (*ring).tail }; + // SAFETY: self.ctx is generated by SYS_io_setup. let ring_nr = unsafe { (*ring).nr }; + // SAFETY: self.ctx is generated by SYS_io_setup. + let io_events: &[IoEvent] = unsafe { (*ring).io_events.as_slice(ring_nr as usize) }; + let nr = if tail >= head { tail - head } else { - ring_nr - head + ring_nr - head + tail }; - unsafe { (*ring).head = (head + nr) % ring_nr }; - let io_events: &[IoEvent] = unsafe { (*ring).io_events.as_slice(ring_nr as usize) }; + // Avoid speculatively loading ring.io_events before observing tail. + fence(Ordering::Acquire); + for i in head..(head + nr) { + let io_event = &io_events[(i % ring_nr) as usize]; + self.events.push(AioEvent { + user_data: io_event.data, + status: io_event.res2, + res: io_event.res, + }) + } + + // Avoid head is updated before we consume all io_events. + fence(Ordering::Release); + // SAFETY: self.ctx is generated by SYS_io_setup. + unsafe { (*ring).head = tail }; - (io_events, head, head + nr) + &self.events } } diff --git a/util/src/aio/mod.rs b/util/src/aio/mod.rs index d7dafdcb0258a780e71c893d983e2d5bd87a668e..100dd01802093e82ac4f945b111e479c8d7425ef 100644 --- a/util/src/aio/mod.rs +++ b/util/src/aio/mod.rs @@ -12,247 +12,1051 @@ mod libaio; mod raw; +mod threads; +mod uring; + +pub use raw::*; use std::clone::Clone; -use std::marker::{Send, Sync}; -use std::os::unix::io::{AsRawFd, RawFd}; +use std::fmt::Display; +use std::io::Write; +use std::os::unix::io::RawFd; +use std::sync::atomic::{AtomicI64, AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; +use std::{cmp, str::FromStr}; +use anyhow::{anyhow, bail, Context, Result}; +use libc::c_void; +use log::{error, warn}; +use serde::{Deserialize, Serialize}; +use uring::IoUringContext; use vmm_sys_util::eventfd::EventFd; -use super::errors::Result; use super::link_list::{List, Node}; -pub use libaio::*; -pub use raw::*; +use crate::loop_context::create_new_eventfd; +use crate::num_ops::{round_down, round_up}; +use crate::thread_pool::ThreadPool; +use crate::unix::host_page_size; +use libaio::LibaioContext; +use threads::ThreadsAioContext; type CbList = List>; type CbNode = Node>; -pub type AioCompleteFunc = Box, i64) + Sync + Send>; +/// None aio type. +const AIO_OFF: &str = "off"; +/// Native aio type. +const AIO_NATIVE: &str = "native"; +/// Io-uring aio type. +const AIO_IOURING: &str = "io_uring"; +/// Aio implemented by thread pool. +const AIO_THREADS: &str = "threads"; +/// Max bytes of bounce buffer for IO. +const MAX_LEN_BOUNCE_BUFF: u64 = 1 << 20; + +#[derive(Default, Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy)] +pub enum AioEngine { + #[serde(alias = "off")] + #[default] + Off = 0, + #[serde(alias = "native")] + Native = 1, + #[serde(alias = "iouring")] + IoUring = 2, + #[serde(alias = "threads")] + Threads = 3, +} + +impl FromStr for AioEngine { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + AIO_OFF => Ok(AioEngine::Off), + AIO_NATIVE => Ok(AioEngine::Native), + AIO_IOURING => Ok(AioEngine::IoUring), + AIO_THREADS => Ok(AioEngine::Threads), + _ => Err(anyhow!("Unknown aio type")), + } + } +} + +impl Display for AioEngine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AioEngine::Off => write!(f, "off"), + AioEngine::Native => write!(f, "native"), + AioEngine::IoUring => write!(f, "io_uring"), + AioEngine::Threads => write!(f, "threads"), + } + } +} + +#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub enum WriteZeroesState { + #[default] + Off, + On, + Unmap, +} + +impl FromStr for WriteZeroesState { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + match s { + "off" => Ok(WriteZeroesState::Off), + "on" => Ok(WriteZeroesState::On), + "unmap" => Ok(WriteZeroesState::Unmap), + _ => Err(anyhow!("Unknown write zeroes state {}", s)), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Iovec { + pub iov_base: u64, + pub iov_len: u64, +} + +impl Iovec { + pub fn new(base: u64, len: u64) -> Self { + Iovec { + iov_base: base, + iov_len: len, + } + } + + pub fn is_none(&self) -> bool { + self.iov_base == 0 && self.iov_len == 0 + } +} + +pub fn get_iov_size(iovecs: &[Iovec]) -> u64 { + let mut sum: u64 = 0; + for iov in iovecs { + sum += iov.iov_len; + } + sum +} + +/// The trait for Asynchronous IO operation. +trait AioContext { + /// Submit IO requests to the OS, the nr submitted is returned. + fn submit(&mut self, iocbp: &[*const AioCb]) -> Result; + /// Submit Io requests to the thread pool, the nr submitted is returned. + fn submit_threads_pool(&mut self, iocbp: &[*const AioCb]) -> Result; + /// Get the IO events of the requests submitted earlier. + fn get_events(&mut self) -> &[AioEvent]; +} + +#[derive(Clone)] +pub struct AioEvent { + pub user_data: u64, + pub status: i64, + pub res: i64, +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum OpCode { + Noop = 0, + Preadv = 1, + Pwritev = 2, + Fdsync = 3, + Discard = 4, + WriteZeroes = 5, + WriteZeroesUnmap = 6, +} pub struct AioCb { - pub last_aio: bool, + pub direct: bool, + pub req_align: u32, + pub buf_align: u32, + pub discard: bool, + pub write_zeroes: WriteZeroesState, pub file_fd: RawFd, - pub opcode: IoCmd, + pub opcode: OpCode, pub iovec: Vec, pub offset: usize, - pub process: bool, - pub iocb: Option>, + pub nbytes: u64, + pub user_data: u64, pub iocompletecb: T, + pub combine_req: Option<(Arc, Arc)>, +} + +pub enum AioReqResult { + Inflight, + Error(i64), + Done, } impl AioCb { - pub fn new(cb: T) -> Self { - AioCb { - last_aio: true, - file_fd: 0, - opcode: IoCmd::Noop, - iovec: Vec::new(), - offset: 0, - process: false, - iocb: None, - iocompletecb: cb, + pub fn req_is_completed(&self, ret: i64) -> AioReqResult { + if let Some((cnt, res)) = self.combine_req.as_ref() { + if ret < 0 { + // Store error code in res. + if let Err(v) = res.compare_exchange(0, ret, Ordering::SeqCst, Ordering::SeqCst) { + warn!("Error already existed, old {} new {}", v, ret); + } + } + if cnt.fetch_sub(1, Ordering::SeqCst) > 1 { + // Request is not completed. + return AioReqResult::Inflight; + } + let v = res.load(Ordering::SeqCst); + if v < 0 { + return AioReqResult::Error(v); + } + } + AioReqResult::Done + } + + pub fn rw_sync(&self) -> i32 { + let mut ret = match self.opcode { + // SAFETY: iovec of aiocb is valid. + OpCode::Preadv => unsafe { raw_readv(self.file_fd, &self.iovec, self.offset) }, + // SAFETY: iovec of aiocb is valid. + OpCode::Pwritev => unsafe { raw_writev(self.file_fd, &self.iovec, self.offset) }, + _ => -1, + }; + if ret < 0 { + error!("Failed to do sync read/write."); + } else if ret as u64 != self.nbytes { + error!("Incomplete sync read/write."); + ret = -1; + } + ret as i32 + } + + fn flush_sync(&self) -> i32 { + let ret = raw_datasync(self.file_fd); + if ret < 0 { + error!("Failed to do sync flush."); + } + ret as i32 + } + + fn discard_sync(&self) -> i32 { + let ret = raw_discard(self.file_fd, self.offset, self.nbytes); + if ret < 0 && ret != -libc::ENOTSUP { + error!("Failed to do sync discard."); + } + ret + } + + fn write_zeroes_sync(&mut self) -> i32 { + let mut ret; + if self.opcode == OpCode::WriteZeroesUnmap { + ret = raw_discard(self.file_fd, self.offset, self.nbytes); + if ret == 0 { + return ret; + } + } + ret = raw_write_zeroes(self.file_fd, self.offset, self.nbytes); + if ret == -libc::ENOTSUP && !self.iovec.is_empty() { + self.opcode = OpCode::Pwritev; + return self.rw_sync(); + } + + if ret < 0 { + error!("Failed to do sync write zeroes."); + } + + ret + } + + // If the buffer is full with zero and the operation is Pwritev, + // It's equal to write zero operation. + fn try_convert_to_write_zero(&mut self) { + if self.opcode == OpCode::Pwritev && + self.write_zeroes != WriteZeroesState::Off && + // SAFETY: iovec is generated by address_space. + unsafe { iovec_is_zero(&self.iovec) } + { + self.opcode = OpCode::WriteZeroes; + if self.write_zeroes == WriteZeroesState::Unmap && self.discard { + self.opcode = OpCode::WriteZeroesUnmap; + } + } + } + + pub fn is_misaligned(&self) -> bool { + if self.direct && (self.opcode == OpCode::Preadv || self.opcode == OpCode::Pwritev) { + if (self.offset as u64) & (u64::from(self.req_align) - 1) != 0 { + return true; + } + for iov in self.iovec.iter() { + if iov.iov_base & (u64::from(self.buf_align) - 1) != 0 { + return true; + } + if iov.iov_len & (u64::from(self.req_align) - 1) != 0 { + return true; + } + } + } + false + } + + pub fn handle_misaligned(&mut self) -> Result { + let max_len = round_down( + self.nbytes + u64::from(self.req_align) * 2, + u64::from(self.req_align), + ) + .with_context(|| "Failed to round down request length.")?; + // Set upper limit of buffer length to avoid OOM. + let buff_len = std::cmp::min(max_len, MAX_LEN_BOUNCE_BUFF); + let bounce_buffer = + // SAFETY: We allocate aligned memory and free it later. Alignment is set to + // host page size to decrease the count of allocated pages. + unsafe { libc::memalign(host_page_size() as usize, buff_len as usize) }; + if bounce_buffer.is_null() { + bail!("Failed to alloc memory for misaligned read/write."); + } + + let res = match self.handle_misaligned_rw(bounce_buffer, buff_len) { + Ok(()) => self.nbytes as i32, + Err(e) => { + error!("{:?}", e); + -1 + } + }; + + // SAFETY: the memory is allocated by us and will not be used anymore. + unsafe { libc::free(bounce_buffer) }; + Ok(res) + } + + pub fn handle_misaligned_rw( + &mut self, + bounce_buffer: *mut c_void, + buffer_len: u64, + ) -> Result<()> { + let offset_align = round_down(self.offset as u64, u64::from(self.req_align)) + .with_context(|| "Failed to round down request offset.")?; + let high = self.offset as u64 + self.nbytes; + let high_align = round_up(high, u64::from(self.req_align)) + .with_context(|| "Failed to round up request high edge.")?; + + match self.opcode { + OpCode::Preadv => { + let mut offset = offset_align; + let mut iovecs = &mut self.iovec[..]; + loop { + // Step1: Read file to bounce buffer. + let nbytes = cmp::min(high_align - offset, buffer_len); + // SAFETY: bounce_buffer is valid and large enough. + let len = unsafe { + raw_read( + self.file_fd, + bounce_buffer as u64, + nbytes as usize, + offset as usize, + ) + }; + if len < 0 { + bail!("Failed to do raw read for misaligned read."); + } + + let real_offset = cmp::max(offset, self.offset as u64); + let real_high = cmp::min(offset + nbytes, high); + let real_nbytes = real_high - real_offset; + if (len as u64) < real_high - offset { + bail!( + "misaligned read len {} less than the nbytes {}", + len, + real_high - offset + ); + } + // SAFETY: the memory is allocated by us. + let src = unsafe { + std::slice::from_raw_parts( + (bounce_buffer as u64 + real_offset - offset) as *const u8, + real_nbytes as usize, + ) + }; + + // Step2: Copy bounce buffer to iovec. + // SAFETY: iovecs is generated by address_space. + unsafe { iov_from_buf_direct(iovecs, src) }.and_then(|v| { + if v == real_nbytes as usize { + Ok(()) + } else { + Err(anyhow!("Failed to copy iovs to buff for misaligned read")) + } + })?; + + // Step3: Adjust offset and iovec for next loop. + offset += nbytes; + if offset >= high_align { + break; + } + iovecs = iov_discard_front_direct(iovecs, real_nbytes) + .with_context(|| "Failed to adjust iovec for misaligned read")?; + } + Ok(()) + } + OpCode::Pwritev => { + // Load the head from file before fill iovec to buffer. + let mut head_loaded = false; + if self.offset as u64 > offset_align { + // SAFETY: bounce_buffer is valid and large enough. + let len = unsafe { + raw_read( + self.file_fd, + bounce_buffer as u64, + self.req_align as usize, + offset_align as usize, + ) + }; + if len < 0 || len as u32 != self.req_align { + bail!("Failed to load head for misaligned write."); + } + head_loaded = true; + } + // Is head and tail in the same alignment section? + let same_section = (offset_align + u64::from(self.req_align)) >= high; + let need_tail = !(same_section && head_loaded) && (high_align > high); + + let mut offset = offset_align; + let mut iovecs = &mut self.iovec[..]; + loop { + // Step1: Load iovec to bounce buffer. + let nbytes = cmp::min(high_align - offset, buffer_len); + + let real_offset = cmp::max(offset, self.offset as u64); + let real_high = cmp::min(offset + nbytes, high); + let real_nbytes = real_high - real_offset; + + if real_high == high && need_tail { + // SAFETY: bounce_buffer is valid and large enough. + let len = unsafe { + raw_read( + self.file_fd, + bounce_buffer as u64 + nbytes - u64::from(self.req_align), + self.req_align as usize, + (offset + nbytes) as usize - self.req_align as usize, + ) + }; + if len < 0 || len as u32 != self.req_align { + bail!("Failed to load tail for misaligned write."); + } + } + + // SAFETY: the memory is allocated by us. + let dst = unsafe { + std::slice::from_raw_parts_mut( + (bounce_buffer as u64 + real_offset - offset) as *mut u8, + real_nbytes as usize, + ) + }; + // SAFETY: iovecs is generated by address_space. + unsafe { iov_to_buf_direct(iovecs, 0, dst) }.and_then(|v| { + if v == real_nbytes as usize { + Ok(()) + } else { + Err(anyhow!("Failed to copy iovs to buff for misaligned write")) + } + })?; + + // Step2: Write bounce buffer to file. + // SAFETY: bounce_buffer is valid and large enough. + let len = unsafe { + raw_write( + self.file_fd, + bounce_buffer as u64, + nbytes as usize, + offset as usize, + ) + }; + if len < 0 || len as u64 != nbytes { + bail!("Failed to do raw write for misaligned write."); + } + + // Step3: Adjuest offset and iovec for next loop. + offset += nbytes; + if offset >= high_align { + break; + } + iovecs = iov_discard_front_direct(iovecs, real_nbytes) + .with_context(|| "Failed to adjust iovec for misaligned write")?; + } + Ok(()) + } + _ => bail!("Failed to do misaligned rw: unknown cmd type"), } } } +pub type AioCompleteFunc = fn(&AioCb, i64) -> Result<()>; + pub struct Aio { - pub ctx: Arc, + ctx: Option>>, + engine: AioEngine, pub fd: EventFd, - pub aio_in_queue: CbList, - pub aio_in_flight: CbList, + aio_in_queue: CbList, + aio_in_flight: CbList, + /// IO in aio_in_queue and aio_in_flight. + pub incomplete_cnt: Arc, max_events: usize, - complete_func: Arc>, + pub complete_func: Arc>, +} + +pub fn aio_probe(engine: AioEngine) -> Result<()> { + match engine { + AioEngine::Native => { + let ctx = LibaioContext::probe(1)?; + // SAFETY: if no err, ctx is valid. + unsafe { libc::syscall(libc::SYS_io_destroy, ctx) }; + } + AioEngine::IoUring => { + IoUringContext::probe(1)?; + } + _ => {} + } + Ok(()) } impl Aio { - pub fn new(func: Arc>) -> Result { - let max_events = 128; + pub fn new( + func: Arc>, + engine: AioEngine, + thread_pool: Option>, + ) -> Result { + let max_events: usize = 128; + let fd = create_new_eventfd()?; + let ctx: Option>> = if let Some(pool) = thread_pool { + let threads_aio_ctx = ThreadsAioContext::new(max_events as u32, &fd, pool); + match engine { + AioEngine::Native => Some(Box::new(LibaioContext::new( + max_events as u32, + threads_aio_ctx, + &fd, + )?)), + AioEngine::IoUring => Some(Box::new(IoUringContext::new( + max_events as u32, + threads_aio_ctx, + &fd, + )?)), + AioEngine::Threads => Some(Box::new(threads_aio_ctx)), + _ => bail!("Aio type {:?} does not support thread pools", engine), + } + } else if engine == AioEngine::Off { + None + } else { + bail!("Aio type {:?} is lack of thread pool context", engine); + }; Ok(Aio { - ctx: Arc::new(LibaioContext::new(max_events as i32)?), - fd: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + ctx, + engine, + fd, aio_in_queue: List::new(), aio_in_flight: List::new(), + incomplete_cnt: Arc::new(AtomicU64::new(0)), max_events, complete_func: func, }) } - pub fn handle(&mut self) -> Result { - let (evts, start, end) = self.ctx.get_events(); - let mut done = false; - for e in start..end { - if evts[e as usize].res2 == 0 { - unsafe { - done = true; - let node = evts[e as usize].data as *mut CbNode; + pub fn get_engine(&self) -> AioEngine { + self.engine + } + + pub fn submit_request(&mut self, mut cb: AioCb) -> Result<()> { + trace::aio_submit_request(cb.file_fd, &cb.opcode, cb.offset, cb.nbytes); + if self.ctx.is_none() + || [ + OpCode::Discard, + OpCode::WriteZeroes, + OpCode::WriteZeroesUnmap, + ] + .contains(&cb.opcode) + { + return self.handle_sync_request(cb); + } + + if cb.is_misaligned() { + return self.submit_thread_pool_async(cb); + } - (self.complete_func)(&(*node).value, evts[e as usize].res); - self.aio_in_flight.unlink(&(*node)); + cb.try_convert_to_write_zero(); - // free mem - if let Some(i) = (*node).value.iocb { - libc::free((*node).value.iovec.as_ptr() as *mut libc::c_void); - libc::free(i.as_ptr() as *mut libc::c_void); - }; - libc::free(node as *mut libc::c_void); + if self.engine != AioEngine::Threads + && [OpCode::Preadv, OpCode::Pwritev, OpCode::Fdsync].contains(&cb.opcode) + { + return self.submit_async(cb); + } + + self.submit_thread_pool_async(cb) + } + + fn handle_sync_request(&mut self, mut cb: AioCb) -> Result<()> { + if cb.is_misaligned() { + let ret = match cb.handle_misaligned() { + Ok(ret) => ret, + Err(e) => { + error!("{:?}", e); + -1 } + }; + return (self.complete_func)(&cb, i64::from(ret)); + } + + cb.try_convert_to_write_zero(); + + let ret = match cb.opcode { + OpCode::Preadv | OpCode::Pwritev => cb.rw_sync(), + OpCode::Fdsync => cb.flush_sync(), + OpCode::Discard => cb.discard_sync(), + OpCode::WriteZeroes | OpCode::WriteZeroesUnmap => cb.write_zeroes_sync(), + OpCode::Noop => return Err(anyhow!("Aio opcode is not specified.")), + }; + (self.complete_func)(&cb, i64::from(ret)) + } + + pub fn flush_request(&mut self) -> Result<()> { + if self.ctx.is_some() { + self.process_list() + } else { + Ok(()) + } + } + + pub fn handle_complete(&mut self) -> Result { + let mut done = false; + if self.ctx.is_none() { + warn!("Can not handle aio complete with invalid ctx."); + return Ok(done); + } + for evt in self.ctx.as_mut().unwrap().get_events() { + // SAFETY: evt.data is specified by submit and not dropped at other place. + unsafe { + let node = evt.user_data as *mut CbNode; + let res = if (evt.status == 0) && (evt.res == (*node).value.nbytes as i64) { + done = true; + evt.res + } else { + error!( + "Async IO request failed, opcode {:?} status {} res {} expect {}", + (*node).value.opcode, + evt.status, + evt.res, + (*node).value.nbytes + ); + -1 + }; + + let res = (self.complete_func)(&(*node).value, res); + self.aio_in_flight.unlink(&(*node)); + self.incomplete_cnt.fetch_sub(1, Ordering::SeqCst); + // Construct Box to free mem automatically. + drop(Box::from_raw(node)); + res?; } } - self.process_list().map(|_v| Ok(done))? + self.process_list()?; + Ok(done) } fn process_list(&mut self) -> Result<()> { - if self.aio_in_queue.len > 0 && self.aio_in_flight.len < self.max_events { + if self.ctx.is_none() { + warn!("Can not process aio list with invalid ctx."); + return Ok(()); + } + while !self.aio_in_queue.is_empty() && self.aio_in_flight.len() < self.max_events { let mut iocbs = Vec::new(); - for _ in self.aio_in_flight.len..self.max_events { + for _ in self.aio_in_flight.len()..self.max_events { match self.aio_in_queue.pop_tail() { Some(node) => { - iocbs.push(node.value.iocb.unwrap().as_ptr()); + iocbs.push(&node.value as *const AioCb); self.aio_in_flight.add_head(node); } None => break, } } - if !iocbs.is_empty() { - return self.ctx.submit(iocbs.len() as i64, &mut iocbs); - } - } + // The iocbs must not be empty. + let (nr, is_err) = match self.ctx.as_mut().unwrap().submit(&iocbs) { + Ok(nr) => (nr, false), + Err(e) => { + error!("{:?}", e); + (0, true) + } + }; - Ok(()) - } + // Push back unsubmitted requests. This should rarely happen, so the + // trade off is acceptable. + let mut index = nr; + while index < iocbs.len() { + if let Some(node) = self.aio_in_flight.pop_head() { + self.aio_in_queue.add_tail(node); + } + index += 1; + } - pub fn rw_aio(&mut self, cb: AioCb, sector_size: u64) -> Result<()> { - let mut misaligned = false; - for iov in cb.iovec.iter() { - if iov.iov_base % sector_size != 0 || iov.iov_len % sector_size != 0 { - misaligned = true; + if is_err { + // Fail one request, retry the rest. + if let Some(node) = self.aio_in_queue.pop_tail() { + self.incomplete_cnt.fetch_sub(1, Ordering::SeqCst); + (self.complete_func)(&(node).value, -1)?; + } + } else if nr == 0 { + // If can't submit any request, break the loop + // and the method handle() will try again. break; } } - if misaligned { - return self.handle_misaligned_aio(cb); - } + Ok(()) + } + + fn submit_thread_pool_async(&mut self, cb: AioCb) -> Result<()> { + let mut node = Box::new(Node::new(cb)); + node.value.user_data = (&mut (*node) as *mut CbNode) as u64; - let last_aio = cb.last_aio; - let opcode = cb.opcode; - let file_fd = cb.file_fd; - let iovec = (&*cb.iovec).as_ptr() as u64; - let sg_size = cb.iovec.len(); - let offset = cb.offset; + self.ctx + .as_mut() + .unwrap() + .submit_threads_pool(&[&node.value as *const AioCb])?; + self.aio_in_flight.add_head(node); + self.incomplete_cnt.fetch_add(1, Ordering::SeqCst); + Ok(()) + } + fn submit_async(&mut self, cb: AioCb) -> Result<()> { let mut node = Box::new(Node::new(cb)); - let iocb = IoCb { - aio_lio_opcode: opcode as u16, - aio_fildes: file_fd as u32, - aio_buf: iovec, - aio_nbytes: sg_size as u64, - aio_offset: offset as u64, - aio_flags: IOCB_FLAG_RESFD, - aio_resfd: self.fd.as_raw_fd() as u32, - data: (&mut (*node) as *mut CbNode) as u64, - ..Default::default() - }; - node.value.iocb = std::ptr::NonNull::new(Box::into_raw(Box::new(iocb))); + node.value.user_data = (&mut (*node) as *mut CbNode) as u64; self.aio_in_queue.add_head(node); - if last_aio || self.aio_in_queue.len + self.aio_in_flight.len >= self.max_events { - return self.process_list(); + self.incomplete_cnt.fetch_add(1, Ordering::SeqCst); + if self.aio_in_queue.len() + self.aio_in_flight.len() >= self.max_events { + self.process_list()?; } Ok(()) } +} - pub fn rw_sync(&mut self, cb: AioCb) -> Result<()> { - let ret = match cb.opcode { - IoCmd::Preadv => { - let mut r = 0; - let mut off = cb.offset; - for iov in cb.iovec.iter() { - r = raw_read(cb.file_fd, iov.iov_base, iov.iov_len as usize, off)?; - off += iov.iov_len as usize; - } - r - } - IoCmd::Pwritev => { - let mut r = 0; - let mut off = cb.offset; - for iov in cb.iovec.iter() { - r = raw_write(cb.file_fd, iov.iov_base, iov.iov_len as usize, off)?; - off += iov.iov_len as usize; - } - r - } - IoCmd::Fdsync => raw_datasync(cb.file_fd)?, - _ => -1, - }; - (self.complete_func)(&cb, ret); +/// # Safety +/// +/// Caller should has valid hva address. +pub unsafe fn mem_from_buf(buf: &[u8], hva: u64) -> Result<()> { + let mut slice = std::slice::from_raw_parts_mut(hva as *mut u8, buf.len()); + slice + .write_all(buf) + .with_context(|| format!("Failed to write buf to hva:{})", hva))?; + Ok(()) +} - Ok(()) +/// Write buf to iovec and return the written number of bytes. +/// # Safety +/// +/// Caller should has valid iovec. +pub unsafe fn iov_from_buf_direct(iovec: &[Iovec], buf: &[u8]) -> Result { + let mut start: usize = 0; + let mut end: usize = 0; + + for iov in iovec.iter() { + end = cmp::min(start + iov.iov_len as usize, buf.len()); + // iov len is not less than buf's. + mem_from_buf(&buf[start..end], iov.iov_base)?; + if end >= buf.len() { + break; + } + start = end; } + Ok(end) +} - fn handle_misaligned_aio(&mut self, cb: AioCb) -> Result<()> { - // Safe because we only get the host page size. - let host_page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as u64; - let mut ret = 0_i64; +/// # Safety +/// +/// Caller should has valid hva address. +pub unsafe fn mem_to_buf(mut buf: &mut [u8], hva: u64) -> Result<()> { + let slice = std::slice::from_raw_parts(hva as *const u8, buf.len()); + buf.write_all(slice) + .with_context(|| format!("Failed to read buf from hva:{})", hva))?; + Ok(()) +} - match cb.opcode { - IoCmd::Preadv => { - let mut off = cb.offset; - for iov in cb.iovec.iter() { - // Safe because we allocate aligned memory and free it later. - // Alignment is set to host page size to decrease the count of allocated pages. - let aligned_buffer = - unsafe { libc::memalign(host_page_size as usize, iov.iov_len as usize) }; - ret = raw_read(cb.file_fd, aligned_buffer as u64, iov.iov_len as usize, off)?; - off += iov.iov_len as usize; +/// Read iovec to buf and return the read number of bytes. +/// # Safety +/// +/// Caller should has valid iovec. +pub unsafe fn iov_to_buf_direct(iovec: &[Iovec], offset: u64, buf: &mut [u8]) -> Result { + let mut iovec2: Option<&[Iovec]> = None; + let mut start: usize = 0; + let mut end: usize = 0; - let dst = unsafe { - std::slice::from_raw_parts_mut( - iov.iov_base as *mut u8, - iov.iov_len as usize, - ) - }; - let src = unsafe { - std::slice::from_raw_parts( - aligned_buffer as *const u8, - iov.iov_len as usize, - ) - }; - dst.copy_from_slice(src); - // Safe because the memory is allocated by us and will not be used anymore. - unsafe { libc::free(aligned_buffer) }; + if offset == 0 { + iovec2 = Some(iovec); + } else { + let mut offset = offset; + for (index, iov) in iovec.iter().enumerate() { + if iov.iov_len > offset { + end = cmp::min((iov.iov_len - offset) as usize, buf.len()); + // iov len is not less than buf's. + mem_to_buf(&mut buf[..end], iov.iov_base + offset)?; + if end >= buf.len() || index >= (iovec.len() - 1) { + return Ok(end); } + start = end; + iovec2 = Some(&iovec[index + 1..]); + break; } - IoCmd::Pwritev => { - let mut off = cb.offset; - for iov in cb.iovec.iter() { - let aligned_buffer = - unsafe { libc::memalign(host_page_size as usize, iov.iov_len as usize) }; - let dst = unsafe { - std::slice::from_raw_parts_mut( - aligned_buffer as *mut u8, - iov.iov_len as usize, - ) - }; - let src = unsafe { - std::slice::from_raw_parts(iov.iov_base as *const u8, iov.iov_len as usize) - }; - dst.copy_from_slice(src); + offset -= iov.iov_len; + } + if iovec2.is_none() { + return Ok(0); + } + } - ret = raw_write(cb.file_fd, aligned_buffer as u64, iov.iov_len as usize, off)?; - off += iov.iov_len as usize; - unsafe { libc::free(aligned_buffer) }; - } + for iov in iovec2.unwrap() { + end = cmp::min(start + iov.iov_len as usize, buf.len()); + // iov len is not less than buf's. + mem_to_buf(&mut buf[start..end], iov.iov_base)?; + if end >= buf.len() { + break; + } + start = end; + } + Ok(end) +} + +/// Discard "size" bytes of the front of iovec. +pub fn iov_discard_front_direct(iovec: &mut [Iovec], mut size: u64) -> Option<&mut [Iovec]> { + for (index, iov) in iovec.iter_mut().enumerate() { + if iov.iov_len > size { + iov.iov_base += size; + iov.iov_len -= size; + return Some(&mut iovec[index..]); + } + size -= iov.iov_len; + } + None +} + +// Caller should have valid hva iovec. +unsafe fn iovec_is_zero(iovecs: &[Iovec]) -> bool { + let size = std::mem::size_of::() as u64; + for iov in iovecs { + if iov.iov_len % size != 0 { + return false; + } + // SAFETY: iov_base and iov_len has been checked in pop_avail(). + let slice = + std::slice::from_raw_parts(iov.iov_base as *const u64, (iov.iov_len / size) as usize); + for val in slice.iter() { + if *val != 0 { + return false; } - IoCmd::Fdsync => ret = raw_datasync(cb.file_fd)?, - _ => {} + } + } + true +} + +pub fn iovecs_split(iovecs: Vec, mut size: u64) -> (Vec, Vec) { + let len = iovecs.len(); + let mut begin: Vec = Vec::with_capacity(len); + let mut end: Vec = Vec::with_capacity(len); + for iov in iovecs { + if size == 0 { + end.push(iov); + continue; + } + if iov.iov_len > size { + begin.push(Iovec::new(iov.iov_base, size)); + end.push(Iovec::new(iov.iov_base + size, iov.iov_len - size)); + size = 0; + } else { + size -= iov.iov_len; + begin.push(iov); + } + } + (begin, end) +} + +/// # Safety +/// +/// Caller should has valid iovec. +pub unsafe fn iovec_write_zero(iovec: &[Iovec]) { + for iov in iovec.iter() { + std::ptr::write_bytes(iov.iov_base as *mut u8, 0, iov.iov_len as usize); + } +} + +#[cfg(test)] +mod tests { + use std::os::unix::prelude::AsRawFd; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + + fn perform_sync_rw( + fsize: usize, + offset: usize, + nbytes: u64, + opcode: OpCode, + direct: bool, + align: u32, + ) { + assert!(opcode == OpCode::Preadv || opcode == OpCode::Pwritev); + // Init a file with special content. + let mut content = vec![0u8; fsize]; + for (index, elem) in content.as_mut_slice().iter_mut().enumerate() { + *elem = index as u8; + } + let tmp_file = TempFile::new().unwrap(); + let mut file = tmp_file.into_file(); + file.write_all(&content).unwrap(); + + // Prepare rw buf. + let mut buf = vec![0xEF; nbytes as usize / 3]; + let mut buf2 = vec![0xFE; nbytes as usize - buf.len()]; + let iovec = vec![ + Iovec { + iov_base: buf.as_mut_ptr() as u64, + iov_len: buf.len() as u64, + }, + Iovec { + iov_base: buf2.as_mut_ptr() as u64, + iov_len: buf2.len() as u64, + }, + ]; + + // Perform aio rw. + let file_fd = file.as_raw_fd(); + let aiocb = AioCb { + direct, + req_align: align, + buf_align: align, + discard: false, + write_zeroes: WriteZeroesState::Off, + file_fd, + opcode, + iovec, + offset, + nbytes, + user_data: 0, + iocompletecb: 0, + combine_req: None, }; - (self.complete_func)(&cb, ret); + let mut aio = Aio::new( + Arc::new(|_: &AioCb, _: i64| -> Result<()> { Ok(()) }), + AioEngine::Off, + None, + ) + .unwrap(); + aio.submit_request(aiocb).unwrap(); - Ok(()) + // Get actual file content. + let mut new_content = vec![0u8; fsize]; + // SAFETY: new_content is valid. + let ret = unsafe { + raw_read( + file_fd, + new_content.as_mut_ptr() as u64, + new_content.len(), + 0, + ) + }; + assert_eq!(ret, fsize as i64); + if opcode == OpCode::Pwritev { + // The expected file content. + let ret = (&mut content[offset..]).write(&buf).unwrap(); + assert_eq!(ret, buf.len()); + let ret = (&mut content[offset + buf.len()..]).write(&buf2).unwrap(); + assert_eq!(ret, buf2.len()); + for index in 0..fsize { + assert_eq!(new_content[index], content[index]); + } + } else { + for index in 0..buf.len() { + assert_eq!(buf[index], new_content[offset + index]); + } + for index in 0..buf2.len() { + assert_eq!(buf2[index], new_content[offset + buf.len() + index]); + } + } + } + + fn test_sync_rw(opcode: OpCode, direct: bool, align: u32) { + assert!(align >= 512); + let fsize: usize = 2 << 20; + + // perform sync rw in the same alignment section. + let minor_align = u64::from(align) - 100; + perform_sync_rw(fsize, 0, minor_align, opcode, direct, align); + perform_sync_rw(fsize, 50, minor_align, opcode, direct, align); + perform_sync_rw(fsize, 100, minor_align, opcode, direct, align); + + // perform sync rw across alignment sections. + let minor_size = fsize as u64 - 100; + perform_sync_rw(fsize, 0, minor_size, opcode, direct, align); + perform_sync_rw(fsize, 50, minor_size, opcode, direct, align); + perform_sync_rw(fsize, 100, minor_size, opcode, direct, align); + } + + fn test_sync_rw_all_align(opcode: OpCode, direct: bool) { + let basic_align = 512; + test_sync_rw(opcode, direct, basic_align); + test_sync_rw(opcode, direct, basic_align << 1); + test_sync_rw(opcode, direct, basic_align << 2); + test_sync_rw(opcode, direct, basic_align << 3); + } + + #[test] + fn test_direct_sync_rw() { + test_sync_rw_all_align(OpCode::Preadv, true); + test_sync_rw_all_align(OpCode::Pwritev, true); + } + + #[test] + fn test_indirect_sync_rw() { + test_sync_rw_all_align(OpCode::Preadv, false); + test_sync_rw_all_align(OpCode::Pwritev, false); + } + + #[test] + fn test_iovecs_split() { + let iovecs = vec![Iovec::new(0, 100), Iovec::new(200, 100)]; + let (left, right) = iovecs_split(iovecs, 0); + assert_eq!(left, vec![]); + assert_eq!(right, vec![Iovec::new(0, 100), Iovec::new(200, 100)]); + + let iovecs = vec![Iovec::new(0, 100), Iovec::new(200, 100)]; + let (left, right) = iovecs_split(iovecs, 50); + assert_eq!(left, vec![Iovec::new(0, 50)]); + assert_eq!(right, vec![Iovec::new(50, 50), Iovec::new(200, 100)]); + + let iovecs = vec![Iovec::new(0, 100), Iovec::new(200, 100)]; + let (left, right) = iovecs_split(iovecs, 100); + assert_eq!(left, vec![Iovec::new(0, 100)]); + assert_eq!(right, vec![Iovec::new(200, 100)]); + + let iovecs = vec![Iovec::new(0, 100), Iovec::new(200, 100)]; + let (left, right) = iovecs_split(iovecs, 150); + assert_eq!(left, vec![Iovec::new(0, 100), Iovec::new(200, 50)]); + assert_eq!(right, vec![Iovec::new(250, 50)]); + + let iovecs = vec![Iovec::new(0, 100), Iovec::new(200, 100)]; + let (left, right) = iovecs_split(iovecs, 300); + assert_eq!(left, vec![Iovec::new(0, 100), Iovec::new(200, 100)]); + assert_eq!(right, vec![]); + } + + #[test] + fn test_iovec_write_zero() { + let buf1 = vec![0x1_u8; 100]; + let buf2 = vec![0x1_u8; 40]; + let iovecs = vec![ + Iovec::new(buf1.as_ptr() as u64, buf1.len() as u64), + Iovec::new(buf2.as_ptr() as u64, buf2.len() as u64), + ]; + + // SAFETY: iovecs has valid hva address. + unsafe { iovec_write_zero(&iovecs) }; + assert_eq!(buf1, vec![0_u8; 100]); + assert_eq!(buf2, vec![0_u8; 40]); } } diff --git a/util/src/aio/raw.rs b/util/src/aio/raw.rs index 27f6f0392cbea01b5a5dc753f760ddfd1067ec02..159740009137e5a41409ae7dc9a928d237955d6a 100644 --- a/util/src/aio/raw.rs +++ b/util/src/aio/raw.rs @@ -10,33 +10,191 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use super::Result; -use libc::{c_void, fdatasync, pread, pwrite}; use std::os::unix::io::RawFd; -pub fn raw_read(fd: RawFd, buf: u64, size: usize, offset: usize) -> Result { - let ret = unsafe { pread(fd, buf as *mut c_void, size, offset as i64) as i64 }; +use libc::{c_int, c_void, fdatasync, iovec, off_t, pread, preadv, pwrite, pwritev, size_t}; +use log::error; +use vmm_sys_util::fallocate::{fallocate, FallocateMode}; + +use super::Iovec; + +/// # Safety +/// +/// Caller should has valid buf. +pub unsafe fn raw_read(fd: RawFd, buf: u64, size: usize, offset: usize) -> i64 { + let mut ret; + loop { + ret = pread( + fd as c_int, + buf as *mut c_void, + size as size_t, + offset as off_t, + ) as i64; + if !(ret < 0 && (nix::errno::errno() == libc::EINTR || nix::errno::errno() == libc::EAGAIN)) + { + break; + } + } if ret < 0 { - bail!("Failed to pread for {}, return {}.", fd, ret); + error!( + "Failed to pread: fd {} buf {:#x}, size {}, offset{:#x}, errno {}.", + fd, + buf, + size, + offset, + nix::errno::errno() + ); } + ret +} - Ok(ret) +/// # Safety +/// +/// Caller should has valid iovec. +pub unsafe fn raw_readv(fd: RawFd, iovec: &[Iovec], offset: usize) -> i64 { + let mut ret; + loop { + ret = preadv( + fd as c_int, + iovec.as_ptr() as *const iovec, + iovec.len() as c_int, + offset as off_t, + ) as i64; + if !(ret < 0 && (nix::errno::errno() == libc::EINTR || nix::errno::errno() == libc::EAGAIN)) + { + break; + } + } + if ret < 0 { + error!( + "Failed to preadv: fd {} offset {:#x}, errno {}.", + fd, + offset, + nix::errno::errno(), + ); + } + ret } -pub fn raw_write(fd: RawFd, buf: u64, size: usize, offset: usize) -> Result { - let ret = unsafe { pwrite(fd, buf as *mut c_void, size, offset as i64) as i64 }; +/// # Safety +/// +/// Caller should has valid buf. +pub unsafe fn raw_write(fd: RawFd, buf: u64, size: usize, offset: usize) -> i64 { + let mut ret; + loop { + ret = pwrite( + fd as c_int, + buf as *mut c_void, + size as size_t, + offset as off_t, + ) as i64; + if !(ret < 0 && (nix::errno::errno() == libc::EINTR || nix::errno::errno() == libc::EAGAIN)) + { + break; + } + } if ret < 0 { - bail!("Failed to pwrite for {}, return {}.", fd, ret); + error!( + "Failed to pwrite: fd {} buf {:#x}, size{}, offset {:#x}, errno {}.", + fd, + buf, + size, + offset, + nix::errno::errno(), + ); } + ret +} - Ok(ret) +/// # Safety +/// +/// Caller should has valid iovec. +pub unsafe fn raw_writev(fd: RawFd, iovec: &[Iovec], offset: usize) -> i64 { + let mut ret; + loop { + // Caller should has valid iovec. + ret = pwritev( + fd as c_int, + iovec.as_ptr() as *const iovec, + iovec.len() as c_int, + offset as off_t, + ) as i64; + if !(ret < 0 && (nix::errno::errno() == libc::EINTR || nix::errno::errno() == libc::EAGAIN)) + { + break; + } + } + if ret < 0 { + error!( + "Failed to pwritev: fd {} offset {:#x}, errno {}.", + fd, + offset, + nix::errno::errno(), + ); + } + ret } -pub fn raw_datasync(fd: RawFd) -> Result { +pub fn raw_datasync(fd: RawFd) -> i64 { + // SAFETY: fd is valid. let ret = unsafe { i64::from(fdatasync(fd)) }; if ret < 0 { - bail!("Failed to fdatasync for {}, return {}.", fd, ret); + error!("Failed to fdatasync: errno {}.", nix::errno::errno()); + } + ret +} + +pub fn raw_discard(fd: RawFd, offset: usize, size: u64) -> i32 { + let ret = do_fallocate(fd, FallocateMode::PunchHole, true, offset as u64, size); + + if ret < 0 && ret != -libc::ENOTSUP { + error!("Failed to fallocate for fd {}, errno {}.", fd, ret); + } + ret +} + +pub fn raw_write_zeroes(fd: RawFd, offset: usize, size: u64) -> i32 { + let ret = do_fallocate(fd, FallocateMode::ZeroRange, false, offset as u64, size); + + if ret < 0 && ret != -libc::ENOTSUP { + error!( + "Failed to fallocate zero range for fd {}, errno {}.", + fd, ret, + ); + } + ret +} + +fn do_fallocate( + fd: RawFd, + fallocate_mode: FallocateMode, + keep_size: bool, + offset: u64, + size: u64, +) -> i32 { + let mut ret: i32 = 0; + loop { + let mode = match &fallocate_mode { + FallocateMode::PunchHole => FallocateMode::PunchHole, + FallocateMode::ZeroRange => FallocateMode::ZeroRange, + }; + + if let Err(e) = fallocate(&fd, mode, keep_size, offset, size) { + ret = e.errno() + }; + + if ret == 0 { + return ret; + } + + if ret != libc::EINTR { + break; + } + } + + if [libc::ENODEV, libc::ENOSYS, libc::EOPNOTSUPP, libc::ENOTTY].contains(&ret) { + ret = libc::ENOTSUP; } - Ok(ret) + -ret } diff --git a/util/src/aio/threads.rs b/util/src/aio/threads.rs new file mode 100644 index 0000000000000000000000000000000000000000..1aecf948ea676c711ec6f50f4fc3a2e725e06dcf --- /dev/null +++ b/util/src/aio/threads.rs @@ -0,0 +1,178 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex}; + +use log::error; +use vmm_sys_util::eventfd::EventFd; + +use crate::aio::{AioCb, AioContext, AioEvent, Iovec, OpCode, Result, WriteZeroesState}; +use crate::thread_pool::{TaskOperation, ThreadPool}; + +pub struct ThreadsTasks { + io_data: IoData, + pub complete_lists: Arc>>, + notify_event: Arc>, +} + +pub struct IoData { + pub direct: bool, + pub req_align: u32, + pub buf_align: u32, + pub discard: bool, + pub write_zeroes: WriteZeroesState, + pub file_fd: i32, + pub opcode: OpCode, + pub iovec: Vec, + pub offset: usize, + pub nbytes: u64, + pub user_data: u64, +} + +impl IoData { + fn package_aiocb(&self) -> AioCb<()> { + AioCb { + direct: self.direct, + req_align: self.req_align, + buf_align: self.buf_align, + discard: self.discard, + write_zeroes: self.write_zeroes, + file_fd: self.file_fd, + opcode: self.opcode, + iovec: self.iovec.clone(), + offset: self.offset, + nbytes: self.nbytes, + user_data: self.user_data, + iocompletecb: (), + combine_req: None, + } + } +} + +impl ThreadsTasks { + fn complete_func(&self, task: &IoData, res: i32) { + let aio_event = AioEvent { + user_data: task.user_data, + status: 0, + res: i64::from(res), + }; + self.complete_lists.lock().unwrap().push(aio_event); + self.notify_event + .lock() + .unwrap() + .write(1) + .unwrap_or_else(|e| error!("{:?}", e)); + } +} + +impl TaskOperation for ThreadsTasks { + fn run(&mut self) { + let mut cb = self.io_data.package_aiocb(); + + // Direct io needs to be aligned before io operation. + if cb.is_misaligned() { + let ret = match cb.handle_misaligned() { + Ok(ret) => ret, + Err(e) => { + error!("{:?}", e); + -1 + } + }; + self.complete_func(&self.io_data, ret); + return; + } + + let mut ret = match cb.opcode { + OpCode::Preadv | OpCode::Pwritev => cb.rw_sync(), + OpCode::Discard => cb.discard_sync(), + OpCode::WriteZeroes | OpCode::WriteZeroesUnmap => cb.write_zeroes_sync(), + OpCode::Fdsync => cb.flush_sync(), + _ => -1, + }; + + if [ + OpCode::Discard, + OpCode::WriteZeroes, + OpCode::WriteZeroesUnmap, + OpCode::Fdsync, + ] + .contains(&cb.opcode) + && ret == 0 + { + ret = self.io_data.nbytes as i32; + } + + self.complete_func(&self.io_data, ret); + } +} + +pub struct ThreadsAioContext { + pool: Arc, + events: Vec, + pub complete_list: Arc>>, + notify_event: Arc>, +} + +impl ThreadsAioContext { + pub fn new(max_size: u32, eventfd: &EventFd, thread_pool: Arc) -> Self { + Self { + pool: thread_pool, + complete_list: Arc::new(Mutex::new(Vec::new())), + notify_event: Arc::new(Mutex::new((*eventfd).try_clone().unwrap())), + events: Vec::with_capacity(max_size as usize), + } + } +} + +impl AioContext for ThreadsAioContext { + fn submit(&mut self, iocbp: &[*const AioCb]) -> Result { + for iocb in iocbp { + // SAFETY: iocb is valid until request is finished. + let cb = unsafe { &*(*iocb) }; + + let io_data = IoData { + opcode: cb.opcode, + file_fd: cb.file_fd, + offset: cb.offset, + nbytes: cb.nbytes, + iovec: cb.iovec.clone(), + direct: cb.direct, + buf_align: cb.buf_align, + req_align: cb.req_align, + discard: cb.discard, + write_zeroes: cb.write_zeroes, + user_data: cb.user_data, + }; + let task = ThreadsTasks { + io_data, + complete_lists: self.complete_list.clone(), + notify_event: self.notify_event.clone(), + }; + + ThreadPool::submit_task(self.pool.clone(), Box::new(task))?; + } + + Ok(iocbp.len()) + } + + fn submit_threads_pool(&mut self, iocbp: &[*const AioCb]) -> Result { + self.submit(iocbp) + } + + fn get_events(&mut self) -> &[AioEvent] { + let mut locked_list = self.complete_list.lock().unwrap(); + self.events = locked_list.drain(0..).collect(); + drop(locked_list); + + &self.events + } +} diff --git a/util/src/aio/uring.rs b/util/src/aio/uring.rs new file mode 100644 index 0000000000000000000000000000000000000000..f1d373a65f7d6fb793bb60acbf059b3d9a590ad4 --- /dev/null +++ b/util/src/aio/uring.rs @@ -0,0 +1,113 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::unix::io::AsRawFd; + +use anyhow::{bail, Context}; +use io_uring::{opcode, types, IoUring}; +use libc; +use vmm_sys_util::eventfd::EventFd; + +use super::threads::ThreadsAioContext; +use super::{AioCb, AioContext, AioEvent, OpCode, Result}; + +/// The io-uring context. +pub(crate) struct IoUringContext { + ring: IoUring, + threads_aio_ctx: ThreadsAioContext, + events: Vec, +} + +impl IoUringContext { + pub fn probe(entries: u32) -> Result { + IoUring::new(entries).with_context(|| "Failed to create io_uring instance.") + } + + pub fn new( + entries: u32, + threads_aio_ctx: ThreadsAioContext, + eventfd: &EventFd, + ) -> Result { + let tmp_entries = entries as i32; + // Ensure the power of 2. + if (tmp_entries & -tmp_entries) != tmp_entries || tmp_entries == 0 { + bail!("Entries must be the power of 2 and larger than 0"); + } + let ring = Self::probe(entries)?; + + ring.submitter() + .register_eventfd(eventfd.as_raw_fd()) + .with_context(|| "Failed to register event fd")?; + let events = Vec::with_capacity(entries as usize); + Ok(IoUringContext { + ring, + threads_aio_ctx, + events, + }) + } +} + +impl AioContext for IoUringContext { + fn submit(&mut self, iocbp: &[*const AioCb]) -> Result { + for iocb in iocbp.iter() { + // SAFETY: iocb is valid until request is finished. + let cb = unsafe { &*(*iocb) }; + let offset = cb.offset as u64; + let data = cb.user_data; + let len = cb.iovec.len(); + let iovs = cb.iovec.as_ptr(); + let fd = types::Fd(cb.file_fd); + let entry = match cb.opcode { + OpCode::Preadv => opcode::Readv::new(fd, iovs as *const libc::iovec, len as u32) + .offset(offset) + .build() + .user_data(data), + OpCode::Pwritev => opcode::Writev::new(fd, iovs as *const libc::iovec, len as u32) + .offset(offset) + .build() + .user_data(data), + OpCode::Fdsync => opcode::Fsync::new(fd).build().user_data(data), + _ => { + bail!("Invalid entry code"); + } + }; + // SAFETY: parameters of the entry are valid until request is finished. + unsafe { + self.ring + .submission() + .push(&entry) + .with_context(|| "Failed to push entry")?; + } + } + self.ring.submit().with_context(|| "Failed to submit sqe") + } + + fn submit_threads_pool(&mut self, iocbp: &[*const AioCb]) -> Result { + self.threads_aio_ctx.submit(iocbp) + } + + fn get_events(&mut self) -> &[AioEvent] { + let mut locked_list = self.threads_aio_ctx.complete_list.lock().unwrap(); + self.events = locked_list.drain(0..).collect(); + drop(locked_list); + + let queue = self.ring.completion(); + for cqe in queue { + self.events.push(AioEvent { + user_data: cqe.user_data(), + status: 0, + res: i64::from(cqe.result()), + }); + } + &self.events + } +} diff --git a/util/src/arg_parser.rs b/util/src/arg_parser.rs index 48ba49370f1f355928b7b55fa0fdfc410ef59a6c..1d6ddf91018c72d1dfdcb6919f8939948360bdfb 100644 --- a/util/src/arg_parser.rs +++ b/util/src/arg_parser.rs @@ -11,15 +11,19 @@ // See the Mulan PSL v2 for more details. use std::cmp::PartialEq; +use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::env; use std::io::Write; use std::process; -use crate::errors::{ErrorKind, Result}; +use anyhow::{anyhow, bail, Result}; + +use crate::UtilError; const PREFIX_CHARS_SHORT: &str = "-"; const PREFIX_CHARS_LONG: &str = "-"; +const PREFIX_OPT_LONG: &str = "--"; const ARG_SEPARATOR: &str = "--"; const HELP_SHORT: &str = "h"; const HELP_LONG: &str = "help"; @@ -32,7 +36,7 @@ const TWENTY_FOUT_BLANK: &str = " "; type ArgsMap = BTreeMap>; /// Format help type. -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Eq, Debug)] pub enum HelpType { /// Argument as a Flag. Flags, @@ -55,9 +59,7 @@ pub enum HelpType { /// .author("example") /// .version("0.0.1") /// .about("Description for application") -/// .arg( -/// Arg::with_name("arg_name") -/// ) +/// .arg(Arg::with_name("arg_name")) /// .get_matches(); /// ``` #[derive(Clone, Debug, Default)] @@ -97,6 +99,7 @@ pub struct Arg<'a> { name: &'a str, long: Option<&'a str>, short: Option<&'a str>, + opt_long: Option<&'a str>, help: Option<&'a str>, value_name: Option<&'a str>, value: Option, @@ -173,6 +176,10 @@ impl<'a> ArgParser<'a> { self.allow_list .push(format!("{}{}", PREFIX_CHARS_SHORT, arg.short.unwrap())); } + if arg.opt_long.is_some() { + self.allow_list + .push(format!("{}{}", PREFIX_OPT_LONG, arg.opt_long.unwrap())); + } self.args.insert(arg.name, arg); self } @@ -296,6 +303,12 @@ impl<'a> Arg<'a> { self } + /// Set opt long argument for arg. + pub fn opt_long(mut self, opt_long: &'a str) -> Self { + self.opt_long = Some(opt_long); + self + } + /// Set help message for arg. pub fn help(mut self, help: &'a str) -> Self { self.help = Some(help); @@ -371,62 +384,63 @@ impl<'a> Arg<'a> { /// Parse argument from a hashset. fn parse_from_hash(&mut self, arg_hash: &ArgsMap, multi_vec: &[String]) -> Result<()> { - let long_name = self.long.unwrap().to_string(); + let name = if let Some(long) = self.long { + long.to_string() + } else if let Some(opt_long) = self.opt_long { + opt_long.to_string() + } else { + bail!("Invalid argument, long and opt_long are None") + }; - if arg_hash.contains_key(&long_name) { - if !self.multiple && multi_vec.contains(&long_name) { - return Err(ErrorKind::DuplicateArgument(long_name).into()); + if arg_hash.contains_key(&name) { + if !self.multiple && multi_vec.contains(&name) { + return Err(anyhow!(UtilError::DuplicateArgument(name))); } - if self.value.is_some() && (arg_hash[&long_name].len() > 1) && !self.multiple { - return Err(ErrorKind::DuplicateValue(long_name).into()); + if self.value.is_some() && (arg_hash[&name].len() > 1) && !self.multiple { + return Err(anyhow!(UtilError::DuplicateValue(name))); } - if (self.value.is_some() || self.values.is_some()) && (arg_hash[&long_name].is_empty()) - { + if (self.value.is_some() || self.values.is_some()) && (arg_hash[&name].is_empty()) { if self.can_no_value { self.value = Some(Default::default()); self.presented = true; return Ok(()); } else { - return Err(ErrorKind::MissingValue(long_name).into()); + return Err(anyhow!(UtilError::MissingValue(name))); } } - if (self.value.is_none() && self.values.is_none()) && (!arg_hash[&long_name].is_empty()) - { - return Err(ErrorKind::IllegelValue( - arg_hash[&long_name][0].to_string(), - long_name.to_string(), - ) - .into()); + if (self.value.is_none() && self.values.is_none()) && (!arg_hash[&name].is_empty()) { + return Err(anyhow!(UtilError::IllegelValue( + arg_hash[&name][0].to_string(), + name.to_string(), + ))); } if self.value.is_some() { - if self.possible_value_check(&arg_hash[&long_name][0]) { - self.value = Some(arg_hash[&long_name][0].clone()); + if self.possible_value_check(&arg_hash[&name][0]) { + self.value = Some(arg_hash[&name][0].clone()); } else { - return Err(ErrorKind::ValueOutOfPossible( - long_name, + return Err(anyhow!(UtilError::ValueOutOfPossible( + name, format!("{:?}", self.possible_values), - ) - .into()); + ))); } } else if self.values.is_some() { - if self.possible_values_check(arg_hash[&long_name].clone()) { - self.values = Some(arg_hash[&long_name].clone()); + if self.possible_values_check(arg_hash[&name].clone()) { + self.values = Some(arg_hash[&name].clone()); } else { - return Err(ErrorKind::ValueOutOfPossible( - long_name, + return Err(anyhow!(UtilError::ValueOutOfPossible( + name, format!("{:?}", self.possible_values), - ) - .into()); + ))); } } self.presented = true; } else if self.required { - return Err(ErrorKind::MissingArgument(long_name).into()); + return Err(anyhow!(UtilError::MissingArgument(name))); } if self.short.is_some() { @@ -435,16 +449,15 @@ impl<'a> Arg<'a> { if (self.value.is_none() && self.values.is_none()) && (!arg_hash[short_name].is_empty()) { - return Err(ErrorKind::IllegelValue( + return Err(anyhow!(UtilError::IllegelValue( arg_hash[short_name][0].to_string(), short_name.to_string(), - ) - .into()); + ))); } self.presented = true; } else if self.required { - return Err(ErrorKind::MissingArgument(short_name.to_string()).into()); + return Err(anyhow!(UtilError::MissingArgument(short_name.to_string()))); } } @@ -453,9 +466,13 @@ impl<'a> Arg<'a> { /// Produce help message for argument. fn help_message(&self) -> (String, HelpType) { + let mut help_str; + if self.hiddable { - (String::new(), HelpType::Hidden) - } else if self.short.is_some() { + return (String::new(), HelpType::Hidden); + } + + if self.short.is_some() { let font_str = format!( "{}{}{}, {}{}", FOUR_BLANK, @@ -464,29 +481,41 @@ impl<'a> Arg<'a> { PREFIX_CHARS_LONG, self.long.unwrap_or("") ); - let mut help_str = format!("{}{}", TWENTY_FOUT_BLANK, self.help.unwrap_or("")); + help_str = format!("{}{}", TWENTY_FOUT_BLANK, self.help.unwrap_or("")); let font_offset = font_str.len(); help_str.replace_range(..font_offset, &font_str); - (help_str, HelpType::Flags) - } else { - let font_str = if self.values.is_some() { - format!( - "{}{}{} <{}>...", - EIGHT_BLANK, - PREFIX_CHARS_LONG, - self.long.unwrap(), - self.value_name.unwrap_or(self.name) - ) + return (help_str, HelpType::Flags); + } + + if self.long.is_some() || self.opt_long.is_some() { + let font_str = if self.long.is_some() { + if self.values.is_some() { + format!( + "{}{}{} {}...", + EIGHT_BLANK, + PREFIX_CHARS_LONG, + self.long.unwrap(), + self.value_name.unwrap_or(self.name) + ) + } else { + format!( + "{}{}{} {}", + EIGHT_BLANK, + PREFIX_CHARS_LONG, + self.long.unwrap(), + self.value_name.unwrap_or(self.name) + ) + } } else { format!( - "{}{}{} <{}>", + "{}{}{}={}", EIGHT_BLANK, - PREFIX_CHARS_LONG, - self.long.unwrap(), + PREFIX_OPT_LONG, + self.opt_long.unwrap(), self.value_name.unwrap_or(self.name) ) }; - let mut help_str = format!( + help_str = format!( "{}{}{}{}", TWENTY_FOUT_BLANK, TWENTY_FOUT_BLANK, @@ -499,8 +528,10 @@ impl<'a> Arg<'a> { } else { help_str.replace_range(..font_offset, &font_str); } - (help_str, HelpType::Optional) + return (help_str, HelpType::Optional); } + + (String::new(), HelpType::Hidden) } fn possible_value_check(&self, value: &'a str) -> bool { @@ -577,6 +608,9 @@ impl<'a> ArgMatches<'a> { fn split_arg(args: &[String]) -> (&[String], &[String]) { if let Some(index) = args.iter().position(|arg| arg == ARG_SEPARATOR) { + if index == args.len() - 1 { + return (&args[..index], &[]); + } return (&args[..index], &args[index + 1..]); } (args, &[]) @@ -587,7 +621,6 @@ impl<'a> ArgMatches<'a> { } } -#[allow(clippy::map_entry)] fn parse_cmdline( cmd_args: &[String], allow_list: &[String], @@ -596,35 +629,59 @@ fn parse_cmdline( let mut arg_map: BTreeMap> = BTreeMap::new(); let mut multi_vec: Vec = Vec::new(); - let mut i = (0, ""); - let mut j = 1; + let mut i: (usize, &str) = (0, ""); + let mut j: usize = 1; for cmd_arg in &cmd_args[1..] { - if !allow_list.contains(cmd_arg) && cmd_arg.starts_with(PREFIX_CHARS_SHORT) { - return Err(ErrorKind::UnexpectedArguments(cmd_arg.to_string()).into()); + if !allow_list.contains(cmd_arg) + && cmd_arg.starts_with(PREFIX_CHARS_SHORT) + && !cmd_arg.starts_with(PREFIX_OPT_LONG) + { + return Err(anyhow!(UtilError::UnexpectedArguments(cmd_arg.to_string()))); } - if cmd_arg.starts_with(PREFIX_CHARS_LONG) { + if cmd_arg.starts_with(PREFIX_OPT_LONG) { + let splits = cmd_arg.split('=').collect::>(); + // It has two arguments. e.g. "--modcaps=+sys_admin". + if splits.len() != 2 { + return Err(anyhow!(UtilError::UnexpectedArguments(cmd_arg.to_string()))); + } + if !allow_list.contains(&splits[0].to_string()) { + return Err(anyhow!(UtilError::UnexpectedArguments(cmd_arg.to_string()))); + } + let arg_str = split_arg(splits[0], PREFIX_OPT_LONG); + if let Entry::Vacant(e) = arg_map.entry(arg_str.to_string()) { + e.insert(Vec::new()); + } else { + multi_vec.push(arg_str.to_string()); + } + arg_map + .get_mut(arg_str.as_str()) + .unwrap() + .push(splits[1].to_string()); + } else if cmd_arg.starts_with(PREFIX_CHARS_LONG) { let arg_str = split_arg(cmd_arg, PREFIX_CHARS_LONG); - if arg_map.contains_key(&arg_str) { - multi_vec.push(arg_str); + + if let Entry::Vacant(e) = arg_map.entry(arg_str.clone()) { + e.insert(Vec::new()); } else { - arg_map.insert(arg_str, Vec::new()); + multi_vec.push(arg_str); } i = (j, PREFIX_CHARS_LONG); } else if cmd_arg.starts_with(PREFIX_CHARS_SHORT) { let arg_str = split_arg(cmd_arg, PREFIX_CHARS_SHORT); - if arg_map.contains_key(&arg_str) { - multi_vec.push(arg_str); + + if let Entry::Vacant(e) = arg_map.entry(arg_str.clone()) { + e.insert(Vec::new()); } else { - arg_map.insert(arg_str, Vec::new()); + multi_vec.push(arg_str); } i = (j, PREFIX_CHARS_SHORT); } else { let arg_str = match i.1 { PREFIX_CHARS_LONG => split_arg(&cmd_args[i.0], PREFIX_CHARS_LONG), &_ => { - return Err(ErrorKind::UnexpectedArguments(cmd_arg.to_string()).into()); + return Err(anyhow!(UtilError::UnexpectedArguments(cmd_arg.to_string()))); } }; arg_map @@ -650,9 +707,10 @@ fn split_arg(arg: &str, prefix_chars: &str) -> String { #[cfg(test)] mod tests { - use super::*; use std::io::{Cursor, Read, Seek, SeekFrom}; + use super::*; + #[derive(Default)] struct TestBuffer { inner: Cursor>, @@ -745,7 +803,7 @@ mod tests { arg_parser.output_help(&mut buffer.inner); let help_str = buffer.get_msg_vec(); - let help_msg = help_str.split("\n").collect::>(); + let help_msg = help_str.split('\n').collect::>(); assert_eq!(help_msg[0], "StratoVirt 1.0.0"); assert_eq!(help_msg[1], "Huawei Technologies Co., Ltd"); assert_eq!(help_msg[2], "A light kvm-based hypervisor."); @@ -777,10 +835,10 @@ mod tests { arg.possible_values.as_ref().unwrap(), &vec!["vm1", "vm2", "vm3"] ); - assert_eq!(arg.required, false); - assert_eq!(arg.presented, true); - assert_eq!(arg.hiddable, false); - assert_eq!(arg.can_no_value, false); + assert!(!arg.required); + assert!(arg.presented); + assert!(!arg.hiddable); + assert!(!arg.can_no_value); assert_eq!(arg.value.as_ref().unwrap(), "vm1"); let (help_msg, help_type) = arg.help_message(); diff --git a/util/src/bitmap.rs b/util/src/bitmap.rs index 8b0336887e3be75da2354797b6d924eeba4365f9..3d8111c676451b57ff46aaaf95dfb7be8ff5e4f2 100644 --- a/util/src/bitmap.rs +++ b/util/src/bitmap.rs @@ -13,7 +13,9 @@ use std::cmp::Ord; use std::mem::size_of; -use crate::errors::{ErrorKind, Result, ResultExt}; +use anyhow::{anyhow, Context, Result}; + +use crate::UtilError; /// This struct is used to offer bitmap. pub struct Bitmap { @@ -52,12 +54,71 @@ impl Bitmap { pub fn set(&mut self, num: usize) -> Result<()> { let index = self.bit_index(num); if index >= self.size() { - return Err(ErrorKind::OutOfBound(index as u64, self.vol() as u64).into()); + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); } self.data[index] = T::bit_or(self.data[index], T::one().rhs(self.bit_pos(num))); Ok(()) } + /// Set the range of bitmap. + /// + /// # Arguments + /// + /// * `start` - the begin bit. + /// * `len` - the end bit. + /// + /// # Example + /// + /// ```rust + /// use util::bitmap::Bitmap; + /// let mut bitmap = Bitmap::::new(4); + /// assert!(bitmap.set_range(65, 10).is_ok()); + /// assert_eq!(bitmap.contain(64).unwrap(), false); + /// assert_eq!(bitmap.contain(65).unwrap(), true); + /// assert_eq!(bitmap.contain(70).unwrap(), true); + /// assert_eq!(bitmap.contain(74).unwrap(), true); + /// assert_eq!(bitmap.contain(75).unwrap(), false); + /// ``` + pub fn set_range(&mut self, start: usize, len: usize) -> Result<()> { + if len == 0 { + return Ok(()); + } + + let mut index = self.bit_index(start); + let mut bits_to_set: usize = T::len() - self.bit_pos(start); + let mut mask_to_set: T = T::full().rhs(self.bit_pos(start)); + let mut length: usize = len; + while length >= bits_to_set { + if index >= self.size() { + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); + } + length -= bits_to_set; + self.data[index] = T::bit_or(self.data[index], mask_to_set); + bits_to_set = T::len(); + mask_to_set = T::full(); + index += 1; + } + if length > 0 { + if index >= self.size() { + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); + } + bits_to_set = T::len() - self.bit_pos(start + len); + let mask_to_set_end: T = T::full().lhs(self.bit_pos(bits_to_set)); + mask_to_set = T::bit_and(mask_to_set, mask_to_set_end); + self.data[index] = T::bit_or(self.data[index], mask_to_set); + } + Ok(()) + } + /// Clear the bit of bitmap. /// /// # Arguments @@ -66,7 +127,10 @@ impl Bitmap { pub fn clear(&mut self, num: usize) -> Result<()> { let index = self.bit_index(num); if index >= self.size() { - return Err(ErrorKind::OutOfBound(index as u64, self.vol() as u64).into()); + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); } self.data[index] = T::bit_and( self.data[index], @@ -75,18 +139,102 @@ impl Bitmap { Ok(()) } + /// Clear the range of bitmap. + /// + /// # Arguments + /// + /// * `start` - the begin bit. + /// * `len` - the end bit. + /// + /// # Example + /// + /// ```rust + /// use util::bitmap::Bitmap; + /// let mut bitmap = Bitmap::::new(4); + /// assert!(bitmap.set_range(0, 256).is_ok()); + /// assert!(bitmap.clear_range(65, 10).is_ok()); + /// + /// assert_eq!(bitmap.contain(64).unwrap(), true); + /// assert_eq!(bitmap.contain(65).unwrap(), false); + /// assert_eq!(bitmap.contain(70).unwrap(), false); + /// assert_eq!(bitmap.contain(74).unwrap(), false); + /// assert_eq!(bitmap.contain(75).unwrap(), true); + /// ``` + pub fn clear_range(&mut self, start: usize, len: usize) -> Result<()> { + if len == 0 { + return Ok(()); + } + + let mut index = self.bit_index(start); + let mut bits_to_clear: usize = T::len() - self.bit_pos(start); + let mut mask_to_clear: T = T::bit_not(T::full().rhs(self.bit_pos(start))); + let mut length: usize = len; + while length >= bits_to_clear { + if index >= self.size() { + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); + } + length -= bits_to_clear; + self.data[index] = T::bit_and(self.data[index], mask_to_clear); + bits_to_clear = T::len(); + mask_to_clear = T::zero(); + index += 1; + } + if length > 0 { + if index >= self.size() { + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); + } + bits_to_clear = T::len() - self.bit_pos(start + len); + let mask_to_clear_end: T = T::bit_not(T::full().lhs(self.bit_pos(bits_to_clear))); + mask_to_clear = T::bit_or(mask_to_clear, mask_to_clear_end); + self.data[index] = T::bit_and(self.data[index], mask_to_clear); + } + Ok(()) + } + + /// Change the bit of bitmap. + /// + /// # Arguments + /// + /// * `num` - the input number + /// # Example + /// + /// ```rust + /// use util::bitmap::Bitmap; + /// let mut bitmap = Bitmap::::new(1); + /// assert!(bitmap.change(15).is_ok()); + /// assert_eq!(bitmap.contain(15).unwrap(), true); + /// assert!(bitmap.change(15).is_ok()); + /// assert_eq!(bitmap.contain(15).unwrap(), false); + /// ``` + pub fn change(&mut self, num: usize) -> Result<()> { + let index = self.bit_index(num); + if index >= self.size() { + return Err(anyhow!(UtilError::OutOfBound( + index as u64, + self.vol() as u64 + ))); + } + self.data[index] = T::bit_xor(self.data[index], T::one().rhs(self.bit_pos(num))); + Ok(()) + } + /// Query bitmap if contains input number or not. /// /// # Arguments /// /// * `num` - the input number. pub fn contain(&self, num: usize) -> Result { - if num > self.vol() { - return Err(ErrorKind::OutOfBound( + if num >= self.vol() { + return Err(anyhow!(UtilError::OutOfBound( num as u64, - (self.size() as u64 * T::len() as u64) as u64, - ) - .into()); + self.size() as u64 * T::len() as u64, + ))); } Ok(T::bit_and( self.data[self.bit_index(num)], @@ -102,13 +250,16 @@ impl Bitmap { /// * `offset` - the input offset as the query's start. pub fn count_front_bits(&self, offset: usize) -> Result { if offset > self.vol() { - return Err(ErrorKind::OutOfBound(offset as u64, self.size() as u64).into()); + return Err(anyhow!(UtilError::OutOfBound( + offset as u64, + self.size() as u64 + ))); } - let mut num = 0; + let mut num: usize = 0; for i in 0..self.bit_index(offset) + 1 { if i == self.bit_index(offset) { for j in i * T::len()..offset { - let ret = self.contain(j).chain_err(|| "count front bits failed")?; + let ret = self.contain(j).with_context(|| "count front bits failed")?; if ret { num += 1; } @@ -132,12 +283,62 @@ impl Bitmap { /// /// * `offset` - the input offset as the query's start. pub fn find_next_zero(&self, offset: usize) -> Result { - for i in offset + 1..self.vol() { - if !self.contain(i)? { - return Ok(i); + let size = self.size(); + let idx = offset / T::len(); + let mut offset = offset % T::len(); + for i in idx..size { + if self.data[i] == T::full() { + continue; } + for j in offset..T::len() { + if !self.contain(i * T::len() + j)? { + return Ok(i * T::len() + j); + } + } + offset = 0; + } + Ok(self.vol()) + } + + /// Return a new offset to get next nonzero bit from input offset. + /// + /// # Arguments + /// + /// * `offset` - the input offset as the query's start. + pub fn find_next_bit(&self, offset: usize) -> Result { + let size = self.size(); + let idx = offset / T::len(); + let mut offset = offset % T::len(); + for i in idx..size { + if self.data[i] == T::zero() { + continue; + } + for j in offset..T::len() { + if self.contain(i * T::len() + j)? { + return Ok(i * T::len() + j); + } + } + offset = 0; + } + + Ok(self.vol()) + } + + /// Get the inner data from bitmap. + /// + /// # Arguments + /// + /// * `buf` - the cache to receive the data. + pub fn get_data(&mut self, buf: &mut Vec) { + buf.clear(); + buf.append(&mut self.data); + } + + /// clear all the data in bitmap + pub fn clear_all(&mut self) { + for i in 0..self.size() { + self.data[i] = T::zero(); } - bail!("Failed to get new zero bit") } fn bit_index(&self, num: usize) -> usize { @@ -233,7 +434,119 @@ mod tests { let mut bitmap = Bitmap::::new(1); assert!(bitmap.set(15).is_ok()); assert!(bitmap.set(16).is_err()); + assert!(bitmap.contain(15).unwrap()); assert_eq!(bitmap.count_front_bits(16).unwrap(), 1); assert_eq!(bitmap.count_front_bits(15).unwrap(), 0); + assert!(bitmap.change(15).is_ok()); + assert!(bitmap.change(16).is_err()); + assert!(!bitmap.contain(15).unwrap()); + } + + #[test] + fn test_bitmap_set_range() { + let mut bitmap = Bitmap::::new(4); + assert!(bitmap.set_range(256, 1).is_err()); + assert!(bitmap.set_range(0, 257).is_err()); + assert!(bitmap.set_range(0, 256).is_ok()); + bitmap.clear_all(); + + assert!(bitmap.set_range(65, 10).is_ok()); + assert!(!bitmap.contain(64).unwrap()); + assert!(bitmap.contain(65).unwrap()); + assert!(bitmap.contain(70).unwrap()); + assert!(bitmap.contain(74).unwrap()); + assert!(!bitmap.contain(75).unwrap()); + bitmap.clear_all(); + + assert!(bitmap.set_range(63, 1).is_ok()); + assert!(!bitmap.contain(62).unwrap()); + assert!(bitmap.contain(63).unwrap()); + assert!(!bitmap.contain(64).unwrap()); + bitmap.clear_all(); + + assert!(bitmap.set_range(63, 66).is_ok()); + assert!(!bitmap.contain(62).unwrap()); + assert!(bitmap.contain(63).unwrap()); + assert!(bitmap.contain(67).unwrap()); + assert!(bitmap.contain(128).unwrap()); + assert!(!bitmap.contain(129).unwrap()); + bitmap.clear_all(); + } + + #[test] + fn test_bitmap_clear_range() { + let mut bitmap = Bitmap::::new(4); + assert!(bitmap.set_range(0, 256).is_ok()); + assert!(bitmap.clear_range(256, 1).is_err()); + assert!(bitmap.clear_range(0, 0).is_ok()); + assert!(bitmap.clear_range(0, 257).is_err()); + + assert!(bitmap.set_range(0, 256).is_ok()); + assert!(bitmap.clear_range(65, 10).is_ok()); + assert!(bitmap.contain(64).unwrap()); + assert!(!bitmap.contain(65).unwrap()); + assert!(!bitmap.contain(70).unwrap()); + assert!(!bitmap.contain(74).unwrap()); + assert!(bitmap.contain(75).unwrap()); + + assert!(bitmap.set_range(0, 256).is_ok()); + assert!(bitmap.clear_range(63, 1).is_ok()); + assert!(bitmap.contain(62).unwrap()); + assert!(!bitmap.contain(63).unwrap()); + assert!(bitmap.contain(64).unwrap()); + + assert!(bitmap.set_range(0, 256).is_ok()); + assert!(bitmap.clear_range(63, 66).is_ok()); + assert!(bitmap.contain(62).unwrap()); + assert!(!bitmap.contain(63).unwrap()); + assert!(!bitmap.contain(67).unwrap()); + assert!(!bitmap.contain(128).unwrap()); + assert!(bitmap.contain(129).unwrap()); + + assert!(bitmap.clear_range(0, 256).is_ok()); + } + + #[test] + fn test_bitmap_find_next_zero() { + let mut bitmap = Bitmap::::new(4); + assert!(bitmap.set_range(0, 256).is_ok()); + assert!(bitmap.clear(0).is_ok()); + assert!(bitmap.clear(32).is_ok()); + assert!(bitmap.clear(64).is_ok()); + assert!(bitmap.clear(128).is_ok()); + + let mut offset = 0_usize; + offset = bitmap.find_next_zero(offset).unwrap(); + assert_eq!(offset, 0); + offset = bitmap.find_next_zero(offset + 1).unwrap(); + assert_eq!(offset, 32); + offset = bitmap.find_next_zero(offset + 1).unwrap(); + assert_eq!(offset, 64); + offset = bitmap.find_next_zero(offset + 1).unwrap(); + assert_eq!(offset, 128); + offset = bitmap.find_next_zero(offset + 1).unwrap(); + assert_eq!(offset, 256); + } + + #[test] + fn test_bitmap_find_next_bit() { + let mut bitmap = Bitmap::::new(4); + bitmap.clear_all(); + assert!(bitmap.set(0).is_ok()); + assert!(bitmap.set(32).is_ok()); + assert!(bitmap.set(64).is_ok()); + assert!(bitmap.set(128).is_ok()); + + let mut offset = 0_usize; + offset = bitmap.find_next_bit(offset).unwrap(); + assert_eq!(offset, 0); + offset = bitmap.find_next_bit(offset + 1).unwrap(); + assert_eq!(offset, 32); + offset = bitmap.find_next_bit(offset + 1).unwrap(); + assert_eq!(offset, 64); + offset = bitmap.find_next_bit(offset + 1).unwrap(); + assert_eq!(offset, 128); + offset = bitmap.find_next_bit(offset + 1).unwrap(); + assert_eq!(offset, 256); } } diff --git a/util/src/byte_code.rs b/util/src/byte_code.rs index 0c607b1ff5cef68d6101bdf048015ada74f667ee..29d82c68ae508117dc68a1d7c45f63b01faaeb9d 100644 --- a/util/src/byte_code.rs +++ b/util/src/byte_code.rs @@ -15,16 +15,18 @@ use std::slice::{from_raw_parts, from_raw_parts_mut}; /// A trait bound defined for types which are safe to convert to a byte slice and /// to create from a byte slice. -pub trait ByteCode: Default + Copy + Send + Sync { +pub trait ByteCode: Clone + Default + Send + Sync { /// Return the contents of an object (impl trait `ByteCode`) as a slice of bytes. /// the inverse of this function is "from_bytes" fn as_bytes(&self) -> &[u8] { + // SAFETY: The object is guaranteed been initialized already. unsafe { from_raw_parts(self as *const Self as *const u8, size_of::()) } } /// Return the contents of a mutable object (impl trait `ByteCode`) to a mutable slice of bytes. /// the inverse of this function is "from_bytes_mut" fn as_mut_bytes(&mut self) -> &mut [u8] { + // SAFETY: The object is guaranteed been initialized already. unsafe { from_raw_parts_mut(self as *mut Self as *mut u8, size_of::()) } } @@ -37,8 +39,9 @@ pub trait ByteCode: Default + Copy + Send + Sync { if data.len() != size_of::() { return None; } - let obj_array = unsafe { from_raw_parts::(data.as_ptr() as *const _, data.len()) }; - Some(&obj_array[0]) + + // SAFETY: The pointer is properly aligned and point to an initialized instance of T. + unsafe { data.as_ptr().cast::().as_ref() } } /// Creates an mutable object (impl trait `ByteCode`) from a mutable slice of bytes @@ -50,9 +53,9 @@ pub trait ByteCode: Default + Copy + Send + Sync { if data.len() != size_of::() { return None; } - let obj_array = - unsafe { from_raw_parts_mut::(data.as_mut_ptr() as *mut _, data.len()) }; - Some(&mut obj_array[0]) + + // SAFETY: The pointer is properly aligned and point to an initialized instance of T. + unsafe { data.as_mut_ptr().cast::().as_mut() } } } @@ -62,17 +65,19 @@ impl ByteCode for u8 {} impl ByteCode for u16 {} impl ByteCode for u32 {} impl ByteCode for u64 {} +impl ByteCode for u128 {} impl ByteCode for isize {} impl ByteCode for i8 {} impl ByteCode for i16 {} impl ByteCode for i32 {} impl ByteCode for i64 {} +impl ByteCode for i128 {} #[cfg(test)] mod test { use super::*; - #[allow(dead_code)] + #[repr(C)] #[derive(Copy, Clone, Default)] struct TestData { type_id: [u8; 8], @@ -90,8 +95,8 @@ mod test { assert_eq!(*u32::from_bytes(&bytes).unwrap(), 0x0512_5634); // Convert failed because byte stream's length is not equal to u32's size - let mis_bytes = [0x0_u8, 0x0, 0x12]; - assert!(u32::from_bytes(&mis_bytes).is_none()); + let miss_bytes = [0x0_u8, 0x0, 0x12]; + assert!(u32::from_bytes(&miss_bytes).is_none()); } #[test] @@ -102,9 +107,9 @@ mod test { }; let mut target = Vec::new(); + target.extend_from_slice(b"bytecode"); target.extend_from_slice(&[0x79, 0x56, 0x34, 0x12]); target.extend_from_slice(&[0_u8; 4]); - target.extend_from_slice(b"bytecode"); assert_eq!(data.as_bytes().to_vec(), target); // Convert failed because byte stream's length is not equal to size of struct. diff --git a/util/src/clock.rs b/util/src/clock.rs new file mode 100644 index 0000000000000000000000000000000000000000..68a043e1c8234b8ff7ad389212700873150d36d2 --- /dev/null +++ b/util/src/clock.rs @@ -0,0 +1,115 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::time::{Duration, Instant}; + +use crate::{ + loop_context::EventLoopContext, + test_helper::{get_test_time, is_test_enabled}, +}; + +pub fn get_current_time() -> Instant { + if is_test_enabled() { + get_test_time() + } else { + Instant::now() + } +} + +/// Recording VM timer state. +pub struct ClockState { + enable: bool, + offset: Instant, + paused: Duration, + elapsed: Duration, +} + +impl Default for ClockState { + fn default() -> Self { + Self { + enable: false, + offset: Instant::now(), + paused: Duration::default(), + elapsed: Duration::default(), + } + } +} + +impl ClockState { + pub fn get_virtual_clock(&mut self) -> Duration { + let mut time = self.paused; + if self.enable { + time = self.offset.elapsed() - self.elapsed; + } + time + } + + pub fn enable(&mut self) { + self.elapsed = self.offset.elapsed() - self.paused; + self.enable = true; + } + + pub fn disable(&mut self) { + self.paused = self.offset.elapsed() - self.elapsed; + self.enable = false; + } +} + +impl EventLoopContext { + /// Returns the clock based on the type. + pub fn get_virtual_clock(&self) -> Duration { + self.clock_state.lock().unwrap().get_virtual_clock() + } + + /// The clock running when VCPU in running. + pub fn enable_clock(&self) { + self.clock_state.lock().unwrap().enable(); + } + + /// The clock is stopped when VCPU in paused. + pub fn disable_clock(&self) { + self.clock_state.lock().unwrap().disable(); + } +} + +#[cfg(test)] +mod test { + use std::{thread, time::Duration}; + + use super::ClockState; + + #[test] + fn test_virtual_clock() { + let mut clock = ClockState::default(); + clock.enable(); + thread::sleep(Duration::from_secs(5)); + let virtual_clock = clock.get_virtual_clock(); + assert_eq!(virtual_clock.as_secs(), 5); + clock.disable(); + thread::sleep(Duration::from_secs(10)); + let virtual_clock = clock.get_virtual_clock(); + assert_eq!(virtual_clock.as_secs(), 5); + clock.enable(); + thread::sleep(Duration::from_secs(5)); + let virtual_clock = clock.get_virtual_clock(); + assert_eq!(virtual_clock.as_secs(), 10); + + clock.disable(); + thread::sleep(Duration::from_secs(10)); + let virtual_clock = clock.get_virtual_clock(); + assert_eq!(virtual_clock.as_secs(), 10); + clock.enable(); + thread::sleep(Duration::from_secs(5)); + let virtual_clock = clock.get_virtual_clock(); + assert_eq!(virtual_clock.as_secs(), 15); + } +} diff --git a/util/src/daemonize.rs b/util/src/daemonize.rs index a057ebec44ec66b3af8fa0faa0b6e2e579160f30..f693abb4e612d457816e500a4b1db942ce574d32 100644 --- a/util/src/daemonize.rs +++ b/util/src/daemonize.rs @@ -39,7 +39,9 @@ use std::os::unix::io::RawFd; use std::path::Path; use std::process::exit; -use crate::errors::{ErrorKind, Result}; +use anyhow::{anyhow, Result}; + +use crate::UtilError; /// Write process id to pid file. fn create_pid_file(path: &str) -> Result<()> { @@ -48,6 +50,7 @@ fn create_pid_file(path: &str) -> Result<()> { let mut pid_file: File = OpenOptions::new() .write(true) .create(true) + .truncate(true) .mode(0o600) .open(path)?; write!(pid_file, "{}", pid)?; @@ -68,10 +71,12 @@ fn create_pid_file(path: &str) -> Result<()> { /// /// `DaemonFork` Error, the ret of `libc::fork()` is less than zero. fn fork() -> Result<()> { + // SAFETY: No input parameters in this system call. + // and the return value have been verified later. let ret = unsafe { libc::fork() }; match ret.cmp(&0) { - Ordering::Less => Err(ErrorKind::DaemonFork.into()), + Ordering::Less => Err(anyhow!(UtilError::DaemonFork)), Ordering::Greater => exit(0), Ordering::Equal => Ok(()), } @@ -83,16 +88,18 @@ fn fork() -> Result<()> { /// process also becomes the process group leader or a new process group in the /// session. /// The calling process will be the only process in the new process group and in -/// the new session. New session has no controlling termimal. +/// the new session. New session has no controlling terminal. /// /// # Errors /// /// `DaemonSetsid` Error, the ret of `libc::setsid()` is -1 fn set_sid() -> Result<()> { + // SAFETY: No input parameters in this system call. + // and the return value have been verified later. let ret = unsafe { libc::setsid() }; if ret == -1 { - Err(ErrorKind::DaemonSetsid.into()) + Err(anyhow!(UtilError::DaemonSetsid)) } else { Ok(()) } @@ -111,19 +118,21 @@ fn set_sid() -> Result<()> { /// `DaemonRedirectStdio` Error, the ret of `libc::open()`, `libc::dup2()`, /// `libc::close()`is -1 fn redirect_stdio(fd: RawFd) -> Result<()> { + // SAFETY: the input parameter for systemctl are constant,and the return + // value have been verified later. unsafe { let devnull_fd = libc::open(b"/dev/null\0" as *const [u8; 10] as _, libc::O_RDWR); if devnull_fd == -1 { - return Err(ErrorKind::DaemonRedirectStdio.into()); + return Err(anyhow!(UtilError::DaemonRedirectStdio)); } if libc::dup2(devnull_fd, fd) == -1 { - return Err(ErrorKind::DaemonRedirectStdio.into()); + return Err(anyhow!(UtilError::DaemonRedirectStdio)); } if libc::close(devnull_fd) == -1 { - return Err(ErrorKind::DaemonRedirectStdio.into()); + return Err(anyhow!(UtilError::DaemonRedirectStdio)); } } @@ -146,7 +155,7 @@ fn redirect_stdio(fd: RawFd) -> Result<()> { pub fn daemonize(pid_file: Option) -> Result<()> { if let Some(path) = pid_file.as_ref() { if Path::new(path).exists() { - return Err(ErrorKind::PidFileExist.into()); + return Err(anyhow!(UtilError::PidFileExist)); } } diff --git a/util/src/device_tree.rs b/util/src/device_tree.rs index d47b7f5e79227be3cf9395a81813ef1f63abd3e0..db590271de9f7a650a4e32c8a12e6cf3ff964322 100644 --- a/util/src/device_tree.rs +++ b/util/src/device_tree.rs @@ -10,14 +10,16 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::mem::size_of; - -use crate::errors::{ErrorKind, Result, ResultExt}; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{BigEndian, ByteOrder}; +use crate::UtilError; + pub const CLK_PHANDLE: u32 = 1; pub const GIC_PHANDLE: u32 = 2; pub const GIC_ITS_PHANDLE: u32 = 3; +pub const PPI_CLUSTER_PHANDLE: u32 = 4; +pub const FIRST_VCPU_PHANDLE: u32 = 6; pub const CPU_PHANDLE_START: u32 = 10; pub const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; @@ -43,6 +45,10 @@ const MEM_RESERVE_ALIGNMENT: usize = 8; // Structure block alignment. const STRUCTURE_BLOCK_ALIGNMENT: usize = 4; +pub const FDT_PCI_RANGE_IOPORT: u32 = 0x0100_0000; +pub const FDT_PCI_RANGE_MMIO: u32 = 0x0200_0000; +pub const FDT_PCI_RANGE_MMIO_64BIT: u32 = 0x0300_0000; + /// FdtBuilder structure. pub struct FdtBuilder { /// The header of flattened device tree. @@ -117,7 +123,7 @@ impl FdtBuilder { pub fn finish(mut self) -> Result> { if self.subnode_depth > 0 { - return Err(ErrorKind::NodeUnclosed(self.subnode_depth).into()); + return Err(anyhow!(UtilError::NodeUnclosed(self.subnode_depth))); } self.structure_blk .extend_from_slice(&FDT_END.to_be_bytes()[..]); @@ -136,6 +142,9 @@ impl FdtBuilder { let off_dt_strings = FDT_HEADER_SIZE + self.mem_reserve.len() + self.structure_blk.len(); let off_mem_rsvmap = FDT_HEADER_SIZE; + if self.fdt_header.len() < FDT_HEADER_SIZE { + bail!("fdt header size too small"); + } BigEndian::write_u32(&mut self.fdt_header[0..4], FDT_MAGIC); BigEndian::write_u32(&mut self.fdt_header[4..8], total_size as u32); BigEndian::write_u32(&mut self.fdt_header[8..12], off_dt_struct as u32); @@ -158,7 +167,7 @@ impl FdtBuilder { pub fn add_mem_reserve(&mut self, mem_reservations: &[FdtReserveEntry]) -> Result<()> { if !check_mem_reserve_overlap(mem_reservations) { - return Err(ErrorKind::MemReserveOverlap.into()); + return Err(anyhow!(UtilError::MemReserveOverlap)); } for mem_reser in mem_reservations { @@ -174,7 +183,7 @@ impl FdtBuilder { pub fn begin_node(&mut self, node_name: &str) -> Result { if !check_string_legality(node_name) { - return Err(ErrorKind::IllegalString(node_name.to_string()).into()); + return Err(anyhow!(UtilError::IllegalString(node_name.to_string()))); } self.structure_blk @@ -196,7 +205,10 @@ impl FdtBuilder { pub fn end_node(&mut self, begin_node_depth: u32) -> Result<()> { if begin_node_depth != self.subnode_depth { - return Err(ErrorKind::NodeDepthMismatch(begin_node_depth, self.subnode_depth).into()); + return Err(anyhow!(UtilError::NodeDepthMismatch( + begin_node_depth, + self.subnode_depth + ))); } self.structure_blk @@ -215,44 +227,44 @@ impl FdtBuilder { // The string property should end with null('\0'). val_array.push(0x0_u8); self.set_property(prop, &val_array) - .chain_err(|| ErrorKind::SetPropertyErr("string".to_string())) + .with_context(|| UtilError::SetPropertyErr("string".to_string())) } pub fn set_property_u32(&mut self, prop: &str, val: u32) -> Result<()> { self.set_property(prop, &val.to_be_bytes()[..]) - .chain_err(|| ErrorKind::SetPropertyErr("u32".to_string())) + .with_context(|| UtilError::SetPropertyErr("u32".to_string())) } pub fn set_property_u64(&mut self, prop: &str, val: u64) -> Result<()> { self.set_property(prop, &val.to_be_bytes()[..]) - .chain_err(|| ErrorKind::SetPropertyErr("u64".to_string())) + .with_context(|| UtilError::SetPropertyErr("u64".to_string())) } pub fn set_property_array_u32(&mut self, prop: &str, array: &[u32]) -> Result<()> { - let mut prop_array = Vec::with_capacity(array.len() * size_of::()); + let mut prop_array = Vec::with_capacity(std::mem::size_of_val(array)); for element in array { prop_array.extend_from_slice(&element.to_be_bytes()[..]); } self.set_property(prop, &prop_array) - .chain_err(|| ErrorKind::SetPropertyErr("u32 array".to_string())) + .with_context(|| UtilError::SetPropertyErr("u32 array".to_string())) } pub fn set_property_array_u64(&mut self, prop: &str, array: &[u64]) -> Result<()> { - let mut prop_array = Vec::with_capacity(array.len() * size_of::()); + let mut prop_array = Vec::with_capacity(std::mem::size_of_val(array)); for element in array { prop_array.extend_from_slice(&element.to_be_bytes()[..]); } self.set_property(prop, &prop_array) - .chain_err(|| ErrorKind::SetPropertyErr("u64 array".to_string())) + .with_context(|| UtilError::SetPropertyErr("u64 array".to_string())) } pub fn set_property(&mut self, property_name: &str, property_val: &[u8]) -> Result<()> { if !check_string_legality(property_name) { - return Err(ErrorKind::IllegalString(property_name.to_string()).into()); + return Err(anyhow!(UtilError::IllegalString(property_name.to_string()))); } if !self.begin_node { - return Err(ErrorKind::IllegelPropertyPos.into()); + return Err(anyhow!(UtilError::IllegelPropertyPos)); } let len = property_val.len() as u32; @@ -275,8 +287,7 @@ impl FdtBuilder { fn align_structure_blk(&mut self, alignment: usize) { let remainder = self.structure_blk.len() % alignment; if remainder != 0 { - self.structure_blk - .extend(vec![0_u8; (alignment - remainder) as usize]); + self.structure_blk.extend(vec![0_u8; alignment - remainder]); } } } @@ -292,13 +303,6 @@ pub trait CompileFDT { fn generate_fdt_node(&self, fdt: &mut FdtBuilder) -> Result<()>; } -pub fn dump_dtb(fdt: &[u8], file_path: &str) { - use std::fs::File; - use std::io::Write; - let mut f = File::create(file_path).unwrap(); - f.write_all(fdt).expect("Unable to write data"); -} - #[cfg(test)] mod tests { use super::*; diff --git a/util/src/edid.rs b/util/src/edid.rs new file mode 100644 index 0000000000000000000000000000000000000000..f124d6d68fd98043fb83d27642064324bcca9476 --- /dev/null +++ b/util/src/edid.rs @@ -0,0 +1,505 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// We use Leaky Bucket Algorithm to limit iops of block device and qmp. +use byteorder::{BigEndian, ByteOrder, LittleEndian}; +use log::warn; + +#[derive(Debug, Default)] +struct EdidMode { + xres: u32, + yres: u32, + byte: u32, + xtra3: u32, + bit: u32, + dta: u32, +} + +#[derive(Debug, Default)] +pub struct EdidInfo { + vendor: Vec, + name: Vec, + serial: u32, + dpi: u32, + prefx: u32, + prefy: u32, + maxx: u32, + maxy: u32, +} + +impl EdidInfo { + pub fn new(vendor: &str, name: &str, dpi: u32, x: u32, y: u32) -> Self { + EdidInfo { + vendor: vendor.chars().collect(), + name: name.chars().collect(), + serial: 0, + dpi, + prefx: x, + prefy: y, + maxx: x, + maxy: y, + } + } + + pub fn edid_array_fulfill(&mut self, edid_array: &mut [u8; 1024]) { + // The format follows VESA ENHANCED EXTENDED DISPLAY IDENTIFICATION DATA STANDARD + if self.vendor.len() != 3 { + // HWV for 'HUAWEI TECHNOLOGIES CO., INC.' + self.vendor = "HWV".chars().collect(); + } + if self.name.is_empty() { + self.name = "STRA Monitor".chars().collect(); + } + if self.dpi == 0 { + self.dpi = 100; + } + if self.prefx == 0 { + self.prefx = 1024; + } + if self.prefy == 0 { + self.prefy = 768; + } + + let mut offset: usize = 54; + let mut xtra3_offset: usize = 0; + let mut dta_offset: usize = 0; + if edid_array.len() >= 256 { + dta_offset = 128; + edid_array[126] += 1; + self.fullfill_ext_dta(edid_array, dta_offset); + } + + // Fixed header + let header: u64 = 0x00FF_FFFF_FFFF_FF00; + LittleEndian::write_u64(&mut edid_array[0..8], header); + // ID Manufacturer Name + let vendor_id: u16 = (((self.vendor[0] as u16 - '@' as u16) & 0x1f) << 10) + | (((self.vendor[1] as u16 - '@' as u16) & 0x1f) << 5) + | ((self.vendor[2] as u16 - '@' as u16) & 0x1f); + BigEndian::write_u16(&mut edid_array[8..10], vendor_id); + // ID Product Code + LittleEndian::write_u16(&mut edid_array[10..12], 0x1234); + // ID Serial Number + LittleEndian::write_u32(&mut edid_array[12..16], self.serial); + // Week of Manufacture + edid_array[16] = 42; + // Year of Manufacture or Model Year + edid_array[17] = (2022 - 1990) as u8; + // Version Number: defines EDID Structure Version 1, Revision 4. + edid_array[18] = 0x01; + // Revision Number + edid_array[19] = 0x04; + + // Video Input Definition: digital, 8bpc, displayport + edid_array[20] = 0xa5; + // Horizontal Screen Size or Aspect Ratio + edid_array[21] = (self.prefx * self.dpi / 2540) as u8; + // Vertical Screen Size or Aspect Ratio + edid_array[22] = (self.prefy * self.dpi / 2540) as u8; + // Display Transfer Characteristic: display gamma is 2.2 + edid_array[23] = 220 - 100; + // Feature Support: std sRGB, preferred timing + edid_array[24] = 0x06; + + let temp: [f32; 8] = [ + 0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290, + ]; + // Color Characteristics: 10 bytes + self.fullfill_color_space(edid_array, temp); + + // 18 Byte Data Blocks: 72 bytes + self.fullfill_desc_timing(edid_array, offset); + offset += 18; + + self.fullfill_desc_range(edid_array, offset, 0xfd); + offset += 18; + + if !self.name.is_empty() { + self.fullfill_desc_text(edid_array, offset, 0xfc); + offset += 18; + } + + if self.serial != 0 { + self.fullfill_desc_text(edid_array, offset, 0xff); + offset += 18; + } + + if offset < 126 { + xtra3_offset = offset; + self.fullfill_desc_xtra3_std(edid_array, xtra3_offset); + offset += 18; + } + + while offset < 126 { + self.fullfill_desc_dummy(edid_array, offset); + offset += 18; + } + + // Established Timings: 3 bytes + // Standard Timings: 16 bytes + self.fullfill_modes(edid_array, xtra3_offset, dta_offset); + + // EXTENSION Flag and Checksum + self.fullfill_checksum(edid_array) + } + + fn fullfill_ext_dta(&mut self, edid_array: &mut [u8], offset: usize) { + edid_array[offset] = 0x02; + edid_array[offset + 1] = 0x03; + edid_array[offset + 2] = 0x05; + edid_array[offset + 3] = 0x00; + // video data block + edid_array[offset + 4] = 0x40; + } + + fn fullfill_color_space(&mut self, edid_array: &mut [u8], arr: [f32; 8]) { + let red_x: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let red_y: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let green_x: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let green_y: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let blue_x: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let blue_y: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let white_x: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + let white_y: u32 = (arr[0] * 1024_f32 + 0.5) as u32; + + edid_array[25] = (((red_x & 0x03) << 6) + | ((red_y & 0x03) << 4) + | ((green_x & 0x03) << 2) + | (green_y & 0x03)) as u8; + edid_array[26] = (((blue_x & 0x03) << 6) + | ((blue_y & 0x03) << 4) + | ((white_x & 0x03) << 2) + | (white_y & 0x03)) as u8; + edid_array[27] = (red_x >> 2) as u8; + edid_array[28] = (red_y >> 2) as u8; + edid_array[29] = (green_x >> 2) as u8; + edid_array[30] = (green_y >> 2) as u8; + edid_array[31] = (blue_x >> 2) as u8; + edid_array[32] = (blue_y >> 2) as u8; + edid_array[33] = (white_x >> 2) as u8; + edid_array[34] = (white_y >> 2) as u8; + } + + fn fullfill_desc_timing(&mut self, edid_array: &mut [u8], offset: usize) { + // physical display size + let xmm: u32 = self.prefx * self.dpi / 254; + let ymm: u32 = self.prefy * self.dpi / 254; + let xfront: u32 = self.prefx * 25 / 100; + let xsync: u32 = self.prefx * 3 / 100; + let xblank: u32 = self.prefx * 35 / 100; + let yfront: u32 = self.prefy * 5 / 1000; + let ysync: u32 = self.prefy * 5 / 1000; + let yblank: u32 = self.prefy * 35 / 1000; + let clock: u32 = 75 * (self.prefx + xblank) * (self.prefy + yblank); + + LittleEndian::write_u16(&mut edid_array[offset..offset + 2], clock as u16); + edid_array[offset + 2] = (self.prefx & 0xff) as u8; + edid_array[offset + 3] = (xblank & 0xff) as u8; + edid_array[offset + 4] = (((self.prefx & 0xf00) >> 4) | ((xblank & 0xf00) >> 8)) as u8; + edid_array[offset + 5] = (self.prefy & 0xff) as u8; + edid_array[offset + 6] = (yblank & 0xff) as u8; + edid_array[offset + 7] = (((self.prefy & 0xf00) >> 4) | ((yblank & 0xf00) >> 8)) as u8; + edid_array[offset + 8] = (xfront & 0xff) as u8; + edid_array[offset + 9] = (xsync & 0xff) as u8; + edid_array[offset + 10] = (((yfront & 0x00f) << 4) | (ysync & 0x00f)) as u8; + edid_array[offset + 11] = (((xfront & 0x300) >> 2) + | ((xsync & 0x300) >> 4) + | ((yfront & 0x030) >> 2) + | ((ysync & 0x030) >> 4)) as u8; + edid_array[offset + 12] = (xmm & 0xff) as u8; + edid_array[offset + 13] = (ymm & 0xff) as u8; + edid_array[offset + 14] = (((xmm & 0xf00) >> 4) | ((ymm & 0xf00) >> 8)) as u8; + edid_array[offset + 17] = 0x18; + } + + fn fullfill_desc_range(&mut self, edid_array: &mut [u8], offset: usize, desc_type: u8) { + self.fullfill_desc_type(edid_array, offset, desc_type); + // vertical (50 -> 125 Hz) + edid_array[offset + 5] = 50; + edid_array[offset + 6] = 125; + // horizontal (30 -> 160 kHz) + edid_array[offset + 7] = 30; + edid_array[offset + 8] = 160; + // max dot clock (1200 MHz) + edid_array[offset + 9] = (1200 / 10) as u8; + // no extended timing information + edid_array[offset + 10] = 0x01; + // padding + edid_array[offset + 11] = b'\n'; + for i in 12..18 { + edid_array[offset + i] = b' '; + } + } + + fn fullfill_desc_text(&mut self, edid_array: &mut [u8], offset: usize, desc_type: u8) { + self.fullfill_desc_type(edid_array, offset, desc_type); + for i in 5..18 { + edid_array[offset + i] = b' '; + } + if desc_type == 0xfc { + // name + for (index, c) in self.name.iter().enumerate() { + edid_array[offset + 5 + index] = (*c) as u8; + } + } else if desc_type == 0xff { + // serial + LittleEndian::write_u32(&mut edid_array[offset + 5..offset + 9], self.serial); + } else { + warn!("Unexpected desc type"); + } + } + + fn fullfill_desc_xtra3_std(&mut self, edid_array: &mut [u8], offset: usize) { + // additional standard timings 3 + self.fullfill_desc_type(edid_array, offset, 0xf7); + edid_array[offset + 4] = 10; + } + + fn fullfill_desc_dummy(&mut self, edid_array: &mut [u8], offset: usize) { + self.fullfill_desc_type(edid_array, offset, 0x10); + } + + fn fullfill_desc_type(&mut self, edid_array: &mut [u8], offset: usize, desc_type: u8) { + edid_array[offset] = 0; + edid_array[offset + 1] = 0; + edid_array[offset + 2] = 0; + edid_array[offset + 3] = desc_type; + edid_array[offset + 4] = 0; + } + + fn fullfill_modes(&mut self, edid_array: &mut [u8], xtra3_offset: usize, dta_offset: usize) { + let edid_modes = vec![ + // dea/dta extension timings (all @ 50 Hz) + EdidMode { + xres: 5120, + yres: 2160, + dta: 125, + ..Default::default() + }, + EdidMode { + xres: 4096, + yres: 2160, + dta: 101, + ..Default::default() + }, + EdidMode { + xres: 3840, + yres: 2160, + dta: 96, + ..Default::default() + }, + EdidMode { + xres: 2560, + yres: 1080, + dta: 89, + ..Default::default() + }, + EdidMode { + xres: 2048, + yres: 1152, + ..Default::default() + }, + EdidMode { + xres: 1920, + yres: 1080, + dta: 31, + ..Default::default() + }, + // additional standard timings 3 (all @ 60Hz) + EdidMode { + xres: 1920, + yres: 1440, + xtra3: 11, + bit: 5, + ..Default::default() + }, + EdidMode { + xres: 1920, + yres: 1200, + xtra3: 10, + bit: 0, + ..Default::default() + }, + EdidMode { + xres: 1856, + yres: 1392, + xtra3: 10, + bit: 3, + ..Default::default() + }, + EdidMode { + xres: 1792, + yres: 1344, + xtra3: 10, + bit: 5, + ..Default::default() + }, + EdidMode { + xres: 1600, + yres: 1200, + xtra3: 9, + bit: 2, + ..Default::default() + }, + EdidMode { + xres: 1680, + yres: 1050, + xtra3: 9, + bit: 5, + ..Default::default() + }, + EdidMode { + xres: 1440, + yres: 1050, + xtra3: 8, + bit: 1, + ..Default::default() + }, + EdidMode { + xres: 1440, + yres: 900, + xtra3: 8, + bit: 5, + ..Default::default() + }, + EdidMode { + xres: 1360, + yres: 768, + xtra3: 8, + bit: 7, + ..Default::default() + }, + EdidMode { + xres: 1280, + yres: 1024, + xtra3: 7, + bit: 1, + ..Default::default() + }, + EdidMode { + xres: 1280, + yres: 960, + xtra3: 7, + bit: 3, + ..Default::default() + }, + EdidMode { + xres: 1280, + yres: 768, + xtra3: 7, + bit: 6, + ..Default::default() + }, + // established timings (all @ 60Hz) + EdidMode { + xres: 1024, + yres: 768, + byte: 36, + bit: 3, + ..Default::default() + }, + EdidMode { + xres: 800, + yres: 600, + byte: 35, + bit: 0, + ..Default::default() + }, + EdidMode { + xres: 640, + yres: 480, + byte: 35, + bit: 5, + ..Default::default() + }, + ]; + let mut std_offset: usize = 38; + + for mode in edid_modes { + if (self.maxx != 0 && mode.xres > self.maxx) + || (self.maxy != 0 && mode.yres > self.maxy) + { + continue; + } + + if mode.byte != 0 { + edid_array[mode.byte as usize] |= (1 << mode.bit) as u8; + } else if mode.xtra3 != 0 && xtra3_offset != 0 { + edid_array[xtra3_offset] |= (1 << mode.bit) as u8; + } else if std_offset < 54 + && self.fullfill_std_mode(edid_array, std_offset, mode.xres, mode.yres) == 0 + { + std_offset += 2; + } + + if dta_offset != 0 && mode.dta != 0 { + self.fullfill_ext_dta_mode(edid_array, dta_offset, mode.dta); + } + } + + while std_offset < 54 { + self.fullfill_std_mode(edid_array, std_offset, 0, 0); + std_offset += 2; + } + } + + fn fullfill_std_mode( + &mut self, + edid_array: &mut [u8], + std_offset: usize, + xres: u32, + yres: u32, + ) -> i32 { + let aspect: u32; + + if xres == 0 || yres == 0 { + edid_array[std_offset] = 0x01; + edid_array[std_offset + 1] = 0x01; + return 0; + } else if xres * 10 == yres * 16 { + aspect = 0; + } else if xres * 3 == yres * 4 { + aspect = 1; + } else if xres * 4 == yres * 5 { + aspect = 2; + } else if xres * 9 == yres * 16 { + aspect = 3; + } else { + return -1; + } + + if (xres / 8) - 31 > 255 { + return -1; + } + edid_array[std_offset] = ((xres / 8) - 31) as u8; + edid_array[std_offset + 1] = (aspect << 6) as u8; + 0 + } + + fn fullfill_ext_dta_mode(&mut self, edid_array: &mut [u8], dta_offset: usize, dta: u32) { + let index = edid_array[dta_offset + 2] as usize; + edid_array[index] = dta as u8; + edid_array[dta_offset + 2] += 1; + edid_array[dta_offset + 4] += 1; + } + + fn fullfill_checksum(&mut self, edid_array: &mut [u8]) { + let mut sum: u32 = 0; + for elem in edid_array.iter() { + sum += u32::from(*elem); + } + sum &= 0xff; + if sum != 0 { + edid_array[127] = (0x100 - sum) as u8; + } + } +} diff --git a/util/src/error.rs b/util/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..e59c1ebd7440c7323ff115d938c90b9771057094 --- /dev/null +++ b/util/src/error.rs @@ -0,0 +1,79 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum UtilError { + #[error("Nul")] + Nul { + #[from] + source: std::ffi::NulError, + }, + // arg_parser submodule error + #[error("Argument '{0}' required, but not found. Use \'-h\' or \'-help\' to get usage.")] + MissingArgument(String), + #[error("The argument '{0}' requires a value, but none was supplied. Use \'-h\' or \'-help\' to get usage.")] + MissingValue(String), + #[error( + "The value '{0}' is illegel for argument '{1}'. Use \'-h\' or \'-help\' to get usage." + )] + IllegelValue(String, String), + #[error("The value of argument '{0}' must be in '{1}'. Use \'-h\' or \'-help\' to get usage.")] + ValueOutOfPossible(String, String), + #[error("Found argument '{0}' which wasn't expected, or isn't valid in the context. Use \'-h\' or \'-help\' to get usage.")] + UnexpectedArguments(String), + #[error( + "The argument '{0}' was provided more than once. Use \'-h\' or \'-help\' to get usage." + )] + DuplicateArgument(String), + #[error("The argument '{0}' only need one value. Use \'-h\' or \'-help\' to get usage.")] + DuplicateValue(String), + // daemonize submodule error + #[error("Unable to fork.")] + DaemonFork, + #[error("Unable to create new session.")] + DaemonSetsid, + #[error("Unable to redirect standard streams to /dev/null.")] + DaemonRedirectStdio, + #[error("Pidfile path is existed yet.")] + PidFileExist, + // epoll_context error + #[error("Found bad syscall, error is {0} .")] + BadSyscall(std::io::Error), + #[error("Unsupported Epoll notifier operation type.")] + UnExpectedOperationType, + #[error("Failed to execute epoll_wait syscall: {0} .")] + EpollWait(std::io::Error), + #[error("The fd {0} is not registered in epoll.")] + NoRegisterFd(i32), + #[error("Found no parked fd {0}.")] + NoParkedFd(i32), + #[error("Notifier Operation non allowed.")] + BadNotifierOperation, + #[error("Chmod command failed, os error {0}")] + ChmodFailed(i32), + #[error("Index :{0} out of bound :{1}")] + OutOfBound(u64, u64), + #[error("Desired node depth :{0}, current node depth :{1}")] + NodeDepthMismatch(u32, u32), + #[error("Still have {0} node open when terminating the fdt")] + NodeUnclosed(u32), + #[error("Failed to add property because there is no open node")] + IllegelPropertyPos, + #[error("Failed to add string to fdt because of null character inside \"{0}\"")] + IllegalString(String), + #[error("Failed to add overlapped mem reserve entries to fdt")] + MemReserveOverlap, + #[error("Failed to set {0} property")] + SetPropertyErr(String), +} diff --git a/util/src/file.rs b/util/src/file.rs new file mode 100644 index 0000000000000000000000000000000000000000..c1efa02b21d1fef5142a48364f720cd3694cc5f4 --- /dev/null +++ b/util/src/file.rs @@ -0,0 +1,188 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::{remove_file, File, OpenOptions}; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::AsRawFd; +use std::path::Path; + +use anyhow::{bail, Context, Ok, Result}; +use nix::fcntl::{fcntl, FcntlArg}; +use nix::unistd::getpid; + +const MIN_FILE_ALIGN: u32 = 512; +pub const MAX_FILE_ALIGN: u32 = 4096; +/// Permission to read +const FILE_LOCK_READ: u64 = 0x01; +/// Permission to write +const FILE_LOCK_WRITE: u64 = 0x02; +/// All permissions +const FILE_LOCK_ALL: [u64; 2] = [FILE_LOCK_READ, FILE_LOCK_WRITE]; +/// Permission lock base address, consistent with qemu +const LOCK_PERM_BASE: u64 = 100; +/// Shared lock base address, consistent with qemu +const LOCK_SHARED_BASE: u64 = 200; + +pub fn open_file(path: &str, read_only: bool, direct: bool) -> Result { + let mut options = OpenOptions::new(); + options.read(true).write(!read_only); + if direct { + options.custom_flags(libc::O_DIRECT); + } + let file = options.open(path).with_context(|| { + format!( + "failed to open the file for block {}. Error: {}", + path, + std::io::Error::last_os_error(), + ) + })?; + + Ok(file) +} + +fn is_io_aligned(file: &File, buf: u64, size: usize) -> bool { + // SAFETY: file and buf is valid. + let ret = unsafe { + libc::pread( + file.as_raw_fd() as libc::c_int, + buf as *mut libc::c_void, + size as libc::size_t, + 0, + ) + }; + ret >= 0 || nix::errno::errno() != libc::EINVAL +} + +pub fn get_file_alignment(file: &File, direct: bool) -> (u32, u32) { + if !direct { + return (1, 1); + } + + let mut req_align: u32 = 0; + let mut buf_align: u32 = 0; + // SAFETY: we allocate aligned memory and free it later. + let aligned_buffer = unsafe { + libc::memalign( + MAX_FILE_ALIGN as libc::size_t, + (MAX_FILE_ALIGN * 2) as libc::size_t, + ) + }; + if aligned_buffer.is_null() { + log::warn!("OOM occurs when get file alignment, assume max alignment"); + return (MAX_FILE_ALIGN, MAX_FILE_ALIGN); + } + + // Guess alignment requirement of request. + let mut align = MIN_FILE_ALIGN; + while align <= MAX_FILE_ALIGN { + if is_io_aligned(file, aligned_buffer as u64, align as usize) { + req_align = align; + break; + } + align <<= 1; + } + + // Guess alignment requirement of buffer. + let mut align = MIN_FILE_ALIGN; + while align <= MAX_FILE_ALIGN { + if is_io_aligned( + file, + aligned_buffer as u64 + u64::from(align), + MAX_FILE_ALIGN as usize, + ) { + buf_align = align; + break; + } + align <<= 1; + } + + // SAFETY: the memory is allocated by us and will not be used anymore. + unsafe { libc::free(aligned_buffer) }; + (req_align, buf_align) +} + +fn do_fcntl_lock( + file: &File, + path: &str, + lockname: &str, + flock: libc::flock, + is_lock: bool, +) -> Result<()> { + let err = match fcntl(file.as_raw_fd(), FcntlArg::F_SETLK(&flock)) { + Err(e) => e, + _ => return Ok(()), + }; + + if is_lock { + bail!( + "Failed to get {} on file: {}. Is it used more than once or \ + another process using the same file? Error: {}", + lockname, + path, + err as i32, + ); + } else { + bail!( + "Failed to release lock on file: {}. Error: {}", + path, + err as i32, + ); + } +} + +fn lock_or_unlock_file( + file: &File, + path: &str, + lock_op: i16, + lock_name: &str, + is_lock: bool, +) -> Result<()> { + let pid = getpid().as_raw(); + let mut flock = libc::flock { + l_whence: libc::SEEK_SET as i16, + l_len: 1, + l_pid: pid, + l_type: lock_op, + l_start: 0, + }; + + for lock in FILE_LOCK_ALL { + flock.l_start = (LOCK_PERM_BASE + lock) as i64; + do_fcntl_lock(file, path, lock_name, flock, is_lock)?; + } + flock.l_start = (LOCK_SHARED_BASE + FILE_LOCK_WRITE) as i64; + do_fcntl_lock(file, path, lock_name, flock, is_lock)?; + + Ok(()) +} + +pub fn lock_file(file: &File, path: &str, read_only: bool) -> Result<()> { + let (lock_op, lock_name) = if read_only { + (libc::F_RDLCK, "read lock") + } else { + (libc::F_WRLCK, "write lock") + }; + lock_or_unlock_file(file, path, lock_op as i16, lock_name, true) +} + +pub fn unlock_file(file: &File, path: &str) -> Result<()> { + lock_or_unlock_file(file, path, libc::F_UNLCK as i16, "", false) +} + +pub fn clear_file(path: String) -> Result<()> { + if Path::new(&path).exists() { + remove_file(&path) + .with_context(|| format!("File {} exists, but failed to remove it.", &path))?; + } + + Ok(()) +} diff --git a/util/src/leak_bucket.rs b/util/src/leak_bucket.rs index d67d750a665120d0ccef6fa98b774a79bc209a61..23a1d9109784a3ef1b9ef6bf478acfab966419b0 100644 --- a/util/src/leak_bucket.rs +++ b/util/src/leak_bucket.rs @@ -12,16 +12,19 @@ /// We use Leaky Bucket Algorithm to limit iops of block device and qmp. use std::os::unix::io::{AsRawFd, RawFd}; -use std::time::Instant; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use anyhow::{Context, Result}; +use log::error; use vmm_sys_util::eventfd::EventFd; -use crate::loop_context::EventLoopContext; +use crate::clock::get_current_time; +use crate::loop_context::{create_new_eventfd, EventLoopContext}; +use crate::time::NANOSECONDS_PER_SECOND; /// Used to improve the accuracy of bucket level. const ACCURACY_SCALE: u64 = 1000; -/// Nanoseconds per second. -const NANOS_PER_SEC: u64 = 1_000_000_000; /// Structure used to describe a Leaky Bucket. pub struct LeakBucket { @@ -33,9 +36,9 @@ pub struct LeakBucket { prev_time: Instant, /// Indicate whether the timer started. timer_started: bool, - /// When bucket is ready for allowing more IO operation, the internal callback will write this FD. - /// This FD should be listened by IO thread. - timer_wakeup: EventFd, + /// When bucket is ready for allowing more IO operation, the internal callback will write this + /// FD. This FD should be listened by IO thread. + timer_wakeup: Arc, } impl LeakBucket { @@ -44,14 +47,16 @@ impl LeakBucket { /// # Arguments /// /// * `units_ps` - units per second. - pub fn new(units_ps: u64) -> Self { - LeakBucket { - capacity: units_ps * ACCURACY_SCALE, + pub fn new(units_ps: u64) -> Result { + Ok(LeakBucket { + capacity: units_ps + .checked_mul(ACCURACY_SCALE) + .with_context(|| "capacity overflow")?, level: 0, - prev_time: Instant::now(), + prev_time: get_current_time(), timer_started: false, - timer_wakeup: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } + timer_wakeup: Arc::new(create_new_eventfd()?), + }) } /// Return true if the bucket is full, and caller must return directly instead of launching IO. @@ -60,7 +65,7 @@ impl LeakBucket { /// # Arguments /// /// * `loop_context` - used for delay function call. - pub fn throttled(&mut self, loop_context: &mut EventLoopContext, need_units: u64) -> bool { + pub fn throttled(&mut self, loop_context: &mut EventLoopContext, need_units: u32) -> bool { // capacity value is zero, indicating that there is no need to limit if self.capacity == 0 { return false; @@ -70,36 +75,39 @@ impl LeakBucket { } // update the water level - let now = Instant::now(); + let now = get_current_time(); let nanos = (now - self.prev_time).as_nanos(); - if nanos > (self.level * NANOS_PER_SEC / self.capacity) as u128 { + let throttle_timeout = + u128::from(self.level) * u128::from(NANOSECONDS_PER_SECOND) / u128::from(self.capacity); + if nanos > throttle_timeout { self.level = 0; } else { - self.level -= nanos as u64 * self.capacity / NANOS_PER_SEC; + self.level -= + (nanos * u128::from(self.capacity) / u128::from(NANOSECONDS_PER_SECOND)) as u64; } self.prev_time = now; // need to be throttled if self.level > self.capacity { - let wakeup_clone = self.timer_wakeup.try_clone().unwrap(); + let wakeup_clone = self.timer_wakeup.clone(); let func = Box::new(move || { wakeup_clone .write(1) - .unwrap_or_else(|e| error!("LeakBucket send event to device failed {}", e)); + .unwrap_or_else(|e| error!("LeakBucket send event to device failed {:?}", e)); }); - loop_context.delay_call( - func, - (self.level - self.capacity) * NANOS_PER_SEC / self.capacity, - ); + let timeout = + (self.level - self.capacity).saturating_mul(NANOSECONDS_PER_SECOND) / self.capacity; + loop_context.timer_add(func, Duration::from_nanos(timeout)); self.timer_started = true; return true; } - self.level += need_units * ACCURACY_SCALE; + let scaled_need = u64::from(need_units) * ACCURACY_SCALE; + self.level = self.level.saturating_add(scaled_need); false } diff --git a/util/src/lib.rs b/util/src/lib.rs index 2e2deae962ac3bbdecb37eabb810a0a48e48c616..60fa43d2ffd55bedbf2605dc7854629b4c2c645b 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -10,177 +10,71 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -extern crate libc; -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate vmm_sys_util; -#[macro_use] -extern crate log; - pub mod aio; pub mod arg_parser; pub mod bitmap; pub mod byte_code; pub mod checksum; +pub mod clock; pub mod daemonize; #[cfg(target_arch = "aarch64")] pub mod device_tree; +pub mod edid; +pub mod error; +pub mod file; pub mod leak_bucket; -mod link_list; +pub mod link_list; +pub mod logger; pub mod loop_context; pub mod num_ops; -pub mod reader; +pub mod offsetof; +#[cfg(target_env = "ohos")] +pub mod ohos_binding; +#[cfg(feature = "pixman")] +pub mod pixman; pub mod seccomp; +pub mod socket; pub mod tap; +pub mod test_helper; +pub mod thread_pool; +pub mod time; pub mod unix; -#[macro_use] -pub mod logger; -#[macro_use] -pub mod offsetof; -pub mod trace; - -pub mod errors { - error_chain! { - foreign_links { - KvmIoctl(kvm_ioctls::Error); - Io(std::io::Error); - Nul(std::ffi::NulError); - } - errors { - // arg_parser submodule error - MissingArgument(t: String) { - description("The required argument was not provided.") - display("Argument '{}' required, but not found. Use \'-h\' or \'-help\' to get usage.", t) - } - MissingValue(t: String) { - description("A value for args was not provided.") - display("The argument '{}' requires a value, but none was supplied. Use \'-h\' or \'-help\' to get usage.", t) - } - IllegelValue(t1: String, t2: String) { - description("A value is illegel for args.") - display("The value '{}' is illegel for argument '{}'. Use \'-h\' or \'-help\' to get usage.", t1, t2) - } - ValueOutOfPossible(t1: String, t2: String) { - description("A value for args is out of possile values.") - display("The value of argument '{}' must be in '{}'. Use \'-h\' or \'-help\' to get usage.", t1, t2) - } - UnexpectedArguments(t: String) { - description("The provided argument was not expected.") - display("Found argument '{}' which wasn't expected, or isn't valid in the context. Use \'-h\' or \'-help\' to get usage.", t) - } - DuplicateArgument(t: String) { - description("The argument was provided more than once.") - display("The argument '{}' was provided more than once. Use \'-h\' or \'-help\' to get usage.", t) - } - DuplicateValue(t: String) { - description("The argument value was provided more than once.") - display("The argument '{}' only need one value. Use \'-h\' or \'-help\' to get usage.", t) - } - // daemonize submodule error - DaemonFork { - description("Unable to fork.") - display("Unable to fork.") - } - DaemonSetsid { - description("Unable to create new session.") - display("Unable to create new session.") - } - DaemonRedirectStdio { - description("Unable to redirect standard streams to /dev/null.") - display("Unable to redirect standard streams to /dev/null.") - } - PidFileExist { - description("Pidfile path is existed yet.") - display("Pidfile path is existed yet.") - } - // epoll_context error - BadSyscall(err: std::io::Error) { - description("Return a bad syscall.") - display("Found bad syscall, error is {} .", err) - } - UnExpectedOperationType { - description("Unsupported notifier operation type.") - display("Unsupported Epoll notifier operation type.") - } - EpollWait(err: std::io::Error) { - description("Failed to execute epoll_wait syscall.") - display("Failed to execute epoll_wait syscall: {} .", err) - } - NoRegisterFd(t: i32) { - description("The fd is not registered in epoll.") - display("The fd {} is not registered in epoll.", t) - } - NoParkedFd(t: i32) { - description("Found no parked fd in registered.") - display("Found no parked fd {}.", t) - } - BadNotifierOperation { - description("Bad Notifier Operation.") - display("Notifier Operation non allowed.") - } - ChmodFailed(e: i32) { - description("Chmod command failed.") - display("Chmod command failed, os error {}", e) - } - OutOfBound(index: u64, bound: u64) { - description("Index out of bound of array") - display("Index :{} out of bound :{}", index, bound) - } - NodeDepthMismatch(target_dep: u32, real_dep: u32) { - description("Fdt structure nested node depth mismatch") - display("Desired node depth :{}, current node depth :{}", target_dep, real_dep) - } - NodeUnclosed(unclose: u32) { - description("Fdt structure block node unclose") - display("Still have {} node open when terminating the fdt", unclose) - } - IllegelPropertyPos { - description("Cann't add property outside the node") - display("Failed to add property because there is no open node") - } - IllegalString(s: String) { - description("The string for fdt should not contain null") - display("Failed to add string to fdt because of null character inside \"{}\"", s) - } - MemReserveOverlap { - description("The mem reserve entry should not overlap") - display("Failed to add overlapped mem reserve entries to fdt") - } - SetPropertyErr(s: String) { - description("Cann't set property for fdt node") - display("Failed to set {} property", s) - } - } - } -} +#[cfg(feature = "usb_camera_v4l2")] +pub mod v4l2; + +pub use error::UtilError; -use libc::{tcgetattr, tcsetattr, termios, OPOST, TCSANOW}; +use std::{any::Any, sync::Mutex}; + +use log::debug; +use nix::sys::termios::{cfmakeraw, tcgetattr, tcsetattr, OutputFlags, SetArg, Termios}; use once_cell::sync::Lazy; -use std::sync::Mutex; use vmm_sys_util::terminal::Terminal; -pub static TERMINAL_MODE: Lazy>> = Lazy::new(|| Mutex::new(None)); +/// Read the program version in `Cargo.toml` and concat with git commit id. +pub const VERSION: &str = concat!( + env!("CARGO_PKG_VERSION"), + " commit-id ", + include_str!(concat!(env!("OUT_DIR"), "/GIT_COMMIT")) +); + +pub static TERMINAL_MODE: Lazy>> = Lazy::new(|| Mutex::new(None)); pub fn set_termi_raw_mode() -> std::io::Result<()> { let tty_fd = std::io::stdin().lock().tty_fd(); - // Safe because this only set the `old_term_mode` struct to zero. - let mut old_term_mode: termios = unsafe { std::mem::zeroed() }; - // Safe because this only get stdin's current mode and save it. - let ret = unsafe { tcgetattr(tty_fd, &mut old_term_mode as *mut _) }; - if ret < 0 { - return Err(std::io::Error::last_os_error()); - } - *TERMINAL_MODE.lock().unwrap() = Some(old_term_mode); - - let mut new_term_mode: termios = old_term_mode; - // Safe because this function only change the `new_term_mode` argument. - unsafe { libc::cfmakeraw(&mut new_term_mode as *mut _) }; - new_term_mode.c_oflag |= OPOST; - // Safe because this function only set the stdin to raw mode. - let ret = unsafe { tcsetattr(tty_fd, TCSANOW, &new_term_mode as *const _) }; - if ret < 0 { + let old_term_mode = match tcgetattr(tty_fd) { + Ok(tm) => tm, + Err(_) => return Err(std::io::Error::last_os_error()), + }; + + *TERMINAL_MODE.lock().unwrap() = Some(old_term_mode.clone()); + + let mut new_term_mode = old_term_mode; + cfmakeraw(&mut new_term_mode); + new_term_mode.output_flags = new_term_mode.output_flags.union(OutputFlags::OPOST); + + if tcsetattr(tty_fd, SetArg::TCSANOW, &new_term_mode).is_err() { return Err(std::io::Error::last_os_error()); } @@ -190,9 +84,7 @@ pub fn set_termi_raw_mode() -> std::io::Result<()> { pub fn set_termi_canon_mode() -> std::io::Result<()> { let tty_fd = std::io::stdin().lock().tty_fd(); if let Some(old_term_mode) = TERMINAL_MODE.lock().unwrap().as_ref() { - // Safe because this only recover the stdin's mode. - let ret = unsafe { tcsetattr(tty_fd, TCSANOW, old_term_mode as *const _) }; - if ret < 0 { + if tcsetattr(tty_fd, SetArg::TCSANOW, old_term_mode).is_err() { return Err(std::io::Error::last_os_error()); } } else { @@ -201,3 +93,73 @@ pub fn set_termi_canon_mode() -> std::io::Result<()> { Ok(()) } + +/// Macro: Generate base getting function. +/// +/// # Arguments +/// +/// * `get_func` - Name of getting `&base` function. +/// * `get_mut_func` - Name of getting `&mut base` function. +/// * `base_type` - Type of `base`. +/// * `base` - `base` in self. +/// +/// # Examples +/// +/// ```rust +/// use util::gen_base_func; +/// struct TestBase(u8); +/// struct Test { +/// base: TestBase, +/// } +/// +/// impl Test { +/// gen_base_func!(test_base, test_base_mut, TestBase, base); +/// } +/// ``` +/// +/// This is equivalent to: +/// +/// ```rust +/// struct TestBase(u8); +/// struct Test { +/// base: TestBase, +/// } +/// +/// impl Test { +/// fn test_base(&self) -> &TestBase { +/// &self.base +/// } +/// +/// fn test_base_mut(&mut self) -> &mut TestBase { +/// &mut self.base +/// } +/// } +/// ``` +#[macro_export] +macro_rules! gen_base_func { + ($get_func: ident, $get_mut_func: ident, $base_type: ty, $($base: tt).*) => { + fn $get_func(&self) -> &$base_type { + &self.$($base).* + } + + fn $get_mut_func(&mut self) -> &mut $base_type { + &mut self.$($base).* + } + }; +} + +/// This trait is to cast trait object to struct. +pub trait AsAny { + fn as_any(&self) -> &dyn Any; + fn as_any_mut(&mut self) -> &mut dyn Any; +} + +impl AsAny for T { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} diff --git a/util/src/link_list.rs b/util/src/link_list.rs index 982fe3828c8308d0b490acf6cb8bb4ad88f66aa9..a779bce17724c480e46d003c0ca66c28b4916b6e 100644 --- a/util/src/link_list.rs +++ b/util/src/link_list.rs @@ -23,10 +23,16 @@ pub struct Node { pub struct List { head: Option>>, tail: Option>>, - pub len: usize, + len: usize, marker: PhantomData>>, } +impl Drop for List { + fn drop(&mut self) { + while self.pop_head().is_some() {} + } +} + impl Node { pub fn new(value: T) -> Self { Node { @@ -51,63 +57,64 @@ impl List { pub fn add_tail(&mut self, mut node: Box>) { node.prev = self.tail; node.next = None; - unsafe { - let node = NonNull::new(Box::into_raw(node)); - if let Some(mut t) = self.tail { - t.as_mut().next = node; - } else { - self.head = node; - self.tail = node; - } + let node = NonNull::new(Box::into_raw(node)); + if let Some(mut t) = self.tail { + // SAFETY: t is guaranteed not to be null. + unsafe { t.as_mut() }.next = node; + } else { + self.head = node; self.tail = node; - self.len += 1; } + + self.tail = node; + self.len += 1; } #[inline] pub fn add_head(&mut self, mut node: Box>) { node.prev = None; node.next = self.head; - unsafe { - let node = NonNull::new(Box::into_raw(node)); - if let Some(mut h) = self.head { - h.as_mut().prev = node; - } else { - self.head = node; - self.tail = node; - } - + let node = NonNull::new(Box::into_raw(node)); + if let Some(mut h) = self.head { + // SAFETY: h is guaranteed not to be null. + unsafe { h.as_mut() }.prev = node; + } else { self.head = node; - self.len += 1; + self.tail = node; } + + self.head = node; + self.len += 1; } #[inline] pub fn unlink(&mut self, node: &Node) { - unsafe { - match node.prev { - Some(mut p) => p.as_mut().next = node.next, - None => self.head = node.next, - } + match node.prev { + // SAFETY: p is guaranteed not to be null. + Some(mut p) => unsafe { p.as_mut() }.next = node.next, + None => self.head = node.next, + } - match node.next { - Some(mut n) => n.as_mut().prev = node.prev, - None => self.tail = node.prev, - } + match node.next { + // SAFETY: n is guaranteed not to be null. + Some(mut n) => unsafe { n.as_mut() }.prev = node.prev, + None => self.tail = node.prev, } self.len -= 1; } #[inline] pub fn pop_tail(&mut self) -> Option>> { - self.tail.map(|node| unsafe { - let node = Box::from_raw(node.as_ptr()); + self.tail.map(|node| { + // SAFETY: node is guaranteed not to be null. + let node = unsafe { Box::from_raw(node.as_ptr()) }; self.tail = node.prev; match self.tail { None => self.head = None, - Some(mut t) => t.as_mut().next = None, + // SAFETY: t is guaranteed not to be null. + Some(mut t) => unsafe { t.as_mut() }.next = None, } self.len -= 1; @@ -117,17 +124,93 @@ impl List { #[inline] pub fn pop_head(&mut self) -> Option>> { - self.head.map(|node| unsafe { - let node = Box::from_raw(node.as_ptr()); + self.head.map(|node| { + // SAFETY: node is guaranteed not to be null. + let node = unsafe { Box::from_raw(node.as_ptr()) }; self.head = node.next; match self.head { - None => self.head = None, - Some(mut h) => h.as_mut().prev = None, + None => self.tail = None, + // SAFETY: h is guaranteed not to be null. + Some(mut h) => unsafe { h.as_mut() }.prev = None, } self.len -= 1; node }) } + + #[inline(always)] + pub fn len(&self) -> usize { + self.len + } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline(always)] + pub fn iter(&'_ self) -> impl Iterator { + Iter::new(self) + } + + #[inline(always)] + pub fn iter_mut(&'_ mut self) -> impl Iterator { + IterMut::new(self) + } +} + +struct Iter<'a, T> { + curr: Option>>, + list: PhantomData<&'a List>, +} + +impl<'a, T> Iter<'a, T> { + fn new(list: &'a List) -> Self { + Self { + curr: list.head, + list: PhantomData, + } + } +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + self.curr.map(|node| { + // SAFETY: node is guaranteed not to be null. + let node = unsafe { node.as_ref() }; + self.curr = node.next; + &node.value + }) + } +} + +struct IterMut<'a, T> { + curr: Option>>, + list: PhantomData<&'a mut List>, +} + +impl<'a, T> IterMut<'a, T> { + fn new(list: &'a mut List) -> Self { + Self { + curr: list.head, + list: PhantomData, + } + } +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + self.curr.map(|mut node| { + // SAFETY: node is guaranteed not to be null. + let node = unsafe { node.as_mut() }; + self.curr = node.next; + &mut node.value + }) + } } diff --git a/util/src/logger.rs b/util/src/logger.rs index ae562cb72b7171067fab700029dec6511c5669db..de35d83f6dcad5e8b49b9754279c7f3b37877dd8 100644 --- a/util/src/logger.rs +++ b/util/src/logger.rs @@ -10,104 +10,202 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::io::prelude::*; +use std::fs::File; +use std::io::Write; +use std::num::Wrapping; +use std::os::unix::fs::OpenOptionsExt; +use std::path::Path; use std::sync::Mutex; +use std::time::UNIX_EPOCH; -use crate::unix::gettid; -use log::{Level, LevelFilter, Log, Metadata, Record, SetLoggerError}; +use anyhow::{Context, Result}; +use log::{Level, LevelFilter, Log, Metadata, Record}; +use nix::unistd::{getpid, gettid}; -fn format_now() -> String { - let mut ts = libc::timespec { - tv_sec: 0, - tv_nsec: 0, - }; +use crate::time::{get_format_time, gettime}; - let mut ti: libc::tm = unsafe { std::mem::zeroed() }; - unsafe { - libc::clock_gettime(libc::CLOCK_REALTIME, &mut ts); - libc::localtime_r(&ts.tv_sec, &mut ti); - } +// Max size of the log file is 100MB. +const LOG_ROTATE_SIZE_MAX: usize = 100 * 1024 * 1024; +// Logs are retained for seven days. +const LOG_ROTATE_COUNT_MAX: u32 = 7; + +fn format_now() -> String { + let (sec, nsec) = gettime().unwrap_or_else(|e| { + println!("{:?}", e); + (0, 0) + }); + let format_time = get_format_time(sec); format!( "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:09}", - ti.tm_year + 1900, - ti.tm_mon + 1, - ti.tm_mday, - ti.tm_hour, - ti.tm_min, - ti.tm_sec, - ts.tv_nsec + format_time[0], + format_time[1], + format_time[2], + format_time[3], + format_time[4], + format_time[5], + nsec ) } +struct FileRotate { + handler: Box, + path: String, + current_size: Wrapping, + create_day: i32, +} + +impl FileRotate { + fn rotate_file(&mut self, size_inc: usize) -> Result<()> { + if self.path.is_empty() { + return Ok(()); + } + + self.current_size += Wrapping(size_inc); + let sec = gettime()?.0; + let today = get_format_time(sec)[2]; + if self.current_size < Wrapping(LOG_ROTATE_SIZE_MAX) && self.create_day == today { + return Ok(()); + } + + // Remove the oldest log file. + let mut rotate_count = LOG_ROTATE_COUNT_MAX - 1; + let old_name = format!("{}{}", self.path, rotate_count); + if Path::new(&old_name).exists() { + std::fs::remove_file(&old_name) + .with_context(|| format! {"Failed to remove log file {}", old_name})?; + } + + // Rename files to older file name. + let mut path_from; + let mut path_to = old_name; + while rotate_count != 0 { + rotate_count -= 1; + path_from = self.path.clone(); + if rotate_count != 0 { + path_from += &rotate_count.to_string(); + } + if Path::new(&path_from).exists() { + std::fs::rename(&path_from, &path_to).with_context( + || format! {"Failed to rename log file from {} to {}", path_from, path_to}, + )?; + } + path_to = path_from; + } + + // Update log file. + self.handler = Box::new(open_log_file(&self.path)?); + self.current_size = Wrapping(0); + self.create_day = today; + Ok(()) + } +} + /// Format like "%year-%mon-%dayT%hour:%min:%sec.%nsec struct VmLogger { - handler: Option>>, + rotate: Mutex, level: Level, } impl Log for VmLogger { fn enabled(&self, metadata: &Metadata) -> bool { - self.handler.is_some() && metadata.level() <= self.level + metadata.level() <= self.level } fn log(&self, record: &Record) { - if self.enabled(record.metadata()) { - let pid = unsafe { libc::getpid() }; - let tid = gettid(); - - self.handler.as_ref().map(|writer| match record.level() { - Level::Error => writer.lock().unwrap().write_fmt(format_args!( - "{:<5}: [{}][{}][{}: {}]:{}: {}\n", - format_now(), - pid, - tid, - record.file().unwrap_or(""), - record.line().unwrap_or(0), - record.level(), - record.args() - )), - _ => writer.lock().unwrap().write_fmt(format_args!( - "{:<5}: [{}][{}]:{}: {}\n", - format_now(), - pid, - tid, - record.level(), - record.args() - )), - }); + if !self.enabled(record.metadata()) { + return; + } + + let pid = getpid().as_raw(); + let tid = gettid().as_raw(); + let formatmsg = format_args!( + "{:<5}: [{}][{}][{}: {}]:{}: {}\n", + format_now(), + pid, + tid, + record.file().unwrap_or(""), + record.line().unwrap_or(0), + record.level(), + record.args() + ) + .to_string(); + + let mut rotate = self.rotate.lock().unwrap(); + if let Err(e) = rotate.handler.write_all(formatmsg.as_bytes()) { + println!("Failed to log message {:?}", e); + return; + } + if let Err(e) = rotate.rotate_file(formatmsg.as_bytes().len()) { + println!("Failed to rotate log files {:?}", e); } } fn flush(&self) {} } -pub fn init_vm_logger( - level: Option, - logfile: Option>, -) -> Result<(), log::SetLoggerError> { - let buffer = logfile.map(Mutex::new); - let logger = VmLogger { - level: level.unwrap_or(Level::Info), - handler: buffer, +fn init_vm_logger( + level: Level, + logfile: Box, + logfile_path: String, +) -> Result<()> { + let current_size; + let create_day; + if logfile_path.is_empty() { + current_size = Wrapping(0); + create_day = 0; + } else { + let metadata = File::open(&logfile_path)?.metadata()?; + current_size = Wrapping(metadata.len() as usize); + let mod_time = metadata.modified()?; + let sec = mod_time.duration_since(UNIX_EPOCH)?.as_secs(); + create_day = get_format_time(i64::try_from(sec)?)[2]; }; + let rotate = Mutex::new(FileRotate { + handler: logfile, + path: logfile_path, + current_size, + create_day, + }); - log::set_boxed_logger(Box::new(logger)).map(|()| log::set_max_level(LevelFilter::Trace)) + let logger = VmLogger { rotate, level }; + log::set_boxed_logger(Box::new(logger)).map(|()| log::set_max_level(LevelFilter::Trace))?; + Ok(()) } -pub fn init_logger_with_env(logfile: Option>) -> Result<(), SetLoggerError> { +fn init_logger_with_env(logfile: Box, logfile_path: String) -> Result<()> { let level = match std::env::var("STRATOVIRT_LOG_LEVEL") { Ok(l) => match l.to_lowercase().as_str() { - "trace" => Level::Trace, - "debug" => Level::Debug, - "info" => Level::Info, + "error" => Level::Error, "warn" => Level::Warn, - _ => Level::Error, + "info" => Level::Info, + "debug" => Level::Debug, + "trace" => Level::Trace, + _ => Level::Info, }, - _ => Level::Error, + _ => Level::Info, }; - init_vm_logger(Some(level), logfile)?; - + init_vm_logger(level, logfile, logfile_path)?; Ok(()) } + +fn open_log_file(path: &str) -> Result { + std::fs::OpenOptions::new() + .read(false) + .append(true) + .create(true) + .mode(0o640) + .open(path) + .with_context(|| format!("Failed to open log file {}", path)) +} + +pub fn init_log(path: String) -> Result<()> { + let logfile: Box = if path.is_empty() { + Box::new(std::io::stderr()) + } else { + Box::new(open_log_file(&path)?) + }; + init_logger_with_env(logfile, path.clone()) + .with_context(|| format!("Failed to init logger: {}", path)) +} diff --git a/util/src/loop_context.rs b/util/src/loop_context.rs index 2c9c8c0af9c4d5a7e2a023e75889ddc272fdd757..8e95698289b2bdafc3c43dfe73324526d453ecdc 100644 --- a/util/src/loop_context.rs +++ b/util/src/loop_context.rs @@ -10,17 +10,30 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -extern crate vmm_sys_util; - use std::collections::BTreeMap; -use std::os::unix::io::RawFd; -use std::sync::{Arc, Mutex, RwLock}; +use std::fmt::Debug; +use std::io::Error; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, Barrier, Mutex, RwLock}; use std::time::{Duration, Instant}; - -use libc::{c_void, read}; +use std::{fmt, i32}; + +use anyhow::{anyhow, Context, Result}; +use libc::{c_void, read, EFD_CLOEXEC, EFD_NONBLOCK}; +use log::{error, warn}; +use nix::errno::Errno; +use nix::{ + poll::{ppoll, PollFd, PollFlags}, + sys::time::TimeSpec, +}; use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; +use vmm_sys_util::eventfd::EventFd; -use crate::errors::{ErrorKind, Result, ResultExt}; +use crate::clock::{get_current_time, ClockState}; +use crate::thread_pool::ThreadPool; +use crate::UtilError; const READY_EVENT_MAX: usize = 256; const AIO_PRFETCH_CYCLE_TIME: usize = 100; @@ -42,35 +55,55 @@ pub enum NotifierOperation { Park = 16, /// Resume a file descriptor from the event table Resume = 32, + /// Add events to current event table for a file descriptor + AddEvents = 64, + /// Delete events from current event table for a file descriptor + DeleteEvents = 128, } +#[derive(Debug, PartialEq)] enum EventStatus { /// Event is currently monitored in epoll. Alive = 0, /// Event is parked, temporarily not monitored. Parked = 1, - /// Event is removed. + /// Event is removed, thus not monitored. Removed = 2, } +// The NotifierCallback must NOT update notifier status of itself, otherwise causes +// deadlock. Instead it should return notifiers and let caller to do so. pub type NotifierCallback = dyn Fn(EventSet, RawFd) -> Option>; /// Epoll Event Notifier Entry. pub struct EventNotifier { /// Raw file descriptor - pub raw_fd: i32, + raw_fd: i32, /// Notifier operation - pub op: NotifierOperation, + op: NotifierOperation, /// Parked fd, temporarily removed from epoll - pub parked_fd: Option, + parked_fd: Option, /// The types of events for which we use this fd - pub event: EventSet, + event: EventSet, /// Event Handler List, one fd event may have many handlers - pub handlers: Vec>>>, + handlers: Vec>, + /// Pre-polling handler + pub handler_poll: Option>, /// Event status - status: EventStatus, - /// The flag representing whether pre polling is required - pub io_poll: bool, + status: Arc>, +} + +impl fmt::Debug for EventNotifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("EventNotifier") + .field("raw_fd", &self.raw_fd) + .field("op", &self.op) + .field("parked_fd", &self.parked_fd) + .field("event", &self.event) + .field("status", &self.status) + .field("io_poll", &self.handler_poll.is_some()) + .finish() + } } impl EventNotifier { @@ -80,7 +113,7 @@ impl EventNotifier { raw_fd: i32, parked_fd: Option, event: EventSet, - handlers: Vec>>>, + handlers: Vec>, ) -> Self { EventNotifier { raw_fd, @@ -88,8 +121,8 @@ impl EventNotifier { parked_fd, event, handlers, - status: EventStatus::Alive, - io_poll: false, + handler_poll: None, + status: Arc::new(Mutex::new(EventStatus::Alive)), } } } @@ -104,6 +137,32 @@ pub trait EventNotifierHelper { fn internal_notifiers(_: Arc>) -> Vec; } +pub fn get_notifiers_fds(notifiers: &[EventNotifier]) -> Vec { + let mut fds = Vec::with_capacity(notifiers.len()); + for notifier in notifiers { + fds.push(notifier.raw_fd); + } + fds +} + +pub fn gen_delete_notifiers(fds: &[RawFd]) -> Vec { + let mut notifiers = Vec::with_capacity(fds.len()); + for fd in fds { + notifiers.push(EventNotifier::new( + NotifierOperation::Delete, + *fd, + None, + EventSet::IN, + Vec::new(), + )); + } + notifiers +} + +pub fn create_new_eventfd() -> Result { + EventFd::new(EFD_NONBLOCK | EFD_CLOEXEC) +} + /// EventLoop manager, advise continue running or stop running pub trait EventLoopManager: Send + Sync { fn loop_should_exit(&self) -> bool; @@ -116,6 +175,8 @@ struct Timer { func: Box, /// Given the real time when the `func` will be called. expire_time: Instant, + /// Timer id. + id: u64, } impl Timer { @@ -124,11 +185,13 @@ impl Timer { /// # Arguments /// /// * `func` - the function will be called later. - /// * `nsec` - delay time in nanosecond. - pub fn new(func: Box, nsec: u64) -> Self { + /// * `delay` - delay time to call the function. + pub fn new(func: Box, delay: Duration, id: u64) -> Self { + let expire_time = get_current_time() + delay; Timer { func, - expire_time: Instant::now() + Duration::new(0, nsec as u32), + expire_time, + id, } } } @@ -140,6 +203,13 @@ pub struct EventLoopContext { epoll: Epoll, /// Control epoll loop running. manager: Option>>, + /// Used to wakeup epoll to re-evaluate events or timers. + kick_event: EventFd, + /// Used to avoid unnecessary kick operation when the + /// next re-evaluation is performed before next epoll. + kick_me: AtomicBool, + /// Used to identify that a kick operation occurred. + kicked: AtomicBool, /// Fds registered to the `EventLoop`. events: Arc>>>, /// Events abandoned are stored in garbage collector. @@ -147,22 +217,76 @@ pub struct EventLoopContext { /// Temp events vector, store wait returned events. ready_events: Vec, /// Timer list - timers: Vec, + timers: Arc>>>, + /// The next timer id to be used. + timer_next_id: AtomicU64, + /// The context for thread pool. + pub thread_pool: Arc, + /// Record VM clock state. + pub clock_state: Arc>, + /// The io thread barrier. + pub thread_exit_barrier: Arc, } -unsafe impl Sync for EventLoopContext {} +impl Drop for EventLoopContext { + fn drop(&mut self) { + self.thread_pool + .cancel() + .unwrap_or_else(|e| error!("Thread pool cancel error: {:?}", e)); + } +} + +// SAFETY: The closure in EventNotifier and Timer doesn't impl Send, they're +// not sent between threads actually. unsafe impl Send for EventLoopContext {} impl EventLoopContext { /// Constructs a new `EventLoopContext`. - pub fn new() -> Self { - EventLoopContext { + pub fn new(thread_exit_barrier: Arc) -> Self { + let mut ctx = EventLoopContext { epoll: Epoll::new().unwrap(), manager: None, + kick_event: create_new_eventfd().unwrap(), + kick_me: AtomicBool::new(false), + kicked: AtomicBool::new(false), events: Arc::new(RwLock::new(BTreeMap::new())), gc: Arc::new(RwLock::new(Vec::new())), ready_events: vec![EpollEvent::default(); READY_EVENT_MAX], - timers: Vec::new(), + timers: Arc::new(Mutex::new(Vec::new())), + timer_next_id: AtomicU64::new(0), + thread_pool: Arc::new(ThreadPool::default()), + clock_state: Arc::new(Mutex::new(ClockState::default())), + thread_exit_barrier, + }; + ctx.init_kick(); + ctx + } + + fn init_kick(&mut self) { + let kick_handler: Rc = Rc::new(|_, fd| { + read_fd(fd); + None + }); + self.add_event(EventNotifier::new( + NotifierOperation::AddExclusion, + self.kick_event.as_raw_fd(), + None, + EventSet::IN, + vec![kick_handler], + )) + .unwrap(); + } + + // Force epoll.wait to exit to re-evaluate events and timers. + pub fn kick(&mut self) { + self.kicked.store(true, Ordering::SeqCst); + if self.kick_me.load(Ordering::SeqCst) { + if let Err(e) = self.kick_event.write(1) { + // Rarely fails when event is full, even if this + // occurs, no need to add event again, so log is + // enough for error handling. + warn!("Failed to kick eventloop, {:?}", e); + } } } @@ -171,44 +295,63 @@ impl EventLoopContext { } fn clear_gc(&mut self) { - let mut gc = self.gc.write().unwrap(); - gc.clear(); + let max_cnt = self.gc.write().unwrap().len(); + let mut pop_cnt: usize = 0; + + loop { + // Loop to avoid hold lock for long time. + if pop_cnt >= max_cnt { + break; + } + // SAFETY: We will stop removing when reach max_cnt and no other place + // removes element of gc. This is to avoid infinite popping if other + // thread continuously adds element to gc. + self.gc.write().unwrap().remove(0); + pop_cnt += 1; + } } - fn add_event(&mut self, event: EventNotifier) -> Result<()> { - // If there is one same alive event monitored, update the handlers. - // If there is one same parked event, update the handlers but warn. + fn add_event(&mut self, mut event: EventNotifier) -> Result<()> { + // If there is one same alive event monitored, update the handlers and eventset. + // If there is one same parked event, update the handlers and eventset but warn. // If there is no event in the map, insert the event and park the related. let mut events_map = self.events.write().unwrap(); if let Some(notifier) = events_map.get_mut(&event.raw_fd) { if let NotifierOperation::AddExclusion = event.op { - return Err(ErrorKind::BadNotifierOperation.into()); + return Err(anyhow!(UtilError::BadNotifierOperation)); } - let mut event = event; + if notifier.event != event.event { + self.epoll.ctl( + ControlOperation::Modify, + notifier.raw_fd, + EpollEvent::new(notifier.event | event.event, &**notifier as *const _ as u64), + )?; + notifier.event |= event.event; + } notifier.handlers.append(&mut event.handlers); - if let EventStatus::Parked = notifier.status { + if *notifier.status.lock().unwrap() == EventStatus::Parked { warn!("Parked event updated!"); } return Ok(()); } - let raw_fd = event.raw_fd; - events_map.insert(raw_fd, Box::new(event)); - let event = events_map.get(&raw_fd).unwrap(); + let event = Box::new(event); self.epoll.ctl( ControlOperation::Add, event.raw_fd, - EpollEvent::new(event.event, &**event as *const _ as u64), + EpollEvent::new(event.event, &*event as *const _ as u64), )?; + let parked_fd = event.parked_fd; + events_map.insert(event.raw_fd, event); - if let Some(parked_fd) = event.parked_fd { + if let Some(parked_fd) = parked_fd { if let Some(parked) = events_map.get_mut(&parked_fd) { self.epoll .ctl(ControlOperation::Delete, parked_fd, EpollEvent::default())?; - parked.status = EventStatus::Parked; + *parked.status.lock().unwrap() = EventStatus::Parked; } else { - return Err(ErrorKind::NoParkedFd(parked_fd).into()); + return Err(anyhow!(UtilError::NoParkedFd(parked_fd))); } } @@ -216,52 +359,81 @@ impl EventLoopContext { } fn rm_event(&mut self, event: &EventNotifier) -> Result<()> { - // If there is one same parked event, return Ok. // If there is no event in the map, return Error. - // If there is one same alive event monitored, put the event in gc and reactivate the parked event. + // Else put the event in gc and reactivate the parked event. let mut events_map = self.events.write().unwrap(); - match events_map.get_mut(&event.raw_fd) { + match events_map.get(&event.raw_fd) { Some(notifier) => { - if let EventStatus::Alive = notifier.status { - // No need to delete fd if status is Parked, it's done in park_event. + // No need to delete fd if status is Parked, it's done in park_event. + if *notifier.status.lock().unwrap() == EventStatus::Alive { if let Err(error) = self.epoll.ctl( ControlOperation::Delete, notifier.raw_fd, EpollEvent::default(), ) { let error_num = error.raw_os_error().unwrap(); - if error_num != libc::EBADF && error_num != libc::ENOENT { - return Err(ErrorKind::BadSyscall(error).into()); + if error_num != libc::EBADF + && error_num != libc::ENOENT + && error_num != libc::EPERM + { + return Err(anyhow!(UtilError::BadSyscall(error))); + } else { + warn!("epoll ctl failed: {}", error); } } } + let parked_fd = notifier.parked_fd; + let event = events_map.remove(&event.raw_fd).unwrap(); + *event.status.lock().unwrap() = EventStatus::Removed; + self.gc.write().unwrap().push(event); - notifier.status = EventStatus::Removed; - - if let Some(parked_fd) = notifier.parked_fd { + if let Some(parked_fd) = parked_fd { if let Some(parked) = events_map.get_mut(&parked_fd) { self.epoll.ctl( ControlOperation::Add, parked_fd, EpollEvent::new(parked.event, &**parked as *const _ as u64), )?; - parked.status = EventStatus::Alive; + *parked.status.lock().unwrap() = EventStatus::Alive; } else { - return Err(ErrorKind::NoParkedFd(parked_fd).into()); + return Err(anyhow!(UtilError::NoParkedFd(parked_fd))); } } - - let event = events_map.remove(&event.raw_fd).unwrap(); - self.gc.write().unwrap().push(event); } _ => { - return Err(ErrorKind::NoRegisterFd(event.raw_fd).into()); + return Err(anyhow!(UtilError::NoRegisterFd(event.raw_fd))); } } Ok(()) } + fn modify_event(&mut self, mut event: EventNotifier) -> Result<()> { + let mut events_map = self.events.write().unwrap(); + match events_map.get_mut(&event.raw_fd) { + Some(notifier) => { + let events_specified = !event.event.is_empty(); + if events_specified && event.event != notifier.event { + self.epoll.ctl( + ControlOperation::Modify, + notifier.raw_fd, + EpollEvent::new(event.event, &**notifier as *const _ as u64), + )?; + notifier.event = event.event; + } + let handlers_specified = !event.handlers.is_empty(); + if handlers_specified { + notifier.handlers.clear(); + notifier.handlers.append(&mut event.handlers); + } + } + _ => { + return Err(anyhow!(UtilError::NoRegisterFd(event.raw_fd))); + } + } + Ok(()) + } + fn park_event(&mut self, event: &EventNotifier) -> Result<()> { let mut events_map = self.events.write().unwrap(); match events_map.get_mut(&event.raw_fd) { @@ -272,11 +444,13 @@ impl EventLoopContext { notifier.raw_fd, EpollEvent::default(), ) - .chain_err(|| format!("Failed to park event, event fd:{}", notifier.raw_fd))?; - notifier.status = EventStatus::Parked; + .with_context(|| { + format!("Failed to park event, event fd:{}", notifier.raw_fd) + })?; + *notifier.status.lock().unwrap() = EventStatus::Parked; } _ => { - return Err(ErrorKind::NoRegisterFd(event.raw_fd).into()); + return Err(anyhow!(UtilError::NoRegisterFd(event.raw_fd))); } } Ok(()) @@ -292,13 +466,42 @@ impl EventLoopContext { notifier.raw_fd, EpollEvent::new(notifier.event, &**notifier as *const _ as u64), ) - .chain_err(|| { + .with_context(|| { format!("Failed to resume event, event fd: {}", notifier.raw_fd) })?; - notifier.status = EventStatus::Alive; + *notifier.status.lock().unwrap() = EventStatus::Alive; + } + _ => { + return Err(anyhow!(UtilError::NoRegisterFd(event.raw_fd))); + } + } + Ok(()) + } + + fn update_events_for_fd(&mut self, event: &EventNotifier, add: bool) -> Result<()> { + let mut events_map = self.events.write().unwrap(); + match events_map.get_mut(&event.raw_fd) { + Some(notifier) => { + let new_events = if add { + event.event | notifier.event + } else { + !event.event & notifier.event + }; + if new_events != notifier.event { + self.epoll + .ctl( + ControlOperation::Modify, + notifier.raw_fd, + EpollEvent::new(new_events, &**notifier as *const _ as u64), + ) + .with_context(|| { + format!("Failed to add events, event fd: {}", notifier.raw_fd) + })?; + notifier.event = new_events; + } } _ => { - return Err(ErrorKind::NoRegisterFd(event.raw_fd).into()); + return Err(anyhow!(UtilError::NoRegisterFd(event.raw_fd))); } } Ok(()) @@ -311,10 +514,14 @@ impl EventLoopContext { /// * `notifiers` - event notifiers wanted to add to or remove from `EventLoop`. pub fn update_events(&mut self, notifiers: Vec) -> Result<()> { for en in notifiers { + trace::update_event(&en.raw_fd, &en.op); match en.op { NotifierOperation::AddExclusion | NotifierOperation::AddShared => { self.add_event(en)?; } + NotifierOperation::Modify => { + self.modify_event(en)?; + } NotifierOperation::Delete => { self.rm_event(&en)?; } @@ -324,11 +531,15 @@ impl EventLoopContext { NotifierOperation::Resume => { self.resume_event(&en)?; } - _ => { - return Err(ErrorKind::UnExpectedOperationType.into()); + NotifierOperation::AddEvents => { + self.update_events_for_fd(&en, true)?; + } + NotifierOperation::DeleteEvents => { + self.update_events_for_fd(&en, false)?; } } } + self.kick(); Ok(()) } @@ -342,7 +553,7 @@ impl EventLoopContext { } } - self.epoll_wait_manager(self.timers_min_timeout()) + self.epoll_wait_manager(self.timers_min_duration()) } pub fn iothread_run(&mut self) -> Result { @@ -352,113 +563,157 @@ impl EventLoopContext { return Ok(false); } } - let timeout = self.timers_min_timeout(); - if timeout == -1 { + let min_timeout_ns = self.timers_min_duration(); + if min_timeout_ns.is_none() { for _i in 0..AIO_PRFETCH_CYCLE_TIME { - for (_fd, notifer) in self.events.read().unwrap().iter() { - if notifer.io_poll { - if let EventStatus::Alive = notifer.status { - let handle = notifer.handlers[1].lock().unwrap(); - match handle(self.ready_events[1].event_set(), notifer.raw_fd) { - None => {} - Some(_) => { - break; - } - } - } + for notifier in self.events.read().unwrap().values() { + let status_locked = notifier.status.lock().unwrap(); + if *status_locked != EventStatus::Alive || notifier.handler_poll.is_none() { + continue; + } + let handler_poll = notifier.handler_poll.as_ref().unwrap(); + if handler_poll(EventSet::empty(), notifier.raw_fd).is_some() { + break; } } } } - - self.epoll_wait_manager(timeout) + self.epoll_wait_manager(min_timeout_ns) } - /// Call the function given by `func` after `nsec` nanoseconds. + /// Call the function given by `func` after `delay` time. /// /// # Arguments /// /// * `func` - the function will be called later. - /// * `nsec` - delay time in nanoseconds. - pub fn delay_call(&mut self, func: Box, nsec: u64) { - let timer = Timer::new(func, nsec); - + /// * `delay` - delay time. + pub fn timer_add(&mut self, func: Box, delay: Duration) -> u64 { // insert in order of expire_time - let mut index = self.timers.len(); - for (i, t) in self.timers.iter().enumerate() { + let mut timers = self.timers.lock().unwrap(); + + let timer_id = self.timer_next_id.fetch_add(1, Ordering::SeqCst); + let timer = Box::new(Timer::new(func, delay, timer_id)); + + let mut index = timers.len(); + for (i, t) in timers.iter().enumerate() { if timer.expire_time < t.expire_time { index = i; break; } } - self.timers.insert(index, timer); + trace::timer_add(&timer.id, &timer.expire_time); + timers.insert(index, timer); + drop(timers); + self.kick(); + timer_id } - /// Get the expire_time of the soonest Timer, and then translate it to timeout. - fn timers_min_timeout(&self) -> i32 { - if self.timers.is_empty() { - return -1; + /// Remove timer with specific timer id. + pub fn timer_del(&mut self, timer_id: u64) { + let mut timers = self.timers.lock().unwrap(); + for (i, t) in timers.iter().enumerate() { + if timer_id == t.id { + trace::timer_del(&t.id, &t.expire_time); + timers.remove(i); + break; + } } + } - let now = Instant::now(); - if self.timers[0].expire_time <= now { - return 0; + /// Get the expire_time of the soonest Timer, and then translate it to duration. + pub fn timers_min_duration(&self) -> Option { + // The kick event happens before re-evaluate can be ignored. + self.kicked.store(false, Ordering::SeqCst); + let timers = self.timers.lock().unwrap(); + if timers.is_empty() { + return None; } - let timeout = (self.timers[0].expire_time - now).as_millis(); - if timeout >= i32::MAX as u128 { - i32::MAX - 1 - } else { - timeout as i32 - } + Some( + timers[0] + .expire_time + .saturating_duration_since(get_current_time()), + ) } /// Call function of the timers which have already expired. - fn run_timers(&mut self) { - let now = Instant::now(); - let mut expired_nr = 0; + pub fn run_timers(&mut self) { + let now = get_current_time(); + let mut expired_nr: usize = 0; - for timer in &self.timers { + let mut timers = self.timers.lock().unwrap(); + for timer in timers.iter() { if timer.expire_time > now { break; } - expired_nr += 1; - (timer.func)(); } - self.timers.drain(0..expired_nr); + let expired_timers: Vec> = timers.drain(0..expired_nr).collect(); + drop(timers); + for timer in expired_timers { + trace::timer_run(&timer.id); + (timer.func)(); + } } - fn epoll_wait_manager(&mut self, time_out: i32) -> Result { - let ev_count = match self - .epoll - .wait(READY_EVENT_MAX, time_out, &mut self.ready_events[..]) - { + fn epoll_wait_manager(&mut self, mut time_out: Option) -> Result { + let need_kick = !(time_out.is_some() && *time_out.as_ref().unwrap() == Duration::ZERO); + if need_kick { + self.kick_me.store(true, Ordering::SeqCst); + if self.kicked.load(Ordering::SeqCst) { + time_out = Some(Duration::ZERO); + } + } + + // When time_out greater then zero, use ppoll as a more precise timer. + if time_out.is_some() && *time_out.as_ref().unwrap() != Duration::ZERO { + let time_out_spec = Some(TimeSpec::from_duration(*time_out.as_ref().unwrap())); + let pollflags = PollFlags::POLLIN | PollFlags::POLLOUT | PollFlags::POLLHUP; + let mut pollfds: [PollFd; 1] = [PollFd::new(self.epoll.as_raw_fd(), pollflags)]; + + match ppoll(&mut pollfds, time_out_spec, None) { + Ok(_) => time_out = Some(Duration::ZERO), + Err(Errno::EINTR) => time_out = Some(Duration::ZERO), + Err(e) => return Err(anyhow!(UtilError::EpollWait(e.into()))), + }; + } + + let time_out_ms = match time_out { + Some(t) => i32::try_from(t.as_millis()).unwrap_or(i32::MAX), + None => -1, + }; + let ev_count = match self.epoll.wait(time_out_ms, &mut self.ready_events[..]) { Ok(ev_count) => ev_count, Err(e) if e.raw_os_error() == Some(libc::EINTR) => 0, - Err(e) => return Err(ErrorKind::EpollWait(e).into()), + Err(e) => return Err(anyhow!(UtilError::EpollWait(e))), }; + if need_kick { + self.kick_me.store(false, Ordering::SeqCst); + } for i in 0..ev_count { - // It`s safe because elements in self.events_map never get released in other functions + // SAFETY: elements in self.events_map never get released in other functions let event = unsafe { let event_ptr = self.ready_events[i].data() as *const EventNotifier; &*event_ptr as &EventNotifier }; - if let EventStatus::Alive = event.status { - let mut notifiers = Vec::new(); - for i in 0..event.handlers.len() { - let handle = event.handlers[i].lock().unwrap(); - match handle(self.ready_events[i].event_set(), event.raw_fd) { + let mut notifiers = Vec::new(); + let status_locked = event.status.lock().unwrap(); + if *status_locked == EventStatus::Alive { + for handler in event.handlers.iter() { + match handler(self.ready_events[i].event_set(), event.raw_fd) { None => {} Some(mut notifier) => { notifiers.append(&mut notifier); } } } - self.update_events(notifiers)?; + } + drop(status_locked); + if let Err(e) = self.update_events(notifiers) { + error!("update event failed: {}", e); } } @@ -468,25 +723,16 @@ impl EventLoopContext { } } -impl Default for EventLoopContext { - fn default() -> Self { - Self::new() - } -} - pub fn read_fd(fd: RawFd) -> u64 { let mut value: u64 = 0; - let ret = unsafe { - read( - fd, - &mut value as *mut u64 as *mut c_void, - std::mem::size_of::(), - ) - }; + let buf = &mut value as *mut u64 as *mut c_void; + let count = std::mem::size_of::(); + // SAFETY: The buf refers to local value and count equals to value size. + let ret = unsafe { read(fd, buf, count) }; if ret == -1 { - error!("Failed to read fd"); + warn!("Failed to read fd"); } value @@ -494,26 +740,19 @@ pub fn read_fd(fd: RawFd) -> u64 { #[cfg(test)] mod test { - use super::*; - use libc::*; use std::os::unix::io::{AsRawFd, RawFd}; + use std::sync::Barrier; + use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + use super::*; + impl EventLoopContext { fn check_existence(&self, fd: RawFd) -> Option { let events_map = self.events.read().unwrap(); - match events_map.get(&fd) { - None => { - return None; - } - Some(notifier) => { - if let EventStatus::Alive = notifier.status { - Some(true) - } else { - Some(false) - } - } - } + events_map + .get(&fd) + .map(|notifier| *notifier.status.lock().unwrap() == EventStatus::Alive) } fn create_event(&mut self) -> i32 { @@ -531,8 +770,8 @@ mod test { } } - fn generate_handler(related_fd: i32) -> Box { - Box::new(move |_, _| { + fn generate_handler(related_fd: i32) -> Rc { + Rc::new(move |_, _| { let mut notifiers = Vec::new(); let event = EventNotifier::new( NotifierOperation::AddShared, @@ -548,20 +787,19 @@ mod test { #[test] fn basic_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let mut notifiers = Vec::new(); let fd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let fd1_related = EventFd::new(EFD_NONBLOCK).unwrap(); let handler1 = generate_handler(fd1_related.as_raw_fd()); - let mut handlers = Vec::new(); - handlers.push(Arc::new(Mutex::new(handler1))); + let handlers = vec![handler1]; let event1 = EventNotifier::new( NotifierOperation::AddShared, fd1.as_raw_fd(), None, EventSet::OUT, - handlers.clone(), + handlers, ); notifiers.push(event1); @@ -576,7 +814,7 @@ mod test { #[test] fn parked_event_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let mut notifiers = Vec::new(); let fd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let fd2 = EventFd::new(EFD_NONBLOCK).unwrap(); @@ -623,7 +861,7 @@ mod test { #[test] fn event_handler_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let mut notifiers = Vec::new(); let fd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let fd1_related = EventFd::new(EFD_NONBLOCK).unwrap(); @@ -636,7 +874,7 @@ mod test { fd1.as_raw_fd(), None, EventSet::OUT, - vec![Arc::new(Mutex::new(handler1))], + vec![handler1], ); let event1_update = EventNotifier::new( @@ -644,7 +882,7 @@ mod test { fd1.as_raw_fd(), None, EventSet::OUT, - vec![Arc::new(Mutex::new(handler1_update))], + vec![handler1_update], ); notifiers.push(event1); @@ -653,7 +891,7 @@ mod test { mainloop.run().unwrap(); // Firstly, event1 with handler1 would be added. Then, event1's handlers would append - // handler1_update, which would register fd1_related_update in mainloop. + // handler1_update, which would register fd1_related_update in mainloop. assert!(mainloop.check_existence(fd1_related.as_raw_fd()).unwrap()); assert!(mainloop .check_existence(fd1_related_update.as_raw_fd()) @@ -662,7 +900,7 @@ mod test { #[test] fn error_operation_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let fd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let leisure_fd = EventFd::new(EFD_NONBLOCK).unwrap(); @@ -699,7 +937,7 @@ mod test { #[test] fn error_parked_operation_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let fd1 = EventFd::new(EFD_NONBLOCK).unwrap(); let fd2 = EventFd::new(EFD_NONBLOCK).unwrap(); @@ -734,7 +972,7 @@ mod test { #[test] fn fd_released_test() { - let mut mainloop = EventLoopContext::new(); + let mut mainloop = EventLoopContext::new(Arc::new(Barrier::new(1))); let fd = mainloop.create_event(); // In this case, fd is already closed. But program was wrote to ignore the error. diff --git a/util/src/num_ops.rs b/util/src/num_ops.rs index fd902eeedb3abd2df10c4c40147544b3b4b4aa35..f5ea59be220b5323a490857dc203d3d8ac00fc76 100644 --- a/util/src/num_ops.rs +++ b/util/src/num_ops.rs @@ -10,7 +10,13 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -//! This module implements some operations of Rust primitive types. +use std::num::ParseIntError; + +use anyhow::{Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::error; + +// This module implements some operations of Rust primitive types. /// Calculate the aligned-up u64 value. /// @@ -29,7 +35,7 @@ /// assert!(value == Some(1004)); /// ``` pub fn round_up(origin: u64, align: u64) -> Option { - match origin % align { + match origin.checked_rem(align)? { 0 => Some(origin), diff => origin.checked_add(align - diff), } @@ -52,12 +58,37 @@ pub fn round_up(origin: u64, align: u64) -> Option { /// assert!(value == Some(1000)); /// ``` pub fn round_down(origin: u64, align: u64) -> Option { - match origin % align { + match origin.checked_rem(align)? { 0 => Some(origin), diff => origin.checked_sub(diff), } } +/// Division rounded up. +/// +/// # Arguments +/// +/// * `dividend` - dividend. +/// * `divisor` - divisor. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::div_round_up; +/// +/// let value = div_round_up(10 as u64, 4 as u64); +/// assert!(value == Some(3)); +/// ``` +pub fn div_round_up(dividend: u64, divisor: u64) -> Option { + let res = dividend.checked_div(divisor)?; + if dividend.checked_rem(divisor)? == 0 { + Some(res) + } else { + Some(res + 1) + } +} + /// Get the first half or second half of u64. /// /// # Arguments @@ -107,6 +138,48 @@ pub fn write_u32(value: u32, page: u32) -> u64 { } } +/// Write the given u32 to the low bits in u64, keep the high bits, +/// returns the u64 value. +/// +/// # Arguments +/// +/// * `origin` - The origin u64 value. +/// * `value` - The set u32 value. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::write_u64_low; +/// +/// let value = write_u64_low(0x1000_0000_0000_0000, 0x1000_0000); +/// assert!(value == 0x1000_0000_1000_0000); +/// ``` +pub fn write_u64_low(origin: u64, value: u32) -> u64 { + origin & 0xFFFF_FFFF_0000_0000_u64 | u64::from(value) +} + +/// Write the given u32 to the high bits in u64, keep the low bits, +/// returns the u64 value. +/// +/// # Arguments +/// +/// * `origin` - The origin u64 value. +/// * `value` - The set u32 value. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::write_u64_high; +/// +/// let value = write_u64_high(0x0000_0000_1000_0000, 0x1000_0000); +/// assert!(value == 0x1000_0000_1000_0000); +/// ``` +pub fn write_u64_high(origin: u64, value: u32) -> u64 { + u64::from(value) << 32 | (origin & 0x0000_0000_FFFF_FFFF_u64) +} + /// Extract from the 32 bit input @value the bit field specified by the /// @start and @length parameters, and return it. The bit field must /// lie entirely within the 32 bit word. It is valid to request that @@ -128,7 +201,7 @@ pub fn write_u32(value: u32, page: u32) -> u64 { /// assert!(value == 0xfa); /// ``` pub fn extract_u32(value: u32, start: u32, length: u32) -> Option { - if length > 32 - start { + if length > 32_u32.checked_sub(start)? { error!( "extract_u32: ( start {} length {} ) is out of range", start, length @@ -160,7 +233,7 @@ pub fn extract_u32(value: u32, start: u32, length: u32) -> Option { /// assert!(value == 0xffff); /// ``` pub fn extract_u64(value: u64, start: u32, length: u32) -> Option { - if length > 64 - start { + if length > 64_u32.checked_sub(start)? { error!( "extract_u64: ( start {} length {} ) is out of range", start, length @@ -168,7 +241,7 @@ pub fn extract_u64(value: u64, start: u32, length: u32) -> Option { return None; } - Some((value >> start as u64) & (!(0_u64) >> (64 - length) as u64)) + Some((value >> u64::from(start)) & (!(0_u64) >> u64::from(64 - length))) } /// Deposit @fieldval into the 32 bit @value at the bit field specified @@ -196,7 +269,7 @@ pub fn extract_u64(value: u64, start: u32, length: u32) -> Option { /// assert!(value == 0xffba); /// ``` pub fn deposit_u32(value: u32, start: u32, length: u32, fieldval: u32) -> Option { - if length > 32 - start { + if length > 32_u32.checked_sub(start)? { error!( "deposit_u32: ( start {} length {} ) is out of range", start, length @@ -208,22 +281,243 @@ pub fn deposit_u32(value: u32, start: u32, length: u32, fieldval: u32) -> Option Some((value & !mask) | ((fieldval << start) & mask)) } +/// Write the given u16 to an array, returns the bool. +/// +/// # Arguments +/// +/// * `data` - The array of u8. +/// * `value` - The u16 value +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::write_data_u16; +/// +/// let mut data: [u8; 2] = [0; 2]; +/// let ret = write_data_u16(&mut data, 0x1234); +/// assert!(ret && data[0] == 0x34 && data[1] == 0x12); +/// ``` +pub fn write_data_u16(data: &mut [u8], value: u16) -> bool { + match data.len() { + 1 => data[0] = value as u8, + 2 => { + LittleEndian::write_u16(data, value); + } + n => { + error!("Invalid data length {} for reading value {}", n, value); + return false; + } + }; + true +} + +/// Write the given u32 to an array, returns the bool. +/// +/// # Arguments +/// +/// * `data` - The array of u8. +/// * `value` - The u32 value +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::write_data_u32; +/// +/// let mut data: [u8; 4] = [0; 4]; +/// let ret = write_data_u32(&mut data, 0x12345678); +/// assert!(ret && data[0] == 0x78 && data[1] == 0x56 && data[2] == 0x34 && data[3] == 0x12); +/// ``` +pub fn write_data_u32(data: &mut [u8], value: u32) -> bool { + match data.len() { + 1 => data[0] = value as u8, + 2 => { + LittleEndian::write_u16(data, value as u16); + } + 4 => { + LittleEndian::write_u32(data, value); + } + _ => { + error!( + "Invalid data length: value {}, data len {}", + value, + data.len() + ); + return false; + } + }; + true +} + +/// Read the given array to an u32, returns the bool. +/// +/// # Arguments +/// +/// * `data` - The array of u8. +/// * `value` - The u32 value +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::read_data_u32; +/// +/// let mut value = 0; +/// let ret = read_data_u32(&[0x11, 0x22, 0x33, 0x44], &mut value); +/// assert!(ret && value == 0x44332211); +/// ``` +pub fn read_data_u32(data: &[u8], value: &mut u32) -> bool { + *value = match data.len() { + 1 => u32::from(data[0]), + 2 => u32::from(LittleEndian::read_u16(data)), + 4 => LittleEndian::read_u32(data), + _ => { + error!("Invalid data length: data len {}", data.len()); + return false; + } + }; + true +} + +/// Read the given array to an u16, returns the bool. +/// +/// # Arguments +/// +/// * `data` - The array of u8. +/// * `value` - The u16 value +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::read_data_u16; +/// +/// let mut value = 0; +/// let ret = read_data_u16(&[0x11, 0x22], &mut value); +/// assert!(ret && value == 0x2211); +/// ``` +pub fn read_data_u16(data: &[u8], value: &mut u16) -> bool { + *value = match data.len() { + 1 => u16::from(data[0]), + 2 => LittleEndian::read_u16(data), + _ => { + error!("Invalid data length: data len {}", data.len()); + return false; + } + }; + true +} + +pub trait Num { + type ParseIntError; + fn from_str_radix(s: &str, radix: u32) -> Result + where + Self: Sized; +} + +macro_rules! int_trait_impl { + ($name:ident for $($t:ty)*) => ($( + impl $name for $t { + type ParseIntError = ::core::num::ParseIntError; + fn from_str_radix(s: &str, radix: u32) -> Result { + <$t>::from_str_radix(s, radix) + } + } + )*) +} + +int_trait_impl!(Num for u8 u16 usize); + +/// Parse a string to a number, decimal and hexadecimal numbers supported now. +/// +/// # Arguments +/// +/// * `string_in` - The string that means a number, eg. "18", "0x1c". +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::str_to_num; +/// +/// let value = str_to_num::("0x17").unwrap(); +/// assert!(value == 0x17); +/// let value = str_to_num::("0X17").unwrap(); +/// assert!(value == 0x17); +/// let value = str_to_num::("17").unwrap(); +/// assert!(value == 17); +/// ``` +pub fn str_to_num(s: &str) -> Result { + let mut base: u32 = 10; + if s.starts_with("0x") || s.starts_with("0X") { + base = 16; + } + let without_prefix = s.trim().trim_start_matches("0x").trim_start_matches("0X"); + let num = + T::from_str_radix(without_prefix, base).with_context(|| format!("Invalid num: {}", s))?; + Ok(num) +} + +/// Check whether two regions overlap with each other. +/// +/// # Arguments +/// +/// * `start1` - Start address of the first region. +/// * `size1` - Size of the first region. +/// * `start2` - Start address of the second region. +/// * `size2` - Size of the second region. +/// +/// # Examples +/// +/// ```rust +/// extern crate util; +/// use util::num_ops::ranges_overlap; +/// +/// let value = ranges_overlap(100, 100, 150, 100).unwrap(); +/// assert!(value == true); +/// ``` +pub fn ranges_overlap(start1: usize, size1: usize, start2: usize, size2: usize) -> Result { + let end1 = start1 + .checked_add(size1) + .with_context(|| format!("range 1 overflows: start {}, size {}", start1, size1))?; + let end2 = start2 + .checked_add(size2) + .with_context(|| format!("range 2 overflows: start {}, size {}", start1, size1))?; + + Ok(!(start1 >= end2 || start2 >= end1)) +} + #[cfg(test)] mod test { use super::*; #[test] fn round_up_test() { - let result = round_up(10001 as u64, 100 as u64); + let result = round_up(10001_u64, 100_u64); assert_eq!(result, Some(10100)); } #[test] fn round_down_test() { - let result = round_down(10001 as u64, 100 as u64); + let result = round_down(10001_u64, 100_u64); assert_eq!(result, Some(10000)); } + #[test] + fn test_div_round_up() { + let res = div_round_up(10, 4); + assert_eq!(res, Some(3)); + let res = div_round_up(10, 2); + assert_eq!(res, Some(5)); + let res = div_round_up(2, 10); + assert_eq!(res, Some(1)); + let res = div_round_up(10, 0); + assert_eq!(res, None); + let res = div_round_up(0xffff_ffff_ffff_ffff_u64, 1); + assert_eq!(res, Some(0xffff_ffff_ffff_ffff_u64)); + } + #[test] fn test_read_u32_from_u64() { let value = 0x1234_5678_9012_3456u64; @@ -239,6 +533,30 @@ mod test { assert_eq!(write_u32(0x1234_5678, 2), 0); } + #[test] + fn test_write_u64_low() { + assert_eq!( + write_u64_low(0x0000_0000_FFFF_FFFF_u64, 0x1234_5678), + 0x0000_0000_1234_5678_u64 + ); + assert_eq!( + write_u64_low(0xFFFF_FFFF_0000_0000_u64, 0x1234_5678), + 0xFFFF_FFFF_1234_5678_u64 + ); + } + + #[test] + fn test_write_u64_high() { + assert_eq!( + write_u64_high(0x0000_0000_FFFF_FFFF_u64, 0x1234_5678), + 0x1234_5678_FFFF_FFFF_u64 + ); + assert_eq!( + write_u64_high(0xFFFF_FFFF_0000_0000_u64, 0x1234_5678), + 0x1234_5678_0000_0000_u64 + ); + } + #[test] fn test_extract_u32() { assert_eq!(extract_u32(0xfefbfffa, 0, 33), None); @@ -283,4 +601,78 @@ mod test { assert_eq!(deposit_u32(0xfdfcfbfa, 8, 24, 0xbdbcbbba), Some(0xbcbbbafa)); assert_eq!(deposit_u32(0xfdfcfbfa, 0, 32, 0xbdbcbbba), Some(0xbdbcbbba)); } + + #[test] + fn test_write_data_u16() { + let mut data: [u8; 1] = [0; 1]; + let ret = write_data_u16(&mut data, 0x11); + assert!(ret && data[0] == 0x11); + let mut data: [u8; 2] = [0; 2]; + let ret = write_data_u16(&mut data, 0x1122); + assert!(ret && data[0] == 0x22 && data[1] == 0x11); + let mut data: [u8; 3] = [0; 3]; + let ret = write_data_u16(&mut data, 0x1122); + assert!(!ret); + } + + #[test] + fn test_write_data_u32() { + let mut data: [u8; 1] = [0; 1]; + let ret = write_data_u32(&mut data, 0x11); + assert!(ret && data[0] == 0x11); + let mut data: [u8; 2] = [0; 2]; + let ret = write_data_u32(&mut data, 0x1122); + assert!(ret && data[0] == 0x22 && data[1] == 0x11); + let mut data: [u8; 3] = [0; 3]; + let ret = write_data_u32(&mut data, 0x112233); + assert!(!ret); + let mut data: [u8; 4] = [0; 4]; + let ret = write_data_u32(&mut data, 0x11223344); + assert!(ret && data[0] == 0x44 && data[1] == 0x33 && data[2] == 0x22 && data[3] == 0x11); + } + + #[test] + fn test_read_data_u16() { + let mut value = 0; + let ret = read_data_u16(&[0x11], &mut value); + assert!(ret && value == 0x11); + let ret = read_data_u16(&[0x11, 0x22], &mut value); + assert!(ret && value == 0x2211); + let ret = read_data_u16(&[0x11, 0x22, 0x33], &mut value); + assert!(!ret); + } + + #[test] + fn test_read_data_u32() { + let mut value = 0; + let ret = read_data_u32(&[0x11], &mut value); + assert!(ret && value == 0x11); + let ret = read_data_u32(&[0x11, 0x22], &mut value); + assert!(ret && value == 0x2211); + let ret = read_data_u32(&[0x11, 0x22, 0x33], &mut value); + assert!(!ret); + let ret = read_data_u32(&[0x11, 0x22, 0x33, 0x44], &mut value); + assert!(ret && value == 0x44332211); + } + + #[test] + fn test_str_to_num() { + let value = str_to_num::("0x17").unwrap(); + assert!(value == 0x17); + let value = str_to_num::("0X17").unwrap(); + assert!(value == 0x17); + let value = str_to_num::("17").unwrap(); + assert!(value == 17); + } + + #[test] + fn test_ranges_overlap() { + assert!(ranges_overlap(100, 100, 150, 100).unwrap()); + assert!(ranges_overlap(100, 100, 150, 50).unwrap()); + assert!(!ranges_overlap(100, 100, 200, 50).unwrap()); + assert!(ranges_overlap(100, 100, 100, 50).unwrap()); + assert!(!ranges_overlap(100, 100, 50, 50).unwrap()); + assert!(ranges_overlap(100, 100, 50, 100).unwrap()); + assert!(ranges_overlap(usize::MAX, 1, 100, 50).is_err()) + } } diff --git a/util/src/offsetof.rs b/util/src/offsetof.rs index a6b55d8c0d328cf1b4ac8e786a44ac54dec95ea6..055b24e26fb40dca6233ce48b879f3e96e35e7f2 100644 --- a/util/src/offsetof.rs +++ b/util/src/offsetof.rs @@ -10,19 +10,20 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +// Note: This can be replaced with std::mem::offset_of! within higher rust version. + /// Macro: Calculate offset of specified field in a type. #[macro_export] macro_rules! __offset_of { ($type_name:ty, $field:ident) => {{ let tmp = core::mem::MaybeUninit::<$type_name>::uninit(); let outer = tmp.as_ptr(); - // Safe because the pointer is valid and aligned, just not initialised; `addr_of` ensures that - // we don't actually read from `outer` (which would be UB) nor create an intermediate reference. + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures + // that we don't actually read from `outer` (which would be UB) nor create an + // intermediate reference. let inner = unsafe { core::ptr::addr_of!((*outer).$field) } as *const u8; - // Safe because the two pointers are within the same allocation block. - unsafe { - inner.offset_from(outer as *const u8) as usize - } + // SAFETY: Two pointers are within the same allocation block. + unsafe { inner.offset_from(outer as *const u8) as usize } }}; } diff --git a/util/src/ohos_binding/audio/mod.rs b/util/src/ohos_binding/audio/mod.rs new file mode 100755 index 0000000000000000000000000000000000000000..07b2cb09afd4a22e35cb6c59ec34e4e5234b7010 --- /dev/null +++ b/util/src/ohos_binding/audio/mod.rs @@ -0,0 +1,569 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod sys; + +use std::os::raw::c_void; +use std::ptr; +use std::sync::{Arc, RwLock}; + +use log::error; +use once_cell::sync::Lazy; + +use super::hwf_adapter::{hwf_adapter_volume_api, volume::VolumeFuncTable}; +pub use sys as capi; + +const AUDIO_SAMPLE_RATE_44KHZ: u32 = 44100; +const AUDIO_SAMPLE_RATE_48KHZ: u32 = 48000; + +macro_rules! call_capi { + ( $f: ident ( $($x: expr),* ) ) => { + { + // SAFETY: OH Audio FrameWork's APIs guarantee safety. + let r = unsafe { capi::$f( $($x),* ) }; + if r != capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_SUCCESS { + error!("ohauadio_rapi: failed at {:?}", stringify!($f)); + Err(OAErr::from(r)) + } else { + Ok(()) + } + } + }; +} + +macro_rules! call_capi_nocheck { + ( $f: ident ( $($x: expr),* ) ) => { + // SAFETY: OH Audio FrameWork's APIs guarantee safety. + unsafe { capi::$f( $($x),* ) } + }; +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum OAErr { + Ok, + InvalidParam, + IllegalState, + SysErr, + UnknownErr, +} + +impl std::error::Error for OAErr {} + +impl std::fmt::Display for OAErr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{}", + match self { + OAErr::Ok => "Ok", + OAErr::InvalidParam => "InvalidParam", + OAErr::IllegalState => "IllegalState", + OAErr::SysErr => "SysErr", + OAErr::UnknownErr => "UnknownErr", + } + ) + } +} + +impl From for OAErr { + #[inline] + fn from(c: capi::OhAudioStreamResult) -> Self { + match c { + capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_SUCCESS => Self::Ok, + capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_INVALID_PARAM => Self::InvalidParam, + capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_ILLEGAL_STATE => Self::IllegalState, + capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_SYSTEM => Self::SysErr, + _ => Self::UnknownErr, + } + } +} + +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] +struct SampleSize(pub capi::OhAudioStreamType); + +impl TryFrom for SampleSize { + type Error = OAErr; + + #[inline] + fn try_from(s: u8) -> Result { + match s { + 16 => Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S16_LE, + )), + 24 => Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S24_LE, + )), + 32 => Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S32_LE, + )), + _ => Err(OAErr::InvalidParam), + } + } +} + +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct SampleRate(pub i32); + +impl TryFrom for SampleRate { + type Error = OAErr; + + #[inline] + fn try_from(value: u32) -> Result { + match value { + AUDIO_SAMPLE_RATE_44KHZ => Ok(SampleRate(value as i32)), + AUDIO_SAMPLE_RATE_48KHZ => Ok(SampleRate(value as i32)), + _ => Err(OAErr::InvalidParam), + } + } +} + +impl Default for SampleRate { + fn default() -> Self { + Self(AUDIO_SAMPLE_RATE_44KHZ as i32) + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Eq, Default)] +struct AudioSpec { + size: SampleSize, + rate: SampleRate, + channels: u8, +} + +impl PartialEq for AudioSpec { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.size == other.size && self.rate == other.rate && self.channels == other.channels + } +} + +impl AudioSpec { + fn set(&mut self, size: u8, rate: u32, channels: u8) -> Result<(), OAErr> { + self.size = SampleSize::try_from(size)?; + self.rate = SampleRate::try_from(rate)?; + self.channels = channels; + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum AudioStreamType { + Render, + Capturer, +} + +#[allow(clippy::from_over_into)] +impl Into for AudioStreamType { + fn into(self) -> capi::OhAudioStreamType { + match self { + AudioStreamType::Render => capi::OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_RERNDERER, + AudioStreamType::Capturer => capi::OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_CAPTURER, + } + } +} + +pub type OhAudioCapturer = sys::OhAudioCapturer; +pub type OhAudioRenderer = sys::OhAudioRenderer; + +pub enum AudioProcessCb { + CapturerCb( + Option< + extern "C" fn( + capturer: *mut capi::OhAudioCapturer, + userData: *mut c_void, + buffer: *mut c_void, + length: i32, + ) -> i32, + >, + Option< + extern "C" fn( + capturer: *mut capi::OhAudioCapturer, + userData: *mut c_void, + source_type: capi::OHAudioInterruptSourceType, + hint: capi::OHAudioInterruptHint, + ) -> i32, + >, + ), + RendererCb( + Option< + extern "C" fn( + renderer: *mut capi::OhAudioRenderer, + userData: *mut c_void, + buffer: *mut c_void, + length: i32, + ) -> i32, + >, + Option< + extern "C" fn( + capturer: *mut capi::OhAudioRenderer, + userData: *mut c_void, + source_type: capi::OHAudioInterruptSourceType, + hint: capi::OHAudioInterruptHint, + ) -> i32, + >, + ), +} + +#[derive(Debug)] +pub struct AudioContext { + stream_type: AudioStreamType, + spec: AudioSpec, + builder: *mut capi::OhAudioStreamBuilder, + capturer: *mut capi::OhAudioCapturer, + renderer: *mut capi::OhAudioRenderer, + userdata: *mut c_void, +} + +impl Drop for AudioContext { + fn drop(&mut self) { + if !self.capturer.is_null() || !self.renderer.is_null() { + self.stop(); + } + if !self.builder.is_null() { + call_capi_nocheck!(OH_AudioStreamBuilder_Destroy(self.builder)); + self.builder = ptr::null_mut(); + } + } +} + +impl AudioContext { + #[inline(always)] + fn set_userdata(&mut self, userdata: *mut c_void) { + self.userdata = userdata; + } + + fn create_builder(&mut self) -> Result<(), OAErr> { + call_capi!(OH_AudioStreamBuilder_Create( + &mut self.builder, + self.stream_type.into() + )) + } + + fn set_sample_rate(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioStreamBuilder_SetSamplingRate( + self.builder, + self.spec.rate.0 + )) + } + + fn set_sample_format(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioStreamBuilder_SetSampleFormat( + self.builder, + self.spec.size.0 + )) + } + + fn set_latency_mode(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioStreamBuilder_SetLatencyMode( + self.builder, + capi::OH_AUDIO_STREAM_LATENCY_MODE_FAST + )) + } + + fn create_renderer(&mut self, cb: AudioProcessCb) -> Result<(), OAErr> { + let mut cbs = capi::OhAudioRendererCallbacks::default(); + if let AudioProcessCb::RendererCb(data_cb, interrupt_cb) = cb { + cbs.oh_audio_renderer_on_write_data = data_cb; + cbs.oh_audio_renderer_on_interrupt_event = interrupt_cb; + } + call_capi!(OH_AudioStreamBuilder_SetRendererCallback( + self.builder, + cbs, + self.userdata + ))?; + call_capi!(OH_AudioStreamBuilder_GenerateRenderer( + self.builder, + &mut self.renderer + )) + } + + fn create_capturer(&mut self, cb: AudioProcessCb) -> Result<(), OAErr> { + let mut cbs = capi::OhAudioCapturerCallbacks::default(); + if let AudioProcessCb::CapturerCb(data_cb, interrupt_cb) = cb { + cbs.oh_audio_capturer_on_read_data = data_cb; + cbs.oh_audio_capturer_on_interrupt_event = interrupt_cb; + } + call_capi!(OH_AudioStreamBuilder_SetCapturerCallback( + self.builder, + cbs, + self.userdata + ))?; + call_capi!(OH_AudioStreamBuilder_GenerateCapturer( + self.builder, + &mut self.capturer + )) + } + + fn create_processor(&mut self, cb: AudioProcessCb) -> Result<(), OAErr> { + match self.stream_type { + AudioStreamType::Capturer => self.create_capturer(cb), + AudioStreamType::Render => self.create_renderer(cb), + } + } + + fn start_capturer(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioCapturer_Start(self.capturer)) + } + + fn start_renderer(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioRenderer_Start(self.renderer)) + } + + pub fn flush_renderer(&self) -> Result<(), OAErr> { + call_capi!(OH_AudioRenderer_Flush(self.renderer)) + } + + pub fn new(stream_type: AudioStreamType) -> Self { + Self { + stream_type, + spec: AudioSpec::default(), + builder: ptr::null_mut(), + capturer: ptr::null_mut(), + renderer: ptr::null_mut(), + userdata: std::ptr::null_mut::(), + } + } + + pub fn init( + &mut self, + size: u8, + rate: u32, + channels: u8, + cb: AudioProcessCb, + userdata: *mut c_void, + ) -> Result<(), OAErr> { + self.set_userdata(userdata); + self.create_builder()?; + self.set_fmt(size, rate, channels)?; + self.set_sample_rate()?; + self.set_sample_format()?; + self.set_latency_mode()?; + self.create_processor(cb) + } + + pub fn start(&self) -> Result<(), OAErr> { + match self.stream_type { + AudioStreamType::Capturer => self.start_capturer(), + AudioStreamType::Render => self.start_renderer(), + } + } + + pub fn stop(&mut self) { + match self.stream_type { + AudioStreamType::Capturer => { + call_capi_nocheck!(OH_AudioCapturer_Stop(self.capturer)); + call_capi_nocheck!(OH_AudioCapturer_Release(self.capturer)); + self.capturer = ptr::null_mut(); + } + AudioStreamType::Render => { + call_capi_nocheck!(OH_AudioRenderer_Stop(self.renderer)); + call_capi_nocheck!(OH_AudioRenderer_Release(self.renderer)); + self.renderer = ptr::null_mut(); + } + } + } + + pub fn set_fmt(&mut self, size: u8, rate: u32, channels: u8) -> Result<(), OAErr> { + self.spec.set(size, rate, channels) + } + + pub fn check_fmt(&self, size: u8, rate: u32, channels: u8) -> bool { + let mut other = AudioSpec::default(); + other + .set(size, rate, channels) + .map_or(false, |_| (self.spec == other)) + } +} + +// From here, the code is related to ohaudio volume. +static OH_VOLUME_ADAPTER: Lazy> = Lazy::new(|| RwLock::new(OhVolume::new())); + +pub trait GuestVolumeNotifier: Send + Sync { + fn notify(&self, vol: u32); +} + +struct OhVolume { + capi: Arc, + notifiers: Vec>, +} + +impl OhVolume { + fn new() -> Self { + let capi = hwf_adapter_volume_api(); + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (*capi.register_volume_change)(on_ohos_volume_changed) }; + Self { + capi, + notifiers: Vec::new(), + } + } + + fn get_ohos_volume(&self) -> u32 { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.get_volume)() as u32 } + } + + fn get_max_volume(&self) -> u32 { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.get_max_volume)() as u32 } + } + + fn get_min_volume(&self) -> u32 { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.get_min_volume)() as u32 } + } + + fn set_ohos_volume(&self, volume: i32) { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.set_volume)(volume) }; + } + + fn notify_volume_change(&self, volume: i32) { + for notifier in self.notifiers.iter() { + notifier.notify(volume as u32); + } + } + + fn register_guest_notifier(&mut self, notifier: Arc) { + self.notifiers.push(notifier); + } +} + +// SAFETY: use RW lock to ensure the security of resources. +unsafe extern "C" fn on_ohos_volume_changed(volume: i32) { + OH_VOLUME_ADAPTER + .read() + .unwrap() + .notify_volume_change(volume); +} + +pub fn register_guest_volume_notifier(notifier: Arc) { + OH_VOLUME_ADAPTER + .write() + .unwrap() + .register_guest_notifier(notifier); +} + +pub fn get_ohos_volume_max() -> u32 { + OH_VOLUME_ADAPTER.read().unwrap().get_max_volume() +} + +pub fn get_ohos_volume_min() -> u32 { + OH_VOLUME_ADAPTER.read().unwrap().get_min_volume() +} + +pub fn get_ohos_volume() -> u32 { + OH_VOLUME_ADAPTER.read().unwrap().get_ohos_volume() +} + +pub fn set_ohos_volume(vol: u32) { + OH_VOLUME_ADAPTER + .read() + .unwrap() + .set_ohos_volume(vol as i32); +} + +#[cfg(test)] +mod tests { + use crate::ohos_binding::audio::sys as capi; + use crate::ohos_binding::audio::{AudioSpec, AudioStreamType, OAErr, SampleRate, SampleSize}; + + #[test] + fn test_err() { + assert_eq!("OK", format!("{}", OAErr::Ok)); + assert_eq!("InvalidParam", format!("{}", OAErr::InvalidParam)); + assert_eq!("IllegalState", format!("{}", OAErr::IllegalState)); + assert_eq!("SysErr", format!("{}", OAErr::SysErr)); + assert_eq!("UnknownErr", format!("{}", OAErr::UnknownErr)); + + assert_eq!( + OAErr::Ok, + OAErr::from(capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_SUCCESS) + ); + assert_eq!( + OAErr::InvalidParam, + OAErr::from(capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_INVALID_PARAM) + ); + assert_eq!( + OAErr::IllegalState, + OAErr::from(capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_ILLEGAL_STATE) + ); + assert_eq!( + OAErr::SysErr, + OAErr::from(capi::OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_SYSTEM) + ); + assert_eq!( + OAErr::UnknownErr, + OAErr::from(capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_F32_LE) + ); + } + + #[test] + fn test_sample_size() { + assert_eq!( + Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S16_LE + )), + SampleSize::try_from(16) + ); + assert_eq!( + Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S24_LE + )), + SampleSize::try_from(24) + ); + assert_eq!( + Ok(SampleSize( + capi::OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S32_LE + )), + SampleSize::try_from(32) + ); + assert_eq!(Err(OAErr::InvalidParam), SampleSize::try_from(18)); + } + + #[test] + fn test_sample_rate() { + assert_eq!(SampleRate(44100), SampleRate::default()); + assert_eq!(Ok(SampleRate(44100)), SampleRate::try_from(44100)); + assert_eq!(Ok(SampleRate(48000)), SampleRate::try_from(48000)); + assert_eq!(Err(OAErr::InvalidParam), SampleRate::try_from(54321)); + } + + #[test] + fn test_audio_spec() { + let mut spec1 = AudioSpec::default(); + let spec2 = AudioSpec::default(); + assert_eq!(spec1, spec2); + assert_eq!(Err(OAErr::InvalidParam), spec1.set(15, 16, 3)); + assert_eq!(Err(OAErr::InvalidParam), spec1.set(16, 16, 3)); + assert_eq!(Ok(()), spec1.set(32, 48000, 4)); + assert_ne!(spec1, spec2); + } + + #[test] + fn test_audio_stream_type() { + let oh_audio_stream_type_render: capi::OhAudioStreamType = AudioStreamType::Render.into(); + assert_eq!( + capi::OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_RERNDERER, + oh_audio_stream_type_render + ); + let oh_audio_stream_type_capturer: capi::OhAudioStreamType = + AudioStreamType::Capturer.into(); + assert_eq!( + capi::OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_CAPTURER, + oh_audio_stream_type_capturer + ); + } +} diff --git a/util/src/ohos_binding/audio/sys.rs b/util/src/ohos_binding/audio/sys.rs new file mode 100755 index 0000000000000000000000000000000000000000..56dcee2e4879607ee87f5e523ad0b24a5032ef8c --- /dev/null +++ b/util/src/ohos_binding/audio/sys.rs @@ -0,0 +1,364 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +/// The call was successful. +pub const OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_SUCCESS: OhAudioStreamResult = 0; +#[allow(unused)] +/// This means that the function was executed with an invalid input parameter. +pub const OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_INVALID_PARAM: OhAudioStreamResult = 1; +#[allow(unused)] +/// Execution status exception +pub const OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_ILLEGAL_STATE: OhAudioStreamResult = 2; +#[allow(unused)] +/// An system error has occurred +pub const OH_AUDIO_STREAM_RESULT_AUDIOSTREAM_ERROR_SYSTEM: OhAudioStreamResult = 3; +/// Define the result of the function execution. +/// +/// @since 10 +pub type OhAudioStreamResult = ::std::os::raw::c_uint; + +/// The type for audio stream is renderer. +pub const OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_RERNDERER: OhAudioStreamType = 1; +/// The type for audio stream is capturer. +pub const OH_AUDIO_STREAM_TYPE_AUDIOSTREAM_TYPE_CAPTURER: OhAudioStreamType = 2; +/// Define the audio stream type. +/// +/// @since 10 +pub type OhAudioStreamType = ::std::os::raw::c_uint; + +#[allow(unused)] +pub const OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_U8: OhAudioStreamSampleFormat = 0; +pub const OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S16_LE: OhAudioStreamSampleFormat = 1; +pub const OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S24_LE: OhAudioStreamSampleFormat = 2; +pub const OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_S32_LE: OhAudioStreamSampleFormat = 3; +#[allow(unused)] +pub const OH_AUDIO_STREAM_SAMPLE_FORMAT_AUDIOSTREAM_SAMPLE_F32_LE: OhAudioStreamSampleFormat = 4; +/// Define the audio stream sample format. +/// +/// @since 10 +pub type OhAudioStreamSampleFormat = ::std::os::raw::c_uint; + +#[allow(unused)] +pub const OH_AUDIO_STREAM_ENCODING_TYPE_AUDIOSTREAM_ENCODING_TYPE_RAW: OhAudioStreamEncodingType = + 0; +/// Define the audio encoding type. +/// +/// @since 10 +pub type OhAudioStreamEncodingType = ::std::os::raw::c_uint; + +#[allow(unused)] +pub const OH_AUDIO_STREAM_USAGE_AUDIOSTREAM_USAGE_UNKNOWN: OhAudioStreamUsage = 0; +#[allow(unused)] +pub const OH_AUDIO_STREAM_USAGE_AUDIOSTREAM_USAGE_MEDIA: OhAudioStreamUsage = 1; +#[allow(unused)] +pub const OH_AUDIO_STREAM_USAGE_AUDIOSTREAM_USAGE_COMMUNICATION: OhAudioStreamUsage = 2; +/// Define the audio stream usage. +/// Audio stream usage is used to describe what work scenario +/// the current stream is used for. +/// +/// @since 10 +pub type OhAudioStreamUsage = ::std::os::raw::c_uint; + +#[allow(unused)] +pub const OH_AUDIO_STREAM_CONTENT_AUDIOSTREAM_CONTENT_TYPE_UNKNOWN: OhAudioStreamContent = 0; +#[allow(unused)] +pub const OH_AUDIO_STREAM_CONTENT_AUDIOSTREAM_CONTENT_TYPE_SPEECH: OhAudioStreamContent = 1; +#[allow(unused)] +pub const OH_AUDIO_STREAM_CONTENT_AUDIOSTREAM_CONTENT_TYPE_MUSIC: OhAudioStreamContent = 2; +#[allow(unused)] +pub const OH_AUDIO_STREAM_CONTENT_AUDIOSTREAM_CONTENT_TYPE_MOVIE: OhAudioStreamContent = 3; +/// Define the audio stream content. +/// Audio stream content is used to describe the stream data type. +/// +/// @since 10 +pub type OhAudioStreamContent = ::std::os::raw::c_uint; + +#[allow(unused)] +/// This is a normal audio scene. +pub const OH_AUDIO_STREAM_LATENCY_MODE_NORMAL: OhAudioStreamLatencyMode = 0; +#[allow(unused)] +pub const OH_AUDIO_STREAM_LATENCY_MODE_FAST: OhAudioStreamLatencyMode = 1; +/// Define the audio latency mode. +/// +/// @since 10 +pub type OhAudioStreamLatencyMode = ::std::os::raw::c_uint; + +#[allow(unused)] +/// The invalid state. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_INVALID: OhAudioStreamState = -1; +#[allow(unused)] +/// The prepared state. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_PREPARED: OhAudioStreamState = 0; +#[allow(unused)] +/// The stream is running. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_RUNNING: OhAudioStreamState = 1; +#[allow(unused)] +/// The stream is stopped. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_STOPPED: OhAudioStreamState = 2; +#[allow(unused)] +/// The stream is paused. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_PAUSED: OhAudioStreamState = 3; +#[allow(unused)] +/// The stream is released. +pub const OH_AUDIO_STREAM_STATE_AUDIOSTREAM_STATE_RELEASED: OhAudioStreamState = 4; +#[allow(unused)] +/// The audio stream states +/// +/// @since 10" +pub type OhAudioStreamState = ::std::os::raw::c_int; + +#[allow(unused)] +pub const OH_AUDIO_STREAM_SOURCE_TYPE_AUDIOSTREAM_SOURCE_TYPE_INVALID: OHAudioStreamSourceType = -1; +#[allow(unused)] +pub const OH_AUDIO_STREAM_SOURCE_TYPE_AUDIOSTREAM_SOURCE_TYPE_MIC: OHAudioStreamSourceType = 0; +#[allow(unused)] +pub const OH_AUDIO_STREAM_SOURCE_TYPE_AUDIOSTREAM_SOURCE_TYPE_VOICE_RECOGNITION: + OHAudioStreamSourceType = 1; +#[allow(unused)] +pub const OH_AUDIO_STREAM_SOURCE_TYPE_AUDIOSTREAM_SOURCE_TYPE_VOICE_COMMUNICATION: + OHAudioStreamSourceType = 7; +/// Defines the audio source type. +/// +/// @since 10 +pub type OHAudioStreamSourceType = ::std::os::raw::c_int; + +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_FORCE: OHAudioInterruptSourceType = 0; +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_SHARE: OHAudioInterruptSourceType = 1; + +/// Defines the audio interrupt source type. +/// +/// @since 10 +pub type OHAudioInterruptSourceType = ::std::os::raw::c_int; + +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_HINT_RESUME: OHAudioInterruptHint = 1; +pub const AUDIOSTREAM_INTERRUPT_HINT_PAUSE: OHAudioInterruptHint = 2; +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_HINT_STOP: OHAudioInterruptHint = 3; +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_HINT_DUCK: OHAudioInterruptHint = 4; +#[allow(unused)] +pub const AUDIOSTREAM_INTERRUPT_HINT_UNDUCK: OHAudioInterruptHint = 5; + +/// Defines the audio interrupt hint type. +/// +/// @since 10 +pub type OHAudioInterruptHint = ::std::os::raw::c_int; + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct OH_AudioStreamBuilderStruct { + _unused: [u8; 0], +} +/// Declaring the audio stream builder. +/// The instance of builder is used for creating audio stream. +/// +/// @since 10 +pub type OhAudioStreamBuilder = OH_AudioStreamBuilderStruct; + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct OH_AudioRendererStruct { + _unused: [u8; 0], +} +/// Declaring the audio renderer stream. +/// The instance of renderer stream is used for playing audio data. +/// +/// @since 10 +pub type OhAudioRenderer = OH_AudioRendererStruct; + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct OH_AudioCapturerStruct { + _unused: [u8; 0], +} +/// Declaring the audio capturer stream. +/// The instance of renderer stream is used for capturing audio data. +/// +/// @since 10 +pub type OhAudioCapturer = OH_AudioCapturerStruct; + +type PlaceHolderFn = std::option::Option i32>; + +/// Declaring the callback struct for renderer stream. +/// +/// @since 10 +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct OhAudioRendererCallbacks { + /// This function pointer will point to the callback function that + /// is used to write audio data + pub oh_audio_renderer_on_write_data: ::std::option::Option< + extern "C" fn( + renderer: *mut OhAudioRenderer, + userData: *mut ::std::os::raw::c_void, + buffer: *mut ::std::os::raw::c_void, + length: i32, + ) -> i32, + >, + pub oh_audio_renderer_on_stream_event: PlaceHolderFn, + pub oh_audio_renderer_on_interrupt_event: ::std::option::Option< + extern "C" fn( + renderer: *mut OhAudioRenderer, + userData: *mut ::std::os::raw::c_void, + source_type: OHAudioInterruptSourceType, + hint: OHAudioInterruptHint, + ) -> i32, + >, + pub oh_audio_renderer_on_error: PlaceHolderFn, +} + +/// Declaring the callback struct for capturer stream. +/// +/// @since 10 +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct OhAudioCapturerCallbacks { + /// This function pointer will point to the callback function that + /// is used to read audio data. + pub oh_audio_capturer_on_read_data: ::std::option::Option< + extern "C" fn( + capturer: *mut OhAudioCapturer, + userData: *mut ::std::os::raw::c_void, + buffer: *mut ::std::os::raw::c_void, + length: i32, + ) -> i32, + >, + pub oh_audio_capturer_on_stream_event: PlaceHolderFn, + pub oh_audio_capturer_on_interrupt_event: ::std::option::Option< + extern "C" fn( + capturer: *mut OhAudioCapturer, + userData: *mut ::std::os::raw::c_void, + source_type: OHAudioInterruptSourceType, + hint: OHAudioInterruptHint, + ) -> i32, + >, + pub oh_audio_capturer_on_error: PlaceHolderFn, +} + +#[allow(unused)] +#[link(name = "ohaudio")] +extern "C" { + pub fn OH_AudioRenderer_Release(renderer: *mut OhAudioRenderer) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_Start(renderer: *mut OhAudioRenderer) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_Pause(renderer: *mut OhAudioRenderer) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_Stop(renderer: *mut OhAudioRenderer) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_Flush(renderer: *mut OhAudioRenderer) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetCurrentState( + renderer: *mut OhAudioRenderer, + state: *mut OhAudioStreamState, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetSamplingRate( + renderer: *mut OhAudioRenderer, + rate: *mut i32, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetStreamId( + renderer: *mut OhAudioRenderer, + streamId: *mut u32, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetChannelCount( + renderer: *mut OhAudioRenderer, + channelCount: *mut i32, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetSampleFormat( + renderer: *mut OhAudioRenderer, + sampleFormat: *mut OhAudioStreamSampleFormat, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetLatencyMode( + renderer: *mut OhAudioRenderer, + latencyMode: *mut OhAudioStreamLatencyMode, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetRendererInfo( + renderer: *mut OhAudioRenderer, + usage: *mut OhAudioStreamUsage, + content: *mut OhAudioStreamContent, + ) -> OhAudioStreamResult; + pub fn OH_AudioRenderer_GetEncodingType( + renderer: *mut OhAudioRenderer, + encodingType: *mut OhAudioStreamEncodingType, + ) -> OhAudioStreamResult; + /// Create a streamBuilder can be used to open a renderer or capturer client. + /// + /// OH_AudioStreamBuilder_Destroy() must be called when you are done using the builder. + /// + /// @since 10 + /// + /// @param builder The builder reference to the created result. + /// @param type The stream type to be created. {@link #AUDIOSTREAM_TYPE_RERNDERER} or {@link #AUDIOSTREAM_TYPE_CAPTURER} + /// @return {@link #AUDIOSTREAM_SUCCESS} or an undesired error. + pub fn OH_AudioStreamBuilder_Create( + builder: *mut *mut OhAudioStreamBuilder, + type_: OhAudioStreamType, + ) -> OhAudioStreamResult; + /// Destroy a streamBulder. + /// + /// This function must be called when you are done using the builder. + /// + /// @since 10 + /// + /// @param builder Reference provided by OH_AudioStreamBuilder_Create() + /// @return {@link #AUDIOSTREAM_SUCCESS} or au undesired error. + pub fn OH_AudioStreamBuilder_Destroy(builder: *mut OhAudioStreamBuilder) + -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetSamplingRate( + builder: *mut OhAudioStreamBuilder, + rate: i32, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetChannelCount( + builder: *mut OhAudioStreamBuilder, + channelCount: i32, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetSampleFormat( + builder: *mut OhAudioStreamBuilder, + format: OhAudioStreamSampleFormat, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetEncodingType( + builder: *mut OhAudioStreamBuilder, + encodingType: OhAudioStreamEncodingType, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetLatencyMode( + builder: *mut OhAudioStreamBuilder, + latencyMode: OhAudioStreamLatencyMode, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetRendererInfo( + builder: *mut OhAudioStreamBuilder, + usage: OhAudioStreamUsage, + content: OhAudioStreamContent, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetCapturerInfo( + builder: *mut OhAudioStreamBuilder, + sourceType: OHAudioStreamSourceType, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetRendererCallback( + builder: *mut OhAudioStreamBuilder, + callbacks: OhAudioRendererCallbacks, + userData: *mut ::std::os::raw::c_void, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_SetCapturerCallback( + builder: *mut OhAudioStreamBuilder, + callbacks: OhAudioCapturerCallbacks, + userdata: *mut ::std::os::raw::c_void, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_GenerateRenderer( + builder: *mut OhAudioStreamBuilder, + audioRenderer: *mut *mut OhAudioRenderer, + ) -> OhAudioStreamResult; + pub fn OH_AudioStreamBuilder_GenerateCapturer( + builder: *mut OhAudioStreamBuilder, + audioCapturer: *mut *mut OhAudioCapturer, + ) -> OhAudioStreamResult; + pub fn OH_AudioCapturer_Start(capturer: *mut OhAudioCapturer) -> OhAudioStreamResult; + pub fn OH_AudioCapturer_Release(capturer: *mut OhAudioCapturer) -> OhAudioStreamResult; + pub fn OH_AudioCapturer_Stop(capturer: *mut OhAudioCapturer) -> OhAudioStreamResult; +} diff --git a/util/src/ohos_binding/camera.rs b/util/src/ohos_binding/camera.rs new file mode 100644 index 0000000000000000000000000000000000000000..4121fcf328635c2b9941ca119a0f3b835880df59 --- /dev/null +++ b/util/src/ohos_binding/camera.rs @@ -0,0 +1,158 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::ffi::CString; +use std::os::raw::{c_int, c_void}; +use std::ptr; +use std::sync::Arc; + +use anyhow::{bail, Context, Result}; + +use super::hwf_adapter::camera::{ + BrokenProcessFn, BufferProcessFn, CamFuncTable, OhCameraCtx, ProfileRecorder, +}; +use super::hwf_adapter::hwf_adapter_camera_api; + +// OH camera framework's related definitions +#[allow(unused)] +pub const CAMERA_FORMAT_YCBCR420: i32 = 2; +#[allow(unused)] +pub const CAMERA_FORMAT_RGB18888: i32 = 3; +pub const CAMERA_FORMAT_YUV420SP: i32 = 1003; +pub const CAMERA_FORMAT_NV12: i32 = 1004; +pub const CAMERA_FORMAT_YUYV422: i32 = 1005; +pub const CAMERA_FORMAT_MJPEG: i32 = 2000; + +#[derive(Clone)] +pub struct OhCamera { + ctx: *mut OhCameraCtx, + capi: Arc, +} + +impl Drop for OhCamera { + fn drop(&mut self) { + self.release_camera(); + self.destroy_ctx(); + } +} + +impl OhCamera { + pub fn new(id: String) -> Result<(OhCamera, i32)> { + let capi = hwf_adapter_camera_api(); + // SAFETY: We call related API sequentially for specified ctx. + let mut ctx = unsafe { (capi.create_ctx)() }; + if ctx.is_null() { + bail!("OH Camera: failed to create camera ctx"); + } + let id_c = CString::new(id).with_context(|| "failed to create CString id")?; + let fmt_cnt; + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + let n = (capi.init_camera)(ctx, id_c.as_ptr()); + if n < 0 { + (capi.destroy_ctx)(ptr::addr_of_mut!(ctx)); + bail!("OH Camera: failed to init cameras"); + } + + fmt_cnt = (capi.init_profiles)(ctx); + if fmt_cnt < 0 { + (capi.destroy_ctx)(ptr::addr_of_mut!(ctx)); + bail!("OH Camera: failed to init profiles"); + } + } + if fmt_cnt > i32::from(u8::MAX) { + bail!("Invalid format counts: {fmt_cnt}"); + } + Ok((Self { ctx, capi }, fmt_cnt)) + } + + pub fn release_camera(&self) { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.release)(self.ctx) } + } + + pub fn destroy_ctx(&mut self) { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.destroy_ctx)(ptr::addr_of_mut!(self.ctx)) } + } + + pub fn set_fmt(&self, profile_idx: i32) -> Result<()> { + let ret = + // SAFETY: We call related API sequentially for specified ctx. + unsafe { (self.capi.set_profile)(self.ctx, profile_idx as c_int) }; + if ret < 0 { + bail!("OH Camera: failed to get camera profile"); + } + Ok(()) + } + + pub fn start_stream( + &self, + buffer_proc: BufferProcessFn, + broken_proc: BrokenProcessFn, + ) -> Result<()> { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + if (self.capi.create_session)(self.ctx) != 0 { + bail!("OH Camera: failed to create session"); + } + if (self.capi.pre_start)(self.ctx, buffer_proc, broken_proc) != 0 { + bail!("OH Camera: failed to prestart camera stream"); + } + if (self.capi.start)(self.ctx) != 0 { + bail!("OH Camera: failed to start camera stream"); + } + } + Ok(()) + } + + pub fn reset_camera(&self, id: String) -> Result<()> { + let id_cstr = CString::new(id).with_context(|| "failed to create CString id")?; + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + (self.capi.init_camera)(self.ctx, id_cstr.as_ptr()); + (self.capi.init_profiles)(self.ctx); + } + Ok(()) + } + + pub fn stop_stream(&self) { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + (self.capi.stop_output)(self.ctx); + (self.capi.release_session)(self.ctx); + } + } + + pub fn get_profile(&self, profile_idx: i32) -> Result<(i32, i32, i32, i32)> { + let pr = ProfileRecorder::default(); + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + let ret = (self.capi.get_profile)( + self.ctx, + profile_idx as c_int, + ptr::addr_of!(pr) as *mut c_void, + ); + if ret < 0 { + bail!("OH Camera: failed to get profile {}", profile_idx); + } + } + Ok((pr.fmt, pr.width, pr.height, pr.fps)) + } + + pub fn next_frame(&self) { + // SAFETY: We call related API sequentially for specified ctx. + unsafe { + (self.capi.allow_next_frame)(self.ctx); + } + } +} diff --git a/util/src/ohos_binding/hwf_adapter/camera.rs b/util/src/ohos_binding/hwf_adapter/camera.rs new file mode 100644 index 0000000000000000000000000000000000000000..bbb2074279b2a0e774b4c79c07f91af0efaccbe4 --- /dev/null +++ b/util/src/ohos_binding/hwf_adapter/camera.rs @@ -0,0 +1,88 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::raw::{c_char, c_int, c_void}; + +use anyhow::{Context, Result}; +use libloading::os::unix::Symbol as RawSymbol; +use libloading::Library; + +use crate::get_libfn; + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct OhCameraCtx { + _unused: [u8; 0], +} + +#[repr(C)] +#[derive(Default)] +pub struct ProfileRecorder { + pub fmt: i32, + pub width: i32, + pub height: i32, + pub fps: i32, +} + +pub type BufferProcessFn = unsafe extern "C" fn(src_buffer: u64, length: i32, camid: *const c_char); +pub type BrokenProcessFn = unsafe extern "C" fn(camid: *const c_char); + +type OhcamCreateCtxFn = unsafe extern "C" fn() -> *mut OhCameraCtx; +type OhcamCreateSessionFn = unsafe extern "C" fn(*mut OhCameraCtx) -> c_int; +type OhcamReleaseSessionFn = unsafe extern "C" fn(*mut OhCameraCtx); +type OhcamInitCameraFn = unsafe extern "C" fn(*mut OhCameraCtx, *const c_char) -> c_int; +type OhcamInitProfilesFn = unsafe extern "C" fn(*mut OhCameraCtx) -> c_int; +type OhcamGetProfileFn = unsafe extern "C" fn(*mut OhCameraCtx, c_int, *mut c_void) -> c_int; +type OhcamSetProfileFn = unsafe extern "C" fn(*mut OhCameraCtx, c_int) -> c_int; +type OhcamPreStartFn = + unsafe extern "C" fn(*mut OhCameraCtx, BufferProcessFn, BrokenProcessFn) -> c_int; +type OhcamStartFn = unsafe extern "C" fn(*mut OhCameraCtx) -> c_int; +type OhcamStopOutputFn = unsafe extern "C" fn(*mut OhCameraCtx); +type OhcamReleaseFn = unsafe extern "C" fn(*mut OhCameraCtx); +type OhcamDestroyCtxFn = unsafe extern "C" fn(*mut *mut OhCameraCtx); +type OhcamAllowNextFrameFn = unsafe extern "C" fn(*mut OhCameraCtx); + +pub struct CamFuncTable { + pub create_ctx: RawSymbol, + pub create_session: RawSymbol, + pub release_session: RawSymbol, + pub init_camera: RawSymbol, + pub init_profiles: RawSymbol, + pub get_profile: RawSymbol, + pub set_profile: RawSymbol, + pub pre_start: RawSymbol, + pub start: RawSymbol, + pub stop_output: RawSymbol, + pub release: RawSymbol, + pub destroy_ctx: RawSymbol, + pub allow_next_frame: RawSymbol, +} + +impl CamFuncTable { + pub unsafe fn new(library: &Library) -> Result { + Ok(Self { + create_ctx: get_libfn!(library, OhcamCreateCtxFn, OhcamCreateCtx), + create_session: get_libfn!(library, OhcamCreateSessionFn, OhcamCreateSession), + release_session: get_libfn!(library, OhcamReleaseSessionFn, OhcamReleaseSession), + init_camera: get_libfn!(library, OhcamInitCameraFn, OhcamInitCamera), + init_profiles: get_libfn!(library, OhcamInitProfilesFn, OhcamInitProfiles), + get_profile: get_libfn!(library, OhcamGetProfileFn, OhcamGetProfile), + set_profile: get_libfn!(library, OhcamSetProfileFn, OhcamSetProfile), + pre_start: get_libfn!(library, OhcamPreStartFn, OhcamPreStart), + start: get_libfn!(library, OhcamStartFn, OhcamStart), + stop_output: get_libfn!(library, OhcamStopOutputFn, OhcamStopOutput), + release: get_libfn!(library, OhcamReleaseFn, OhcamRelease), + destroy_ctx: get_libfn!(library, OhcamDestroyCtxFn, OhcamDestroyCtx), + allow_next_frame: get_libfn!(library, OhcamAllowNextFrameFn, OhcamAllowNextFrame), + }) + } +} diff --git a/util/src/ohos_binding/hwf_adapter/mod.rs b/util/src/ohos_binding/hwf_adapter/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..14e4e0c24657c12779b823239c6f914eab259b55 --- /dev/null +++ b/util/src/ohos_binding/hwf_adapter/mod.rs @@ -0,0 +1,119 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "usb_camera_oh")] +pub mod camera; +#[cfg(feature = "usb_host")] +pub mod usb; + +#[cfg(feature = "scream_ohaudio")] +pub mod volume; + +use std::ffi::OsStr; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use libloading::Library; +use log::error; +use once_cell::sync::Lazy; + +#[cfg(feature = "usb_camera_oh")] +use camera::CamFuncTable; +#[cfg(feature = "usb_host")] +use usb::UsbFuncTable; +#[cfg(feature = "scream_ohaudio")] +use volume::VolumeFuncTable; + +static LIB_HWF_ADAPTER: Lazy = Lazy::new(|| + // SAFETY: The dynamic library should be always existing. + unsafe { + LibHwfAdapter::new(OsStr::new("/system/lib64/libhwf_adapter.so")) + .map_err(|e| { + error!("failed to init LibHwfAdapter with error: {:?}", e); + e + }) + .unwrap() + }); + +struct LibHwfAdapter { + #[allow(unused)] + library: Library, + #[cfg(feature = "usb_camera_oh")] + camera: Arc, + #[cfg(feature = "usb_host")] + usb: Arc, + #[cfg(feature = "scream_ohaudio")] + volume: Arc, +} + +impl LibHwfAdapter { + unsafe fn new(library_name: &OsStr) -> Result { + let library = + Library::new(library_name).with_context(|| "failed to load hwf_adapter library")?; + + #[cfg(feature = "usb_camera_oh")] + let camera = Arc::new( + CamFuncTable::new(&library).with_context(|| "failed to init camera function table")?, + ); + + #[cfg(feature = "usb_host")] + let usb = Arc::new( + UsbFuncTable::new(&library).with_context(|| "failed to init usb function table")?, + ); + + #[cfg(feature = "scream_ohaudio")] + let volume = Arc::new( + VolumeFuncTable::new(&library) + .with_context(|| "failed to init volume function table")?, + ); + + Ok(Self { + library, + #[cfg(feature = "usb_camera_oh")] + camera, + #[cfg(feature = "usb_host")] + usb, + #[cfg(feature = "scream_ohaudio")] + volume, + }) + } + + #[cfg(feature = "usb_camera_oh")] + fn get_camera_api(&self) -> Arc { + self.camera.clone() + } + + #[cfg(feature = "usb_host")] + fn get_usb_api(&self) -> Arc { + self.usb.clone() + } + + #[cfg(feature = "scream_ohaudio")] + fn get_volume_api(&self) -> Arc { + self.volume.clone() + } +} + +#[cfg(feature = "usb_camera_oh")] +pub fn hwf_adapter_camera_api() -> Arc { + LIB_HWF_ADAPTER.get_camera_api() +} + +#[cfg(feature = "usb_host")] +pub fn hwf_adapter_usb_api() -> Arc { + LIB_HWF_ADAPTER.get_usb_api() +} + +#[cfg(feature = "scream_ohaudio")] +pub fn hwf_adapter_volume_api() -> Arc { + LIB_HWF_ADAPTER.get_volume_api() +} diff --git a/util/src/ohos_binding/hwf_adapter/usb.rs b/util/src/ohos_binding/hwf_adapter/usb.rs new file mode 100644 index 0000000000000000000000000000000000000000..abb3cc7479b914d571d00935c2bee457dfa7380c --- /dev/null +++ b/util/src/ohos_binding/hwf_adapter/usb.rs @@ -0,0 +1,45 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::raw::c_int; + +use anyhow::{Context, Result}; +use libloading::os::unix::Symbol as RawSymbol; +use libloading::Library; + +use crate::get_libfn; + +#[allow(non_snake_case)] +#[repr(C)] +#[derive(Eq, PartialEq, Clone, Copy, Debug)] +pub struct OhusbDevice { + pub busNum: u8, + pub devAddr: u8, + pub fd: c_int, +} + +type OhusbOpenDeviceFn = unsafe extern "C" fn(*mut OhusbDevice) -> c_int; +type OhusbCloseDeviceFn = unsafe extern "C" fn(*mut OhusbDevice) -> c_int; + +pub struct UsbFuncTable { + pub open_device: RawSymbol, + pub close_device: RawSymbol, +} + +impl UsbFuncTable { + pub unsafe fn new(library: &Library) -> Result { + Ok(Self { + open_device: get_libfn!(library, OhusbOpenDeviceFn, OhusbOpenDevice), + close_device: get_libfn!(library, OhusbCloseDeviceFn, OhusbCloseDevice), + }) + } +} diff --git a/util/src/ohos_binding/hwf_adapter/volume.rs b/util/src/ohos_binding/hwf_adapter/volume.rs new file mode 100644 index 0000000000000000000000000000000000000000..b730e143308c5721b93b0b02e1293559f76e712a --- /dev/null +++ b/util/src/ohos_binding/hwf_adapter/volume.rs @@ -0,0 +1,51 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::os::raw::c_int; + +use anyhow::{Context, Result}; +use libloading::os::unix::Symbol as RawSymbol; +use libloading::Library; + +use crate::get_libfn; + +pub type VolumeChangedCallBack = unsafe extern "C" fn(c_int); + +type OhSysAudioGetVolumeFn = unsafe extern "C" fn() -> c_int; +type OhSysAudioGetMaxVolumeFn = unsafe extern "C" fn() -> c_int; +type OhSysAudioGetMinVolumeFn = unsafe extern "C" fn() -> c_int; +type OhSysAudioSetVolumeFn = unsafe extern "C" fn(c_int); +type OhSysAudioRegisterVolumeChangeFn = unsafe extern "C" fn(VolumeChangedCallBack) -> c_int; + +pub struct VolumeFuncTable { + pub get_volume: RawSymbol, + pub get_max_volume: RawSymbol, + pub get_min_volume: RawSymbol, + pub set_volume: RawSymbol, + pub register_volume_change: RawSymbol, +} + +impl VolumeFuncTable { + pub unsafe fn new(library: &Library) -> Result { + Ok(Self { + get_volume: get_libfn!(library, OhSysAudioGetVolumeFn, OhSysAudioGetVolume), + get_max_volume: get_libfn!(library, OhSysAudioGetMaxVolumeFn, OhSysAudioGetMaxVolume), + get_min_volume: get_libfn!(library, OhSysAudioGetMinVolumeFn, OhSysAudioGetMinVolume), + set_volume: get_libfn!(library, OhSysAudioSetVolumeFn, OhSysAudioSetVolume), + register_volume_change: get_libfn!( + library, + OhSysAudioRegisterVolumeChangeFn, + OhSysAudioRegisterVolumeChange + ), + }) + } +} diff --git a/util/src/ohos_binding/misc.rs b/util/src/ohos_binding/misc.rs new file mode 100644 index 0000000000000000000000000000000000000000..27b564557e8f96715d1c2e5e78fd6459f14122b4 --- /dev/null +++ b/util/src/ohos_binding/misc.rs @@ -0,0 +1,89 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::OpenOptions; + +use anyhow::{bail, Context, Result}; +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr}; + +const ACCESS_TOKEN_ID_IOCTL_BASE: u32 = b'A' as u32; +const GET_FTOKEN_ID: u32 = 3; +const SET_FTOKEN_ID: u32 = 4; +const ACCESS_TOKEN_ID_DEV: &str = "/dev/access_token_id"; + +ioctl_iow_nr!( + ACCESS_TOKENID_SET_FTOKENID, + ACCESS_TOKEN_ID_IOCTL_BASE, + SET_FTOKEN_ID, + ::std::os::raw::c_ulonglong +); +ioctl_ior_nr!( + ACCESS_TOKENID_GET_FTOKENID, + ACCESS_TOKEN_ID_IOCTL_BASE, + GET_FTOKEN_ID, + ::std::os::raw::c_ulonglong +); + +fn set_firstcaller_tokenid(id: u64) -> Result<()> { + let fd = OpenOptions::new() + .read(true) + .write(true) + .open(ACCESS_TOKEN_ID_DEV) + .with_context(|| { + format!( + "Failed to open {} for set_firstcaller_tokenid.", + ACCESS_TOKEN_ID_DEV + ) + })?; + // SAFETY: ioctl is safe. called file is '/dev/access_token_id' fd and we check the return. + let ret = unsafe { ioctl_with_ref(&fd, ACCESS_TOKENID_SET_FTOKENID(), &id) }; + if ret != 0 { + bail!( + "Failed to set first caller tokenid: {ret}, error info: {}", + std::io::Error::last_os_error() + ); + } + Ok(()) +} + +fn get_firstcaller_tokenid() -> Result { + let fd = OpenOptions::new() + .read(true) + .write(true) + .open(ACCESS_TOKEN_ID_DEV) + .with_context(|| { + format!( + "Failed to open {} for get_firstcaller_tokenid.", + ACCESS_TOKEN_ID_DEV + ) + })?; + let mut id: u64 = 0; + // SAFETY: ioctl is safe. called file is '/dev/access_token_id' fd and we check the return. + let ret = unsafe { ioctl_with_mut_ref(&fd, ACCESS_TOKENID_GET_FTOKENID(), &mut id) }; + if ret != 0 { + bail!( + "Failed to get first caller tokenid: {ret}, error info: {}", + std::io::Error::last_os_error() + ); + } + Ok(id) +} + +pub fn bound_tokenid(token_id: u64) -> Result<()> { + if token_id == 0 { + bail!("UI token ID not passed."); + } else if token_id != get_firstcaller_tokenid()? { + set_firstcaller_tokenid(token_id)?; + } + Ok(()) +} diff --git a/util/src/ohos_binding/mod.rs b/util/src/ohos_binding/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..2e6a3cfcc775e570537c87e7a805151998c465ee --- /dev/null +++ b/util/src/ohos_binding/mod.rs @@ -0,0 +1,31 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +#[cfg(feature = "scream_ohaudio")] +pub mod audio; +#[cfg(feature = "usb_camera_oh")] +pub mod camera; +pub mod misc; +#[cfg(feature = "usb_host")] +pub mod usb; + +#[cfg(feature = "usb_camera_oh")] +mod hwf_adapter; + +#[macro_export] +macro_rules! get_libfn { + ( $lib: ident, $tname: ident, $fname: ident ) => { + $lib.get::<$tname>(stringify!($fname).as_bytes()) + .with_context(|| format!("failed to get function {}", stringify!($fname)))? + .into_raw() + }; +} diff --git a/util/src/ohos_binding/usb.rs b/util/src/ohos_binding/usb.rs new file mode 100644 index 0000000000000000000000000000000000000000..a8d227bd316b3b9ff6f546f5efc8e26fdd4ef6a7 --- /dev/null +++ b/util/src/ohos_binding/usb.rs @@ -0,0 +1,50 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub use super::hwf_adapter::usb::OhusbDevice; + +use std::sync::Arc; + +use anyhow::{bail, Result}; + +use super::hwf_adapter::hwf_adapter_usb_api; +use super::hwf_adapter::usb::UsbFuncTable; + +#[derive(Clone)] +pub struct OhUsb { + capi: Arc, +} + +impl OhUsb { + pub fn new() -> Result { + let capi = hwf_adapter_usb_api(); + Ok(Self { capi }) + } + + pub fn open_device(&self, dev_handle: *mut OhusbDevice) -> Result { + // SAFETY: We call related API sequentially for specified ctx. + let ret = unsafe { (self.capi.open_device)(dev_handle) }; + if ret < 0 { + bail!("OH USB: open device failed."); + } + Ok(ret) + } + + pub fn close_device(&self, dev_handle: *mut OhusbDevice) -> Result { + // SAFETY: We call related API sequentially for specified ctx. + let ret = unsafe { (self.capi.close_device)(dev_handle) }; + if ret < 0 { + bail!("OH USB: close device failed."); + } + Ok(ret) + } +} diff --git a/util/src/pixman.rs b/util/src/pixman.rs new file mode 100644 index 0000000000000000000000000000000000000000..2cd2e27bdc3348c4990e9fba53f11951700d9d76 --- /dev/null +++ b/util/src/pixman.rs @@ -0,0 +1,291 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +use std::ptr; + +pub type pixman_bool_t = libc::c_int; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct pixman_box16 { + pub x1: i16, + pub y1: i16, + pub x2: i16, + pub y2: i16, +} +pub type pixman_box16_t = pixman_box16; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct pixman_color { + pub red: u16, + pub green: u16, + pub blue: u16, + pub alpha: u16, +} +pub type pixman_color_t = pixman_color; + +#[repr(u32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum pixman_format_code_t { + PIXMAN_a8r8g8b8 = 537036936, + PIXMAN_x8r8g8b8 = 537004168, + PIXMAN_a8b8g8r8 = 537102472, + PIXMAN_x8b8g8r8 = 537069704, + PIXMAN_b8g8r8a8 = 537430152, + PIXMAN_b8g8r8x8 = 537397384, + PIXMAN_r8g8b8a8 = 537495688, + PIXMAN_r8g8b8x8 = 537462920, + PIXMAN_x14r6g6b6 = 537003622, + PIXMAN_x2r10g10b10 = 537004714, + PIXMAN_a2r10g10b10 = 537012906, + PIXMAN_x2b10g10r10 = 537070250, + PIXMAN_a2b10g10r10 = 537078442, + PIXMAN_a8r8g8b8_sRGB = 537561224, + PIXMAN_r8g8b8 = 402786440, + PIXMAN_b8g8r8 = 402851976, + PIXMAN_r5g6b5 = 268567909, + PIXMAN_b5g6r5 = 268633445, + PIXMAN_a1r5g5b5 = 268571989, + PIXMAN_x1r5g5b5 = 268567893, + PIXMAN_a1b5g5r5 = 268637525, + PIXMAN_x1b5g5r5 = 268633429, + PIXMAN_a4r4g4b4 = 268584004, + PIXMAN_x4r4g4b4 = 268567620, + PIXMAN_a4b4g4r4 = 268649540, + PIXMAN_x4b4g4r4 = 268633156, + PIXMAN_a8 = 134316032, + PIXMAN_r3g3b2 = 134349618, + PIXMAN_b2g3r3 = 134415154, + PIXMAN_a2r2g2b2 = 134357538, + PIXMAN_a2b2g2r2 = 134423074, + PIXMAN_c8 = 134479872, + PIXMAN_g8 = 134545408, + PIXMAN_x4a4 = 134299648, + PIXMAN_a4 = 67190784, + PIXMAN_r1g2b1 = 67240225, + PIXMAN_b1g2r1 = 67305761, + PIXMAN_a1r1g1b1 = 67244305, + PIXMAN_a1b1g1r1 = 67309841, + PIXMAN_c4 = 67371008, + PIXMAN_g4 = 67436544, + PIXMAN_a1 = 16846848, + PIXMAN_g1 = 17104896, + PIXMAN_yuy2 = 268828672, + PIXMAN_yv12 = 201785344, +} + +#[repr(C)] +#[derive(Default, Debug, Copy, Clone)] +pub struct pixman_image { + _unused: [u8; 0], +} +pub type pixman_image_t = pixman_image; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct pixman_rectangle16 { + pub x: i16, + pub y: i16, + pub width: u16, + pub height: u16, +} +pub type pixman_rectangle16_t = pixman_rectangle16; + +#[repr(C)] +#[derive(Debug, Default, Clone, Copy)] +pub struct pixman_region16_data { + pub size: libc::c_long, + pub numRects: libc::c_long, +} +pub type pixman_region16_data_t = pixman_region16_data; + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct pixman_region16 { + pub extents: pixman_box16_t, + pub data: *mut pixman_region16_data_t, +} +pub type pixman_region16_t = pixman_region16; +impl Default for pixman_region16 { + fn default() -> Self { + pixman_region16 { + extents: pixman_box16_t::default(), + data: ptr::null_mut(), + } + } +} + +#[repr(u32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum pixman_op_t { + PIXMAN_OP_CLEAR = 0, + PIXMAN_OP_SRC = 1, + PIXMAN_OP_DST = 2, + PIXMAN_OP_OVER = 3, + PIXMAN_OP_OVER_REVERSE = 4, + PIXMAN_OP_IN = 5, + PIXMAN_OP_IN_REVERSE = 6, + PIXMAN_OP_OUT = 7, + PIXMAN_OP_OUT_REVERSE = 8, + PIXMAN_OP_ATOP = 9, + PIXMAN_OP_ATOP_REVERSE = 10, + PIXMAN_OP_XOR = 11, + PIXMAN_OP_ADD = 12, + PIXMAN_OP_SATURATE = 13, + PIXMAN_OP_DISJOINT_CLEAR = 16, + PIXMAN_OP_DISJOINT_SRC = 17, + PIXMAN_OP_DISJOINT_DST = 18, + PIXMAN_OP_DISJOINT_OVER = 19, + PIXMAN_OP_DISJOINT_OVER_REVERSE = 20, + PIXMAN_OP_DISJOINT_IN = 21, + PIXMAN_OP_DISJOINT_IN_REVERSE = 22, + PIXMAN_OP_DISJOINT_OUT = 23, + PIXMAN_OP_DISJOINT_OUT_REVERSE = 24, + PIXMAN_OP_DISJOINT_ATOP = 25, + PIXMAN_OP_DISJOINT_ATOP_REVERSE = 26, + PIXMAN_OP_DISJOINT_XOR = 27, + PIXMAN_OP_CONJOINT_CLEAR = 32, + PIXMAN_OP_CONJOINT_SRC = 33, + PIXMAN_OP_CONJOINT_DST = 34, + PIXMAN_OP_CONJOINT_OVER = 35, + PIXMAN_OP_CONJOINT_OVER_REVERSE = 36, + PIXMAN_OP_CONJOINT_IN = 37, + PIXMAN_OP_CONJOINT_IN_REVERSE = 38, + PIXMAN_OP_CONJOINT_OUT = 39, + PIXMAN_OP_CONJOINT_OUT_REVERSE = 40, + PIXMAN_OP_CONJOINT_ATOP = 41, + PIXMAN_OP_CONJOINT_ATOP_REVERSE = 42, + PIXMAN_OP_CONJOINT_XOR = 43, + PIXMAN_OP_MULTIPLY = 48, + PIXMAN_OP_SCREEN = 49, + PIXMAN_OP_OVERLAY = 50, + PIXMAN_OP_DARKEN = 51, + PIXMAN_OP_LIGHTEN = 52, + PIXMAN_OP_COLOR_DODGE = 53, + PIXMAN_OP_COLOR_BURN = 54, + PIXMAN_OP_HARD_LIGHT = 55, + PIXMAN_OP_SOFT_LIGHT = 56, + PIXMAN_OP_DIFFERENCE = 57, + PIXMAN_OP_EXCLUSION = 58, + PIXMAN_OP_HSL_HUE = 59, + PIXMAN_OP_HSL_SATURATION = 60, + PIXMAN_OP_HSL_COLOR = 61, + PIXMAN_OP_HSL_LUMINOSITY = 62, +} + +pub type pixman_image_destroy_func_t = + Option; + +/// # Safety +/// +/// Caller should has valid image and data. +pub unsafe extern "C" fn virtio_gpu_unref_resource_callback( + _image: *mut pixman_image_t, + data: *mut libc::c_void, +) { + // The safety of this function is guaranteed by caller. + pixman_image_unref(data.cast()); +} + +fn pixman_format_reshift(val: u32, ofs: u32, num: u32) -> u32 { + ((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3) +} + +pub fn pixman_format_bpp(val: u32) -> u8 { + pixman_format_reshift(val, 24, 8) as u8 +} + +pub fn pixman_format_a(val: u32) -> u8 { + pixman_format_reshift(val, 12, 4) as u8 +} + +pub fn pixman_format_r(val: u32) -> u8 { + pixman_format_reshift(val, 8, 4) as u8 +} + +pub fn pixman_format_g(val: u32) -> u8 { + pixman_format_reshift(val, 4, 4) as u8 +} + +pub fn pixman_format_b(val: u32) -> u8 { + pixman_format_reshift(val, 0, 4) as u8 +} + +pub fn pixman_format_depth(val: u32) -> u8 { + pixman_format_a(val) + .wrapping_add(pixman_format_r(val)) + .wrapping_add(pixman_format_g(val)) + .wrapping_add(pixman_format_b(val)) +} + +extern "C" { + pub fn pixman_format_supported_source(format: pixman_format_code_t) -> pixman_bool_t; + pub fn pixman_image_composite( + op: pixman_op_t, + src: *mut pixman_image_t, + mask: *mut pixman_image_t, + dest: *mut pixman_image_t, + src_x: i16, + src_y: i16, + mask_x: i16, + mask_y: i16, + dest_x: i16, + dest_y: i16, + width: u16, + height: u16, + ); + pub fn pixman_image_create_bits( + format: pixman_format_code_t, + width: libc::c_int, + height: libc::c_int, + bits: *mut u32, + rowstride_bytes: libc::c_int, + ) -> *mut pixman_image_t; + pub fn pixman_image_create_solid_fill(color: *const pixman_color_t) -> *mut pixman_image_t; + pub fn pixman_image_fill_rectangles( + op: pixman_op_t, + image: *mut pixman_image_t, + color: *const pixman_color_t, + n_rects: libc::c_int, + rects: *const pixman_rectangle16_t, + ) -> pixman_bool_t; + pub fn pixman_image_get_data(image: *mut pixman_image_t) -> *mut u32; + pub fn pixman_image_get_format(image: *mut pixman_image_t) -> pixman_format_code_t; + pub fn pixman_image_get_height(image: *mut pixman_image_t) -> libc::c_int; + pub fn pixman_image_get_stride(image: *mut pixman_image_t) -> libc::c_int; + pub fn pixman_image_get_width(image: *mut pixman_image_t) -> libc::c_int; + pub fn pixman_image_ref(image: *mut pixman_image_t) -> *mut pixman_image_t; + pub fn pixman_image_set_destroy_function( + image: *mut pixman_image_t, + function: pixman_image_destroy_func_t, + data: *mut libc::c_void, + ); + pub fn pixman_image_unref(image: *mut pixman_image_t) -> pixman_bool_t; + pub fn pixman_region_extents(region: *mut pixman_region16_t) -> *mut pixman_box16_t; + pub fn pixman_region_fini(region: *mut pixman_region16_t); + pub fn pixman_region_init(region: *mut pixman_region16_t); + pub fn pixman_region_init_rect( + region: *mut pixman_region16_t, + x: libc::c_int, + y: libc::c_int, + width: libc::c_uint, + height: libc::c_uint, + ); + pub fn pixman_region_intersect( + new_reg: *mut pixman_region16_t, + reg1: *mut pixman_region16_t, + reg2: *mut pixman_region16_t, + ) -> pixman_bool_t; + pub fn pixman_region_translate(region: *mut pixman_region16_t, x: libc::c_int, y: libc::c_int); +} diff --git a/util/src/reader.rs b/util/src/reader.rs deleted file mode 100644 index d0b0f9be5fc087a7f30f5af640569fc38d719279..0000000000000000000000000000000000000000 --- a/util/src/reader.rs +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::cmp::Ordering; -use std::io::{Read, Result}; - -const DEFAULT_BUFFER_SIZE: usize = 8192; - -pub struct BufferReader { - reader: R, - buffer: Vec, - pos: usize, - end: usize, -} - -impl BufferReader { - /// Create a new buffer_reader instance from `Read`. - pub fn new(reader: R) -> Self { - let mut buffer = Vec::::new(); - buffer.resize(DEFAULT_BUFFER_SIZE, 0); - - BufferReader { - reader, - buffer, - pos: 0, - end: 0, - } - } - - pub fn with_capacity(reader: R, capacity: usize) -> Self { - let mut buffer = Vec::::new(); - buffer.resize(capacity, 0_u8); - - BufferReader { - reader, - buffer, - pos: 0, - end: 0, - } - } - - /// Returns the number of bytes the internal buffer can hold at once. - pub fn capacity(&self) -> usize { - self.buffer.len() - } - - /// Read data from `reader` to fill `buffer`, update `pos` and `end`. - pub fn read_buffer(&mut self) -> Result<()> { - let length = self.reader.read(&mut self.buffer)?; - self.pos = 0; - self.end = length; - - Ok(()) - } - - /// Read assigned length bytes from `file` to come out with `Vec`. - /// - /// # Arguments - /// - /// * `bytes_num` - length wanted to read from `file`. - pub fn read_vectored(&mut self, bytes_num: usize) -> Option> { - let mut slice_vec = Vec::::new(); - let mut read_len = bytes_num; - - // Judge the file is read over or not. - while self.end != 0 { - match read_len.cmp(&(self.end - self.pos)) { - Ordering::Greater => { - slice_vec.extend(&self.buffer[self.pos..self.end]); - read_len -= self.end - self.pos; - self.read_buffer().unwrap(); - } - Ordering::Equal => { - slice_vec.extend(&self.buffer[self.pos..self.end]); - self.read_buffer().unwrap(); - break; - } - Ordering::Less => { - slice_vec.extend(&self.buffer[self.pos..self.pos + read_len]); - self.pos += read_len; - break; - } - } - } - - if slice_vec.len() == bytes_num { - Some(slice_vec) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use std::fs::File; - use std::io::{Result, Write}; - - use super::BufferReader; - - fn mk_tempfile() -> Result<()> { - let module_dir = option_env!("CARGO_MANIFEST_DIR").unwrap(); - let tempfile_path = module_dir.to_string() + "/tempfile"; - let mut file = File::create(tempfile_path)?; - write_test_data(&mut file) - } - - fn open_tempfile() -> Result { - let module_dir = option_env!("CARGO_MANIFEST_DIR").unwrap(); - let tempfile_path = module_dir.to_string() + "/tempfile"; - File::open(tempfile_path) - } - - fn del_tempfile() -> Result<()> { - let module_dir = option_env!("CARGO_MANIFEST_DIR").unwrap(); - let tempfile_path = module_dir.to_string() + "/tempfile"; - std::fs::remove_file(tempfile_path) - } - - fn write_test_data(file: &mut File) -> Result<()> { - let mut test_buffer_vec_01 = [0_u8; 16384]; - for i in 0..16384 { - test_buffer_vec_01[i] = (i % 256) as u8; - } - file.write(&mut test_buffer_vec_01)?; - - let mut test_buffer_vec_02 = [8_u8; 16384]; - file.write(&mut test_buffer_vec_02)?; - - Ok(()) - } - - #[test] - fn test_buffer_reader() -> Result<()> { - assert!(mk_tempfile().is_ok()); - - let file = open_tempfile()?; - let mut buffer_reader = BufferReader::new(&file); - - assert_eq!(buffer_reader.capacity(), 8_192); - assert!(buffer_reader.read_buffer().is_ok()); - assert_eq!(buffer_reader.read_vectored(4), Some(vec![0, 1, 2, 3])); - assert_eq!( - buffer_reader.read_vectored(8), - Some(vec![4, 5, 6, 7, 8, 9, 10, 11]) - ); - assert_eq!( - buffer_reader.read_vectored(243), - Some((12..255).into_iter().collect::>()) - ); - assert!(buffer_reader.read_vectored(16125).is_some()); - assert_eq!( - buffer_reader.read_vectored(8), - Some(vec![252, 253, 254, 255, 8, 8, 8, 8]) - ); - assert!(buffer_reader.read_vectored(16380).is_some()); - assert!(buffer_reader.read_vectored(1).is_none()); - - assert!(del_tempfile().is_ok()); - Ok(()) - } -} diff --git a/util/src/seccomp.rs b/util/src/seccomp.rs index 40ab17eb9f80539fc0722540519de0b5821c196e..cce0facd2b52eda3d8b8e2aa648fcd849f52763b 100644 --- a/util/src/seccomp.rs +++ b/util/src/seccomp.rs @@ -50,19 +50,20 @@ //! let mut seccomp_filter = SyscallFilter::new(SeccompOpt::Trap); //! //! let nr_open = { -//! #[cfg(target_arch="x86_64")] +//! #[cfg(target_arch = "x86_64")] //! let nr = libc::SYS_open; -//! #[cfg(target_arch="aarch64")] +//! #[cfg(target_arch = "aarch64")] //! let nr = libc::SYS_openat; //! nr //! }; //! //! seccomp_filter.push(&mut BpfRule::new(nr_open)); //! seccomp_filter.push(&mut BpfRule::new(libc::SYS_fcntl)); -//! seccomp_filter.push( -//! &mut BpfRule::new(libc::SYS_read) -//! .add_constraint(SeccompCmpOpt::Ne, 2, 1024) -//! ); +//! seccomp_filter.push(&mut BpfRule::new(libc::SYS_read).add_constraint( +//! SeccompCmpOpt::Ne, +//! 2, +//! 1024, +//! )); //! seccomp_filter.push(&mut BpfRule::new(libc::SYS_write)); //! seccomp_filter.push(&mut BpfRule::new(libc::SYS_close)); //! seccomp_filter.push(&mut BpfRule::new(libc::SYS_sigaltstack)); @@ -77,7 +78,8 @@ //! ``` //! This programe will be trapped. -use crate::errors::Result; +use anyhow::{bail, Result}; + use crate::offset_of; // BPF Instruction classes @@ -112,6 +114,7 @@ const SECCOMP_RET_TRAP: u32 = 0x0003_0000; const SECCOMP_RET_ERRNO: u32 = 0x0005_0000; const SECCOMP_RET_TRACE: u32 = 0x7ff0_0000; const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000; +const SECCOMP_RET_LOG: u32 = 0x7ffc_0000; /// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/seccomp.h#L45 const SECCOMP_RET_MASK: u32 = 0x0000_ffff; /// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/seccomp.h#L16 @@ -134,7 +137,7 @@ const AUDIT_ARCH_X86_64: u32 = EM_X86_64 | __AUDIT_ATCH_64BIT | __AUDIT_ARCH_LE; const AUDIT_ARCH_AARCH64: u32 = EM_AARCH64 | __AUDIT_ATCH_64BIT | __AUDIT_ARCH_LE; /// Compared operator in bpf filter rule. -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub enum SeccompCmpOpt { /// Equal. Eq, @@ -156,7 +159,7 @@ pub enum SeccompCmpOpt { /// These operation one-to-one correspondence with BPF-filter return value: /// `SECCOMP_RET_KILL_PROCESS`, `SECCOMP_RET_KILL_THREAD`, `SECCOMP_RET_TRAP`, /// `SECCOMP_RET_ERRNO`, `SECCOMP_RET_TRACE`, `SECCOMP_RET_ALLOW`, `SECCOMP_RET_LOG`. -#[derive(Copy, Clone, PartialEq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum SeccompOpt { /// Kill the task immediately. Kill, @@ -168,6 +171,8 @@ pub enum SeccompOpt { Trace(u32), /// Allow. Allow, + /// The syscall will be logged. + Log, } impl From for u32 { @@ -178,6 +183,7 @@ impl From for u32 { SeccompOpt::Errno(x) => SECCOMP_RET_ERRNO | (x & SECCOMP_RET_MASK), SeccompOpt::Trace(x) => SECCOMP_RET_TRACE | (x & SECCOMP_RET_MASK), SeccompOpt::Allow => SECCOMP_RET_ALLOW, + SeccompOpt::Log => SECCOMP_RET_LOG, } } } @@ -207,18 +213,18 @@ impl SeccompData { offset_of!(SeccompData, arch) as u32 } - fn args(num: u32) -> u32 { + fn args(num: u8) -> u32 { let offset_of_u64 = offset_of!(SeccompData, args) - offset_of!(SeccompData, instruction_pointer); - offset_of!(SeccompData, args) as u32 + num * offset_of_u64 as u32 + offset_of!(SeccompData, args) as u32 + u32::from(num) * offset_of_u64 as u32 } } /// Filter block /// -/// See: https://elixir.bootlin.com/linux/v4.19.123/source/include/uapi/linux/filter.h#L24 +/// See: `` #[repr(C)] -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct SockFilter { /// Actual filter code code: u16, @@ -281,8 +287,12 @@ fn handle_process(opt: SeccompOpt) -> Vec { /// A wrapper structure of a list of bpf_filters for a syscall's rule. #[derive(Debug)] pub struct BpfRule { + /// The staged rules to avoid jump offset overflow. + staged_rules: Vec, /// The first bpf_filter to compare syscall number. header_rule: SockFilter, + /// The last args index. + args_idx_last: Option, /// The inner rules to limit the arguments of syscall. inner_rules: Vec, /// The last bpf_filter to allow syscall. @@ -296,7 +306,9 @@ impl BpfRule { /// * `syscall_num` - the number of system call. pub fn new(syscall_num: i64) -> BpfRule { BpfRule { + staged_rules: Vec::new(), header_rule: bpf_jump(BPF_JMP + BPF_JEQ + BPF_K, syscall_num as u32, 0, 1), + args_idx_last: None, inner_rules: Vec::new(), tail_rule: bpf_stmt(BPF_RET + BPF_K, SECCOMP_RET_ALLOW), } @@ -306,16 +318,23 @@ impl BpfRule { /// /// # Arguments /// * `cmp` - Compare operator for given args_value and the raw args_value. - /// * `args_num` - The index number of system call's arguments. - /// * `args_value` - The value of args_num you want to limit. This value - /// used with `cmp` together. - pub fn add_constraint(mut self, cmp: SeccompCmpOpt, args_num: u32, args_value: u32) -> BpfRule { + /// * `args_idx` - The index number of system call's arguments. + /// * `args_value` - The value of args_num you want to limit. This value used with `cmp` + /// together. + pub fn add_constraint(mut self, cmp: SeccompCmpOpt, args_idx: u8, args_value: u32) -> BpfRule { if self.inner_rules.is_empty() { self.tail_rule = bpf_stmt(BPF_LD + BPF_W + BPF_ABS, SeccompData::nr()); } - // Create a bpf_filter to get args in `SeccompData`. - let args_filter = bpf_stmt(BPF_LD + BPF_W + BPF_ABS, SeccompData::args(args_num)); + let mut inner_append = Vec::new(); + + // Reload new args if idx changes. + if self.args_idx_last.ne(&Some(args_idx)) { + // Create a bpf_filter to get args in `SeccompData`. + let args_filter = bpf_stmt(BPF_LD + BPF_W + BPF_ABS, SeccompData::args(args_idx)); + inner_append.push(args_filter); + self.args_idx_last = Some(args_idx); + } // Create a bpf_filter to limit args in syscall. let constraint_filter = match cmp { @@ -326,30 +345,51 @@ impl BpfRule { SeccompCmpOpt::Le => bpf_jump(BPF_JMP + BPF_JGE + BPF_K, args_value, 1, 0), SeccompCmpOpt::Lt => bpf_jump(BPF_JMP + BPF_JGT + BPF_K, args_value, 1, 0), }; - - self.append(&mut vec![ - args_filter, - constraint_filter, - bpf_stmt(BPF_RET + BPF_K, SECCOMP_RET_ALLOW), - ]); - self + inner_append.push(constraint_filter); + inner_append.push(bpf_stmt(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)); + + if !self.append_to_inner(&mut inner_append) { + self.start_new_session(); + self.add_constraint(cmp, args_idx, args_value) + } else { + self + } } /// Change `BpfRules` to a list of `SockFilter`. It will be used when /// seccomp taking effect. - fn as_vec(&mut self) -> Vec { - let mut bpf_filters = vec![self.header_rule]; - bpf_filters.append(&mut self.inner_rules); + fn as_vec(&self) -> Vec { + let mut bpf_filters = self.staged_rules.clone(); + bpf_filters.push(self.header_rule); + bpf_filters.append(&mut self.inner_rules.clone()); bpf_filters.push(self.tail_rule); bpf_filters } + /// Stage current rules and start new session. Used when header rule jump + /// is about to overflow. + fn start_new_session(&mut self) { + // Save current rules to staged. + self.staged_rules.push(self.header_rule); + self.staged_rules.append(&mut self.inner_rules); + self.staged_rules.push(self.tail_rule); + + self.header_rule.jf = 1; + self.args_idx_last = None; + } + /// Add bpf_filters to `inner_rules`. - fn append(&mut self, bpf_filters: &mut Vec) { + fn append_to_inner(&mut self, bpf_filters: &mut Vec) -> bool { + // bpf_filters len is less than u8::MAX. let offset = bpf_filters.len() as u8; - self.header_rule.jf += offset; - self.inner_rules.append(bpf_filters); + if let Some(jf_added) = self.header_rule.jf.checked_add(offset) { + self.header_rule.jf = jf_added; + self.inner_rules.append(bpf_filters); + true + } else { + false + } } } @@ -394,24 +434,26 @@ impl SyscallFilter { /// this structure dropped or not. You can only use this function once in /// a thread. Otherwise you will get an error. pub fn realize(mut self) -> Result<()> { - //Add opt as a bpf_filter to sock_filters + // Add opt as a bpf_filter to sock_filters. self.sock_filters.append(&mut handle_process(self.opt)); let sock_bpf_vec = self.sock_filters; // This operation can guarantee seccomp make use for all users and subprocess. + // SAFETY: All input parameters are constants. let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) }; if ret != 0 { bail!("Seccomp: prctl(2) set no new privs failed."); } let prog = SockFProg { - len: sock_bpf_vec.len() as u16, + len: u16::try_from(sock_bpf_vec.len())?, sock_filter: sock_bpf_vec.as_ptr(), }; let bpf_prog_ptr = &prog as *const SockFProg; // Use seccomp(2) to make bpf rules take effect. + // SAFETY: The pointer of bpf_prog_ptr can be guaranteed not null. let ret = unsafe { libc::syscall( libc::SYS_seccomp, @@ -437,52 +479,48 @@ mod tests { // a list of bpf_filter to allow `read` syscall and forbidden others // in x86_64. let bpf_vec = vec![ + // Load arch SockFilter { code: 0x20, jt: 0, jf: 0, k: 4, }, - #[cfg(target_arch = "x86_64")] + // Verify arch SockFilter { code: 0x15, jt: 1, jf: 0, + #[cfg(target_arch = "x86_64")] k: 0xC000_003E, - }, - #[cfg(target_arch = "aarch64")] - SockFilter { - code: 0x15, - jt: 1, - jf: 0, + #[cfg(target_arch = "aarch64")] k: 0xC000_00B7, }, + // Ret kill SockFilter { code: 0x06, jt: 0, jf: 0, k: 0, }, + // Load syscall nr SockFilter { code: 0x20, jt: 0, jf: 0, k: 0, }, - #[cfg(target_arch = "x86_64")] + // Verify syscall nr SockFilter { code: 0x15, jt: 0, jf: 1, + #[cfg(target_arch = "x86_64")] k: 0, - }, - #[cfg(target_arch = "aarch64")] - SockFilter { - code: 0x15, - jt: 0, - jf: 1, + #[cfg(target_arch = "aarch64")] k: 63, }, + // Ret allow SockFilter { code: 0x06, jt: 0, @@ -501,85 +539,139 @@ mod tests { fn test_enable_syscall_extra() { // a list of bpf_filter to allow read `1024` bytes in x86_64 and // forbidden others - let bpf_vec = vec![ + let mut bpf_vec = vec![ + // Load arch SockFilter { code: 0x20, jt: 0, jf: 0, k: 4, }, - #[cfg(target_arch = "x86_64")] + // Verify arch SockFilter { code: 0x15, jt: 1, jf: 0, + #[cfg(target_arch = "x86_64")] k: 0xC000_003E, - }, - #[cfg(target_arch = "aarch64")] - SockFilter { - code: 0x15, - jt: 1, - jf: 0, + #[cfg(target_arch = "aarch64")] k: 0xC000_00B7, }, + // Ret kill SockFilter { code: 0x06, jt: 0, jf: 0, k: 0, }, + // Load syscall nr SockFilter { code: 0x20, jt: 0, jf: 0, k: 0, }, - #[cfg(target_arch = "x86_64")] + // Verify syscall nr SockFilter { code: 0x15, jt: 0, - jf: 4, + jf: 254, + #[cfg(target_arch = "x86_64")] k: 0, + #[cfg(target_arch = "aarch64")] + k: 63, }, - #[cfg(target_arch = "aarch64")] + // Load arg SockFilter { - code: 0x15, + code: 0x20, jt: 0, - jf: 4, - k: 63, + jf: 0, + k: 0x20, }, + ]; + for _ in 0..126 { + bpf_vec.append(&mut vec![ + // Verify arg + SockFilter { + code: 0x15, + jt: 0, + jf: 1, + k: 1024, + }, + // Ret allow + SockFilter { + code: 0x06, + jt: 0, + jf: 0, + k: 0x7fff_0000, + }, + ]); + } + bpf_vec.push( + // Load syscall nr SockFilter { code: 0x20, jt: 0, jf: 0, - k: 0x20, + k: 0, }, + ); + + // Start new session. + bpf_vec.append(&mut vec![ + // Verify syscall nr SockFilter { code: 0x15, jt: 0, - jf: 1, - k: 1024, + jf: 150, + #[cfg(target_arch = "x86_64")] + k: 0, + #[cfg(target_arch = "aarch64")] + k: 63, }, + // Load arg SockFilter { - code: 0x06, + code: 0x20, jt: 0, jf: 0, - k: 0x7fff_0000, + k: 0x20, }, + ]); + for _ in 126..200 { + bpf_vec.append(&mut vec![ + // Verify arg + SockFilter { + code: 0x15, + jt: 0, + jf: 1, + k: 1024, + }, + // Ret allow + SockFilter { + code: 0x06, + jt: 0, + jf: 0, + k: 0x7fff_0000, + }, + ]); + } + bpf_vec.push( + // Load syscall nr SockFilter { code: 0x20, jt: 0, jf: 0, k: 0, }, - ]; + ); let mut seccomp_filter = SyscallFilter::new(SeccompOpt::Trap); - seccomp_filter.push(&mut BpfRule::new(libc::SYS_read).add_constraint( - SeccompCmpOpt::Eq, - 2, - 1024, - )); + let mut read_rules = BpfRule::new(libc::SYS_read); + // Add enough constraint to verify that jump does not overflow. + for _ in 0..200 { + read_rules = read_rules.add_constraint(SeccompCmpOpt::Eq, 2, 1024); + } + seccomp_filter.push(&mut read_rules); assert_eq!(seccomp_filter.sock_filters, bpf_vec); } diff --git a/util/src/socket.rs b/util/src/socket.rs new file mode 100644 index 0000000000000000000000000000000000000000..573b7f4430e8ee7d585fde12b0030aa652166475 --- /dev/null +++ b/util/src/socket.rs @@ -0,0 +1,173 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::io::{IoSlice, IoSliceMut, Result as IoResult}; +use std::net::{TcpListener, TcpStream}; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::{UnixListener, UnixStream}; + +use anyhow::Result; + +/// Provide socket abstraction for UnixStream and TcpStream. +#[derive(Debug)] +pub enum SocketStream { + Tcp { + link_description: String, + stream: TcpStream, + }, + Unix { + link_description: String, + stream: UnixStream, + }, +} + +impl SocketStream { + pub fn link_description(&self) -> String { + match self { + SocketStream::Tcp { + link_description, .. + } => link_description.clone(), + SocketStream::Unix { + link_description, .. + } => link_description.clone(), + } + } + + pub fn set_nonblocking(&mut self, nonblocking: bool) -> IoResult<()> { + match self { + SocketStream::Tcp { stream, .. } => stream.set_nonblocking(nonblocking), + SocketStream::Unix { stream, .. } => stream.set_nonblocking(nonblocking), + } + } +} + +impl AsRawFd for SocketStream { + fn as_raw_fd(&self) -> RawFd { + match self { + SocketStream::Tcp { stream, .. } => stream.as_raw_fd(), + SocketStream::Unix { stream, .. } => stream.as_raw_fd(), + } + } +} + +impl std::io::Read for SocketStream { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + match self { + SocketStream::Tcp { stream, .. } => stream.read(buf), + SocketStream::Unix { stream, .. } => stream.read(buf), + } + } + + fn read_vectored(&mut self, bufs: &mut [IoSliceMut]) -> IoResult { + match self { + SocketStream::Tcp { stream, .. } => stream.read_vectored(bufs), + SocketStream::Unix { stream, .. } => stream.read_vectored(bufs), + } + } +} + +impl std::io::Write for SocketStream { + fn write(&mut self, buf: &[u8]) -> IoResult { + match self { + SocketStream::Tcp { stream, .. } => stream.write(buf), + SocketStream::Unix { stream, .. } => stream.write(buf), + } + } + + fn write_vectored(&mut self, bufs: &[IoSlice]) -> IoResult { + match self { + SocketStream::Tcp { stream, .. } => stream.write_vectored(bufs), + SocketStream::Unix { stream, .. } => stream.write_vectored(bufs), + } + } + + fn flush(&mut self) -> IoResult<()> { + match self { + SocketStream::Tcp { stream, .. } => stream.flush(), + SocketStream::Unix { stream, .. } => stream.flush(), + } + } +} + +/// Provide listener abstraction for UnixListener and TcpListener. +#[derive(Debug)] +pub enum SocketListener { + Tcp { + address: String, + listener: TcpListener, + }, + Unix { + address: String, + listener: UnixListener, + }, +} + +impl SocketListener { + pub fn bind_by_tcp(host: &str, port: u16) -> Result { + let address = format!("{}:{}", &host, &port); + let listener = TcpListener::bind(&address)?; + listener.set_nonblocking(true)?; + Ok(SocketListener::Tcp { address, listener }) + } + + pub fn bind_by_uds(path: &str) -> Result { + let listener = UnixListener::bind(path)?; + listener.set_nonblocking(true)?; + Ok(SocketListener::Unix { + address: String::from(path), + listener, + }) + } + + pub fn address(&self) -> String { + match self { + SocketListener::Tcp { address, .. } => address.clone(), + SocketListener::Unix { address, .. } => address.clone(), + } + } + + pub fn accept(&self) -> Result { + match self { + SocketListener::Tcp { listener, address } => { + let (stream, sock_addr) = listener.accept()?; + stream.set_nonblocking(true)?; + let peer_address = sock_addr.to_string(); + let link_description = format!( + "{{ protocol: tcp, address: {}, peer: {} }}", + address, peer_address + ); + Ok(SocketStream::Tcp { + link_description, + stream, + }) + } + SocketListener::Unix { listener, address } => { + let (stream, _) = listener.accept()?; + stream.set_nonblocking(true)?; + let link_description = format!("{{ protocol: unix, address: {} }}", address); + Ok(SocketStream::Unix { + link_description, + stream, + }) + } + } + } +} + +impl AsRawFd for SocketListener { + fn as_raw_fd(&self) -> RawFd { + match self { + SocketListener::Tcp { listener, .. } => listener.as_raw_fd(), + SocketListener::Unix { listener, .. } => listener.as_raw_fd(), + } + } +} diff --git a/util/src/tap.rs b/util/src/tap.rs index 4154d522ba788b2184c2f6fe64cf9f169b99d9fa..4752cd6796d6389faeb82e0c5474a739ecb10206 100644 --- a/util/src/tap.rs +++ b/util/src/tap.rs @@ -11,16 +11,29 @@ // See the Mulan PSL v2 for more details. use std::fs::{File, OpenOptions}; -use std::io::{Read, Result as IoResult, Write}; +use std::io::{ErrorKind, Read, Result as IoResult, Write}; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use log::error; +use nix::fcntl::{fcntl, FcntlArg, OFlag}; use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr}; + +use crate::aio::Iovec; -use super::errors::{Result, ResultExt}; +const IFF_ATTACH_QUEUE: u16 = 0x0200; +const IFF_DETACH_QUEUE: u16 = 0x0400; pub const TUN_F_CSUM: u32 = 1; pub const TUN_F_TSO4: u32 = 2; pub const TUN_F_TSO6: u32 = 4; +pub const TUN_F_TSO_ECN: u32 = 8; pub const TUN_F_UFO: u32 = 16; pub const TUN_F_VIRTIO: u32 = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO; @@ -29,20 +42,26 @@ pub const IFF_MULTI_QUEUE: u16 = 0x100; const IFF_NO_PI: u16 = 0x1000; const IFF_VNET_HDR: u16 = 0x4000; const TUNTAP_PATH: &str = "/dev/net/tun"; +const IFNAME_SIZE: usize = 16; ioctl_iow_nr!(TUNSETIFF, 84, 202, ::std::os::raw::c_int); ioctl_ior_nr!(TUNGETFEATURES, 84, 207, ::std::os::raw::c_uint); ioctl_iow_nr!(TUNSETOFFLOAD, 84, 208, ::std::os::raw::c_int); ioctl_iow_nr!(TUNSETVNETHDRSZ, 84, 216, ::std::os::raw::c_int); +ioctl_iow_nr!(TUNSETQUEUE, 84, 217, ::std::os::raw::c_int); #[repr(C)] pub struct IfReq { - ifr_name: [u8; 16], + ifr_name: [u8; IFNAME_SIZE], ifr_flags: u16, } +#[derive(Clone)] pub struct Tap { - pub file: File, + pub file: Arc, + pub enabled: bool, + pub upload_stats: Arc, + pub download_stats: Arc, } impl Tap { @@ -50,11 +69,11 @@ impl Tap { let file; if let Some(name) = name { - if name.len() > 15 { - return Err(format!("Open tap {} failed, name too long.", name).into()); + if name.len() > IFNAME_SIZE - 1 { + return Err(anyhow!("Open tap {} failed, name too long.", name)); } - let mut ifr_name = [0_u8; 16]; + let mut ifr_name = [0_u8; IFNAME_SIZE]; let (left, _) = ifr_name.split_at_mut(name.len()); left.copy_from_slice(name.as_bytes()); @@ -72,86 +91,179 @@ impl Tap { .write(true) .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK) .open(TUNTAP_PATH) - .chain_err(|| format!("Open {} failed.", TUNTAP_PATH))?; + .with_context(|| format!("Open {} failed.", TUNTAP_PATH))?; + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. let ret = unsafe { ioctl_with_mut_ref(&file_, TUNSETIFF(), &mut if_req) }; if ret < 0 { - return Err(format!( + return Err(anyhow!( "Failed to set tap ifr flags, error is {}", std::io::Error::last_os_error() - ) - .into()); + )); } file = file_; } else if let Some(fd) = fd { - file = unsafe { - libc::fcntl(fd, libc::F_SETFL, libc::O_NONBLOCK); - File::from_raw_fd(fd) - }; + let fcnt_arg = FcntlArg::F_SETFL(OFlag::from_bits(libc::O_NONBLOCK).unwrap()); + fcntl(fd, fcnt_arg)?; + // SAFETY: The fd has been verified. + file = unsafe { File::from_raw_fd(fd) }; } else { - return Err(format!( + return Err(anyhow!( "Open tap failed, unsupported operation, error is {}", std::io::Error::last_os_error() - ) - .into()); + )); } - let mut features = 0; + let mut features: u16 = 0; + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. let ret = unsafe { ioctl_with_mut_ref(&file, TUNGETFEATURES(), &mut features) }; if ret < 0 { - return Err(format!( + return Err(anyhow!( "Failed to get tap features, error is {}.", std::io::Error::last_os_error() - ) - .into()); + )); } if (features & IFF_MULTI_QUEUE == 0) && queue_pairs > 1 { - bail!( - "Tap device doesn't support mq, but command set queue pairs {}.", - queue_pairs - ); + bail!("Needs multiqueue, but no kernel support for IFF_MULTI_QUEUE available"); } - Ok(Tap { file }) + Ok(Tap { + file: Arc::new(file), + enabled: true, + upload_stats: Arc::new(AtomicU64::new(0)), + download_stats: Arc::new(AtomicU64::new(0)), + }) } pub fn set_offload(&self, flags: u32) -> Result<()> { - let ret = unsafe { ioctl_with_val(&self.file, TUNSETOFFLOAD(), flags as libc::c_ulong) }; + let ret = + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. + unsafe { ioctl_with_val(self.file.as_ref(), TUNSETOFFLOAD(), u64::from(flags)) }; if ret < 0 { - return Err("ioctl TUNSETOFFLOAD failed.".to_string().into()); + return Err(anyhow!("ioctl TUNSETOFFLOAD failed.".to_string())); } Ok(()) } pub fn set_hdr_size(&self, len: u32) -> Result<()> { - let ret = unsafe { ioctl_with_ref(&self.file, TUNSETVNETHDRSZ(), &len) }; + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. + let ret = unsafe { ioctl_with_ref(self.file.as_ref(), TUNSETVNETHDRSZ(), &len) }; if ret < 0 { - return Err("ioctl TUNSETVNETHDRSZ failed.".to_string().into()); + return Err(anyhow!("ioctl TUNSETVNETHDRSZ failed.".to_string())); } Ok(()) } - pub fn read(&mut self, buf: &mut [u8]) -> IoResult { - self.file.read(buf) + pub fn has_ufo(&self) -> bool { + let flags = TUN_F_CSUM | TUN_F_UFO; + ( + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. + unsafe { ioctl_with_val(self.file.as_ref(), TUNSETOFFLOAD(), u64::from(flags)) } + ) >= 0 } - pub fn write(&mut self, buf: &[u8]) -> IoResult { - self.file.write(buf) + pub fn set_queue(&mut self, enable: bool) -> i32 { + if enable == self.enabled { + return 0; + } + let ifr_flags = if enable { + IFF_ATTACH_QUEUE + } else { + IFF_DETACH_QUEUE + }; + let mut if_req = IfReq { + ifr_name: [0_u8; IFNAME_SIZE], + ifr_flags, + }; + + // SAFETY: The parameter of file can be guaranteed to be legal, and other parameters are constant. + let ret = unsafe { ioctl_with_mut_ref(self.file.as_ref(), TUNSETQUEUE(), &mut if_req) }; + if ret == 0 { + self.enabled = enable; + } else { + error!( + "Failed to set queue, flags is {}, error is {}", + ifr_flags, + std::io::Error::last_os_error() + ); + } + ret } - pub fn as_raw_fd(&self) -> RawFd { - self.file.as_raw_fd() + pub fn receive_packets(&self, iovecs: &[Iovec]) -> isize { + // SAFETY: the arguments of readv has been checked and is correct. + let size = unsafe { + libc::readv( + self.as_raw_fd() as libc::c_int, + iovecs.as_ptr() as *const libc::iovec, + iovecs.len() as libc::c_int, + ) + }; + if size < 0 { + let e = std::io::Error::last_os_error(); + if e.kind() == std::io::ErrorKind::WouldBlock { + return size; + } + + // If the backend tap device is removed, readv returns less than 0. + // At this time, the content in the tap needs to be cleaned up. + // Here, read is called to process, otherwise handle_rx may be triggered all the time. + let mut buf = [0; 1024]; + match self.read(&mut buf) { + Ok(cnt) => error!("Failed to call readv but tap read is ok: cnt {}", cnt), + Err(e) => { + // When the backend tap device is abnormally removed, read return EBADFD. + error!("Failed to read tap: {:?}", e); + } + } + error!("Failed to call readv for net handle_rx: {:?}", e); + } else { + self.download_stats.fetch_add(size as u64, Ordering::SeqCst); + } + + size } -} -impl Clone for Tap { - fn clone(&self) -> Self { - Tap { - file: self.file.try_clone().unwrap(), + pub fn send_packets(&self, iovecs: &[Iovec]) -> i8 { + loop { + // SAFETY: the arguments of writev has been checked and is correct. + let size = unsafe { + libc::writev( + self.as_raw_fd(), + iovecs.as_ptr() as *const libc::iovec, + iovecs.len() as libc::c_int, + ) + }; + if size < 0 { + let e = std::io::Error::last_os_error(); + match e.kind() { + ErrorKind::Interrupted => continue, + ErrorKind::WouldBlock => return -1_i8, + // Ignore other errors which can not be handled. + _ => error!("Failed to call writev for net handle_tx: {:?}", e), + } + } else { + self.upload_stats.fetch_add(size as u64, Ordering::SeqCst); + } + + break; } + 0_i8 + } + + pub fn read(&self, buf: &mut [u8]) -> IoResult { + self.file.as_ref().read(buf) + } + + pub fn write(&self, buf: &[u8]) -> IoResult { + self.file.as_ref().write(buf) + } + + pub fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() } } diff --git a/util/src/test_helper.rs b/util/src/test_helper.rs new file mode 100644 index 0000000000000000000000000000000000000000..47cb11b622f3fc9d005347102464640d4cda0652 --- /dev/null +++ b/util/src/test_helper.rs @@ -0,0 +1,168 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex, RwLock}; +use std::time::{Duration, Instant}; + +use once_cell::sync::{Lazy, OnceCell}; + +#[derive(Default, Clone, Copy)] +pub struct MsixMsg { + pub addr: u64, + pub data: u32, +} + +impl MsixMsg { + pub fn new(addr: u64, data: u32) -> Self { + MsixMsg { addr, data } + } +} + +#[derive(Default, Clone, Copy, Debug)] +pub struct IntxInfo { + pub irq: u32, + pub level: i8, +} + +impl IntxInfo { + pub fn new(irq: u32, level: i8) -> Self { + IntxInfo { irq, level } + } +} + +static TEST_ENABLED: OnceCell = OnceCell::new(); +static TEST_BASE_TIME: OnceCell = OnceCell::new(); +static mut TEST_CLOCK: Option>> = None; +pub static TEST_MSIX_LIST: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); +pub static TEST_INTX_LIST: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + +pub fn set_test_enabled() { + if let Err(_e) = TEST_ENABLED.set(true) { + panic!("Failed to enable test server."); + } + if let Err(_e) = TEST_BASE_TIME.set(Instant::now()) { + panic!("Failed to initialize clock"); + } + // SAFETY: This module is only used for test. + unsafe { + if TEST_CLOCK.is_none() { + TEST_CLOCK = Some(Arc::new(RwLock::new(0))); + } + } +} + +pub fn is_test_enabled() -> bool { + *TEST_ENABLED.get_or_init(|| false) +} + +pub fn set_test_clock(value: u64) { + // SAFETY: This module is only used for test. + unsafe { + if TEST_CLOCK.is_none() { + panic!("TEST_CLOCK has not been initialized."); + } + if value <= get_test_clock() { + return; + } + + let mut test_clock = TEST_CLOCK.as_ref().unwrap().write().unwrap(); + *test_clock = value; + } +} + +pub fn get_test_clock() -> u64 { + // SAFETY: This module is only used for test. + unsafe { + if TEST_CLOCK.is_none() { + panic!("TEST_CLOCK has not been initialized."); + } + + *TEST_CLOCK.as_ref().unwrap().read().unwrap() + } +} + +pub fn get_test_time() -> Instant { + // SAFETY: This module is only used for test. + unsafe { + if TEST_CLOCK.is_none() { + panic!("TEST_CLOCK has not been initialized."); + } + + TEST_BASE_TIME + .get() + .unwrap() + .checked_add(Duration::from_nanos(get_test_clock())) + .unwrap() + } +} + +pub fn add_msix_msg(addr: u64, data: u32) { + let new_msg = MsixMsg::new(addr, data); + let mut msix_list_lock = TEST_MSIX_LIST.lock().unwrap(); + + for msg in msix_list_lock.iter() { + if new_msg.addr == msg.addr && new_msg.data == msg.data { + return; + } + } + + msix_list_lock.push(new_msg); +} + +pub fn has_msix_msg(addr: u64, data: u32) -> bool { + let target_msg = MsixMsg::new(addr, data); + let mut target_index: Option = None; + let mut msix_list_lock = TEST_MSIX_LIST.lock().unwrap(); + + for (index, msg) in msix_list_lock.iter().enumerate() { + if target_msg.addr == msg.addr && target_msg.data == msg.data { + target_index = Some(index); + break; + } + } + + match target_index { + Some(i) => { + msix_list_lock.remove(i); + true + } + None => false, + } +} + +pub fn query_intx(irq: u32) -> bool { + let mut intx_list_lock = TEST_INTX_LIST.lock().unwrap(); + for intx in intx_list_lock.iter_mut() { + if intx.irq == irq { + return intx.level > 0; + } + } + + false +} + +pub fn eoi_intx(irq: u32) -> bool { + let mut intx_list_lock = TEST_INTX_LIST.lock().unwrap(); + + for intx in intx_list_lock.iter_mut() { + if intx.irq == irq { + if intx.level == 0 { + return false; + } else { + intx.level -= 1; + return true; + } + } + } + + false +} diff --git a/util/src/thread_pool.rs b/util/src/thread_pool.rs new file mode 100644 index 0000000000000000000000000000000000000000..48ac9af95f5066a1ebd0f24b492dad278195653d --- /dev/null +++ b/util/src/thread_pool.rs @@ -0,0 +1,261 @@ +// Copyright (c) 2024 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::LinkedList; +use std::sync::{Arc, Condvar, Mutex}; +use std::thread; +use std::time::Duration; + +use anyhow::{bail, Context, Result}; +use log::error; + +const MIN_THREADS: u64 = 1; +const MAX_THREADS: u64 = 10; +type PoolTask = Box; + +pub trait TaskOperation: Sync + Send { + fn run(&mut self); +} + +struct PoolState { + /// The total number of current threads in thread pool. + /// Including the number of threads need to be created and the number of running threads. + total_threads: u64, + /// The current number of blocking threads, they will be blocked + /// until awakened by request_cond or timeout. + blocked_threads: u64, + /// The number of threads need to be created. It could be created + /// in the main loop or another thread in thread pool later. + new_threads: u64, + /// The number of threads that have been created but + /// have not yet entered the work loop. + pending_threads: u64, + /// The minimum number of threads residing in the thread pool. + min_threads: u64, + /// The maximum number of threads that thread pool can create. + max_threads: u64, + /// List of pending tasks in the thread pool. + req_lists: LinkedList, +} + +/// SAFETY: All the operations on req_lists are protected by the mutex, +/// so there is no synchronization problem. +unsafe impl Send for PoolState {} + +impl PoolState { + fn new() -> Self { + Self { + total_threads: 0, + blocked_threads: 0, + new_threads: 0, + pending_threads: 0, + min_threads: MIN_THREADS, + max_threads: MAX_THREADS, + req_lists: LinkedList::new(), + } + } + + fn spawn_thread_needed(&self) -> bool { + self.blocked_threads == 0 && self.total_threads < self.max_threads + } + + fn is_running(&self) -> bool { + self.total_threads <= self.max_threads + } + + fn spawn_thread(&mut self, pool: Arc) -> Result<()> { + self.total_threads += 1; + self.new_threads += 1; + + if self.pending_threads == 0 { + self.do_spawn_thread(pool)?; + } + Ok(()) + } + + fn do_spawn_thread(&mut self, pool: Arc) -> Result<()> { + if self.new_threads == 0 { + return Ok(()); + } + + self.new_threads -= 1; + self.pending_threads += 1; + trace::thread_pool_spawn_thread( + &self.total_threads, + &self.blocked_threads, + &self.new_threads, + &self.pending_threads, + ); + thread::Builder::new() + .name("thread-pool".to_string()) + .spawn(move || worker_thread(pool)) + .with_context(|| "Failed to create thread in pool!")?; + Ok(()) + } +} + +pub struct ThreadPool { + /// Data shared by all threads in the pool. + pool_state: Arc>, + /// Notify the thread in the pool that there are some work to do. + request_cond: Condvar, + /// Notify threadpool that the current thread has exited. + stop_cond: Condvar, +} + +impl Default for ThreadPool { + fn default() -> Self { + Self { + pool_state: Arc::new(Mutex::new(PoolState::new())), + request_cond: Condvar::new(), + stop_cond: Condvar::new(), + } + } +} + +impl ThreadPool { + /// Submit task to thread pool. + pub fn submit_task(pool: Arc, task: Box) -> Result<()> { + trace::thread_pool_submit_task(); + let mut locked_state = pool.pool_state.lock().unwrap(); + if locked_state.spawn_thread_needed() { + locked_state.spawn_thread(pool.clone())? + } + locked_state.req_lists.push_back(task); + drop(locked_state); + + pool.request_cond.notify_one(); + Ok(()) + } + + /// It should be confirmed that all threads have successfully exited + /// before function return. + pub fn cancel(&self) -> Result<()> { + let mut locked_state = self.pool_state.lock().unwrap(); + locked_state.total_threads -= locked_state.new_threads; + locked_state.new_threads = 0; + locked_state.max_threads = 0; + self.request_cond.notify_all(); + + while locked_state.total_threads > 0 { + match self.stop_cond.wait(locked_state) { + Ok(lock) => locked_state = lock, + Err(e) => bail!("{:?}", e), + } + } + Ok(()) + } +} + +fn worker_thread(pool: Arc) { + let mut locked_state = pool.pool_state.lock().unwrap(); + locked_state.pending_threads -= 1; + locked_state + .do_spawn_thread(pool.clone()) + .unwrap_or_else(|e| error!("Thread pool error: {:?}", e)); + + while locked_state.is_running() { + let result; + + if locked_state.req_lists.is_empty() { + locked_state.blocked_threads += 1; + match pool + .request_cond + .wait_timeout(locked_state, Duration::from_secs(10)) + { + Ok((guard, ret)) => { + locked_state = guard; + result = ret; + } + Err(e) => { + error!("Unknown errors have occurred thread pool: {:?}", e); + locked_state = e.into_inner().0; + break; + } + } + locked_state.blocked_threads -= 1; + + if result.timed_out() + && locked_state.req_lists.is_empty() + && locked_state.total_threads > locked_state.min_threads + { + // If wait time_out and no pending task and current total number + // of threads exceeds the minimum, then exit. + break; + } + + continue; + } + + let mut req = locked_state.req_lists.pop_front().unwrap(); + drop(locked_state); + + req.run(); + + locked_state = pool.pool_state.lock().unwrap(); + } + locked_state.total_threads -= 1; + trace::thread_pool_exit_thread(&locked_state.total_threads, &locked_state.req_lists.len()); + + pool.stop_cond.notify_one(); + pool.request_cond.notify_one(); +} + +#[cfg(test)] +mod test { + use std::sync::atomic::{AtomicU64, Ordering}; + use std::sync::Arc; + use std::{thread, time}; + + use super::{TaskOperation, ThreadPool}; + + struct PoolTask { + count: Arc, + } + + impl TaskOperation for PoolTask { + fn run(&mut self) { + std::thread::sleep(std::time::Duration::from_millis(500)); + self.count.fetch_add(1, Ordering::SeqCst); + } + } + + #[test] + fn test_pool_exit() { + let pool = Arc::new(ThreadPool::default()); + let count = Arc::new(AtomicU64::new(0)); + let begin = time::SystemTime::now(); + for _ in 0..10 { + let task = Box::new(PoolTask { + count: count.clone(), + }); + assert!(ThreadPool::submit_task(pool.clone(), task).is_ok()); + } + + // Waiting for creating. + while !pool.pool_state.lock().unwrap().req_lists.is_empty() { + thread::sleep(time::Duration::from_millis(10)); + + let now = time::SystemTime::now(); + let duration = now.duration_since(begin).unwrap().as_millis(); + assert!(duration < 500 * 10); + } + + assert!(pool.cancel().is_ok()); + let end = time::SystemTime::now(); + let duration = end.duration_since(begin).unwrap().as_millis(); + // All tasks are processed in parallel. + assert!(duration < 500 * 10); + // All the task has been finished. + assert_eq!(count.load(Ordering::SeqCst), 10); + } +} diff --git a/util/src/time.rs b/util/src/time.rs new file mode 100644 index 0000000000000000000000000000000000000000..ed9f4f3383487891fc2949a78108ca9823beb805 --- /dev/null +++ b/util/src/time.rs @@ -0,0 +1,60 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use anyhow::{bail, Result}; +use nix::time::{clock_gettime, ClockId}; + +pub const NANOSECONDS_PER_SECOND: u64 = 1_000_000_000; + +/// Converts date to seconds since 1970-01-01 00:00:00. +pub fn mktime64(year: u64, mon: u64, day: u64, hour: u64, min: u64, sec: u64) -> u64 { + let mut y = year; + let mut m = mon; + + if m <= 2 { + m += 10; + y -= 1; + } else { + m -= 2; + } + + ((((y / 4 - y / 100 + y / 400 + 367 * m / 12 + day) + y * 365 - 719499) * 24 + hour) * 60 + min) + * 60 + + sec +} + +/// Get wall time. +pub fn gettime() -> Result<(i64, i64)> { + match clock_gettime(ClockId::CLOCK_REALTIME) { + Ok(ts) => Ok((ts.tv_sec(), ts.tv_nsec())), + Err(e) => bail!("clock_gettime failed: {:?}", e), + } +} + +/// Convert wall time to year/month/day/hour/minute/second format. +pub fn get_format_time(sec: i64) -> [i32; 6] { + // SAFETY: No input parameter. + let mut ti: libc::tm = unsafe { std::mem::zeroed() }; + // SAFETY: The parameters of sec and ti can be guaranteed not be null. + unsafe { + libc::localtime_r(&sec, &mut ti); + } + + [ + ti.tm_year.saturating_add(1900), + ti.tm_mon.saturating_add(1), + ti.tm_mday, + ti.tm_hour, + ti.tm_min, + ti.tm_sec, + ] +} diff --git a/util/src/trace.rs b/util/src/trace.rs deleted file mode 100644 index 9a267d03947a3cf05440a622c10d02d48c1e8d4a..0000000000000000000000000000000000000000 --- a/util/src/trace.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::collections::HashSet; -use std::fs::{File, OpenOptions}; -use std::io::{prelude::Write, BufRead, BufReader}; -use std::ops::Deref; -use std::sync::Arc; - -use arc_swap::ArcSwap; -use once_cell::sync::Lazy; - -use crate::errors::{Result, ResultExt}; - -static TRACE_MARKER_FD: Lazy> = Lazy::new(open_trace_marker); -static TRACE_EVENTS: Lazy>> = - Lazy::new(|| ArcSwap::new(Arc::new(HashSet::new()))); - -fn open_trace_marker() -> Option { - let file = "/proc/mounts"; - let proc_mounts_fd = match File::open(file) { - Ok(fd) => fd, - Err(e) => { - error!("Failed to open {}: {}", file, e); - return None; - } - }; - let mut reader = BufReader::new(proc_mounts_fd); - let mut buf: String; - loop { - buf = String::new(); - match reader.read_line(&mut buf) { - Ok(_) => { - if buf.contains("tracefs") { - break; - } - } - Err(e) => { - error!("Read {} error: {}.", &file, e); - return None; - } - } - } - - let fields: Vec<&str> = buf.split(' ').collect(); - let tracefs_mount_point = match fields.get(1) { - Some(s) => s.to_string(), - None => panic!("Failed to get mount point of tracefs."), - }; - - let tracing_on = format!("{}/tracing_on", tracefs_mount_point); - let mut tracing_on_fd = match OpenOptions::new().write(true).open(&tracing_on) { - Ok(fd) => fd, - Err(e) => { - error!("Failed to open {}: {}", tracing_on, e); - return None; - } - }; - if let Err(e) = tracing_on_fd.write(b"1") { - error!("Failed to enable tracing_on: {}", e); - return None; - } - - let trace_marker = format!("{}/trace_marker", tracefs_mount_point); - match OpenOptions::new().write(true).open(&trace_marker) { - Ok(fd) => Some(fd), - Err(e) => { - error!("Failed to open {}: {}", trace_marker, e); - None - } - } -} - -pub fn write_trace_marker(event: &str, msg: &str) { - if !is_trace_event_enabled(event) { - return; - } - - let msg = format!("[{}] {}", event, msg); - if let Err(e) = TRACE_MARKER_FD.as_ref().unwrap().write(msg.as_bytes()) { - error!("Write trace_marker error: {}", e); - } -} - -#[macro_export] -macro_rules! ftrace { - ($func: ident) => { - let func = stringify!($func); - let msg = String::new(); - $crate::trace::write_trace_marker(func, &msg); - }; - ($func: ident, $($arg: tt)*) => { - let func = stringify!($func); - let msg = format!("{}", format_args!($($arg)*)); - $crate::trace::write_trace_marker(func, &msg); - }; -} - -pub fn enable_trace_events(file: &str) -> Result<()> { - let fd = File::open(file).chain_err(|| format!("Failed to open {}.", file))?; - let mut reader = BufReader::new(fd); - - loop { - let mut buf = String::new(); - let size = reader - .read_line(&mut buf) - .chain_err(|| format!("Read {} error.", file))?; - - if size == 0 { - return Ok(()); - } - - let mut trace_events = TRACE_EVENTS.load().deref().deref().clone(); - trace_events.insert(buf.trim().to_string()); - TRACE_EVENTS.store(Arc::new(trace_events)); - } -} - -pub fn is_trace_event_enabled(event: &str) -> bool { - if TRACE_EVENTS.load().is_empty() { - return false; - } - - TRACE_EVENTS.load().contains(event) -} diff --git a/util/src/unix.rs b/util/src/unix.rs index c7d177282329ac56c9910f37e5df9988bcb77051..2fcebce41ab4e28c7cbed39411ec00eb9b340391 100644 --- a/util/src/unix.rs +++ b/util/src/unix.rs @@ -14,71 +14,56 @@ use std::fs::File; use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; +use std::path::Path; use std::ptr::{copy_nonoverlapping, null_mut, write_unaligned}; +use anyhow::{anyhow, bail, Context, Result}; use libc::{ - c_void, cmsghdr, iovec, msghdr, recvmsg, sendmsg, CMSG_LEN, CMSG_SPACE, MSG_NOSIGNAL, - MSG_WAITALL, SCM_RIGHTS, SOL_SOCKET, + c_void, cmsghdr, iovec, msghdr, recvmsg, sendmsg, syscall, SYS_mbind, CMSG_LEN, CMSG_SPACE, + MSG_NOSIGNAL, MSG_WAITALL, SCM_RIGHTS, SOL_SOCKET, }; +use log::error; +use nix::unistd::{sysconf, SysconfVar}; -use super::errors::{ErrorKind, Result, ResultExt}; - -/// This function returns the caller's thread ID(TID). -pub fn gettid() -> u64 { - unsafe { libc::syscall(libc::SYS_gettid) as u64 } -} +use crate::UtilError; /// This function used to remove group and others permission using libc::chmod. pub fn limit_permission(path: &str) -> Result<()> { let file_path = path.as_bytes().to_vec(); let cstr_file_path = std::ffi::CString::new(file_path).unwrap(); + // SAFETY: The file_path can be guaranteed to be legal, and the + // return value have bee verified later. let ret = unsafe { libc::chmod(cstr_file_path.as_ptr(), 0o600) }; if ret == 0 { Ok(()) } else { - Err(ErrorKind::ChmodFailed(ret).into()) + Err(anyhow!(UtilError::ChmodFailed(ret))) } } /// Gets the page size of host. pub fn host_page_size() -> u64 { - unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 } -} - -#[derive(PartialEq, Debug)] -/// Three path type in unix. -pub enum UnixPath { - File = 0, - Unix = 1, - Tcp = 2, - Unknown = 3, -} - -impl From<&str> for UnixPath { - fn from(s: &str) -> Self { - match s { - "file" | "File" | "FILE" => UnixPath::File, - "unix" | "Unix" | "UNIX" => UnixPath::Unix, - "tcp" | "Tcp" | "TCP" => UnixPath::Tcp, - _ => UnixPath::Unknown, + let page_size = match sysconf(SysconfVar::PAGE_SIZE) { + Ok(Some(size)) => size, + Ok(None) => 0, + Err(e) => { + error!("Get host page size failed: {:?}", e); + 0 } - } + }; + page_size as u64 } /// Parse unix uri to unix path. /// /// # Notions /// -/// Unix uri is the string as `file:/xxx/xxx` or `unix:/xxx/xxx` or `tcp:xxx.xxx.xxx`. -pub fn parse_uri(uri: &str) -> Result<(UnixPath, String)> { +/// Unix uri is the string as `unix:/xxx/xxx`. +pub fn parse_unix_uri(uri: &str) -> Result { let parse_vec: Vec<&str> = uri.split(':').collect(); - if parse_vec.len() == 2 { - match UnixPath::from(parse_vec[0]) { - UnixPath::File => Ok((UnixPath::File, String::from(parse_vec[1]))), - UnixPath::Unix => Ok((UnixPath::Unix, String::from(parse_vec[1]))), - _ => bail!("Unsupported unix path type."), - } + if parse_vec.len() == 2 && parse_vec[0] == "unix" { + Ok(parse_vec[1].to_string()) } else { bail!("Invalid unix uri: {}", uri) } @@ -89,7 +74,7 @@ pub fn parse_uri(uri: &str) -> Result<(UnixPath, String)> { /// # Arguments /// /// * `file` - Backend file. -/// * `len` - Length of maping. +/// * `len` - Length of mapping. /// * `offset` - Offset in the file (or other object). /// * `read_only` - Allow to write or not. /// * `is_share` - Share the mapping or not. @@ -125,10 +110,10 @@ pub fn do_mmap( prot |= libc::PROT_WRITE; } - // Safe because the return value is checked. + // SAFETY: The return value is checked. let hva = unsafe { libc::mmap( - std::ptr::null_mut() as *mut libc::c_void, + std::ptr::null_mut(), len as libc::size_t, prot, flags, @@ -137,26 +122,67 @@ pub fn do_mmap( ) }; if hva == libc::MAP_FAILED { - return Err(std::io::Error::last_os_error()).chain_err(|| "Mmap failed."); + return Err(std::io::Error::last_os_error()).with_context(|| "Mmap failed."); } if !dump_guest_core { - set_memory_undumpable(hva, len); + // SAFETY: The hva and len are mmap-ed above and are verified. + unsafe { set_memory_undumpable(hva, len) }; } Ok(hva as u64) } -fn set_memory_undumpable(host_addr: *mut libc::c_void, size: u64) { - // Safe because host_addr and size are valid and return value is checked. - let ret = unsafe { libc::madvise(host_addr, size as libc::size_t, libc::MADV_DONTDUMP) }; +unsafe fn set_memory_undumpable(host_addr: *mut libc::c_void, size: u64) { + let ret = libc::madvise(host_addr, size as libc::size_t, libc::MADV_DONTDUMP); if ret < 0 { error!( - "Syscall madvise(with MADV_DONTDUMP) failed, OS error is {}", + "Syscall madvise(with MADV_DONTDUMP) failed, OS error is {:?}", std::io::Error::last_os_error() ); } } +/// This function set memory policy for host NUMA node memory range. +/// +/// * Arguments +/// +/// * `addr` - The memory range starting with addr. +/// * `len` - Length of the memory range. +/// * `mode` - Memory policy mode. +/// * `node_mask` - node_mask specifies physical node ID. +/// * `max_node` - The max node. +/// * `flags` - Mode flags. +/// +/// # Safety +/// +/// Caller should has valid params. +pub unsafe fn mbind( + addr: u64, + len: u64, + mode: u32, + node_mask: Vec, + max_node: u64, + flags: u32, +) -> Result<()> { + let res = syscall( + SYS_mbind, + addr as *mut c_void, + len, + mode, + node_mask.as_ptr(), + max_node + 1, + flags, + ); + if res < 0 { + bail!( + "Failed to apply host numa node policy, error is {}", + std::io::Error::last_os_error() + ); + } + + Ok(()) +} + /// Unix socket is a data communication endpoint for exchanging data /// between processes executing on the same host OS. pub struct UnixSock { @@ -178,7 +204,6 @@ impl Clone for UnixSock { } } -#[allow(dead_code)] impl UnixSock { pub fn new(path: &str) -> Self { UnixSock { @@ -189,44 +214,80 @@ impl UnixSock { } /// Bind assigns a unique listener for the socket. - fn bind(&mut self, unlink: bool) -> Result<()> { - if unlink { + pub fn bind(&mut self, unlink: bool) -> Result<()> { + if unlink && Path::new(self.path.as_str()).exists() { std::fs::remove_file(self.path.as_str()) - .chain_err(|| format!("Failed to remove socket file {}.", self.path.as_str()))?; + .with_context(|| format!("Failed to remove socket file {}.", self.path.as_str()))?; } let listener = UnixListener::bind(self.path.as_str()) - .chain_err(|| format!("Failed to bind the socket {}", self.path))?; + .with_context(|| format!("Failed to bind the socket {}", self.path))?; self.listener = Some(listener); Ok(()) } /// The listener accepts incoming client connections. - fn accept(&mut self) -> Result<()> { - let (sock, _addr) = self + pub fn accept(&mut self) -> Result<()> { + let listener = self .listener .as_ref() - .unwrap() + .with_context(|| "UnixSock is not bound")?; + let (sock, _addr) = listener .accept() - .chain_err(|| format!("Failed to accept the socket {}", self.path))?; + .with_context(|| format!("Failed to accept the socket {}", self.path))?; self.sock = Some(sock); Ok(()) } - fn is_accepted(&self) -> bool { + pub fn is_accepted(&self) -> bool { self.sock.is_some() } + pub fn server_connection_refuse(&mut self) -> Result<()> { + let listener = self + .listener + .as_ref() + .with_context(|| "UnixSock is not bound")?; + // Refuse connection by finishing life cycle of stream fd from listener fd. + listener.accept().with_context(|| { + format!( + "Failed to accept the socket for refused connection {}", + self.path + ) + })?; + + Ok(()) + } + /// Unix socket stream create a connection for requests. pub fn connect(&mut self) -> Result<()> { let sock = UnixStream::connect(self.path.as_str()) - .chain_err(|| format!("Failed to connect the socket {}", self.path))?; + .with_context(|| format!("Failed to connect the socket {}", self.path))?; self.sock = Some(sock); Ok(()) } + pub fn listen_set_nonblocking(&self, nonblocking: bool) -> Result<()> { + let listener = self + .listener + .as_ref() + .with_context(|| "UnixSock is not bound")?; + listener + .set_nonblocking(nonblocking) + .with_context(|| "couldn't set nonblocking for unix sock listener") + } + + pub fn set_nonblocking(&self, nonblocking: bool) -> Result<()> { + let sock = self + .sock + .as_ref() + .with_context(|| "UnixSock is not connected")?; + sock.set_nonblocking(nonblocking) + .with_context(|| "couldn't set nonblocking") + } + /// Get Stream's fd from `UnixSock`. pub fn get_stream_raw_fd(&self) -> RawFd { self.sock.as_ref().unwrap().as_raw_fd() @@ -238,8 +299,10 @@ impl UnixSock { } fn cmsg_data(&self, cmsg_buffer: *mut cmsghdr) -> *mut RawFd { - // Safe as parameter is zero. - (cmsg_buffer as *mut u8).wrapping_add(unsafe { CMSG_LEN(0) } as usize) as *mut RawFd + (cmsg_buffer as *mut u8).wrapping_add( + // SAFETY: Parameter is zero. + unsafe { CMSG_LEN(0) } as usize, + ) as *mut RawFd } fn get_next_cmsg( @@ -248,17 +311,16 @@ impl UnixSock { cmsg: &cmsghdr, cmsg_ptr: *mut cmsghdr, ) -> *mut cmsghdr { - // Safe to get cmsg_len because the parameter is valid. - let next_cmsg = (cmsg_ptr as *mut u8) - .wrapping_add(unsafe { CMSG_LEN(cmsg.cmsg_len as u32) } as usize) - as *mut cmsghdr; + let next_cmsg = (cmsg_ptr as *mut u8).wrapping_add( + // SAFETY: Safe to get cmsg_len because the parameter is valid. + unsafe { CMSG_LEN(cmsg.cmsg_len as _) } as usize, + ) as *mut cmsghdr; // Safe to get msg_control because the parameter is valid. - let nex_cmsg_pos = - (next_cmsg as *mut u8).wrapping_sub(msghdr.msg_control as usize) as usize; + let nex_cmsg_pos = (next_cmsg as *mut u8).wrapping_sub(msghdr.msg_control as usize) as u64; - // Safe as parameter is zero. - if nex_cmsg_pos.wrapping_add(unsafe { CMSG_LEN(0) } as usize) - > msghdr.msg_controllen as usize + // SAFETY: Parameter is constant. + if nex_cmsg_pos.wrapping_add(u64::from(unsafe { CMSG_LEN(0) })) + > msghdr.msg_controllen as u64 { null_mut() } else { @@ -276,45 +338,36 @@ impl UnixSock { /// # Errors /// /// The socket file descriptor is broken. - pub fn send_msg(&self, iovecs: &mut [iovec], out_fds: &[RawFd]) -> std::io::Result { - // It is safe because we check the iovecs lens before. - #[cfg(not(target_env = "musl"))] + pub fn send_msg(&self, iovecs: &mut [iovec], out_fds: &[RawFd]) -> Result { + // SAFETY: We checked the iovecs lens before. let iovecs_len = iovecs.len(); - #[cfg(target_env = "musl")] - let iovecs_len = iovecs.len() as i32; - // It is safe because we check the out_fds lens before. - #[cfg(not(target_env = "musl"))] - let cmsg_len = unsafe { CMSG_LEN((size_of::() * out_fds.len()) as u32) } as usize; - #[cfg(target_env = "musl")] - let cmsg_len = unsafe { CMSG_LEN((size_of::() * out_fds.len()) as u32) } as u32; - // It is safe because we check the out_fds lens before. - #[cfg(not(target_env = "musl"))] - let cmsg_capacity = - unsafe { CMSG_SPACE((size_of::() * out_fds.len()) as u32) } as usize; - #[cfg(target_env = "musl")] - let cmsg_capacity = - unsafe { CMSG_SPACE((size_of::() * out_fds.len()) as u32) } as u32; + // SAFETY: We checked the out_fds lens before. + let cmsg_len = unsafe { CMSG_LEN(u32::try_from(std::mem::size_of_val(out_fds))?) }; + // SAFETY: We checked the out_fds lens before. + let cmsg_capacity = unsafe { CMSG_SPACE(u32::try_from(std::mem::size_of_val(out_fds))?) }; let mut cmsg_buffer = vec![0_u64; cmsg_capacity as usize]; // In `musl` toolchain, msghdr has private member `__pad0` and `__pad1`, it can't be // initialized in normal way. + // SAFETY: The member variable of msg will be assigned value later. let mut msg: msghdr = unsafe { std::mem::zeroed() }; msg.msg_name = null_mut(); msg.msg_namelen = 0; msg.msg_iov = iovecs.as_mut_ptr(); - msg.msg_iovlen = iovecs_len; + msg.msg_iovlen = iovecs_len as _; msg.msg_control = null_mut(); msg.msg_controllen = 0; msg.msg_flags = 0; if !out_fds.is_empty() { let cmsg = cmsghdr { - cmsg_len, - #[cfg(target_env = "musl")] + cmsg_len: cmsg_len as _, + #[cfg(any(target_env = "musl", target_env = "ohos"))] __pad1: 0, cmsg_level: SOL_SOCKET, cmsg_type: SCM_RIGHTS, }; + // SAFETY: cmsg_buffer was created in this function and can be guaranteed not be null. unsafe { write_unaligned(cmsg_buffer.as_mut_ptr() as *mut cmsghdr, cmsg); @@ -326,21 +379,21 @@ impl UnixSock { } msg.msg_control = cmsg_buffer.as_mut_ptr() as *mut c_void; - msg.msg_controllen = cmsg_capacity; + msg.msg_controllen = cmsg_capacity as _; } - // Safe as msg parameters are valid. - let write_count = - unsafe { sendmsg(self.sock.as_ref().unwrap().as_raw_fd(), &msg, MSG_NOSIGNAL) }; - - if write_count == -1 { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Failed to send msg, err: {}", - std::io::Error::last_os_error() - ), - )); + let sock = self + .sock + .as_ref() + .with_context(|| "UnixSock is not connected")?; + // SAFETY: msg parameters are valid. + let write_count = unsafe { sendmsg(sock.as_raw_fd(), &msg, MSG_NOSIGNAL) }; + + if write_count < 0 { + Err(anyhow!( + "Failed to send msg, err: {}", + std::io::Error::last_os_error() + )) } else { Ok(write_count as usize) } @@ -356,118 +409,111 @@ impl UnixSock { /// # Errors /// /// The socket file descriptor is broken. - pub fn recv_msg( - &self, - iovecs: &mut [iovec], - in_fds: &mut [RawFd], - ) -> std::io::Result<(usize, usize)> { - // It is safe because we check the iovecs lens before. - #[cfg(not(target_env = "musl"))] + pub fn recv_msg(&self, iovecs: &mut [iovec], in_fds: &mut [RawFd]) -> Result<(usize, usize)> { + // SAFETY: We check the iovecs lens before. let iovecs_len = iovecs.len(); - #[cfg(target_env = "musl")] - let iovecs_len = iovecs.len() as i32; - // It is safe because we check the in_fds lens before. - #[cfg(not(target_env = "musl"))] - let cmsg_capacity = - unsafe { CMSG_SPACE((size_of::() * in_fds.len()) as u32) } as usize; - #[cfg(target_env = "musl")] - let cmsg_capacity = - unsafe { CMSG_SPACE((size_of::() * in_fds.len()) as u32) } as u32; + // SAFETY: We check the in_fds lens before. + let cmsg_capacity = unsafe { CMSG_SPACE(u32::try_from(std::mem::size_of_val(in_fds))?) }; let mut cmsg_buffer = vec![0_u64; cmsg_capacity as usize]; // In `musl` toolchain, msghdr has private member `__pad0` and `__pad1`, it can't be // initialized in normal way. + // SAFETY: The member variable of msg will be assigned value later. let mut msg: msghdr = unsafe { std::mem::zeroed() }; msg.msg_name = null_mut(); msg.msg_namelen = 0; msg.msg_iov = iovecs.as_mut_ptr(); - msg.msg_iovlen = iovecs_len; + msg.msg_iovlen = iovecs_len as _; msg.msg_control = null_mut(); msg.msg_controllen = 0; msg.msg_flags = 0; if !in_fds.is_empty() { msg.msg_control = cmsg_buffer.as_mut_ptr() as *mut c_void; - msg.msg_controllen = cmsg_capacity; + msg.msg_controllen = cmsg_capacity as _; } - // Safe as msg parameters are valid. - let total_read = unsafe { - recvmsg( - self.sock.as_ref().unwrap().as_raw_fd(), - &mut msg, - MSG_WAITALL, - ) - }; - - if total_read == -1 { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Failed to recv msg, err: {}", - std::io::Error::last_os_error() - ), - )); + let sock = self + .sock + .as_ref() + .with_context(|| "UnixSock is not connected")?; + // SAFETY: msg parameters are valid. + let total_read = unsafe { recvmsg(sock.as_raw_fd(), &mut msg, MSG_WAITALL) }; + + if total_read < 0 { + bail!( + "Failed to recv msg, err: {}", + std::io::Error::last_os_error() + ); } - if total_read == 0 && (msg.msg_controllen as usize) < size_of::() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "The length of control message is invalid, {} {}", - msg.msg_controllen, - size_of::() - ), - )); + if total_read == 0 && (msg.msg_controllen as u64) < size_of::() as u64 { + bail!( + "The length of control message is invalid, {} {}", + msg.msg_controllen, + size_of::() + ); } let mut cmsg_ptr = msg.msg_control as *mut cmsghdr; let mut in_fds_count = 0_usize; while !cmsg_ptr.is_null() { - let cmsg = unsafe { (cmsg_ptr as *mut cmsghdr).read_unaligned() }; + // SAFETY: The pointer of cmsg_ptr was created in this function and + // can be guaranteed not be null. + let cmsg = unsafe { cmsg_ptr.read_unaligned() }; if cmsg.cmsg_level == SOL_SOCKET && cmsg.cmsg_type == SCM_RIGHTS { - let fd_count = - (cmsg.cmsg_len as usize - unsafe { CMSG_LEN(0) } as usize) / size_of::(); + // SAFETY: Input parameter is constant. + let fd_count = (cmsg.cmsg_len as u64 - u64::from(unsafe { CMSG_LEN(0) })) as usize + / size_of::(); + let new_in_fds_count = in_fds_count + .checked_add(fd_count) + .with_context(|| "fds count overflow")?; + if new_in_fds_count > in_fds.len() { + bail!("in_fds is too small"); + } + // SAFETY: + // 1. the pointer of cmsg_ptr was created in this function and can be guaranteed not be null. + // 2. the parameter of in_fds has been checked before. unsafe { copy_nonoverlapping( self.cmsg_data(cmsg_ptr), - in_fds[in_fds_count..(in_fds_count + fd_count)].as_mut_ptr(), + in_fds[in_fds_count..new_in_fds_count].as_mut_ptr(), fd_count, ); } - in_fds_count += fd_count; + in_fds_count = new_in_fds_count; } cmsg_ptr = self.get_next_cmsg(&msg, &cmsg, cmsg_ptr); } - - Ok((total_read as usize, in_fds_count as usize)) + Ok((total_read as usize, in_fds_count)) } } #[cfg(test)] mod tests { + use std::fs; use std::path::Path; use std::time::Duration; use libc::{c_void, iovec}; - use super::{parse_uri, UnixPath, UnixSock}; + use super::{parse_unix_uri, UnixSock}; #[test] fn test_parse_uri() { - let test_uri_01 = "file:/tmp/test_file"; - assert!(parse_uri(test_uri_01).is_ok()); + let test_uri_01 = "unix:/tmp/test_file.sock"; + assert!(parse_unix_uri(test_uri_01).is_ok()); assert_eq!( - parse_uri(test_uri_01).unwrap(), - (UnixPath::File, String::from("/tmp/test_file")) + parse_unix_uri(test_uri_01).unwrap(), + String::from("/tmp/test_file.sock") ); let test_uri_02 = "file:/tmp/test_file:file"; - assert!(parse_uri(test_uri_02).is_err()); + assert!(parse_unix_uri(test_uri_02).is_err()); let test_uri_03 = "tcp:127.0.0.1"; - assert!(parse_uri(test_uri_03).is_err()); + assert!(parse_unix_uri(test_uri_03).is_err()); } #[test] @@ -489,7 +535,11 @@ mod tests { assert_ne!(stream.get_stream_raw_fd(), 0); assert!(listener.accept().is_ok()); - assert_eq!(listener.is_accepted(), true); + assert!(listener.is_accepted()); + + if sock_path.exists() { + fs::remove_file("./test_socket1.sock").unwrap(); + } } #[test] @@ -524,5 +574,9 @@ mod tests { let (data_size, fd_size) = stream.recv_msg(&mut recv, &mut in_fd).unwrap(); assert_eq!(data_size, buff.len()); assert_eq!(fd_size, in_fd.len()); + + if sock_path.exists() { + fs::remove_file("./test_socket2.sock").unwrap(); + } } } diff --git a/util/src/v4l2.rs b/util/src/v4l2.rs new file mode 100644 index 0000000000000000000000000000000000000000..9abcfd22a19d1d0fef56314152edd0a40ea7c774 --- /dev/null +++ b/util/src/v4l2.rs @@ -0,0 +1,303 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::fs::{File, OpenOptions}; +use std::io::ErrorKind; +use std::os::unix::prelude::{AsRawFd, OpenOptionsExt, RawFd}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use log::{debug, error}; +use v4l2_sys_mit::{ + v4l2_buffer, v4l2_capability, v4l2_fmtdesc, v4l2_format, v4l2_frmivalenum, v4l2_frmsizeenum, + v4l2_requestbuffers, v4l2_streamparm, +}; +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; +use vmm_sys_util::{ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr, ioctl_iowr_nr}; + +use crate::aio::Iovec; + +const VIDEO: u32 = 86; + +ioctl_ior_nr!(VIDIOC_QUERYCAP, VIDEO, 0, v4l2_capability); +ioctl_iowr_nr!(VIDIOC_ENUM_FMT, VIDEO, 2, v4l2_fmtdesc); +ioctl_iowr_nr!(VIDIOC_G_FMT, VIDEO, 4, v4l2_format); +ioctl_iowr_nr!(VIDIOC_S_FMT, VIDEO, 5, v4l2_format); +ioctl_iowr_nr!(VIDIOC_REQBUFS, VIDEO, 8, v4l2_requestbuffers); +ioctl_iowr_nr!(VIDIOC_QUERYBUF, VIDEO, 9, v4l2_buffer); +ioctl_iowr_nr!(VIDIOC_QBUF, VIDEO, 15, v4l2_buffer); +ioctl_iowr_nr!(VIDIOC_DQBUF, VIDEO, 17, v4l2_buffer); +ioctl_iow_nr!(VIDIOC_STREAMON, VIDEO, 18, std::os::raw::c_int); +ioctl_iow_nr!(VIDIOC_STREAMOFF, VIDEO, 19, std::os::raw::c_int); +ioctl_iowr_nr!(VIDIOC_S_PARM, VIDEO, 22, v4l2_streamparm); +ioctl_iowr_nr!(VIDIOC_ENUM_FRAMESIZES, VIDEO, 74, v4l2_frmsizeenum); +ioctl_iowr_nr!(VIDIOC_ENUM_FRAMEINTERVALS, VIDEO, 75, v4l2_frmivalenum); + +pub struct V4l2Backend { + /// V4L2 backend path, such as /dev/video0. + path: String, + /// V4L2 backend device fd. + fd: File, + /// V4L2 image buffer. + pub buffer: Arc>>, +} + +impl Drop for V4l2Backend { + fn drop(&mut self) { + debug!("Drop v4l2 backend fd {}", self.as_raw_fd()); + if let Err(e) = self.release_buffers() { + error!("Failed to release buffer for {}, {:?}", self.path, e); + } + } +} + +impl V4l2Backend { + pub fn new(path: String, buf_cnt: usize) -> Result { + let fd = OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK) + .open(&path) + .with_context(|| format!("Failed to open v4l2 backend {}.", &path))?; + Ok(Self { + path, + fd, + buffer: Arc::new(Mutex::new(vec![Iovec::new(0, 0); buf_cnt])), + }) + } + + pub fn query_cap(&self) -> Result { + let mut cap = new_init::(); + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_mut_ref(self, VIDIOC_QUERYCAP(), &mut cap) }; + if ret < 0 { + bail!( + "Failed to query cap, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(cap) + } + + pub fn set_format(&self, fmt: &v4l2_format) -> Result<()> { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_S_FMT(), fmt) }; + if ret < 0 { + bail!( + "Failed to set format, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(()) + } + + pub fn request_buffers(&self, bufs: &mut v4l2_requestbuffers) -> Result<()> { + // Ensure that there are no residual buffers. + self.release_buffers()?; + let mut locked_buf = self.buffer.lock().unwrap(); + let cnt = locked_buf.len() as u32; + // Ensure the count is equal to the length of buffer. + bufs.count = cnt; + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_REQBUFS(), bufs) }; + if ret < 0 { + bail!( + "Failed to request buffers, error {:?}", + std::io::Error::last_os_error() + ); + } + + for i in 0..cnt { + let mut buf = new_init::(); + buf.index = i; + buf.type_ = bufs.type_; + buf.memory = bufs.memory; + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_QUERYBUF(), &buf) }; + if ret < 0 { + bail!( + "Failed to query buffer {}, error {:?}", + i, + std::io::Error::last_os_error() + ); + } + + // SAFETY: + // 1. self.fd is created in function new(). + // 2. buf can be guaranteed not be null. + let ret = unsafe { + libc::mmap( + std::ptr::null_mut(), + buf.length as libc::size_t, + libc::PROT_WRITE | libc::PROT_READ, + libc::MAP_SHARED, + self.as_raw_fd(), + buf.m.offset.into(), + ) + }; + if ret == libc::MAP_FAILED { + bail!( + "Failed to mmap for buffer {}, error {:?}", + i, + std::io::Error::last_os_error() + ); + } + locked_buf[i as usize].iov_base = ret as u64; + locked_buf[i as usize].iov_len = u64::from(buf.length); + // Queue buffer to get data. + self.queue_buffer(&buf)?; + } + Ok(()) + } + + pub fn release_buffers(&self) -> Result<()> { + let mut locked_buf = self.buffer.lock().unwrap(); + for buf in locked_buf.iter_mut() { + if buf.is_none() { + continue; + } + // SAFETY: buf can be guaranteed not be null. + let ret = unsafe { + libc::munmap( + buf.iov_base as *mut libc::c_void, + buf.iov_len as libc::size_t, + ) + }; + if ret < 0 { + bail!( + "Failed to release buffers, error {:?}", + std::io::Error::last_os_error() + ); + } + buf.iov_base = 0; + buf.iov_len = 0; + } + Ok(()) + } + + pub fn stream_on(&self, vtype: std::os::raw::c_int) -> Result<()> { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_STREAMON(), &vtype) }; + if ret < 0 { + bail!( + "Failed to stream on, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(()) + } + + pub fn stream_off(&self, vtype: std::os::raw::c_int) -> Result<()> { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_STREAMOFF(), &vtype) }; + if ret < 0 { + bail!( + "Failed to stream off, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(()) + } + + pub fn queue_buffer(&self, buf: &v4l2_buffer) -> Result<()> { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_QBUF(), buf) }; + if ret < 0 { + bail!( + "Failed to queue buffer, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(()) + } + + pub fn dequeue_buffer(&self, buf: &v4l2_buffer) -> Result { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_DQBUF(), buf) }; + if ret < 0 { + if nix::errno::errno() == libc::EAGAIN { + return Ok(false); + } + bail!( + "Failed to dequeue buffer, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(true) + } + + pub fn enum_format(&self, desc: &mut v4l2_fmtdesc) -> Result { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_mut_ref(self, VIDIOC_ENUM_FMT(), desc) }; + if ret < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == ErrorKind::InvalidInput { + return Ok(true); + } + bail!("Failed to enumerate format, error {:?}", err); + } + Ok(false) + } + + pub fn enum_frame_size(&self, frmsize: &mut v4l2_frmsizeenum) -> Result { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_mut_ref(self, VIDIOC_ENUM_FRAMESIZES(), frmsize) }; + if ret < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == ErrorKind::InvalidInput { + return Ok(true); + } + bail!("Failed to enumerate frame size, error {:?}", err); + } + Ok(false) + } + + pub fn enum_frame_interval(&self, frame_val: &mut v4l2_frmivalenum) -> Result { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_mut_ref(self, VIDIOC_ENUM_FRAMEINTERVALS(), frame_val) }; + if ret < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() == ErrorKind::InvalidInput { + return Ok(true); + } + bail!("Failed to enumerate frame interval, error {:?}", err); + } + Ok(false) + } + + pub fn set_stream_parameter(&self, parm: &v4l2_streamparm) -> Result<()> { + // SAFETY: self.fd is created in function new(). + let ret = unsafe { ioctl_with_ref(self, VIDIOC_S_PARM(), parm) }; + if ret < 0 { + bail!( + "Failed to set stream parameter, error {:?}", + std::io::Error::last_os_error() + ); + } + Ok(()) + } +} + +impl AsRawFd for V4l2Backend { + fn as_raw_fd(&self) -> RawFd { + self.fd.as_raw_fd() + } +} + +pub fn new_init() -> T { + let mut s = ::std::mem::MaybeUninit::::uninit(); + // SAFETY: s can be guaranteed not be null. + unsafe { + ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } +} diff --git a/vfio/Cargo.toml b/vfio/Cargo.toml index b8fff9d815f573562677086d198dc206c5b06a47..6b3e135ad6130c2dcae4bf8214f8bed5f9b948ad 100644 --- a/vfio/Cargo.toml +++ b/vfio/Cargo.toml @@ -1,22 +1,25 @@ [package] name = "vfio" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "Virtual function I/O" [dependencies] -byteorder = "1.3.4" -error-chain = "0.12.4" -kvm-bindings = ">=0.3.0" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = "0.4.8" -vmm-sys-util = ">=0.7.0" -vfio-bindings = "0.2.0" -once_cell = "1.9.0" +byteorder = "1.4.3" +thiserror = "1.0" +anyhow = "1.0" +kvm-bindings = { version = "0.7.0", features = ["fam-wrappers"] } +kvm-ioctls = "0.16.0" +libc = "0.2" +log = "0.4" +vmm-sys-util = "0.12.1" +vfio-bindings = "0.3" +once_cell = "1.18.0" address_space = { path = "../address_space" } -hypervisor = { path = "../hypervisor" } +hypervisor = { path = "../hypervisor"} +machine_manager = { path = "../machine_manager" } util = { path = "../util" } -pci = { path = "../pci" } +devices = { path = "../devices" } +clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } diff --git a/vfio/src/error.rs b/vfio/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..7f9ff5ae24603587ca35f49dc51b2606b73e2368 --- /dev/null +++ b/vfio/src/error.rs @@ -0,0 +1,31 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum VfioError { + #[error("PciErr")] + PciErr { + #[from] + source: devices::pci::error::PciError, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("Failed to add sub region at the BAR {0} in memory space.")] + AddRegBar(usize), + #[error("Vfio ioctl failed: {0}, error is: {1:?}")] + VfioIoctl(String, std::io::Error), +} diff --git a/vfio/src/lib.rs b/vfio/src/lib.rs index 9787513ce3ade497958fd67215e740232f88e55f..3d2705a399a1ef56a06ba927bf255e0fbe6cf2c9 100644 --- a/vfio/src/lib.rs +++ b/vfio/src/lib.rs @@ -10,75 +10,37 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate vmm_sys_util; - -pub mod errors { - error_chain! { - links { - PciErr(pci::errors::Error, pci::errors::ErrorKind); - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - Hypervisor(hypervisor::errors::Error, hypervisor::errors::ErrorKind); - } - errors { - AddRegBar(id: usize) { - display("Failed to add sub region at the BAR {} in memory space.", id) - } - VfioIoctl(ioctl: String, error: std::io::Error) { - display("Vfio ioctl failed: {}, error is: {:?}", ioctl, error) - } - } - } -} +pub mod error; mod vfio_dev; mod vfio_pci; +pub use error::VfioError; pub use vfio_dev::{ VfioContainer, VfioDevice, VFIO_CHECK_EXTENSION, VFIO_DEVICE_GET_INFO, - VFIO_DEVICE_GET_IRQ_INFO, VFIO_DEVICE_GET_REGION_INFO, VFIO_DEVICE_RESET, VFIO_DEVICE_SET_IRQS, - VFIO_GET_API_VERSION, VFIO_GROUP_GET_DEVICE_FD, VFIO_GROUP_GET_STATUS, - VFIO_GROUP_SET_CONTAINER, VFIO_IOMMU_MAP_DMA, VFIO_IOMMU_UNMAP_DMA, VFIO_SET_IOMMU, + VFIO_DEVICE_GET_REGION_INFO, VFIO_DEVICE_RESET, VFIO_DEVICE_SET_IRQS, VFIO_GET_API_VERSION, + VFIO_GROUP_GET_DEVICE_FD, VFIO_GROUP_GET_STATUS, VFIO_GROUP_SET_CONTAINER, VFIO_IOMMU_MAP_DMA, + VFIO_IOMMU_UNMAP_DMA, VFIO_SET_IOMMU, }; -pub use vfio_pci::VfioPciDevice; +pub use vfio_pci::{VfioConfig, VfioPciDevice}; use std::collections::HashMap; use std::os::unix::io::RawFd; use std::sync::{Arc, Mutex}; -use hypervisor::kvm::KVM_FDS; -use kvm_bindings::{kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO}; +use anyhow::Result; use kvm_ioctls::DeviceFd; use once_cell::sync::Lazy; + +use devices::pci::register_pcidevops_type; use vfio_dev::VfioGroup; -pub static KVM_DEVICE_FD: Lazy> = Lazy::new(create_kvm_vfio_device); +pub static KVM_DEVICE_FD: Lazy>> = Lazy::new(|| Mutex::new(None)); pub static CONTAINERS: Lazy>>>> = Lazy::new(|| Mutex::new(HashMap::new())); pub static GROUPS: Lazy>>> = Lazy::new(|| Mutex::new(HashMap::new())); -fn create_kvm_vfio_device() -> Option { - let mut device = kvm_create_device { - type_: kvm_device_type_KVM_DEV_TYPE_VFIO, - fd: 0, - flags: 0, - }; - match KVM_FDS - .load() - .vm_fd - .as_ref() - .unwrap() - .create_device(&mut device) - { - Ok(fd) => Some(fd), - Err(e) => { - error!("{}", e); - None - } - } +pub fn vfio_register_pcidevops_type() -> Result<()> { + register_pcidevops_type::() } diff --git a/vfio/src/vfio_dev.rs b/vfio/src/vfio_dev.rs index db0c58df6bdf11f9bffea745955e6be3628520e2..5a6fa44478af8048c3b51d5b59900106ba700a31 100644 --- a/vfio/src/vfio_dev.rs +++ b/vfio/src/vfio_dev.rs @@ -19,7 +19,7 @@ use std::os::unix::prelude::FileExt; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex, Weak}; -use address_space::{AddressSpace, FlatRange, Listener, ListenerReqType, RegionIoEventFd}; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{ByteOrder, LittleEndian}; use kvm_bindings::{ kvm_device_attr, KVM_DEV_VFIO_GROUP, KVM_DEV_VFIO_GROUP_ADD, KVM_DEV_VFIO_GROUP_DEL, @@ -28,9 +28,13 @@ use vfio_bindings::bindings::vfio; use vmm_sys_util::ioctl::{ ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref, ioctl_with_val, }; +use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr}; -use super::errors::{ErrorKind, Result, ResultExt}; use super::{CONTAINERS, GROUPS, KVM_DEVICE_FD}; +use crate::VfioError; +use address_space::{ + AddressAttr, AddressSpace, FlatRange, Listener, ListenerReqType, RegionIoEventFd, +}; /// Refer to VFIO in https://github.com/torvalds/linux/blob/master/include/uapi/linux/vfio.h const IOMMU_GROUP: &str = "iommu_group"; @@ -74,11 +78,6 @@ ioctl_io_nr!( vfio::VFIO_TYPE, vfio::VFIO_BASE + 0x08 ); -ioctl_io_nr!( - VFIO_DEVICE_GET_IRQ_INFO, - vfio::VFIO_TYPE, - vfio::VFIO_BASE + 0x09 -); ioctl_io_nr!( VFIO_DEVICE_SET_IRQS, vfio::VFIO_TYPE, @@ -116,27 +115,25 @@ impl VfioContainer { .read(true) .write(true) .open(CONTAINER_PATH) - .chain_err(|| format!("Failed to open {} for VFIO container.", CONTAINER_PATH))?; + .with_context(|| format!("Failed to open {} for VFIO container.", CONTAINER_PATH))?; - // Ioctl is safe. Called file is `/dev/vfio/vfio` fd and we check the return. + // SAFETY: Called file is `/dev/vfio/vfio` fd and we check the return. let v = unsafe { ioctl(&fd, VFIO_GET_API_VERSION()) }; if v as u32 != vfio::VFIO_API_VERSION { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_GET_API_VERSION".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); }; - // Ioctl is safe. Called file is `/dev/vfio/vfio` fd and we check the return. let ret = + // SAFETY: Ioctl is safe. Called file is `/dev/vfio/vfio` fd and we check the return. unsafe { ioctl_with_val(&fd, VFIO_CHECK_EXTENSION(), vfio::VFIO_TYPE1v2_IOMMU.into()) }; if ret != 1 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_CHECK_EXTENSION".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(VfioContainer { @@ -156,14 +153,13 @@ impl VfioContainer { /// * Fail to match IOMMU type. /// * Fail to set container IOMMU. fn set_iommu(&self, val: u32) -> Result<()> { - // Ioctl is safe. Called container file is `/dev/vfio/vfio` fd and we check the return. + // SAFETY: Called container file is `/dev/vfio/vfio` fd and we check the return. let ret = unsafe { ioctl_with_val(&self.fd, VFIO_SET_IOMMU(), val.into()) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_SET_IOMMU".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(()) } @@ -187,14 +183,13 @@ impl VfioContainer { size, }; - // Ioctl is safe. Called container file is `/dev/vfio/vfio` fd and we check the return. + // SAFETY: Called container file is `/dev/vfio/vfio` fd and we check the return. let ret = unsafe { ioctl_with_ref(&self.fd, VFIO_IOMMU_MAP_DMA(), &map) }; if ret != 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_IOMMU_MAP_DMA".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(()) } @@ -216,31 +211,31 @@ impl VfioContainer { size, }; - // Ioctl is safe. Called container file is `/dev/vfio/vfio` fd and we check the return. + // SAFETY: Called container file is `/dev/vfio/vfio` fd and we check the return. let ret = unsafe { ioctl_with_ref(&self.fd, VFIO_IOMMU_UNMAP_DMA(), &unmap) }; if ret != 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_IOMMU_UNMAP_DMA".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(()) } - fn add_listener_region(&self, fr: &FlatRange) -> address_space::errors::Result<()> { + fn add_listener_region(&self, fr: &FlatRange) -> Result<()> { if fr.owner.region_type() != address_space::RegionType::Ram { return Ok(()); } let guest_phys_addr = fr.addr_range.base.raw_value(); let memory_size = fr.addr_range.size; - let hva = match fr.owner.get_host_address() { + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + let hva = match unsafe { fr.owner.get_host_address(AddressAttr::Ram) } { Some(addr) => addr, None => bail!("Failed to get host address"), }; let userspace_addr = hva + fr.offset_in_region; - address_space::errors::ResultExt::chain_err( + Result::with_context( self.vfio_dma_map(guest_phys_addr, memory_size, userspace_addr), || { format!( @@ -252,22 +247,19 @@ impl VfioContainer { Ok(()) } - fn del_listener_region(&self, fr: &FlatRange) -> address_space::errors::Result<()> { + fn del_listener_region(&self, fr: &FlatRange) -> Result<()> { if fr.owner.region_type() != address_space::RegionType::Ram { return Ok(()); } let guest_phys_addr = fr.addr_range.base.raw_value(); let size = fr.addr_range.size; - address_space::errors::ResultExt::chain_err( - self.vfio_dma_unmap(guest_phys_addr, size), - || { - format!( - "Failed to do dma unmap: gpa 0x{:x}, size 0x{:x}.", - guest_phys_addr, size - ) - }, - )?; + Result::with_context(self.vfio_dma_unmap(guest_phys_addr, size), || { + format!( + "Failed to do dma unmap: gpa 0x{:x}, size 0x{:x}.", + guest_phys_addr, size + ) + })?; Ok(()) } } @@ -294,7 +286,7 @@ impl Listener for VfioContainer { range: Option<&FlatRange>, _evtfd: Option<&RegionIoEventFd>, req_type: ListenerReqType, - ) -> address_space::errors::Result<()> { + ) -> Result<()> { match req_type { ListenerReqType::AddRegion => { self.add_listener_region(range.unwrap())?; @@ -329,7 +321,7 @@ impl VfioGroup { .read(true) .write(true) .open(&group_path) - .chain_err(|| { + .with_context(|| { format!( "Failed to open {} for iommu_group.", group_path.to_str().unwrap() @@ -340,14 +332,13 @@ impl VfioGroup { argsz: size_of::() as u32, flags: 0, }; - // Safe as file is `iommu_group` fd, and we check the return. + // SAFETY: file is `iommu_group` fd, and we check the return. let ret = unsafe { ioctl_with_mut_ref(&file, VFIO_GROUP_GET_STATUS(), &mut status) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_GROUP_GET_STATUS".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } if status.flags != vfio::VFIO_GROUP_FLAGS_VIABLE { bail!( @@ -375,11 +366,11 @@ impl VfioGroup { attr: u64::from(KVM_DEV_VFIO_GROUP_ADD), addr: &self.fd.as_raw_fd() as *const i32 as u64, }; - match KVM_DEVICE_FD.as_ref() { + match KVM_DEVICE_FD.lock().unwrap().as_ref() { Some(fd) => fd .set_device_attr(&attr) - .chain_err(|| "Failed to add group to kvm device.")?, - None => bail!("Failed to create kvm device."), + .with_context(|| "Failed to add group to kvm device.")?, + None => bail!("Kvm device hasn't been created."), } Ok(()) } @@ -395,10 +386,10 @@ impl VfioGroup { attr: u64::from(KVM_DEV_VFIO_GROUP_DEL), addr: &self.fd.as_raw_fd() as *const i32 as u64, }; - match KVM_DEVICE_FD.as_ref() { + match KVM_DEVICE_FD.lock().unwrap().as_ref() { Some(fd) => fd .set_device_attr(&attr) - .chain_err(|| "Failed to delete group from kvm device.")?, + .with_context(|| "Failed to delete group from kvm device.")?, None => bail!("Kvm device hasn't been created."), } Ok(()) @@ -406,14 +397,13 @@ impl VfioGroup { fn set_container(&mut self, container: &Arc>) -> Result<()> { let fd = &container.lock().unwrap().fd.as_raw_fd(); - // Safe as group is the owner of file, and we check the return. + // SAFETY: group is the owner of file, and we check the return. let ret = unsafe { ioctl_with_ref(&self.fd, VFIO_GROUP_SET_CONTAINER(), fd) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_GROUP_SET_CONTAINER".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } self.container = Arc::downgrade(container); Ok(()) @@ -422,6 +412,7 @@ impl VfioGroup { fn unset_container(&mut self) { let container = self.container.upgrade().unwrap(); let fd = container.lock().unwrap().fd.as_raw_fd(); + // SAFETY: self.fd was created in function new(). unsafe { ioctl_with_ref(&self.fd, VFIO_GROUP_UNSET_CONTAINER(), &fd) }; self.container = Weak::new(); } @@ -449,7 +440,7 @@ impl VfioGroup { self.add_to_kvm_device()?; mem_as .register_listener(self.container.upgrade().unwrap()) - .chain_err(|| "Failed to register memory listener.")?; + .with_context(|| "Failed to register memory listener.")?; Ok(()) } } @@ -502,23 +493,17 @@ struct VfioRegionWithCap { cap_info: vfio::__IncompleteArrayField, } -#[allow(dead_code)] -pub struct VfioIrq { - count: u32, - flags: u32, - index: u32, -} - impl VfioDevice { pub fn new(path: &Path, mem_as: &Arc) -> Result>> { if !path.exists() { bail!("No provided host PCI device, use -device vfio-pci,host=DDDD:BB:DD.F"); } - let group = Self::vfio_get_group(path, mem_as).chain_err(|| "Failed to get iommu group")?; + let group = + Self::vfio_get_group(path, mem_as).with_context(|| "Failed to get iommu group")?; let (name, fd) = - Self::vfio_get_device(&group, path).chain_err(|| "Failed to get vfio device")?; - let dev_info = Self::get_dev_info(&fd).chain_err(|| "Failed to get device info")?; + Self::vfio_get_device(&group, path).with_context(|| "Failed to get vfio device")?; + let dev_info = Self::get_dev_info(&fd).with_context(|| "Failed to get device info")?; let vfio_dev = Arc::new(Mutex::new(VfioDevice { fd, name, @@ -540,13 +525,13 @@ impl VfioDevice { .iter() .collect::() .read_link() - .chain_err(|| "Invalid iommu group path")?; + .with_context(|| "Invalid iommu group path")?; let group_name = iommu_group .file_name() - .chain_err(|| "Invalid iommu group name")?; + .with_context(|| "Invalid iommu group name")?; let mut group_id = 0; if let Some(n) = group_name.to_str() { - group_id = n.parse::().chain_err(|| "Invalid iommu group id")?; + group_id = n.parse::().with_context(|| "Invalid iommu group id")?; } if let Some(g) = GROUPS.lock().unwrap().get(&group_id) { @@ -575,7 +560,7 @@ impl VfioDevice { fn vfio_get_device(group: &VfioGroup, name: &Path) -> Result<(String, File)> { let mut dev_name: &str = ""; if let Some(n) = name.file_name() { - dev_name = n.to_str().chain_err(|| "Invalid device path")?; + dev_name = n.to_str().with_context(|| "Invalid device path")?; } for device in group.devices.lock().unwrap().iter() { @@ -585,19 +570,18 @@ impl VfioDevice { } let path: CString = CString::new(dev_name.as_bytes()) - .chain_err(|| "Failed to convert device name to CString type of data")?; + .with_context(|| "Failed to convert device name to CString type of data")?; let ptr = path.as_ptr(); - // Safe as group is the owner of file and make sure ptr is valid. + // SAFETY: group is the owner of file and make sure ptr is valid. let fd = unsafe { ioctl_with_ptr(&group.fd, VFIO_GROUP_GET_DEVICE_FD(), ptr) }; if fd < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_GROUP_GET_DEVICE_FD".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } - // Safe as we have verified that fd is a valid FD. + // SAFETY: We have verified that fd is a valid FD. let device = unsafe { File::from_raw_fd(fd) }; Ok((String::from(dev_name), device)) } @@ -610,18 +594,17 @@ impl VfioDevice { num_irqs: 0, }; - // Safe as device is the owner of file, and we will verify the result is valid. + // SAFETY: Device is the owner of file, and we will verify the result is valid. let ret = unsafe { ioctl_with_mut_ref(device, VFIO_DEVICE_GET_INFO(), &mut dev_info) }; if ret < 0 || (dev_info.flags & vfio::VFIO_DEVICE_FLAGS_PCI) == 0 || dev_info.num_regions < vfio::VFIO_PCI_CONFIG_REGION_INDEX + 1 || dev_info.num_irqs < vfio::VFIO_PCI_MSIX_IRQ_INDEX + 1 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_GET_INFO".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(VfioDevInfo { @@ -643,7 +626,7 @@ impl VfioDevice { let cap_size = (info.argsz - argsz) as usize; let mut new_info = array_to_vec::(cap_size); new_info[0].region_info = info; - // Safe as device is the owner of file, and we will verify the result is valid. + // SAFETY: Device is the owner of file, and we will verify the result is valid. let ret = unsafe { ioctl_with_mut_ref( &self.fd, @@ -652,21 +635,23 @@ impl VfioDevice { ) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_GET_REGION_INFO".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } - // Safe as we make sure there is enough memory space to convert cap info into + // SAFETY: We make sure there is enough memory space to convert cap info into // specific structure. let sparse = unsafe { new_info[0].cap_info.as_ptr() as *mut vfio::vfio_region_info_cap_sparse_mmap }; + // SAFETY: sparse was created in this function and can be guaranteed now be null. if unsafe { (*sparse).header.id } == vfio::VFIO_REGION_INFO_CAP_SPARSE_MMAP as u16 { + // SAFETY: The reason is same as above. let nr_areas = unsafe { (*sparse).nr_areas as usize }; let areas: &mut [vfio::vfio_region_sparse_mmap_area] = + // SAFETY: The reason is same as above. unsafe { (*sparse).areas.as_mut_slice(nr_areas) }; mmaps = Vec::with_capacity(nr_areas); for area in areas.iter() { @@ -695,14 +680,13 @@ impl VfioDevice { offset: 0, }; - // Safe as device is the owner of file, and we will verify the result is valid. + // SAFETY: Device is the owner of file, and we will verify the result is valid. let ret = unsafe { ioctl_with_mut_ref(&self.fd, VFIO_DEVICE_GET_REGION_INFO(), &mut info) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_GET_REGION_INFO".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } Ok(info) @@ -713,13 +697,13 @@ impl VfioDevice { for index in vfio::VFIO_PCI_BAR0_REGION_INDEX..vfio::VFIO_PCI_ROM_REGION_INDEX { let info = self .region_info(index) - .chain_err(|| "Fail to get region info")?; + .with_context(|| "Fail to get region info")?; let mut mmaps = Vec::new(); if info.size > 0 { mmaps = self .region_mmap_info(info) - .chain_err(|| "Fail to get region mmap info")?; + .with_context(|| "Fail to get region mmap info")?; } regions.push(VfioRegion { @@ -734,39 +718,6 @@ impl VfioDevice { Ok(regions) } - pub fn get_irqs_info(&self, num_irqs: u32) -> Result> { - let mut irqs: HashMap = HashMap::new(); - - for index in 0..num_irqs { - let mut info = vfio::vfio_irq_info { - argsz: size_of::() as u32, - flags: 0, - index, - count: 0, - }; - - // Safe as device is the owner of file, and we will verify the result is valid. - let ret = - unsafe { ioctl_with_mut_ref(&self.fd, VFIO_DEVICE_GET_IRQ_INFO(), &mut info) }; - if ret < 0 { - return Err(ErrorKind::VfioIoctl( - "VFIO_DEVICE_GET_IRQ_INFO".to_string(), - std::io::Error::last_os_error(), - ) - .into()); - } - - let irq = VfioIrq { - flags: info.flags, - count: info.count, - index, - }; - irqs.insert(index, irq); - } - - Ok(irqs) - } - /// Read region information from VFIO device. /// /// # Arguments @@ -777,7 +728,7 @@ impl VfioDevice { pub fn read_region(&self, buf: &mut [u8], region_offset: u64, addr: u64) -> Result<()> { self.fd .read_exact_at(buf, region_offset + addr) - .chain_err(|| "Failed to read vfio region")?; + .with_context(|| "Failed to read vfio region")?; Ok(()) } @@ -792,7 +743,7 @@ impl VfioDevice { pub fn write_region(&self, buf: &[u8], region_offset: u64, addr: u64) -> Result<()> { self.fd .write_all_at(buf, region_offset + addr) - .chain_err(|| "Failed to write vfio region")?; + .with_context(|| "Failed to write vfio region")?; Ok(()) } @@ -802,32 +753,31 @@ impl VfioDevice { /// # Arguments /// /// * `irq_fds` - Irq fds that will be registered to kvm. - pub fn enable_irqs(&mut self, irq_fds: Vec) -> Result<()> { + /// * `start` - The start of subindexes being specified. + pub fn enable_irqs(&mut self, irq_fds: Vec, start: u32) -> Result<()> { let mut irq_set = array_to_vec::(irq_fds.len()); irq_set[0].argsz = (size_of::() + irq_fds.len() * size_of::()) as u32; irq_set[0].flags = vfio::VFIO_IRQ_SET_DATA_EVENTFD | vfio::VFIO_IRQ_SET_ACTION_TRIGGER; irq_set[0].index = vfio::VFIO_PCI_MSIX_IRQ_INDEX; - irq_set[0].start = 0u32; + irq_set[0].start = start; irq_set[0].count = irq_fds.len() as u32; - // It is safe as enough memory space to save irq_set data. + // SAFETY: It is safe as enough memory space to save irq_set data. let data: &mut [u8] = unsafe { irq_set[0] .data .as_mut_slice(irq_fds.len() * size_of::()) }; LittleEndian::write_i32_into(irq_fds.as_slice(), data); - // Safe as device is the owner of file, and we will verify the result is valid. + // SAFETY: Device is the owner of file, and we will verify the result is valid. let ret = unsafe { ioctl_with_ref(&self.fd, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_SET_IRQS".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } - self.nr_vectors = irq_fds.len(); Ok(()) } @@ -848,29 +798,27 @@ impl VfioDevice { irq_set[0].start = 0u32; irq_set[0].count = 0u32; - // Safe as device is the owner of file, and we will verify the result is valid. + // SAFETY: Device is the owner of file, and we will verify the result is valid. let ret = unsafe { ioctl_with_ref(&self.fd, VFIO_DEVICE_SET_IRQS(), &irq_set[0]) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_SET_IRQS".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } self.nr_vectors = 0; Ok(()) } pub fn reset(&self) -> Result<()> { - // Safe as device is the owner of file, and we verify the device supports being reset. if self.dev_info.flags & vfio::VFIO_DEVICE_FLAGS_RESET != 0 { + // SAFETY: Device is the owner of file, and we verify the device supports being reset. let ret = unsafe { ioctl(&self.fd, VFIO_DEVICE_RESET()) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_DEVICE_RESET".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } } diff --git a/vfio/src/vfio_pci.rs b/vfio/src/vfio_pci.rs index abc8f6c4417e48da76a4b244ed68fc8ac8faad8d..d4c0dfee4f5bcac3c26474b3203dc70108ece3f6 100644 --- a/vfio/src/vfio_pci.rs +++ b/vfio/src/vfio_pci.rs @@ -10,46 +10,69 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::collections::HashMap; use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, Mutex, Weak}; -use address_space::{AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region, RegionOps}; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{ByteOrder, LittleEndian}; -use error_chain::ChainedError; -use hypervisor::kvm::{MsiVector, KVM_FDS}; +use clap::{ArgAction, Parser}; +use log::error; +use vfio_bindings::bindings::vfio; +use vmm_sys_util::eventfd::EventFd; +use vmm_sys_util::ioctl::ioctl_with_mut_ref; + +use crate::vfio_dev::*; +use crate::VfioError; +use crate::{CONTAINERS, GROUPS}; +use address_space::{AddressSpace, FileBackend, GuestAddress, HostMemMapping, Region, RegionOps}; #[cfg(target_arch = "aarch64")] -use pci::config::SECONDARY_BUS_NUM; -use pci::config::{ +use devices::pci::config::SECONDARY_BUS_NUM; +use devices::pci::config::{ PciConfig, RegionType, BAR_0, BAR_5, BAR_IO_SPACE, BAR_MEM_64BIT, BAR_SPACE_UNMAPPED, COMMAND, COMMAND_BUS_MASTER, COMMAND_INTERRUPT_DISABLE, COMMAND_IO_SPACE, COMMAND_MEMORY_SPACE, HEADER_TYPE, IO_BASE_ADDR_MASK, MEM_BASE_ADDR_MASK, PCIE_CONFIG_SPACE_SIZE, PCI_CONFIG_SPACE_SIZE, REG_SIZE, }; -use pci::errors::Result as PciResult; -use pci::msix::{ - is_msix_enabled, update_dev_id, Msix, MSIX_CAP_CONTROL, MSIX_CAP_ENABLE, MSIX_CAP_FUNC_MASK, - MSIX_CAP_ID, MSIX_CAP_SIZE, MSIX_CAP_TABLE, MSIX_TABLE_BIR, MSIX_TABLE_ENTRY_SIZE, - MSIX_TABLE_OFFSET, MSIX_TABLE_SIZE_MAX, +use devices::pci::msix::{ + Msix, MSIX_CAP_CONTROL, MSIX_CAP_ENABLE, MSIX_CAP_FUNC_MASK, MSIX_CAP_ID, MSIX_CAP_SIZE, + MSIX_CAP_TABLE, MSIX_TABLE_BIR, MSIX_TABLE_ENTRY_SIZE, MSIX_TABLE_OFFSET, MSIX_TABLE_SIZE_MAX, }; -use pci::{ +use devices::pci::{ init_multifunction, le_read_u16, le_read_u32, le_write_u16, le_write_u32, pci_ext_cap_id, - pci_ext_cap_next, pci_ext_cap_ver, ranges_overlap, PciBus, PciDevOps, + pci_ext_cap_next, pci_ext_cap_ver, MsiVector, PciBus, PciDevBase, PciDevOps, }; +use devices::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +use machine_manager::config::{get_pci_df, parse_bool, valid_id}; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; +use util::num_ops::ranges_overlap; use util::unix::host_page_size; -use vfio_bindings::bindings::vfio; -use vmm_sys_util::eventfd::EventFd; -use vmm_sys_util::ioctl::ioctl_with_mut_ref; - -use super::errors::{ErrorKind, Result, ResultExt}; -use crate::vfio_dev::*; -use crate::{CONTAINERS, GROUPS}; const PCI_NUM_BARS: u8 = 6; const PCI_ROM_SLOT: u8 = 6; +#[derive(Parser, Default, Debug)] +#[command(no_binary_name(true))] +#[clap(group = clap::ArgGroup::new("path").args(&["host", "sysfsdev"]).multiple(false).required(true))] +pub struct VfioConfig { + #[arg(long, value_parser = ["vfio-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long, value_parser = valid_id)] + pub host: Option, + #[arg(long)] + pub bus: String, + #[arg(long)] + pub sysfsdev: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, +} + struct MsixTable { table_bar: u8, table_offset: u64, @@ -59,11 +82,8 @@ struct MsixTable { struct VfioMsixInfo { // Table bar, table offset and table size info. table: MsixTable, - // Msix enteries. - enteries: u16, - // Vfio device irq info - #[allow(dead_code)] - vfio_irq: HashMap, + // Msix entries. + entries: u16, } struct VfioBar { @@ -73,14 +93,15 @@ struct VfioBar { } struct GsiMsiRoute { - irq_fd: Option, + irq_fd: Option>, gsi: i32, + nr: u32, } /// VfioPciDevice is a VFIO PCI device. It implements PciDevOps trait for a PCI device. /// And it is bound to a VFIO device. pub struct VfioPciDevice { - pci_config: PciConfig, + base: PciDevBase, config_size: u64, // Offset of pci config space region within vfio device fd. config_offset: u64, @@ -92,10 +113,7 @@ pub struct VfioPciDevice { vfio_bars: Arc>>, // Maintains a list of GSI with irqfds that are registered to kvm. gsi_msi_routes: Arc>>, - devfn: u8, dev_id: Arc, - name: String, - parent_bus: Weak>, // Multi-Function flag. multi_func: bool, mem_as: Arc, @@ -107,23 +125,24 @@ impl VfioPciDevice { vfio_device: Arc>, devfn: u8, name: String, - parent_bus: Weak>, + parent_bus: Weak>, multi_func: bool, mem_as: Arc, ) -> Self { Self { // Unknown PCI or PCIe type here, allocate enough space to match the two types. - pci_config: PciConfig::new(PCIE_CONFIG_SPACE_SIZE, PCI_NUM_BARS), + base: PciDevBase { + base: DeviceBase::new(name, true, Some(parent_bus)), + config: PciConfig::new(devfn, PCIE_CONFIG_SPACE_SIZE, PCI_NUM_BARS), + devfn, + }, config_size: 0, config_offset: 0, vfio_device, msix_info: None, vfio_bars: Arc::new(Mutex::new(Vec::with_capacity(PCI_NUM_BARS as usize))), gsi_msi_routes: Arc::new(Mutex::new(Vec::new())), - devfn, dev_id: Arc::new(AtomicU16::new(0)), - name, - parent_bus, multi_func, mem_as, } @@ -141,22 +160,21 @@ impl VfioPciDevice { }; let locked_dev = self.vfio_device.lock().unwrap(); - // Safe as device is the owner of file, and we will verify the result is valid. let ret = + // SAFETY: Device is the owner of file, and we will verify the result is valid. unsafe { ioctl_with_mut_ref(&locked_dev.fd, VFIO_DEVICE_GET_REGION_INFO(), &mut info) }; if ret < 0 { - return Err(ErrorKind::VfioIoctl( + return Err(anyhow!(VfioError::VfioIoctl( "VFIO_GET_PCI_CONFIG_INFO".to_string(), std::io::Error::last_os_error(), - ) - .into()); + ))); } self.config_size = info.size; self.config_offset = info.offset; let mut config_data = vec![0_u8; self.config_size as usize]; locked_dev.read_region(config_data.as_mut_slice(), self.config_offset, 0)?; - self.pci_config.config[..PCI_CONFIG_SPACE_SIZE] + self.base.config.config[..PCI_CONFIG_SPACE_SIZE] .copy_from_slice(&config_data[..PCI_CONFIG_SPACE_SIZE]); // If guest OS can not see extended caps, just ignore them. @@ -164,9 +182,10 @@ impl VfioPciDevice { return Ok(()); } - // Cache the pci config space to avoid overwriting the original config space. Because we will - // parse the chain of extended caps in cache config and insert them into original config space. - let mut config = PciConfig::new(PCIE_CONFIG_SPACE_SIZE, PCI_NUM_BARS); + // Cache the pci config space to avoid overwriting the original config space. Because we + // will parse the chain of extended caps in cache config and insert them into original + // config space. + let mut config = PciConfig::new(self.base.devfn, PCIE_CONFIG_SPACE_SIZE, PCI_NUM_BARS); config.config = config_data; let mut next = PCI_CONFIG_SPACE_SIZE; while (PCI_CONFIG_SPACE_SIZE..PCIE_CONFIG_SPACE_SIZE).contains(&next) { @@ -186,9 +205,10 @@ impl VfioPciDevice { continue; } let offset = self - .pci_config + .base + .config .add_pcie_ext_cap(cap_id, size, cap_version)?; - self.pci_config.config[offset..offset + size] + self.base.config.config[offset..offset + size] .copy_from_slice(&config.config[old_next..old_next + size]); } @@ -198,29 +218,29 @@ impl VfioPciDevice { /// Disable I/O, MMIO, bus master and INTx states, And clear host device bar size information. /// Guest OS can get residual addresses from the host if not clear bar size. fn pci_config_reset(&mut self) -> Result<()> { - let mut cmd = le_read_u16(&self.pci_config.config, COMMAND as usize)?; + let mut cmd = le_read_u16(&self.base.config.config, COMMAND as usize)?; cmd &= !(COMMAND_IO_SPACE | COMMAND_MEMORY_SPACE | COMMAND_BUS_MASTER | COMMAND_INTERRUPT_DISABLE); - le_write_u16(&mut self.pci_config.config, COMMAND as usize, cmd)?; + le_write_u16(&mut self.base.config.config, COMMAND as usize, cmd)?; let mut data = vec![0u8; 2]; LittleEndian::write_u16(&mut data, cmd); self.vfio_device.lock().unwrap().write_region( data.as_slice(), self.config_offset, - COMMAND as u64, + u64::from(COMMAND), )?; for i in 0..PCI_ROM_SLOT { let offset = BAR_0 as usize + REG_SIZE * i as usize; - let v = le_read_u32(&self.pci_config.config, offset)?; - if v & BAR_IO_SPACE as u32 != 0 { - le_write_u32(&mut self.pci_config.config, offset, v & !IO_BASE_ADDR_MASK)?; + let v = le_read_u32(&self.base.config.config, offset)?; + if v & u32::from(BAR_IO_SPACE) != 0 { + le_write_u32(&mut self.base.config.config, offset, v & !IO_BASE_ADDR_MASK)?; } else { le_write_u32( - &mut self.pci_config.config, + &mut self.base.config.config, offset, v & !MEM_BASE_ADDR_MASK as u32, )?; @@ -230,41 +250,34 @@ impl VfioPciDevice { Ok(()) } - /// Get MSI-X table, vfio_irq and entry information from vfio device. + /// Get MSI-X table and entry information from vfio device. fn get_msix_info(&mut self) -> Result { - let locked_dev = self.vfio_device.lock().unwrap(); - let n = locked_dev.dev_info.num_irqs; - let vfio_irq = locked_dev - .get_irqs_info(n) - .chain_err(|| "Failed to get vfio irqs info")?; - - let cap_offset = self.pci_config.find_pci_cap(MSIX_CAP_ID); + let cap_offset = self.base.config.find_pci_cap(MSIX_CAP_ID); let table = le_read_u32( - &self.pci_config.config, + &self.base.config.config, cap_offset + MSIX_CAP_TABLE as usize, )?; let ctrl = le_read_u16( - &self.pci_config.config, + &self.base.config.config, cap_offset + MSIX_CAP_CONTROL as usize, )?; - let enteries = (ctrl & MSIX_TABLE_SIZE_MAX) + 1; - // Make sure that if enteries less than 1 or greater than (0x7ff + 1) is error value. - if !(1..=(MSIX_TABLE_SIZE_MAX + 1)).contains(&enteries) { + let entries = (ctrl & MSIX_TABLE_SIZE_MAX) + 1; + // Make sure that if entries less than 1 or greater than (0x7ff + 1) is error value. + if !(1..=(MSIX_TABLE_SIZE_MAX + 1)).contains(&entries) { bail!( "The number of MSI-X vectors is invalid, MSI-X vectors are {}", - enteries, + entries, ); } Ok(VfioMsixInfo { table: MsixTable { table_bar: (table as u16 & MSIX_TABLE_BIR) as u8, - table_offset: (table & MSIX_TABLE_OFFSET) as u64, - table_size: (enteries * MSIX_TABLE_ENTRY_SIZE) as u64, + table_offset: u64::from(table & MSIX_TABLE_OFFSET), + table_size: u64::from(entries * MSIX_TABLE_ENTRY_SIZE), }, - enteries: enteries as u16, - vfio_irq, + entries, }) } @@ -275,20 +288,20 @@ impl VfioPciDevice { let locked_dev = self.vfio_device.lock().unwrap(); let mut infos = locked_dev .get_regions_info() - .chain_err(|| "Failed get vfio device regions info")?; + .with_context(|| "Failed get vfio device regions info")?; for i in 0..PCI_ROM_SLOT { let mut data = vec![0_u8; 4]; locked_dev.read_region( data.as_mut_slice(), self.config_offset, - (BAR_0 + (REG_SIZE as u8) * i) as u64, + u64::from(BAR_0 + (REG_SIZE as u8) * i), )?; let mut region_type = RegionType::Mem32Bit; let pci_bar = LittleEndian::read_u32(&data); - if pci_bar & BAR_IO_SPACE as u32 != 0 { + if pci_bar & u32::from(BAR_IO_SPACE) != 0 { region_type = RegionType::Io; - } else if pci_bar & BAR_MEM_64BIT as u32 != 0 { + } else if pci_bar & u32::from(BAR_MEM_64BIT) != 0 { region_type = RegionType::Mem64Bit; } let vfio_region = infos.remove(0); @@ -306,15 +319,15 @@ impl VfioPciDevice { Ok(vfio_bars) } - fn fixup_msix_region(&self, vfio_bars: &mut Vec) -> Result<()> { + fn fixup_msix_region(&self, vfio_bars: &mut [VfioBar]) -> Result<()> { let msix_info = self .msix_info .as_ref() - .chain_err(|| "Failed to get MSIX info")?; + .with_context(|| "Failed to get MSIX info")?; let vfio_bar = vfio_bars .get_mut(msix_info.table.table_bar as usize) - .chain_err(|| "Failed to get vfio bar info")?; + .with_context(|| "Failed to get vfio bar info")?; let region = &mut vfio_bar.vfio_region; // If MSI-X area already setups or does not support mapping, we shall just return. if region.mmaps.len() != 1 @@ -357,14 +370,14 @@ impl VfioPciDevice { let msix_info = self .msix_info .as_ref() - .chain_err(|| "Failed to get MSIX info")?; + .with_context(|| "Failed to get MSIX info")?; let table_bar = msix_info.table.table_bar; let table_offset = msix_info.table.table_offset; let table_size = msix_info.table.table_size; // Create a separate region for MSI-X table, VFIO won't allow to map the MSI-X table area. let table_ops = self .get_table_region_ops() - .chain_err(|| "Failed to get table region ops")?; + .with_context(|| "Failed to get table region ops")?; let bar_ops = self.get_bar_region_ops(); for i in 0..PCI_ROM_SLOT { @@ -372,7 +385,7 @@ impl VfioPciDevice { let mut bars = self.vfio_bars.lock().unwrap(); let bar = bars .get_mut(i as usize) - .chain_err(|| "Failed to get bar info")?; + .with_context(|| "Failed to get bar info")?; // Skip unimplemented bar and the upper half of 64 bit bar. if bar.size == 0 { continue; @@ -382,22 +395,25 @@ impl VfioPciDevice { let mut vfio_bars = self.vfio_bars.lock().unwrap(); let vfio_bar = vfio_bars .get_mut(i as usize) - .chain_err(|| "Failed to get vfio bar info")?; + .with_context(|| "Failed to get vfio bar info")?; let size = vfio_bar.size; - let region = Region::init_container_region(size); + let region = Region::init_container_region(size, "VfioPci"); let bar_region = if i == table_bar { region .add_subregion( - Region::init_io_region(table_size as u64, table_ops.clone()), + Region::init_io_region(table_size, table_ops.clone(), "VfioBar"), table_offset, ) - .chain_err(|| ErrorKind::AddRegBar(i as usize))?; + .with_context(|| VfioError::AddRegBar(i as usize))?; if table_offset > 0 { region - .add_subregion(Region::init_io_region(table_offset, bar_ops.clone()), 0) - .chain_err(|| ErrorKind::AddRegBar(i as usize))?; + .add_subregion( + Region::init_io_region(table_offset, bar_ops.clone(), "VfioRegion"), + 0, + ) + .with_context(|| VfioError::AddRegBar(i as usize))?; } if table_offset + table_size < size { @@ -406,29 +422,38 @@ impl VfioPciDevice { Region::init_io_region( size - table_offset - table_size, bar_ops.clone(), + "vfio_io_region2", ), table_offset + table_size, ) - .chain_err(|| ErrorKind::AddRegBar(i as usize))?; + .with_context(|| VfioError::AddRegBar(i as usize))?; } region } else { region - .add_subregion(Region::init_io_region(size, bar_ops.clone()), 0) - .chain_err(|| ErrorKind::AddRegBar(i as usize))?; + .add_subregion( + Region::init_io_region(size, bar_ops.clone(), "vfio_io_region"), + 0, + ) + .with_context(|| VfioError::AddRegBar(i as usize))?; region }; - self.pci_config - .register_bar(i as usize, bar_region, vfio_bar.region_type, false, size); + self.base.config.register_bar( + i as usize, + bar_region, + vfio_bar.region_type, + false, + size, + )?; } Ok(()) } fn unregister_bars(&mut self) -> Result<()> { - let bus = self.parent_bus.upgrade().unwrap(); - self.pci_config.unregister_bars(&bus)?; + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + self.base.config.unregister_bars(&bus)?; Ok(()) } @@ -437,23 +462,32 @@ impl VfioPciDevice { let msix_info = self .msix_info .as_ref() - .chain_err(|| "Failed to get MSIX info")?; + .with_context(|| "Failed to get MSIX info")?; let table_size = msix_info.table.table_size as u32; - let cap_offset = self.pci_config.find_pci_cap(MSIX_CAP_ID); + let cap_offset = self.base.config.find_pci_cap(MSIX_CAP_ID); let offset: usize = cap_offset + MSIX_CAP_CONTROL as usize; le_write_u16( - &mut self.pci_config.write_mask, + &mut self.base.config.write_mask, offset, MSIX_CAP_FUNC_MASK | MSIX_CAP_ENABLE, )?; + + let msi_irq_manager = if let Some(bus) = self.parent_bus().unwrap().upgrade() { + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.get_msi_irq_manager() + } else { + None + }; + let msix = Arc::new(Mutex::new(Msix::new( table_size, table_size / 128, cap_offset as u16, - self.dev_id.load(Ordering::Acquire), + self.dev_id.clone(), + msi_irq_manager, ))); - self.pci_config.msix = Some(msix.clone()); + self.base.config.msix = Some(msix.clone()); let cloned_msix = msix.clone(); let read = move |data: &mut [u8], _: GuestAddress, offset: u64| -> bool { @@ -473,86 +507,80 @@ impl VfioPciDevice { let cloned_dev = self.vfio_device.clone(); let cloned_gsi_routes = self.gsi_msi_routes.clone(); - let parent_bus = self.parent_bus.clone(); + let parent_bus = self.parent_bus().unwrap().clone(); let dev_id = self.dev_id.clone(); - let devfn = self.devfn; + let devfn = self.base.devfn; + let cloned_msix = msix.clone(); let write = move |data: &[u8], _: GuestAddress, offset: u64| -> bool { let mut locked_msix = msix.lock().unwrap(); locked_msix.table[offset as usize..(offset as usize + data.len())] .copy_from_slice(data); - let vector = offset / MSIX_TABLE_ENTRY_SIZE as u64; + let vector = offset / u64::from(MSIX_TABLE_ENTRY_SIZE); if locked_msix.is_vector_masked(vector as u16) { return true; } let entry = locked_msix.get_message(vector as u16); - update_dev_id(&parent_bus, devfn, &dev_id); + let bus = parent_bus.upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.update_dev_id(devfn, &dev_id); let msix_vector = MsiVector { msg_addr_lo: entry.address_lo, msg_addr_hi: entry.address_hi, msg_data: entry.data, masked: false, #[cfg(target_arch = "aarch64")] - dev_id: dev_id.load(Ordering::Acquire) as u32, + dev_id: u32::from(dev_id.load(Ordering::Acquire)), }; let mut locked_gsi_routes = cloned_gsi_routes.lock().unwrap(); - let mut gsi_route = locked_gsi_routes.get_mut(vector as usize).unwrap(); + let gsi_route = locked_gsi_routes.get_mut(vector as usize).unwrap(); if gsi_route.irq_fd.is_none() { - let irq_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - gsi_route.irq_fd = Some(irq_fd); + let irq_fd = create_new_eventfd().unwrap(); + gsi_route.irq_fd = Some(Arc::new(irq_fd)); } + let irq_fd = gsi_route.irq_fd.clone(); + let msi_irq_manager = &cloned_msix.lock().unwrap().msi_irq_manager; + let irq_manager = msi_irq_manager.as_ref().unwrap(); if gsi_route.gsi == -1 { - gsi_route.gsi = match KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .allocate_gsi() - { + gsi_route.gsi = match irq_manager.allocate_irq(msix_vector) { Ok(g) => g as i32, Err(e) => { - error!("Failed to allocate gsi, error is {}", e); + error!("Failed to init msix vector {:?}, error is {:?}", vector, e); return true; } }; - KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .add_msi_route(gsi_route.gsi as u32, msix_vector) - .unwrap_or_else(|e| error!("Failed to add MSI-X route, error is {}", e)); - KVM_FDS - .load() - .commit_irq_routing() - .unwrap_or_else(|e| error!("{}", e)); - KVM_FDS - .load() - .register_irqfd(gsi_route.irq_fd.as_ref().unwrap(), gsi_route.gsi as u32) - .unwrap_or_else(|e| error!("{}", e)); + irq_manager + .register_irqfd(irq_fd.unwrap(), gsi_route.gsi as u32) + .unwrap_or_else(|e| error!("{:?}", e)); } else { - KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .update_msi_route(gsi_route.gsi as u32, msix_vector) - .unwrap_or_else(|e| error!("Failed to update MSI-X route, error is {}", e)); - KVM_FDS - .load() - .commit_irq_routing() - .unwrap_or_else(|e| error!("{}", e)); + irq_manager + .update_route_table(gsi_route.gsi as u32, msix_vector) + .unwrap_or_else(|e| error!("{:?}", e)); } let mut locked_dev = cloned_dev.lock().unwrap(); - locked_dev - .disable_irqs() - .unwrap_or_else(|e| error!("Failed to disable irq, error is {}", e)); - locked_dev - .enable_irqs(get_irq_rawfds(&locked_gsi_routes)) - .unwrap_or_else(|e| error!("Failed to enable irq, error is {}", e)); + if (vector + 1) > (locked_dev.nr_vectors as u64) { + locked_dev + .disable_irqs() + .unwrap_or_else(|e| error!("Failed to disable irq, error is {:?}", e)); + + locked_dev + .enable_irqs( + get_irq_rawfds(&locked_gsi_routes, 0, (vector + 1) as u32), + 0, + ) + .unwrap_or_else(|e| error!("Failed to enable irq, error is {:?}", e)); + locked_dev.nr_vectors = (vector + 1) as usize; + } else { + locked_dev + .enable_irqs( + get_irq_rawfds(&locked_gsi_routes, vector as u32, 1), + vector as u32, + ) + .unwrap_or_else(|e| error!("Failed to enable irq, error is {:?}", e)); + } true }; @@ -584,7 +612,7 @@ impl VfioPciDevice { .read_region(data, r.region_offset, offset) { error!( - "Failed to read bar region, address is {}, offset is {}, error is {}", + "Failed to read bar region, address is {}, offset is {}, error is {:?}", addr.0, offset, e, ); } @@ -614,7 +642,7 @@ impl VfioPciDevice { .write_region(data, r.region_offset, offset) { error!( - "Failed to write bar region, address is {}, offset is {}, error is {}", + "Failed to write bar region, address is {}, offset is {}, error is {:?}", addr.0, offset, e, ); } @@ -634,7 +662,7 @@ impl VfioPciDevice { /// the guest OS. fn setup_bars_mmap(&mut self) -> Result<()> { for i in vfio::VFIO_PCI_BAR0_REGION_INDEX..vfio::VFIO_PCI_ROM_REGION_INDEX { - let gpa = self.pci_config.get_bar_address(i as usize); + let gpa = self.base.config.get_bar_address(i as usize); if gpa == BAR_SPACE_UNMAPPED || gpa == 0 { continue; } @@ -642,13 +670,16 @@ impl VfioPciDevice { let mut bars = self.vfio_bars.lock().unwrap(); let bar = bars .get_mut(i as usize) - .chain_err(|| "Failed to get bar info")?; + .with_context(|| "Failed to get bar info")?; let region = &mut bar.vfio_region; // If bar region already setups or does not support mapping, just process the nest. - if region.size == 0 || region.mmaps.is_empty() || region.guest_phys_addr == gpa { + if region.size == 0 || region.guest_phys_addr == gpa { + continue; + } + + region.guest_phys_addr = gpa; + if region.mmaps.is_empty() { continue; - } else { - region.guest_phys_addr = gpa; } let mut read_only = true; @@ -673,17 +704,18 @@ impl VfioPciDevice { read_only, )?; - let ram_device = Region::init_ram_device_region(Arc::new(host_mmap)); + let ram_device = Region::init_ram_device_region(Arc::new(host_mmap), "VfioRam"); let bar = self - .pci_config + .base + .config .bars .get_mut(i as usize) - .chain_err(|| "Failed to get pci bar info")?; + .with_context(|| "Failed to get pci bar info")?; bar.region .as_ref() .unwrap() .add_subregion(ram_device, mmap.offset) - .chain_err(|| ErrorKind::AddRegBar(i as usize))?; + .with_context(|| VfioError::AddRegBar(i as usize))?; } } Ok(()) @@ -691,19 +723,21 @@ impl VfioPciDevice { fn vfio_enable_msix(&mut self) -> Result<()> { let mut gsi_routes = self.gsi_msi_routes.lock().unwrap(); - if gsi_routes.len() == 0 { - let irq_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + if gsi_routes.is_empty() { + let irq_fd = create_new_eventfd().unwrap(); let gsi_route = GsiMsiRoute { - irq_fd: Some(irq_fd), + irq_fd: Some(Arc::new(irq_fd)), gsi: -1, + nr: 0, }; gsi_routes.push(gsi_route); - let entries = self.msix_info.as_ref().unwrap().enteries; - for _ in 1..entries { + let entries = self.msix_info.as_ref().unwrap().entries; + for i in 1..entries { let gsi_route = GsiMsiRoute { irq_fd: None, gsi: -1, + nr: u32::from(i), }; gsi_routes.push(gsi_route); } @@ -713,8 +747,8 @@ impl VfioPciDevice { self.vfio_device .lock() .unwrap() - .enable_irqs(get_irq_rawfds(&gsi_routes)) - .chain_err(|| "Failed enable irqfds in kvm")?; + .enable_irqs(get_irq_rawfds(&gsi_routes, 0, 1), 0) + .with_context(|| "Failed enable irqfds in kvm")?; Ok(()) } @@ -724,26 +758,27 @@ impl VfioPciDevice { .lock() .unwrap() .disable_irqs() - .chain_err(|| "Failed disable irqfds in kvm")?; + .with_context(|| "Failed disable irqfds in kvm")?; Ok(()) } fn vfio_unregister_all_irqfd(&mut self) -> Result<()> { let routes = self.gsi_msi_routes.lock().unwrap(); + let msix = self.base.config.msix.as_ref().unwrap(); + let irq_ctrl = &msix.lock().unwrap().msi_irq_manager; for route in routes.iter() { - if let Some(fd) = &route.irq_fd.as_ref() { - KVM_FDS.load().unregister_irqfd(fd, route.gsi as u32)?; + if let Some(fd) = route.irq_fd.as_ref() { + irq_ctrl + .as_ref() + .unwrap() + .unregister_irqfd(fd.clone(), route.gsi as u32)?; // No need to release gsi. if route.gsi == -1 { continue; } - KVM_FDS - .load() - .irq_route_table - .lock() - .unwrap() - .release_gsi(route.gsi as u32)?; + + irq_ctrl.as_ref().unwrap().release_irq(route.gsi as u32)?; } } Ok(()) @@ -778,94 +813,82 @@ impl VfioPciDevice { } } -impl PciDevOps for VfioPciDevice { - fn init_write_mask(&mut self) -> PciResult<()> { - self.pci_config.init_common_write_mask() - } +impl Device for VfioPciDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); - fn init_write_clear_mask(&mut self) -> PciResult<()> { - self.pci_config.init_common_write_clear_mask() + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + Result::with_context(self.vfio_device.lock().unwrap().reset(), || { + "Fail to reset vfio dev" + }) } - fn realize(mut self) -> PciResult<()> { - use pci::errors::ResultExt as PciResultExt; - - self.init_write_mask()?; - self.init_write_clear_mask()?; - PciResultExt::chain_err(self.vfio_device.lock().unwrap().reset(), || { + fn realize(mut self) -> Result>> { + let parent_bus = self.parent_bus().unwrap(); + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + Result::with_context(self.vfio_device.lock().unwrap().reset(), || { "Failed to reset vfio device" })?; - PciResultExt::chain_err(self.get_pci_config(), || { + Result::with_context(self.get_pci_config(), || { "Failed to get vfio device pci config space" })?; - PciResultExt::chain_err(self.pci_config_reset(), || { + Result::with_context(self.pci_config_reset(), || { "Failed to reset vfio device pci config space" })?; - PciResultExt::chain_err( + Result::with_context( init_multifunction( self.multi_func, - &mut self.pci_config.config, - self.devfn, - self.parent_bus.clone(), + &mut self.base.config.config, + self.base.devfn, + parent_bus.clone(), ), || "Failed to init vfio device multifunction.", )?; #[cfg(target_arch = "aarch64")] { - let bus_num = self - .parent_bus - .upgrade() - .unwrap() - .lock() - .unwrap() - .number(SECONDARY_BUS_NUM as usize); - self.dev_id = Arc::new(AtomicU16::new(self.set_dev_id(bus_num, self.devfn))); + let bus = parent_bus.upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + let bus_num = pci_bus.number(SECONDARY_BUS_NUM as usize); + drop(locked_bus); + self.dev_id = Arc::new(AtomicU16::new(self.set_dev_id(bus_num, self.base.devfn))); } - self.msix_info = Some(PciResultExt::chain_err(self.get_msix_info(), || { + self.msix_info = Some(Result::with_context(self.get_msix_info(), || { "Failed to get MSI-X info" })?); - self.vfio_bars = Arc::new(Mutex::new(PciResultExt::chain_err( + self.vfio_bars = Arc::new(Mutex::new(Result::with_context( self.bar_region_info(), || "Failed to get bar region info", )?)); - PciResultExt::chain_err(self.register_bars(), || "Failed to register bars")?; + Result::with_context(self.register_bars(), || "Failed to register bars")?; - let devfn = self.devfn; + let devfn = u64::from(self.base.devfn); let dev = Arc::new(Mutex::new(self)); - let pci_bus = dev.lock().unwrap().parent_bus.upgrade().unwrap(); - let mut locked_pci_bus = pci_bus.lock().unwrap(); - let pci_device = locked_pci_bus.devices.get(&devfn); - if pci_device.is_none() { - locked_pci_bus.devices.insert(devfn, dev); - } else { - bail!( - "Devfn {:?} has been used by {:?}", - &devfn, - pci_device.unwrap().lock().unwrap().name() - ); - } + let parent_bus = dev.lock().unwrap().parent_bus().unwrap().upgrade().unwrap(); + let mut locked_bus = parent_bus.lock().unwrap(); + locked_bus.attach_child(devfn, dev.clone())?; - Ok(()) + Ok(dev) } - fn unrealize(&mut self) -> PciResult<()> { + fn unrealize(&mut self) -> Result<()> { if let Err(e) = VfioPciDevice::unrealize(self) { - error!("{}", e.display_chain()); + error!("{:?}", e); bail!("Failed to unrealize vfio-pci."); } Ok(()) } +} - fn devfn(&self) -> Option { - Some(self.devfn) - } +impl PciDevOps for VfioPciDevice { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); /// Read pci data from pci config if it emulate, otherwise read from vfio device. - fn read_config(&self, offset: usize, data: &mut [u8]) { + fn read_config(&mut self, offset: usize, data: &mut [u8]) { let size = data.len(); + // SAFETY: offset is no more than 0xfff. let end = offset + size; if end > (self.config_size as usize) || size > 4 { error!( @@ -876,16 +899,13 @@ impl PciDevOps for VfioPciDevice { } // BAR, header_type and extended caps are always controlled by StratoVirt. - if ranges_overlap(offset, end, BAR_0 as usize, (BAR_5 as usize) + REG_SIZE) - || ranges_overlap( - offset, - end, - HEADER_TYPE as usize, - (HEADER_TYPE as usize) + 2, - ) - || ranges_overlap(offset, end, PCI_CONFIG_SPACE_SIZE, PCIE_CONFIG_SPACE_SIZE) + let bars_size = (BAR_5 - BAR_0) as usize + REG_SIZE; + let ext_cfg_size = PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE; + if ranges_overlap(offset, size, BAR_0 as usize, bars_size).unwrap() + || ranges_overlap(offset, size, HEADER_TYPE as usize, 2).unwrap() + || ranges_overlap(offset, size, PCI_CONFIG_SPACE_SIZE, ext_cfg_size).unwrap() { - self.pci_config.read(offset, data); + self.base.config.read(offset, data); return; } @@ -895,7 +915,7 @@ impl PciDevOps for VfioPciDevice { .unwrap() .read_region(data, self.config_offset, offset as u64) { - error!("Failed to read device pci config, error is {}", e); + error!("Failed to read device pci config, error is {:?}", e); return; } for (i, data) in data.iter_mut().enumerate().take(size) { @@ -909,6 +929,7 @@ impl PciDevOps for VfioPciDevice { /// Write data to pci config and vfio device at the same time fn write_config(&mut self, offset: usize, data: &[u8]) { let size = data.len(); + // SAFETY: offset is no more than 0xfff. let end = offset + size; if end > (self.config_size as usize) || size > 4 { error!( @@ -925,93 +946,102 @@ impl PciDevOps for VfioPciDevice { .unwrap() .write_region(data, self.config_offset, offset as u64) { - error!("Failed to write device pci config, error is {}", e); + error!("Failed to write device pci config, error is {:?}", e); return; } let cap_offset = self - .pci_config + .base + .config .msix .as_ref() .map_or(0, |m| m.lock().unwrap().msix_cap_offset as usize); - if ranges_overlap(offset, end, COMMAND as usize, COMMAND as usize + REG_SIZE) { - self.pci_config - .write(offset, data, self.dev_id.load(Ordering::Acquire)); - - if le_read_u32(&self.pci_config.config, offset).unwrap() & COMMAND_MEMORY_SPACE as u32 + let was_enable = self.base.config.msix.as_ref().map_or(false, |m| { + m.lock().unwrap().is_enabled(&self.base.config.config) + }); + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + self.base.config.write( + offset, + data, + self.dev_id.load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ); + + if ranges_overlap(offset, size, COMMAND as usize, REG_SIZE).unwrap() { + if le_read_u32(&self.base.config.config, offset).unwrap() + & u32::from(COMMAND_MEMORY_SPACE) != 0 { - let parent_bus = self.parent_bus.upgrade().unwrap(); - let locked_parent_bus = parent_bus.lock().unwrap(); - if let Err(e) = self.pci_config.update_bar_mapping( - #[cfg(target_arch = "x86_64")] - &locked_parent_bus.io_region, - &locked_parent_bus.mem_region, - ) { - error!("Failed to update bar, error is {}", e.display_chain()); - return; - } - drop(locked_parent_bus); - if let Err(e) = self.setup_bars_mmap() { - error!("Failed to map bar regions, error is {}", e.display_chain()); + error!("Failed to map bar regions, error is {:?}", e); } } - } else if ranges_overlap(offset, end, BAR_0 as usize, (BAR_5 as usize) + REG_SIZE) { - self.pci_config - .write(offset, data, self.dev_id.load(Ordering::Acquire)); - - if size == 4 && LittleEndian::read_u32(data) != 0xffff_ffff { - let parent_bus = self.parent_bus.upgrade().unwrap(); - let locked_parent_bus = parent_bus.lock().unwrap(); - if let Err(e) = self.pci_config.update_bar_mapping( - #[cfg(target_arch = "x86_64")] - &locked_parent_bus.io_region, - &locked_parent_bus.mem_region, - ) { - error!("Failed to update bar, error is {}", e.display_chain()); - } - } - } else if ranges_overlap(offset, end, cap_offset, cap_offset + MSIX_CAP_SIZE as usize) { - let was_enable = is_msix_enabled(cap_offset, &self.pci_config.config); - self.pci_config - .write(offset, data, self.dev_id.load(Ordering::Acquire)); - let is_enable = is_msix_enabled(cap_offset, &self.pci_config.config); + } else if ranges_overlap(offset, size, cap_offset, MSIX_CAP_SIZE as usize).unwrap() { + let is_enable = self.base.config.msix.as_ref().map_or(false, |m| { + m.lock().unwrap().is_enabled(&self.base.config.config) + }); if !was_enable && is_enable { if let Err(e) = self.vfio_enable_msix() { - error!("{}\nFailed to enable MSI-X.", e.display_chain()); + error!("{:?}\nFailed to enable MSI-X.", e); } } else if was_enable && !is_enable { if let Err(e) = self.vfio_disable_msix() { - error!("{}\nFailed to disable MSI-X.", e.display_chain()); + error!("{:?}\nFailed to disable MSI-X.", e); } } - } else { - self.pci_config - .write(offset, data, self.dev_id.load(Ordering::Acquire)); } } - - fn name(&self) -> String { - self.name.clone() - } - - fn reset(&mut self, _reset_child_device: bool) -> PciResult<()> { - use pci::errors::ResultExt as PciResultExt; - - PciResultExt::chain_err(self.vfio_device.lock().unwrap().reset(), || { - "Fail to reset vfio dev" - }) - } } -fn get_irq_rawfds(gsi_msi_routes: &[GsiMsiRoute]) -> Vec { +fn get_irq_rawfds(gsi_msi_routes: &[GsiMsiRoute], start: u32, count: u32) -> Vec { let mut rawfds: Vec = Vec::new(); for r in gsi_msi_routes.iter() { - if let Some(fd) = r.irq_fd.as_ref() { - rawfds.push(fd.as_raw_fd()); + if r.nr >= start && r.nr < start + count { + if let Some(fd) = r.irq_fd.as_ref() { + rawfds.push(fd.as_raw_fd()); + } else { + rawfds.push(-1); + } } } rawfds } + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_vfio_config_cmdline_parser() { + // Test1: right. + let vfio_cmd1 = "vfio-pci,host=0000:1a:00.3,id=net,bus=pcie.0,addr=0x5,multifunction=on"; + let result = VfioConfig::try_parse_from(str_slip_to_clap(vfio_cmd1, true, false)); + assert!(result.is_ok()); + let vfio_config = result.unwrap(); + assert_eq!(vfio_config.host, Some("0000:1a:00.3".to_string())); + assert_eq!(vfio_config.id, "net"); + assert_eq!(vfio_config.bus, "pcie.0"); + assert_eq!(vfio_config.addr, (5, 0)); + assert_eq!(vfio_config.multifunction, Some(true)); + + // Test2: Missing bus/addr. + let vfio_cmd2 = "vfio-pci,host=0000:1a:00.3,id=net"; + let result = VfioConfig::try_parse_from(str_slip_to_clap(vfio_cmd2, true, false)); + assert!(result.is_err()); + + // Test3: `host` conflicts with `sysfsdev`. + let vfio_cmd3 = "vfio-pci,host=0000:1a:00.3,sysfsdev=/sys/bus/pci/devices/0000:00:02.0,id=net,bus=pcie.0,addr=0x5"; + let result = VfioConfig::try_parse_from(str_slip_to_clap(vfio_cmd3, true, false)); + assert!(result.is_err()); + + // Test4: Missing host/sysfsdev. + let vfio_cmd4 = "vfio-pci,id=net,bus=pcie.0,addr=0x1.0x2"; + let result = VfioConfig::try_parse_from(str_slip_to_clap(vfio_cmd4, true, false)); + assert!(result.is_err()); + } +} diff --git a/virtio/Cargo.toml b/virtio/Cargo.toml index 41a5a5cb8beea5b0e4dc6f88ecabe80efeb8c900..b8692b39491280351478e519422301dedbe962d7 100644 --- a/virtio/Cargo.toml +++ b/virtio/Cargo.toml @@ -1,26 +1,40 @@ [package] name = "virtio" -version = "2.1.0" +version = "2.4.0" authors = ["Huawei StratoVirt Team"] -edition = "2018" +edition = "2021" license = "Mulan PSL v2" description = "Virtio devices emulation" [dependencies] -byteorder = "1.3.4" -error-chain = "0.12.4" -kvm-ioctls = "0.6.0" -libc = ">=0.2.71" -log = "0.4.8" -serde_json = "1.0.55" -vmm-sys-util = ">=0.7.0" +byteorder = "1.4.3" +thiserror = "1.0" +anyhow = "1.0" +libc = "0.2" +log = "0.4" +serde_json = "1.0" +vmm-sys-util = "0.12.1" +once_cell = "1.18.0" address_space = { path = "../address_space" } -hypervisor = { path = "../hypervisor" } machine_manager = { path = "../machine_manager" } migration = { path = "../migration" } -migration_derive = { path = "../migration_derive" } -sysbus = { path = "../sysbus" } +migration_derive = { path = "../migration/migration_derive" } util = { path = "../util" } -pci = { path = "../pci" } acpi = { path = "../acpi" } devices = {path = "../devices"} +block_backend = {path = "../block_backend"} +chardev_backend = {path = "../chardev_backend" } +ui = { path = "../ui", features = ["console"], optional = true } +trace = {path = "../trace"} +clap = { version = "=4.1.4", default-features = false, features = ["std", "derive"] } + +[features] +default = [] +virtio_gpu = ["ui", "machine_manager/virtio_gpu", "util/pixman"] +virtio_rng = [] +virtio_scsi = [] +ohui_srv = [] +vhost_vsock =[] +vhostuser_block = [] +vhostuser_net = [] +vhost_net = [] diff --git a/virtio/src/block.rs b/virtio/src/block.rs deleted file mode 100644 index 696dac59dcf3cd4a094fe10013e888819e166eef..0000000000000000000000000000000000000000 --- a/virtio/src/block.rs +++ /dev/null @@ -1,1442 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::cmp; -use std::convert::TryFrom; -use std::fs::{File, OpenOptions}; -use std::io::{Seek, SeekFrom, Write}; -use std::mem::size_of; -use std::os::unix::fs::OpenOptionsExt; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::{Arc, Mutex}; - -use address_space::{AddressSpace, GuestAddress}; -use error_chain::ChainedError; -use machine_manager::{ - config::{BlkDevConfig, ConfigCheck}, - event_loop::EventLoop, -}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::aio::{Aio, AioCb, AioCompleteFunc, IoCmd, Iovec}; -use util::byte_code::ByteCode; -use util::leak_bucket::LeakBucket; -use util::loop_context::{ - read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, -}; -use util::num_ops::{read_u32, write_u32}; -use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; - -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{ - Element, Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_BLK_F_FLUSH, - VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_ID_BYTES, - VIRTIO_BLK_S_OK, VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, - VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_RING_INDIRECT_DESC, VIRTIO_F_VERSION_1, VIRTIO_TYPE_BLOCK, -}; - -/// Number of virtqueues. -const QUEUE_NUM_BLK: usize = 1; -/// Size of each virtqueue. -const QUEUE_SIZE_BLK: u16 = 256; -/// Used to compute the number of sectors. -const SECTOR_SHIFT: u8 = 9; -/// Size of a sector of the block device. -const SECTOR_SIZE: u64 = (0x01_u64) << SECTOR_SHIFT; -/// Size of the dummy block device. -const DUMMY_IMG_SIZE: u64 = 0; - -type SenderConfig = (Option>, u64, Option, bool); - -fn get_serial_num_config(serial_num: &str) -> Vec { - let mut id_bytes = vec![0; VIRTIO_BLK_ID_BYTES as usize]; - let bytes_to_copy = cmp::min(serial_num.len(), VIRTIO_BLK_ID_BYTES as usize); - - let serial_bytes = serial_num.as_bytes(); - id_bytes[..bytes_to_copy].clone_from_slice(&serial_bytes[..bytes_to_copy]); - id_bytes -} - -fn write_buf_mem(buf: &[u8], hva: u64) -> Result<()> { - let mut slice = unsafe { std::slice::from_raw_parts_mut(hva as *mut u8, buf.len()) }; - (&mut slice) - .write(buf) - .chain_err(|| format!("Failed to write buf(hva:{})", hva))?; - - Ok(()) -} - -#[repr(C)] -#[derive(Default, Clone, Copy)] -struct RequestOutHeader { - request_type: u32, - io_prio: u32, - sector: u64, -} - -impl RequestOutHeader { - fn is_valid(&self) -> bool { - match self.request_type { - VIRTIO_BLK_T_IN | VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH | VIRTIO_BLK_T_GET_ID => true, - _ => { - error!( - "request type {} is not supported for block", - self.request_type - ); - false - } - } - } -} - -impl ByteCode for RequestOutHeader {} - -#[derive(Clone)] -pub struct AioCompleteCb { - queue: Arc>, - mem_space: Arc, - desc_index: u16, - rw_len: u32, - req_status_addr: GuestAddress, - interrupt_cb: Option>, - driver_features: u64, -} - -impl AioCompleteCb { - fn new( - queue: Arc>, - mem_space: Arc, - desc_index: u16, - rw_len: u32, - req_status_addr: GuestAddress, - interrupt_cb: Option>, - driver_features: u64, - ) -> Self { - AioCompleteCb { - queue, - mem_space, - desc_index, - rw_len, - req_status_addr, - interrupt_cb, - driver_features, - } - } -} - -#[derive(Clone)] -struct Request { - desc_index: u16, - out_header: RequestOutHeader, - iovec: Vec, - data_len: u64, - in_header: GuestAddress, -} - -impl Request { - fn new(mem_space: &Arc, elem: &Element) -> Result { - if elem.out_iovec.is_empty() || elem.in_iovec.is_empty() || elem.desc_num < 2 { - bail!( - "Missed header for block request: out {} in {} desc num {}", - elem.out_iovec.len(), - elem.in_iovec.len(), - elem.desc_num - ); - } - - let out_iov_elem = elem.out_iovec.get(0).unwrap(); - if out_iov_elem.len < size_of::() as u32 { - bail!( - "Invalid out header for block request: length {}", - out_iov_elem.len - ); - } - - let out_header = mem_space - .read_object::(out_iov_elem.addr) - .chain_err(|| { - ErrorKind::ReadObjectErr("the block's request header", out_iov_elem.addr.0) - })?; - - if !out_header.is_valid() { - bail!("Unsupported block request type"); - } - - let pos = elem.in_iovec.len() - 1; - let in_iov_elem = elem.in_iovec.get(pos).unwrap(); - if in_iov_elem.len < 1 { - bail!( - "Invalid out header for block request: length {}", - in_iov_elem.len - ); - } - - let mut request = Request { - desc_index: elem.index, - out_header, - iovec: Vec::with_capacity(elem.desc_num as usize), - data_len: 0, - in_header: in_iov_elem.addr, - }; - - match out_header.request_type { - VIRTIO_BLK_T_IN | VIRTIO_BLK_T_GET_ID => { - for (index, elem_iov) in elem.in_iovec.iter().enumerate() { - if index == elem.in_iovec.len() - 1 { - break; - } - if let Some(hva) = mem_space.get_host_address(elem_iov.addr) { - let iov = Iovec { - iov_base: hva, - iov_len: u64::from(elem_iov.len), - }; - request.iovec.push(iov); - request.data_len += u64::from(elem_iov.len); - } - } - } - VIRTIO_BLK_T_OUT => { - for (index, elem_iov) in elem.out_iovec.iter().enumerate() { - if index == 0 { - continue; - } - if let Some(hva) = mem_space.get_host_address(elem_iov.addr) { - let iov = Iovec { - iov_base: hva, - iov_len: u64::from(elem_iov.len), - }; - request.iovec.push(iov); - request.data_len += u64::from(elem_iov.len); - } - } - } - _ => (), - } - - Ok(request) - } - - #[allow(clippy::too_many_arguments)] - #[allow(clippy::borrowed_box)] - fn execute( - &self, - aio: &mut Box>, - disk: &File, - disk_sectors: u64, - serial_num: &Option, - direct: bool, - last_aio: bool, - iocompletecb: AioCompleteCb, - ) -> Result { - let mut top: u64 = self.data_len / SECTOR_SIZE; - if self.data_len % SECTOR_SIZE != 0 { - top += 1; - } - top.checked_add(self.out_header.sector) - .filter(|off| off <= &disk_sectors) - .chain_err(|| { - format!( - "offset {} invalid, disk sector {}", - self.out_header.sector, disk_sectors - ) - })?; - - let mut aiocb = AioCb { - last_aio, - file_fd: disk.as_raw_fd(), - opcode: IoCmd::Noop, - iovec: Vec::new(), - offset: (self.out_header.sector << SECTOR_SHIFT) as usize, - process: true, - iocb: None, - iocompletecb, - }; - - for iov in self.iovec.iter() { - let iovec = Iovec { - iov_base: iov.iov_base, - iov_len: iov.iov_len, - }; - aiocb.iovec.push(iovec); - } - - match self.out_header.request_type { - VIRTIO_BLK_T_IN => { - aiocb.opcode = IoCmd::Preadv; - if direct { - (*aio).as_mut().rw_aio(aiocb, SECTOR_SIZE).chain_err(|| { - "Failed to process block request for reading asynchronously" - })?; - } else { - (*aio).as_mut().rw_sync(aiocb).chain_err(|| { - "Failed to process block request for reading synchronously" - })?; - } - } - VIRTIO_BLK_T_OUT => { - aiocb.opcode = IoCmd::Pwritev; - if direct { - (*aio).as_mut().rw_aio(aiocb, SECTOR_SIZE).chain_err(|| { - "Failed to process block request for writing asynchronously" - })?; - } else { - (*aio).as_mut().rw_sync(aiocb).chain_err(|| { - "Failed to process block request for writing synchronously" - })?; - } - } - VIRTIO_BLK_T_FLUSH => { - aiocb.opcode = IoCmd::Fdsync; - (*aio) - .as_mut() - .rw_sync(aiocb) - .chain_err(|| "Failed to process block request for flushing")?; - } - VIRTIO_BLK_T_GET_ID => { - if let Some(serial) = serial_num { - let serial_vec = get_serial_num_config(serial); - - for iov in self.iovec.iter() { - if (iov.iov_len as usize) < serial_vec.len() { - bail!( - "The buffer length {} is less than the length {} of serial num", - iov.iov_len, - serial_vec.len() - ); - } - write_buf_mem(&serial_vec, iov.iov_base) - .chain_err(|| "Failed to write buf for virtio block id")?; - } - } - - return Ok(1); - } - _ => bail!( - "The type {} of block request is not supported", - self.out_header.request_type - ), - }; - Ok(0) - } - - fn get_req_sector_num(&self) -> u64 { - self.data_len / SECTOR_SIZE - } -} - -/// Control block of Block IO. -struct BlockIoHandler { - /// The virtqueue. - queue: Arc>, - /// Eventfd of the virtqueue for IO event. - queue_evt: EventFd, - /// The address space to which the block device belongs. - mem_space: Arc, - /// The image file opened by the block device. - disk_image: Option>, - /// The number of sectors of the disk image. - disk_sectors: u64, - /// Serial number of the block device. - serial_num: Option, - /// if use direct access io. - direct: bool, - /// Aio context. - aio: Option>>, - /// Bit mask of features negotiated by the backend and the frontend. - driver_features: u64, - /// The receiving half of Rust's channel to receive the image file. - receiver: Receiver, - /// Eventfd for config space update. - update_evt: RawFd, - /// Eventfd for device deactivate. - deactivate_evt: RawFd, - /// Callback to trigger an interrupt. - interrupt_cb: Arc, - /// thread name of io handler - iothread: Option, - /// Using the leak bucket to implement IO limits - leak_bucket: Option, -} - -impl BlockIoHandler { - fn merge_req_queue(&self, mut req_queue: Vec) -> Vec { - if req_queue.len() == 1 { - return req_queue; - } - - req_queue.sort_by(|a, b| a.out_header.sector.cmp(&b.out_header.sector)); - let mut merge_req_queue = Vec::::new(); - let mut continue_merge: bool = false; - - for req in &req_queue { - if continue_merge { - if let Some(last_req) = merge_req_queue.last_mut() { - if last_req.out_header.sector + last_req.get_req_sector_num() - != req.out_header.sector - { - continue_merge = false; - merge_req_queue.push(req.clone()); - } else { - for iov in req.iovec.iter() { - let iovec = Iovec { - iov_base: iov.iov_base, - iov_len: iov.iov_len, - }; - last_req.data_len += iovec.iov_len; - last_req.iovec.push(iovec); - } - } - } - } else { - merge_req_queue.push(req.clone()); - } - } - - merge_req_queue - } - - fn process_queue(&mut self) -> Result { - let mut req_queue = Vec::new(); - let mut req_index = 0; - let mut last_aio_req_index = 0; - let mut need_interrupt = false; - let mut done = false; - - let mut queue = self.queue.lock().unwrap(); - - while let Ok(elem) = queue.vring.pop_avail(&self.mem_space, self.driver_features) { - // limit io operations if iops is configured - if let Some(lb) = self.leak_bucket.as_mut() { - if let Some(ctx) = EventLoop::get_ctx(self.iothread.as_ref()) { - if lb.throttled(ctx, 1_u64) { - queue.vring.push_back(); - break; - } - } else { - bail!( - "IOThread {:?} of Block is not found in cmdline.", - self.iothread, - ); - }; - } - - match Request::new(&self.mem_space, &elem) { - Ok(req) => { - match req.out_header.request_type { - VIRTIO_BLK_T_IN | VIRTIO_BLK_T_OUT => { - last_aio_req_index = req_index; - } - _ => {} - } - req_queue.push(req); - req_index += 1; - done = true; - } - Err(ref e) => { - // If it fails, also need to free descriptor table entry. - queue - .vring - .add_used(&self.mem_space, elem.index, 0) - .chain_err(|| "Failed to add used ring")?; - need_interrupt = true; - - error!( - "failed to create block request, {}", - error_chain::ChainedError::display_chain(e) - ); - } - }; - } - - // unlock queue, because it will be hold below. - drop(queue); - - let merge_req_queue = self.merge_req_queue(req_queue); - - if let Some(disk_img) = self.disk_image.as_mut() { - req_index = 0; - for req in merge_req_queue.iter() { - if let Some(ref mut aio) = self.aio { - let rw_len = match req.out_header.request_type { - VIRTIO_BLK_T_IN => u32::try_from(req.data_len) - .chain_err(|| "Convert block request len to u32 with overflow.")?, - _ => 0u32, - }; - - let aiocompletecb = AioCompleteCb::new( - self.queue.clone(), - self.mem_space.clone(), - req.desc_index, - rw_len, - req.in_header, - Some(self.interrupt_cb.clone()), - self.driver_features, - ); - - match req.execute( - aio, - disk_img, - self.disk_sectors, - &self.serial_num, - self.direct, - last_aio_req_index == req_index, - aiocompletecb, - ) { - Ok(v) => { - if v == 1 { - // get device id - self.mem_space - .write_object(&VIRTIO_BLK_S_OK, req.in_header) - .chain_err(|| "Failed to write result for the request for block with device id")?; - self.queue.lock().unwrap().vring.add_used( - &self.mem_space, - req.desc_index, - 1, - ).chain_err(|| "Failed to add the request for block with device id to used ring")?; - - if self - .queue - .lock() - .unwrap() - .vring - .should_notify(&self.mem_space, self.driver_features) - { - need_interrupt = true; - } - } - } - Err(ref e) => { - error!( - "Failed to execute block request, {}", - error_chain::ChainedError::display_chain(e) - ); - } - } - req_index += 1; - } - } - } else if !merge_req_queue.is_empty() { - for req in merge_req_queue.iter() { - self.queue - .lock() - .unwrap() - .vring - .add_used(&self.mem_space, req.desc_index, 1) - .chain_err(|| { - "Failed to add used ring, when block request queue isn't empty" - })?; - } - need_interrupt = true - } - - if need_interrupt { - (self.interrupt_cb)( - &VirtioInterruptType::Vring, - Some(&self.queue.lock().unwrap()), - ) - .chain_err(|| ErrorKind::InterruptTrigger("block", VirtioInterruptType::Vring))?; - } - - Ok(done) - } - - fn build_aio(&self) -> Result>> { - let complete_func = Arc::new(Box::new(move |aiocb: &AioCb, ret: i64| { - let status = if ret < 0 { - ret - } else { - i64::from(VIRTIO_BLK_S_OK) - }; - - let complete_cb = &aiocb.iocompletecb; - if let Err(ref e) = complete_cb - .mem_space - .write_object(&status, complete_cb.req_status_addr) - { - error!( - "Failed to write the status (aio completion) {}", - error_chain::ChainedError::display_chain(e) - ); - return; - } - - let mut queue_lock = complete_cb.queue.lock().unwrap(); - if let Err(ref e) = queue_lock.vring.add_used( - &complete_cb.mem_space, - complete_cb.desc_index, - complete_cb.rw_len, - ) { - error!( - "Failed to add used ring(aio completion), index {}, len {} {}", - complete_cb.desc_index, - complete_cb.rw_len, - error_chain::ChainedError::display_chain(e), - ); - return; - } - - if queue_lock - .vring - .should_notify(&complete_cb.mem_space, complete_cb.driver_features) - { - if let Err(e) = (*complete_cb.interrupt_cb.as_ref().unwrap())( - &VirtioInterruptType::Vring, - Some(&queue_lock), - ) { - error!( - "Failed to trigger interrupt(aio completion) for block device, error is {}", - e.display_chain() - ); - } - } - }) as AioCompleteFunc); - - Ok(Box::new(Aio::new(complete_func)?)) - } - - fn update_evt_handler(&mut self) { - match self.receiver.recv() { - Ok((image, disk_sectors, serial_num, direct)) => { - self.disk_sectors = disk_sectors; - self.disk_image = image; - self.serial_num = serial_num; - self.direct = direct; - } - Err(_) => { - self.disk_sectors = 0; - self.disk_image = None; - self.serial_num = None; - self.direct = true; - } - }; - - if let Err(ref e) = self.process_queue() { - error!( - "Failed to handle block IO for updating handler {}", - error_chain::ChainedError::display_chain(e) - ); - } - } - - fn deactivate_evt_handler(&mut self) -> Vec { - let mut notifiers = vec![ - EventNotifier::new( - NotifierOperation::Delete, - self.update_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.queue_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - ]; - if let Some(lb) = self.leak_bucket.as_ref() { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - lb.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )); - } - if let Some(aio) = &self.aio { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - aio.fd.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )); - } - - notifiers - } -} - -fn build_event_notifier(fd: RawFd, handler: Box) -> EventNotifier { - EventNotifier::new( - NotifierOperation::AddShared, - fd, - None, - EventSet::IN, - vec![Arc::new(Mutex::new(handler))], - ) -} - -impl EventNotifierHelper for BlockIoHandler { - fn internal_notifiers(handler: Arc>) -> Vec { - let handler_raw = handler.lock().unwrap(); - let mut notifiers = Vec::new(); - - // Register event notifier for update_evt. - let h_clone = handler.clone(); - let h: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - h_clone.lock().unwrap().update_evt_handler(); - None - }); - notifiers.push(build_event_notifier(handler_raw.update_evt, h)); - - // Register event notifier for deactivate_evt. - let h_clone = handler.clone(); - let h: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(h_clone.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(build_event_notifier(handler_raw.deactivate_evt, h)); - - // Register event notifier for queue_evt. - let h_clone = handler.clone(); - let h: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - - if let Err(ref e) = h_clone.lock().unwrap().process_queue() { - error!( - "Failed to handle block IO {}", - error_chain::ChainedError::display_chain(e) - ); - } - None - }); - - let h_clone = handler.clone(); - let handler_iopoll: Box = Box::new(move |_, _fd: RawFd| { - let done = h_clone - .lock() - .unwrap() - .process_queue() - .chain_err(|| "Failed to handle block IO") - .ok()?; - if done { - Some(Vec::new()) - } else { - None - } - }); - - let mut e = EventNotifier::new( - NotifierOperation::AddShared, - handler_raw.queue_evt.as_raw_fd(), - None, - EventSet::IN, - vec![ - Arc::new(Mutex::new(h)), - Arc::new(Mutex::new(handler_iopoll)), - ], - ); - e.io_poll = true; - - notifiers.push(e); - - // Register timer event notifier for IO limits - if let Some(lb) = handler_raw.leak_bucket.as_ref() { - let h_clone = handler.clone(); - let h: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - - if let Some(lb) = h_clone.lock().unwrap().leak_bucket.as_mut() { - lb.clear_timer(); - } - - if let Err(ref e) = h_clone.lock().unwrap().process_queue() { - error!( - "Failed to handle block IO {}", - error_chain::ChainedError::display_chain(e) - ); - } - None - }); - notifiers.push(build_event_notifier(lb.as_raw_fd(), h)); - } - - // Register event notifier for aio. - if let Some(ref aio) = handler_raw.aio { - let h_clone = handler.clone(); - let h: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - - if let Some(aio) = &mut h_clone.lock().unwrap().aio { - if let Err(ref e) = aio.handle() { - error!( - "Failed to handle aio, {}", - error_chain::ChainedError::display_chain(e) - ); - } - } - None - }); - - let h_clone = handler.clone(); - let handler_iopoll: Box = Box::new(move |_, _fd: RawFd| { - let mut done = false; - if let Some(aio) = &mut h_clone.lock().unwrap().aio { - done = aio.handle().chain_err(|| "Failed to handle aio").ok()?; - } - if done { - Some(Vec::new()) - } else { - None - } - }); - - let mut e = EventNotifier::new( - NotifierOperation::AddShared, - aio.fd.as_raw_fd(), - None, - EventSet::IN, - vec![ - Arc::new(Mutex::new(h)), - Arc::new(Mutex::new(handler_iopoll)), - ], - ); - e.io_poll = true; - - notifiers.push(e); - } - - notifiers - } -} - -/// State of block device. -#[repr(C)] -#[derive(Clone, Copy, Desc, ByteCode)] -#[desc_version(compat_version = "0.1.0")] -pub struct BlockState { - /// Bitmask of features supported by the backend. - device_features: u64, - /// Bit mask of features negotiated by the backend and the frontend. - driver_features: u64, - /// Config space of the block device. - config_space: [u8; 16], -} - -/// Block device structure. -pub struct Block { - /// Configuration of the block device. - blk_cfg: BlkDevConfig, - /// Image file opened. - disk_image: Option>, - /// Number of sectors of the image file. - disk_sectors: u64, - /// Status of block device. - state: BlockState, - /// Callback to trigger interrupt. - interrupt_cb: Option>, - /// The sending half of Rust's channel to send the image file. - sender: Option>, - /// Eventfd for config space update. - update_evt: EventFd, - /// Eventfd for device deactivate. - deactivate_evt: EventFd, -} - -impl Default for Block { - fn default() -> Self { - Block { - blk_cfg: Default::default(), - disk_image: None, - disk_sectors: 0, - state: BlockState::default(), - interrupt_cb: None, - sender: None, - update_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } - } -} - -impl Block { - pub fn new(blk_cfg: BlkDevConfig) -> Block { - Self { - blk_cfg, - disk_image: None, - disk_sectors: 0, - state: BlockState::default(), - interrupt_cb: None, - sender: None, - update_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } - } - - fn build_device_config_space(&mut self) { - // capacity: 64bits - let num_sectors = DUMMY_IMG_SIZE >> SECTOR_SHIFT; - for i in 0..8 { - self.state.config_space[i] = (num_sectors >> (8 * i)) as u8; - } - - // seg_max=128-2: 32bits - for i in 0..4 { - self.state.config_space[12 + i] = (126 >> (8 * i)) as u8; - } - } -} - -impl VirtioDevice for Block { - /// Realize virtio block device. - fn realize(&mut self) -> Result<()> { - // if iothread not found, return err - if self.blk_cfg.iothread.is_some() - && EventLoop::get_ctx(self.blk_cfg.iothread.as_ref()).is_none() - { - bail!( - "IOThread {:?} of Block is not configured in params.", - self.blk_cfg.iothread, - ); - } - - self.state.device_features = (1_u64 << VIRTIO_F_VERSION_1) | (1_u64 << VIRTIO_BLK_F_FLUSH); - if self.blk_cfg.read_only { - self.state.device_features |= 1_u64 << VIRTIO_BLK_F_RO; - }; - self.state.device_features |= 1_u64 << VIRTIO_F_RING_INDIRECT_DESC; - self.state.device_features |= 1_u64 << VIRTIO_BLK_F_SIZE_MAX; - self.state.device_features |= 1_u64 << VIRTIO_BLK_F_SEG_MAX; - self.state.device_features |= 1_u64 << VIRTIO_F_RING_EVENT_IDX; - - self.build_device_config_space(); - - let mut disk_size = DUMMY_IMG_SIZE; - - if !self.blk_cfg.path_on_host.is_empty() { - self.disk_image = None; - - let mut file = if self.blk_cfg.direct { - OpenOptions::new() - .read(true) - .write(!self.blk_cfg.read_only) - .custom_flags(libc::O_DIRECT) - .open(&self.blk_cfg.path_on_host) - .chain_err(|| { - format!( - "failed to open the file by O_DIRECT for block {}", - self.blk_cfg.path_on_host - ) - })? - } else { - OpenOptions::new() - .read(true) - .write(!self.blk_cfg.read_only) - .open(&self.blk_cfg.path_on_host) - .chain_err(|| { - format!( - "failed to open the file for block {}", - self.blk_cfg.path_on_host - ) - })? - }; - - disk_size = file - .seek(SeekFrom::End(0)) - .chain_err(|| "Failed to seek the end for block")? as u64; - - self.disk_image = Some(Arc::new(file)); - } else { - self.disk_image = None; - } - - self.disk_sectors = disk_size >> SECTOR_SHIFT; - for i in 0..8 { - self.state.config_space[i] = (self.disk_sectors >> (8 * i)) as u8; - } - - Ok(()) - } - - fn unrealize(&mut self) -> Result<()> { - MigrationManager::unregister_device_instance_mutex_by_id( - BlockState::descriptor(), - &self.blk_cfg.id, - ); - Ok(()) - } - - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_BLOCK - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - QUEUE_NUM_BLK - } - - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_BLK - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.state.device_features, features_select) - } - - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.state.device_features; - if unrequested_features != 0 { - v &= !unrequested_features; - } - self.state.driver_features |= v; - } - - /// Read data of config from guest. - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { - let config_len = self.state.config_space.len() as u64; - if offset >= config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len).into()); - } - if let Some(end) = offset.checked_add(data.len() as u64) { - data.write_all( - &self.state.config_space[offset as usize..cmp::min(end, config_len) as usize], - )?; - } - - Ok(()) - } - - /// Write data to config from guest. - fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { - let data_len = data.len(); - let config_len = self.state.config_space.len(); - if offset as usize + data_len > config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len as u64).into()); - } - - self.state.config_space[(offset as usize)..(offset as usize + data_len)] - .copy_from_slice(data); - - Ok(()) - } - - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. - fn activate( - &mut self, - mem_space: Arc, - interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, - ) -> Result<()> { - self.interrupt_cb = Some(interrupt_cb.clone()); - let (sender, receiver) = channel(); - self.sender = Some(sender); - - let mut handler = BlockIoHandler { - queue: queues[0].clone(), - queue_evt: queue_evts.remove(0), - mem_space, - disk_image: self.disk_image.clone(), - disk_sectors: self.disk_sectors, - direct: self.blk_cfg.direct, - serial_num: self.blk_cfg.serial_num.clone(), - aio: None, - driver_features: self.state.driver_features, - receiver, - update_evt: self.update_evt.as_raw_fd(), - deactivate_evt: self.deactivate_evt.as_raw_fd(), - interrupt_cb, - iothread: self.blk_cfg.iothread.clone(), - leak_bucket: self.blk_cfg.iops.map(LeakBucket::new), - }; - - handler.aio = Some(handler.build_aio()?); - - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - self.blk_cfg.iothread.as_ref(), - )?; - - Ok(()) - } - - fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite) - } - - fn update_config(&mut self, dev_config: Option>) -> Result<()> { - if let Some(conf) = dev_config { - self.blk_cfg = conf - .as_any() - .downcast_ref::() - .unwrap() - .clone(); - } else { - self.blk_cfg = Default::default(); - } - - self.realize()?; - - if let Some(sender) = &self.sender { - sender - .send(( - self.disk_image.take(), - self.disk_sectors, - self.blk_cfg.serial_num.clone(), - self.blk_cfg.direct, - )) - .chain_err(|| ErrorKind::ChannelSend("image fd".to_string()))?; - - self.update_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; - } - - if let Some(interrupt_cb) = &self.interrupt_cb { - interrupt_cb(&VirtioInterruptType::Config, None) - .chain_err(|| ErrorKind::InterruptTrigger("block", VirtioInterruptType::Config))?; - } - - Ok(()) - } -} - -// Send and Sync is not auto-implemented for `Sender` type. -// Implementing them is safe because `Sender` field of Block won't change in migration -// workflow. -unsafe impl Sync for Block {} - -impl StateTransfer for Block { - fn get_state_vec(&self) -> migration::errors::Result> { - Ok(self.state.as_bytes().to_vec()) - } - - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *BlockState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("BLOCK"))?; - - Ok(()) - } - - fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&BlockState::descriptor().name) { - alias - } else { - !0 - } - } -} - -impl MigrationHook for Block {} - -#[cfg(test)] -mod tests { - use super::super::*; - use super::*; - use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; - use machine_manager::config::IothreadConfig; - use std::sync::atomic::{AtomicU32, Ordering}; - use std::{thread, time::Duration}; - use vmm_sys_util::tempfile::TempFile; - - const CONFIG_SPACE_SIZE: usize = 16; - const VIRTQ_DESC_F_NEXT: u16 = 0x01; - const VIRTQ_DESC_F_WRITE: u16 = 0x02; - const SYSTEM_SPACE_SIZE: u64 = (1024 * 1024) as u64; - - // build dummy address space of vm - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - SYSTEM_SPACE_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } - - // Use different input parameters to verify block `new()` and `realize()` functionality. - #[test] - fn test_block_init() { - // New block device - let mut block = Block::default(); - assert_eq!(block.disk_sectors, 0); - assert_eq!(block.state.device_features, 0); - assert_eq!(block.state.driver_features, 0); - assert_eq!(block.state.config_space.len(), 16); - assert!(block.disk_image.is_none()); - assert!(block.interrupt_cb.is_none()); - assert!(block.sender.is_none()); - - // Realize block device: create TempFile as backing file. - block.blk_cfg.read_only = true; - block.blk_cfg.direct = false; - let f = TempFile::new().unwrap(); - block.blk_cfg.path_on_host = f.as_path().to_str().unwrap().to_string(); - assert!(block.realize().is_ok()); - - assert_eq!(block.device_type(), VIRTIO_TYPE_BLOCK); - assert_eq!(block.queue_num(), QUEUE_NUM_BLK); - assert_eq!(block.queue_size(), QUEUE_SIZE_BLK); - } - - // Test `write_config` and `read_config`. The main contests include: compare expect data and - // read date are same; Input invalid offset or date length, it will failed. - #[test] - fn test_read_write_config() { - let mut block = Block::default(); - block.realize().unwrap(); - - let expect_config_space: [u8; 8] = [0x00, 020, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00]; - let mut read_config_space = [0u8; 8]; - block.write_config(0, &expect_config_space).unwrap(); - block.read_config(0, &mut read_config_space).unwrap(); - assert_eq!(read_config_space, expect_config_space); - - // Invalid write - assert!(block - .write_config(CONFIG_SPACE_SIZE as u64 + 1, &expect_config_space) - .is_err()); - let errlen_config_space = [0u8; 17]; - assert!(block.write_config(0, &errlen_config_space).is_err()); - // Invalid read - read_config_space = expect_config_space; - assert!(block - .read_config(CONFIG_SPACE_SIZE as u64 + 1, &mut read_config_space) - .is_err()); - } - - // Test `get_device_features` and `set_driver_features`. The main contests include: If the - // device feature is 0, all driver features are not supported; If both the device feature bit - // and the front-end driver feature bit are supported at the same time, this driver feature - // bit is supported. - #[test] - fn test_block_features() { - let mut block = Block::default(); - - // If the device feature is 0, all driver features are not supported. - block.state.device_features = 0; - let driver_feature: u32 = 0xFF; - let page = 0_u32; - block.set_driver_features(page, driver_feature); - assert_eq!(block.state.driver_features, 0_u64); - assert_eq!(block.get_device_features(0_u32), 0_u32); - - let driver_feature: u32 = 0xFF; - let page = 1_u32; - block.set_driver_features(page, driver_feature); - assert_eq!(block.state.driver_features, 0_u64); - assert_eq!(block.get_device_features(1_u32), 0_u32); - - // If both the device feature bit and the front-end driver feature bit are - // supported at the same time, this driver feature bit is supported. - block.state.device_features = - 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_F_RING_INDIRECT_DESC; - let driver_feature: u32 = (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) as u32; - let page = 0_u32; - block.set_driver_features(page, driver_feature); - assert_eq!( - block.state.driver_features, - (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) - ); - assert_eq!( - block.get_device_features(page), - (1_u32 << VIRTIO_F_RING_INDIRECT_DESC) - ); - block.state.driver_features = 0; - - block.state.device_features = 1_u64 << VIRTIO_F_VERSION_1; - let driver_feature: u32 = (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) as u32; - let page = 0_u32; - block.set_driver_features(page, driver_feature); - assert_eq!(block.state.driver_features, 0); - assert_eq!(block.get_device_features(page), 0_u32); - block.state.driver_features = 0; - } - - // Test `get_serial_num_config`. The function will output the shorter length between 20 - // with serial_num length. - #[test] - fn test_serial_num_config() { - let serial_num = "fldXlNNdCeqMvoIfEFogBxlL"; - let serial_num_arr = serial_num.as_bytes(); - let id_bytes = get_serial_num_config(&serial_num); - assert_eq!(id_bytes[..], serial_num_arr[..20]); - assert_eq!(id_bytes.len(), 20); - - let serial_num = "7681194149"; - let serial_num_arr = serial_num.as_bytes(); - let id_bytes = get_serial_num_config(&serial_num); - assert_eq!(id_bytes[..10], serial_num_arr[..]); - assert_eq!(id_bytes.len(), 20); - - let serial_num = ""; - let id_bytes_temp = get_serial_num_config(&serial_num); - assert_eq!(id_bytes_temp[..], [0; 20]); - assert_eq!(id_bytes_temp.len(), 20); - } - - // Test iothread and qos capability. The function will spawn a thread called 'iothread', then - // io request will be handled by this thread. - #[test] - fn test_iothread() { - let thread_name = "io1".to_string(); - - // spawn io thread - let io_conf = IothreadConfig { - id: thread_name.clone(), - }; - EventLoop::object_init(&Some(vec![io_conf])).unwrap(); - - let mut block = Block::default(); - let file = TempFile::new().unwrap(); - block.blk_cfg.path_on_host = file.as_path().to_str().unwrap().to_string(); - - // config iothread and iops - block.blk_cfg.iothread = Some(thread_name); - block.blk_cfg.iops = Some(100); - - let mem_space = address_space_init(); - let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let interrupt_status = Arc::new(AtomicU32::new(0)); - let interrupt_cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { - let status = match int_type { - VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, - VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, - }; - interrupt_status.fetch_or(status as u32, Ordering::SeqCst); - interrupt_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; - - Ok(()) - }, - ) as VirtioInterrupt); - - let mut queue_config = QueueConfig::new(QUEUE_SIZE_BLK); - queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - mem_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress(16 * QUEUE_SIZE_BLK as u64); - queue_config.addr_cache.avail_ring_host = - mem_space.get_host_address(queue_config.avail_ring).unwrap(); - queue_config.used_ring = GuestAddress(32 * QUEUE_SIZE_BLK as u64); - queue_config.addr_cache.used_ring_host = - mem_space.get_host_address(queue_config.used_ring).unwrap(); - queue_config.size = QUEUE_SIZE_BLK; - queue_config.ready = true; - - let queues: Vec>> = - vec![Arc::new(Mutex::new(Queue::new(queue_config, 1).unwrap()))]; - let event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - - // activate block device - block - .activate( - mem_space.clone(), - interrupt_cb, - &queues, - vec![event.try_clone().unwrap()], - ) - .unwrap(); - - // make first descriptor entry - let desc = SplitVringDesc { - addr: GuestAddress(0x100), - len: 16, - flags: VIRTQ_DESC_F_NEXT, - next: 1, - }; - mem_space - .write_object::(&desc, GuestAddress(queue_config.desc_table.0)) - .unwrap(); - - // write RequestOutHeader to first desc - let req_head = RequestOutHeader { - request_type: 0, // read - io_prio: 0, - sector: 0, - }; - mem_space - .write_object::(&req_head, GuestAddress(0x100)) - .unwrap(); - - // making the second descriptor entry to receive data from device - let desc = SplitVringDesc { - addr: GuestAddress(0x200), - len: 16, - flags: VIRTQ_DESC_F_WRITE, - next: 2, - }; - mem_space - .write_object::( - &desc, - GuestAddress(queue_config.desc_table.0 + 16 as u64), - ) - .unwrap(); - - // write avail_ring idx - mem_space - .write_object::(&0, GuestAddress(queue_config.avail_ring.0 + 4 as u64)) - .unwrap(); - - // write avail_ring id - mem_space - .write_object::(&1, GuestAddress(queue_config.avail_ring.0 + 2 as u64)) - .unwrap(); - - // imitating guest OS to send notification. - event.write(1).unwrap(); - - // waiting for io handled - let mut wait = 10; // wait for 2 seconds - loop { - thread::sleep(Duration::from_millis(200)); - - wait -= 1; - if wait == 0 { - assert_eq!(0, 1); // timeout failed - } - - // get used_ring data - let idx = mem_space - .read_object::(GuestAddress(queue_config.used_ring.0 + 2 as u64)) - .unwrap(); - if idx == 1 { - break; - } - } - } -} diff --git a/virtio/src/console.rs b/virtio/src/console.rs deleted file mode 100644 index 1192d28f95af604817947805aed99eba55325854..0000000000000000000000000000000000000000 --- a/virtio/src/console.rs +++ /dev/null @@ -1,570 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::io::Write; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::{Arc, Mutex}; -use std::{cmp, usize}; - -use address_space::AddressSpace; -use devices::legacy::{Chardev, InputReceiver}; -use error_chain::ChainedError; -use machine_manager::{ - config::{ChardevType, VirtioConsole}, - event_loop::EventLoop, -}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::byte_code::ByteCode; -use util::loop_context::{read_fd, EventNotifier, EventNotifierHelper, NotifierOperation}; -use util::num_ops::{read_u32, write_u32}; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; - -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{ - Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_CONSOLE_F_SIZE, - VIRTIO_F_VERSION_1, VIRTIO_TYPE_CONSOLE, -}; - -/// Number of virtqueues. -const QUEUE_NUM_CONSOLE: usize = 2; -/// Size of virtqueue. -const QUEUE_SIZE_CONSOLE: u16 = 256; - -const BUFF_SIZE: usize = 4096; - -#[derive(Copy, Clone, Debug, Default)] -#[repr(C)] -struct VirtioConsoleConfig { - max_nr_ports: u32, - emerg_wr: u32, -} - -impl ByteCode for VirtioConsoleConfig {} - -impl VirtioConsoleConfig { - /// Create configuration of virtio-console devices. - pub fn new() -> Self { - VirtioConsoleConfig { - max_nr_ports: 1_u32, - emerg_wr: 0_u32, - } - } -} - -struct ConsoleHandler { - input_queue: Arc>, - output_queue: Arc>, - output_queue_evt: EventFd, - deactivate_evt: RawFd, - mem_space: Arc, - interrupt_cb: Arc, - driver_features: u64, - chardev: Arc>, -} - -impl InputReceiver for ConsoleHandler { - #[allow(clippy::useless_asref)] - fn input_handle(&mut self, buffer: &[u8]) { - let mut queue_lock = self.input_queue.lock().unwrap(); - - let count = buffer.len(); - if count == 0 { - return; - } - - while let Ok(elem) = queue_lock - .vring - .pop_avail(&self.mem_space, self.driver_features) - { - let mut write_count = 0_usize; - for elem_iov in elem.in_iovec.iter() { - let allow_write_count = cmp::min(write_count + elem_iov.len as usize, count); - let source_slice = &buffer[write_count..allow_write_count]; - - let write_result = self.mem_space.write( - &mut source_slice.as_ref(), - elem_iov.addr, - source_slice.len() as u64, - ); - match write_result { - Ok(_) => { - write_count = allow_write_count; - } - Err(ref e) => { - error!( - "Failed to write slice for input console: addr {:X} len {} {}", - elem_iov.addr.0, - source_slice.len(), - e.display_chain() - ); - break; - } - } - } - - if let Err(ref e) = - queue_lock - .vring - .add_used(&self.mem_space, elem.index, write_count as u32) - { - error!( - "Failed to add used ring for input console, index: {} len: {} {}", - elem.index, - write_count, - e.display_chain() - ); - break; - } - - if write_count >= count { - break; - } - } - - if let Err(ref e) = (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock)) { - error!( - "Failed to trigger interrupt for console, int-type {:?} {} ", - VirtioInterruptType::Vring, - e.display_chain() - ) - } - } - - fn get_remain_space_size(&mut self) -> usize { - BUFF_SIZE - } -} - -impl ConsoleHandler { - fn output_handle(&mut self) { - let mut queue_lock = self.output_queue.lock().unwrap(); - let mut buffer = [0_u8; 4096]; - - while let Ok(elem) = queue_lock - .vring - .pop_avail(&self.mem_space, self.driver_features) - { - let mut read_count = 0_usize; - for elem_iov in elem.out_iovec.iter() { - let allow_read_count = cmp::min(read_count + elem_iov.len as usize, buffer.len()); - let mut slice = &mut buffer[read_count..allow_read_count]; - - let read_result = self.mem_space.read( - &mut slice, - elem_iov.addr, - (allow_read_count - read_count) as u64, - ); - match read_result { - Ok(_) => { - read_count = allow_read_count; - } - Err(ref e) => { - error!( - "Failed to read buffer for output console: addr: {:X}, len: {} {}", - elem_iov.addr.0, - allow_read_count - read_count, - e.display_chain() - ); - break; - } - }; - } - if let Some(output) = &mut self.chardev.lock().unwrap().output { - let mut locked_output = output.lock().unwrap(); - if let Err(e) = locked_output.write_all(&buffer[..read_count as usize]) { - error!("Failed to write to console output: {}", e); - } - if let Err(e) = locked_output.flush() { - error!("Failed to flush console output: {}", e); - } - } else { - debug!("Failed to get output fd"); - } - - if let Err(ref e) = queue_lock.vring.add_used(&self.mem_space, elem.index, 0) { - error!( - "Failed to add used ring for output console, index: {} len: {} {}", - elem.index, - 0, - e.display_chain() - ); - break; - } - } - } - - fn deactivate_evt_handler(&self) -> Vec { - let locked_chardev = self.chardev.lock().unwrap(); - let mut notifiers = vec![ - EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.output_queue_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - ]; - match &locked_chardev.backend { - ChardevType::Stdio | ChardevType::Pty => { - if let Some(input) = locked_chardev.input.clone() { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - input.lock().unwrap().as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )); - } - } - ChardevType::Socket(_) => { - if let Some(stream_fd) = locked_chardev.stream_fd { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - stream_fd, - None, - EventSet::IN | EventSet::HANG_UP, - Vec::new(), - )); - } - let listener_fd = locked_chardev.listener.as_ref().unwrap().as_raw_fd(); - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - listener_fd, - None, - EventSet::IN, - Vec::new(), - )); - } - _ => (), - } - notifiers - } -} - -impl EventNotifierHelper for ConsoleHandler { - fn internal_notifiers(console_handler: Arc>) -> Vec { - let mut notifiers = Vec::new(); - let cloned_cls = console_handler.clone(); - let handler = Box::new(move |_, fd: RawFd| { - read_fd(fd); - cloned_cls.lock().unwrap().output_handle(); - None as Option> - }); - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - console_handler.lock().unwrap().output_queue_evt.as_raw_fd(), - None, - EventSet::IN, - vec![Arc::new(Mutex::new(handler))], - )); - - let cloned_cls = console_handler.clone(); - let handler = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(cloned_cls.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - console_handler.lock().unwrap().deactivate_evt, - None, - EventSet::IN, - vec![Arc::new(Mutex::new(handler))], - )); - - notifiers - } -} - -/// Status of console device. -#[repr(C)] -#[derive(Copy, Clone, Desc, ByteCode)] -#[desc_version(compat_version = "0.1.0")] -pub struct VirtioConsoleState { - /// Bit mask of features supported by the backend. - device_features: u64, - /// Bit mask of features negotiated by the backend and the frontend. - driver_features: u64, - /// Virtio Console config space. - config_space: VirtioConsoleConfig, -} - -/// Virtio console device structure. -pub struct Console { - /// Status of console device. - state: VirtioConsoleState, - /// EventFd for device deactivate. - deactivate_evt: EventFd, - /// Character device for redirection. - chardev: Arc>, -} - -impl Console { - /// Create a virtio-console device. - /// - /// # Arguments - /// - /// * `console_cfg` - Device configuration set by user. - pub fn new(console_cfg: VirtioConsole) -> Self { - Console { - state: VirtioConsoleState { - device_features: 0_u64, - driver_features: 0_u64, - config_space: VirtioConsoleConfig::new(), - }, - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - chardev: Arc::new(Mutex::new(Chardev::new(console_cfg.chardev))), - } - } -} - -impl VirtioDevice for Console { - /// Realize virtio console device. - fn realize(&mut self) -> Result<()> { - self.state.device_features = 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_SIZE; - self.chardev - .lock() - .unwrap() - .realize() - .chain_err(|| "Failed to realize chardev")?; - Ok(()) - } - - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_CONSOLE - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - QUEUE_NUM_CONSOLE - } - - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_CONSOLE - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.state.device_features, features_select) - } - - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.state.device_features; - if unrequested_features != 0 { - warn!("Received acknowledge request with unknown feature for console."); - v &= !unrequested_features; - } - self.state.driver_features |= v; - } - - /// Read data of config from guest. - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { - let config_slice = self.state.config_space.as_bytes(); - let config_len = config_slice.len() as u64; - if offset >= config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len).into()); - } - - if let Some(end) = offset.checked_add(data.len() as u64) { - data.write_all(&config_slice[offset as usize..cmp::min(end, config_len) as usize])?; - } - - Ok(()) - } - - /// Write data to config from guest. - fn write_config(&mut self, _offset: u64, _data: &[u8]) -> Result<()> { - bail!("Device config space for console is not supported") - } - - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. - fn activate( - &mut self, - mem_space: Arc, - interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, - ) -> Result<()> { - queue_evts.remove(0); // input_queue_evt never used - - let handler = ConsoleHandler { - input_queue: queues[0].clone(), - output_queue: queues[1].clone(), - output_queue_evt: queue_evts.remove(0), - mem_space, - interrupt_cb, - driver_features: self.state.driver_features, - deactivate_evt: self.deactivate_evt.as_raw_fd(), - chardev: self.chardev.clone(), - }; - - let dev = Arc::new(Mutex::new(handler)); - EventLoop::update_event(EventNotifierHelper::internal_notifiers(dev.clone()), None)?; - let locked_dev = dev.lock().unwrap(); - locked_dev.chardev.lock().unwrap().set_input_callback(&dev); - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(locked_dev.chardev.clone()), - None, - )?; - Ok(()) - } - - fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite) - } -} - -impl StateTransfer for Console { - fn get_state_vec(&self) -> migration::errors::Result> { - Ok(self.state.as_bytes().to_vec()) - } - - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *VirtioConsoleState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("CONSOLE"))?; - - Ok(()) - } - - fn get_device_alias(&self) -> u64 { - if let Some(alias) = - MigrationManager::get_desc_alias(&VirtioConsoleState::descriptor().name) - { - alias - } else { - !0 - } - } -} - -impl MigrationHook for Console {} - -#[cfg(test)] -mod tests { - pub use super::super::*; - pub use super::*; - use std::mem::size_of; - - use machine_manager::config::{ChardevConfig, ChardevType}; - - #[test] - fn test_set_driver_features() { - let chardev_cfg = ChardevConfig { - id: "chardev".to_string(), - backend: ChardevType::Stdio, - }; - let mut console = Console::new(VirtioConsole { - id: "console".to_string(), - chardev: chardev_cfg.clone(), - }); - let mut chardev = Chardev::new(chardev_cfg); - chardev.output = Some(Arc::new(Mutex::new(std::io::stdout()))); - console.chardev = Arc::new(Mutex::new(chardev)); - - //If the device feature is 0, all driver features are not supported. - console.state.device_features = 0; - let driver_feature: u32 = 0xFF; - let page = 0_u32; - console.set_driver_features(page, driver_feature); - assert_eq!(console.state.driver_features, 0_u64); - - let driver_feature: u32 = 0xFF; - let page = 1_u32; - console.set_driver_features(page, driver_feature); - assert_eq!(console.state.driver_features, 0_u64); - - //If both the device feature bit and the front-end driver feature bit are - //supported at the same time, this driver feature bit is supported. - console.state.device_features = - 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_SIZE; - let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_SIZE) as u32; - let page = 0_u32; - console.set_driver_features(page, driver_feature); - assert_eq!( - console.state.driver_features, - (1_u64 << VIRTIO_CONSOLE_F_SIZE) - ); - console.state.driver_features = 0; - - console.state.device_features = 1_u64 << VIRTIO_F_VERSION_1; - let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_SIZE) as u32; - let page = 0_u32; - console.set_driver_features(page, driver_feature); - assert_eq!(console.state.driver_features, 0); - console.state.driver_features = 0; - - console.state.device_features = - 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_SIZE; - let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_SIZE) as u32; - let page = 0_u32; - console.set_driver_features(page, driver_feature); - assert_eq!( - console.state.driver_features, - (1_u64 << VIRTIO_CONSOLE_F_SIZE) - ); - - let driver_feature: u32 = ((1_u64 << VIRTIO_F_VERSION_1) >> 32) as u32; - let page = 1_u32; - console.set_driver_features(page, driver_feature); - assert_eq!( - console.state.driver_features, - (1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_SIZE) - ); - } - - #[test] - fn test_read_config() { - let chardev_cfg = ChardevConfig { - id: "chardev".to_string(), - backend: ChardevType::Stdio, - }; - let mut console = Console::new(VirtioConsole { - id: "console".to_string(), - chardev: chardev_cfg.clone(), - }); - let mut chardev = Chardev::new(chardev_cfg); - chardev.output = Some(Arc::new(Mutex::new(std::io::stdout()))); - console.chardev = Arc::new(Mutex::new(chardev)); - - //The offset of configuration that needs to be read exceeds the maximum - let offset = size_of::() as u64; - let mut read_data: Vec = vec![0; 8]; - assert_eq!(console.read_config(offset, &mut read_data).is_ok(), false); - - //Check the configuration that needs to be read - let offset = 0_u64; - let mut read_data: Vec = vec![0; 8]; - let expect_data: Vec = vec![1, 0, 0, 0, 0, 0, 0, 0]; - assert_eq!(console.read_config(offset, &mut read_data).is_ok(), true); - assert_eq!(read_data, expect_data); - - let offset = 0_u64; - let mut read_data: Vec = vec![0; 1]; - let expect_data: Vec = vec![1]; - assert_eq!(console.read_config(offset, &mut read_data).is_ok(), true); - assert_eq!(read_data, expect_data); - } -} diff --git a/virtio/src/balloon.rs b/virtio/src/device/balloon.rs similarity index 41% rename from virtio/src/balloon.rs rename to virtio/src/device/balloon.rs index a2aa067c24c660b1aa4569a2fd214324f23bfbc4..e6ab32cdeef9e4db9e6172fa9c2284bde4f76eb0 100644 --- a/virtio/src/balloon.rs +++ b/virtio/src/device/balloon.rs @@ -9,72 +9,106 @@ // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::io::Write; + use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::{Arc, Mutex}; use std::{ cmp::{self, Reverse}, time::Duration, }; +use anyhow::{anyhow, Context, Result}; +use clap::{ArgAction, Parser}; +use log::{error, warn}; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, timerfd::TimerFd}; + +use crate::{ + error::*, read_config_default, report_virtio_error, virtio_has_feature, Element, Queue, + VirtioBase, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_VERSION_1, + VIRTIO_TYPE_BALLOON, +}; use address_space::{ - AddressSpace, FlatRange, GuestAddress, Listener, ListenerReqType, RegionIoEventFd, RegionType, + AddressAttr, AddressSpace, FlatRange, GuestAddress, Listener, ListenerReqType, RegionIoEventFd, + RegionType, }; -use error_chain::ChainedError; use machine_manager::{ - config::BalloonConfig, event_loop::EventLoop, qmp::qmp_schema::BalloonInfo, qmp::QmpChannel, -}; -use util::{ - bitmap::Bitmap, - byte_code::ByteCode, - loop_context::{ - read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, - }, - num_ops::{read_u32, round_down, write_u32}, - seccomp::BpfRule, - unix::host_page_size, + config::{get_pci_df, parse_bool, DEFAULT_VIRTQUEUE_SIZE}, + config::{valid_id, ConfigCheck, ConfigError}, + event, + event_loop::{register_event_helper, unregister_event_helper}, + qmp::qmp_channel::QmpChannel, + qmp::qmp_schema::BalloonInfo, }; -use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, timerfd::TimerFd}; - -use super::{ - errors::*, Element, Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, - VIRTIO_F_VERSION_1, VIRTIO_TYPE_BALLOON, +use util::bitmap::Bitmap; +use util::byte_code::ByteCode; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, }; +use util::num_ops::round_down; +use util::seccomp::BpfRule; +use util::unix::host_page_size; +use util::{gen_base_func, offset_of}; const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u32 = 2; +const VIRTIO_BALLOON_F_REPORTING: u32 = 5; +/// The feature for Auto-balloon +const VIRTIO_BALLOON_F_MESSAGE_VQ: u32 = 16; const VIRTIO_BALLOON_PFN_SHIFT: u32 = 12; -const QUEUE_SIZE_BALLOON: u16 = 256; const QUEUE_NUM_BALLOON: usize = 2; const BALLOON_PAGE_SIZE: u64 = 1 << VIRTIO_BALLOON_PFN_SHIFT; const BALLOON_INFLATE_EVENT: bool = true; const BALLOON_DEFLATE_EVENT: bool = false; +const IN_IOVEC: bool = true; +const OUT_IOVEC: bool = false; const BITS_OF_TYPE_U64: u64 = 64; +const MEM_BUFFER_PERCENT_MIN: u32 = 20; +const MEM_BUFFER_PERCENT_MAX: u32 = 80; +const MONITOR_INTERVAL_SECOND_MIN: u32 = 5; +const MONITOR_INTERVAL_SECOND_MAX: u32 = 300; + static mut BALLOON_DEV: Option>> = None; /// IO vector, used to find memory segments. #[derive(Clone, Copy, Default)] -struct Iovec { +struct GuestIovec { /// Base address of memory. iov_base: GuestAddress, /// Length of memory segments. iov_len: u64, } +#[derive(Clone, Copy, Default)] +#[repr(packed(1))] +struct BalloonStat { + _tag: u16, + _val: u64, +} + /// Balloon configuration, which would be used to transport data between `Guest` and `Host`. #[derive(Copy, Clone, Default)] struct VirtioBalloonConfig { - /// Number of pages host wants Guest to give up. - #[allow(dead_code)] - pub num_pages: u32, - /// Number of pages we've actually got in balloon. - #[allow(dead_code)] - pub actual: u32, + /// The target page numbers of balloon device. + _num_pages: u32, + /// Number of pages we've actually got in balloon device. + _actual: u32, + _reserved: u32, + _reserved1: u32, + /// Buffer percent is a percentage of memory actually needed by + /// the applications and services running inside the virtual machine. + /// This parameter takes effect only when VIRTIO_BALLOON_F_MESSAGE_VQ is supported. + /// Recommended value range: [20, 80] and default is 50. + _membuf_percent: u32, + /// Monitor interval(second) host wants to adjust VM memory size. + /// Recommended value range: [5, 300] and default is 10. + _monitor_interval: u32, } -impl ByteCode for Iovec {} +impl ByteCode for BalloonStat {} +impl ByteCode for GuestIovec {} impl ByteCode for VirtioBalloonConfig {} /// Bitmap for balloon. It is used if the host page size is bigger than 4k. @@ -102,7 +136,7 @@ impl BalloonedPageBitmap { match self.bitmap.count_front_bits(bits as usize) { Ok(nr) => nr == bits as usize, Err(ref e) => { - error!("Failed to count bits: {}", e); + error!("Failed to count bits: {:?}", e); false } } @@ -110,7 +144,7 @@ impl BalloonedPageBitmap { } /// Read data segment starting at `iov.iov_base` + `offset` to buffer . -/// Return bufer . +/// Return buffer . /// /// # Arguments /// @@ -119,7 +153,7 @@ impl BalloonedPageBitmap { /// * `offset` - Offset. fn iov_to_buf( address_space: &Arc, - &iov: &Iovec, + &iov: &GuestIovec, offset: u64, ) -> Option { let obj_len = std::mem::size_of::() as u64; @@ -127,29 +161,29 @@ fn iov_to_buf( return None; } - match address_space.read_object::(GuestAddress(iov.iov_base.raw_value() + offset)) { + // GPAChecked: the iov has been checked in pop_avail(). + match address_space.read_object::( + GuestAddress(iov.iov_base.raw_value() + offset), + AddressAttr::Ram, + ) { Ok(dat) => Some(dat), Err(ref e) => { - error!( - "Read virtioqueue failed: {}", - error_chain::ChainedError::display_chain(e) - ); + error!("Read virtioqueue failed: {:?}", e); None } } } -fn memory_advise(addr: *mut libc::c_void, len: libc::size_t, advice: libc::c_int) { - // Safe, because the memory to be freed is allocated by guest. - if unsafe { libc::madvise(addr, len, advice) } != 0 { - let evt_type = if advice == libc::MADV_WILLNEED { - "WILLNEED".to_string() - } else { - "DONTNEED".to_string() +unsafe fn memory_advise(addr: *mut libc::c_void, len: libc::size_t, advice: libc::c_int) { + if libc::madvise(addr, len, advice) != 0 { + let evt_type = match advice { + libc::MADV_DONTNEED => "DONTNEED".to_string(), + libc::MADV_REMOVE => "REMOVE".to_string(), + _ => "WILLNEED".to_string(), }; let e = std::io::Error::last_os_error(); error!( - "Mark memory address: {} to {} failed: {}", + "Mark memory address: {} to {} failed: {:?}", addr as u64, evt_type, e ); } @@ -160,35 +194,79 @@ struct Request { /// Count of elements. elem_cnt: u32, /// The data which is both readable and writable. - iovec: Vec, + iovec: Vec, } impl Request { /// Parse the request from virtio queue. - /// Return the request from qirtio queue. + /// Return the request from virtio queue. /// /// # Arguments /// /// * `elem` - Available ring. - fn parse(elem: &Element) -> Result { + /// * `elem_type` - The type of available ring. + fn parse(elem: &Element, elem_type: bool) -> Result { let mut request = Request { desc_index: elem.index, elem_cnt: 0u32, iovec: Vec::new(), }; - if elem.out_iovec.is_empty() { - return Err(ErrorKind::ElementEmpty.into()); + let iovec = if elem_type { + &elem.in_iovec } else { - let elem_iov = elem.out_iovec.get(0).unwrap(); - request.iovec.push(Iovec { + &elem.out_iovec + }; + if iovec.is_empty() { + return Err(anyhow!(VirtioError::ElementEmpty)); + } + for elem_iov in iovec { + request.iovec.push(GuestIovec { iov_base: elem_iov.addr, - iov_len: elem_iov.len as u64, + iov_len: u64::from(elem_iov.len), }); request.elem_cnt += elem_iov.len; } Ok(request) } + fn balloon_deflate_page(&self, hvaset: &mut Vec<(u64, bool)>) { + let mut free_len: u64 = 0; + let mut start_addr: u64 = 0; + let mut last_addr: u64 = 0; + + while let Some((hva, _)) = hvaset.pop() { + if last_addr == 0 { + free_len += 1; + start_addr = hva; + } else if hva == last_addr + BALLOON_PAGE_SIZE { + free_len += 1; + } else { + // SAFETY: The memory to be freed is allocated by guest. + unsafe { + memory_advise( + start_addr as *const libc::c_void as *mut _, + (free_len * BALLOON_PAGE_SIZE) as usize, + libc::MADV_WILLNEED, + ) + }; + free_len = 1; + start_addr = hva; + } + + last_addr = hva; + } + + if free_len != 0 { + // SAFETY: The memory to be freed is allocated by guest. + unsafe { + memory_advise( + start_addr as *const libc::c_void as *mut _, + (free_len * BALLOON_PAGE_SIZE) as usize, + libc::MADV_WILLNEED, + ) + }; + } + } /// Mark balloon page with `MADV_DONTNEED` or `MADV_WILLNEED`. /// /// # Arguments @@ -201,101 +279,156 @@ impl Request { address_space: &Arc, mem: &Arc>, ) { - let advice = if req_type { - libc::MADV_DONTNEED - } else { - libc::MADV_WILLNEED - }; let mut last_addr: u64 = 0; - let mut count_iov: u64 = 4; + let mut last_share = false; let mut free_len: u64 = 0; let mut start_addr: u64 = 0; + let mut hvaset = Vec::new(); for iov in self.iovec.iter() { - let mut offset = 0; - let mut hvaset = Vec::new(); + let mut offset = 0_u64; + while let Some(pfn) = iov_to_buf::(address_space, iov, offset) { offset += std::mem::size_of::() as u64; - let gpa: GuestAddress = GuestAddress((pfn as u64) << VIRTIO_BALLOON_PFN_SHIFT); - let hva = match mem.lock().unwrap().get_host_address(gpa) { - Some(addr) => addr, + let gpa: GuestAddress = GuestAddress(u64::from(pfn) << VIRTIO_BALLOON_PFN_SHIFT); + let (hva, shared) = match mem.lock().unwrap().get_host_address(gpa) { + Some((addr, mem_share)) => (addr, mem_share), None => { - error!("Can not get host address, gpa: {}", gpa.raw_value()); + // Windows OS will populate the address with PA of 0 continue; } }; - hvaset.push(hva); + hvaset.push((hva, shared)); } - hvaset.sort_by_key(|&b| Reverse(b)); - let host_page_size = host_page_size(); - if host_page_size == BALLOON_PAGE_SIZE { - while let Some(hva) = hvaset.pop() { - if last_addr == 0 { - free_len += 1; - start_addr = hva; - } else if hva == last_addr + BALLOON_PAGE_SIZE { - free_len += 1; + } + hvaset.sort_by_key(|&b| Reverse(b.0)); + + if req_type == BALLOON_DEFLATE_EVENT { + self.balloon_deflate_page(&mut hvaset); + return; + } + + let host_page_size = host_page_size(); + let mut advice = 0_i32; + // If host_page_size equals BALLOON_PAGE_SIZE and have the same share properties, + // we can directly call the madvise function without any problem. And if the advice is + // MADV_WILLNEED, we just hint the whole host page it lives on, since we can't do + // anything smaller. + if host_page_size == BALLOON_PAGE_SIZE { + while let Some((hva, share)) = hvaset.pop() { + if last_addr == 0 { + free_len += 1; + start_addr = hva; + last_share = share; + if share { + advice = libc::MADV_REMOVE; } else { - memory_advise( - start_addr as *const libc::c_void as *mut _, - (free_len * BALLOON_PAGE_SIZE) as usize, - advice, - ); - free_len = 1; - start_addr = hva; + advice = libc::MADV_DONTNEED; } - - if count_iov == iov.iov_len { + } else if hva == last_addr + BALLOON_PAGE_SIZE && last_share == share { + free_len += 1; + } else { + // SAFETY: The memory to be freed is allocated by guest. + unsafe { memory_advise( start_addr as *const libc::c_void as *mut _, (free_len * BALLOON_PAGE_SIZE) as usize, advice, - ); + ) + }; + free_len = 1; + start_addr = hva; + last_share = share; + if share { + advice = libc::MADV_REMOVE; + } else { + advice = libc::MADV_DONTNEED; } - count_iov += std::mem::size_of::() as u64; - last_addr = hva; } - } else { - let mut host_page_bitmap = - BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); - while let Some(hva) = hvaset.pop() { - if host_page_bitmap.base_address == 0 { - if let Some(base_addr) = round_down(hva, host_page_size) { - host_page_bitmap.base_address = base_addr; - } else { - error!( - "Failed to round_down, hva: {}, align: {}", - hva, host_page_size - ); - } - } else if host_page_bitmap.base_address + host_page_size < hva { - host_page_bitmap = - BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); - continue; - } - if let Err(ref e) = - host_page_bitmap.set_bit((hva % host_page_size) / BALLOON_PAGE_SIZE) - { + last_addr = hva; + } + if free_len != 0 { + // SAFETY: The memory to be freed is allocated by guest. + unsafe { + memory_advise( + start_addr as *const libc::c_void as *mut _, + (free_len * BALLOON_PAGE_SIZE) as usize, + advice, + ) + }; + } + } else { + let mut host_page_bitmap = BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); + while let Some((hva, share)) = hvaset.pop() { + if host_page_bitmap.base_address == 0 { + if let Some(base_addr) = round_down(hva, host_page_size) { + host_page_bitmap.base_address = base_addr; + } else { error!( - "Failed to set bit with index: {} :{}", - (hva % host_page_size) / BALLOON_PAGE_SIZE, - e + "Failed to round_down, hva: {}, align: {}", + hva, host_page_size ); } - if host_page_bitmap.is_full(host_page_size / BALLOON_PAGE_SIZE) { + } else if host_page_bitmap.base_address + host_page_size < hva { + host_page_bitmap = BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); + continue; + } + + if let Err(ref e) = + host_page_bitmap.set_bit((hva % host_page_size) / BALLOON_PAGE_SIZE) + { + error!( + "Failed to set bit with index: {} :{:?}", + (hva % host_page_size) / BALLOON_PAGE_SIZE, + e + ); + } + if host_page_bitmap.is_full(host_page_size / BALLOON_PAGE_SIZE) { + if share { + advice = libc::MADV_REMOVE; + } else { + advice = libc::MADV_DONTNEED; + } + // SAFETY: The memory to be freed is allocated by guest. + unsafe { memory_advise( host_page_bitmap.base_address as *const libc::c_void as *mut _, host_page_size as usize, advice, - ); - host_page_bitmap = - BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); - } + ) + }; + host_page_bitmap = BalloonedPageBitmap::new(host_page_size / BALLOON_PAGE_SIZE); } } } } + + fn release_pages(&self, mem: &Arc>) { + for iov in self.iovec.iter() { + let gpa: GuestAddress = iov.iov_base; + let (hva, shared) = match mem.lock().unwrap().get_host_address(gpa) { + Some((hva, shared)) => (hva, shared), + None => { + error!("Can not get host address, gpa: {}", gpa.raw_value()); + continue; + } + }; + let advice = if shared { + libc::MADV_REMOVE + } else { + libc::MADV_DONTNEED + }; + // SAFETY: The memory to be freed is allocated by guest. + unsafe { + memory_advise( + hva as *const libc::c_void as *mut _, + iov.iov_len as usize, + advice, + ) + }; + } + } } #[derive(Debug, Copy, Clone, Default)] @@ -310,31 +443,36 @@ struct BlnMemoryRegion { flags_padding: u64, /// Region Page size reg_page_size: Option, + /// Region shared or not + mem_share: bool, } struct BlnMemInfo { regions: Mutex>, + enabled: bool, } impl BlnMemInfo { fn new() -> BlnMemInfo { BlnMemInfo { regions: Mutex::new(Vec::new()), + enabled: false, } } - fn get_host_address(&self, addr: GuestAddress) -> Option { + fn get_host_address(&self, addr: GuestAddress) -> Option<(u64, bool)> { let all_regions = self.regions.lock().unwrap(); - for i in 0..all_regions.len() { - if addr.raw_value() < all_regions[i].guest_phys_addr + all_regions[i].memory_size - && addr.raw_value() >= all_regions[i].guest_phys_addr + for region in all_regions.iter() { + if addr.raw_value() < region.guest_phys_addr + region.memory_size + && addr.raw_value() >= region.guest_phys_addr { - return Some( - all_regions[i].userspace_addr + addr.raw_value() - - all_regions[i].guest_phys_addr, - ); + return Some(( + region.userspace_addr + addr.raw_value() - region.guest_phys_addr, + region.mem_share, + )); } } + None } @@ -353,7 +491,8 @@ impl BlnMemInfo { fn add_mem_range(&self, fr: &FlatRange) { let guest_phys_addr = fr.addr_range.base.raw_value(); let memory_size = fr.addr_range.size; - if let Some(host_addr) = fr.owner.get_host_address() { + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + if let Some(host_addr) = unsafe { fr.owner.get_host_address(AddressAttr::Ram) } { let userspace_addr = host_addr + fr.offset_in_region; let reg_page_size = fr.owner.get_region_page_size(); self.regions.lock().unwrap().push(BlnMemoryRegion { @@ -362,6 +501,7 @@ impl BlnMemInfo { userspace_addr, flags_padding: 0_u64, reg_page_size, + mem_share: fr.owner.get_host_share().unwrap_or(false), }); } else { error!("Failed to get host address!"); @@ -370,7 +510,8 @@ impl BlnMemInfo { fn delete_mem_range(&self, fr: &FlatRange) { let mut mem_regions = self.regions.lock().unwrap(); - if let Some(host_addr) = fr.owner.get_host_address() { + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + if let Some(host_addr) = unsafe { fr.owner.get_host_address(AddressAttr::Ram) } { let reg_page_size = fr.owner.get_region_page_size(); let target = BlnMemoryRegion { guest_phys_addr: fr.addr_range.base.raw_value(), @@ -378,6 +519,7 @@ impl BlnMemInfo { userspace_addr: host_addr + fr.offset_in_region, flags_padding: 0_u64, reg_page_size, + mem_share: false, }; for (index, mr) in mem_regions.iter().enumerate() { if mr.guest_phys_addr == target.guest_phys_addr @@ -411,12 +553,24 @@ impl Listener for BlnMemInfo { 0 } + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + fn handle_request( &self, range: Option<&FlatRange>, _evtfd: Option<&RegionIoEventFd>, req_type: ListenerReqType, - ) -> std::result::Result<(), address_space::errors::Error> { + ) -> Result<(), anyhow::Error> { match req_type { ListenerReqType::AddRegion => { let fr = range.unwrap(); @@ -445,13 +599,21 @@ struct BalloonIoHandler { /// Inflate queue. inf_queue: Arc>, /// Inflate EventFd. - inf_evt: EventFd, + inf_evt: Arc, /// Deflate queue. def_queue: Arc>, /// Deflate EventFd. - def_evt: EventFd, - /// EventFd for device deactivate - deactivate_evt: RawFd, + def_evt: Arc, + /// Reporting queue. + report_queue: Option>>, + /// Reporting EventFd. + report_evt: Option>, + /// Auto balloon msg queue. + msg_queue: Option>>, + /// Auto balloon msg EventFd. + msg_evt: Option>, + /// Device is broken or not. + device_broken: Arc, /// The interrupt call back function. interrupt_cb: Arc, /// Balloon Memory information. @@ -469,40 +631,124 @@ impl BalloonIoHandler { /// /// * `req_type` - Type of request. /// - /// if `req_type` is `BALLOON_INFLATE_EVENT`, then inflate the balloon, otherwise, deflate the balloon. + /// if `req_type` is `BALLOON_INFLATE_EVENT`, then inflate the balloon, otherwise, deflate the + /// balloon. fn process_balloon_queue(&mut self, req_type: bool) -> Result<()> { let queue = if req_type { + trace::virtio_receive_request("Balloon".to_string(), "to inflate".to_string()); &self.inf_queue } else { + trace::virtio_receive_request("Balloon".to_string(), "to deflate".to_string()); &self.def_queue }; let mut locked_queue = queue.lock().unwrap(); - while let Ok(elem) = locked_queue - .vring - .pop_avail(&self.mem_space, self.driver_features) - { - match Request::parse(&elem) { - Ok(req) => { - if !self.mem_info.lock().unwrap().has_huge_page() { - req.mark_balloon_page(req_type, &self.mem_space, &self.mem_info); + loop { + let elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring for process balloon queue")?; + + if elem.desc_num == 0 { + break; + } + let req = Request::parse(&elem, OUT_IOVEC) + .with_context(|| "Fail to parse available descriptor chain")?; + if !self.mem_info.lock().unwrap().has_huge_page() { + req.mark_balloon_page(req_type, &self.mem_space, &self.mem_info); + } + locked_queue + .vring + .add_used(req.desc_index, req.elem_cnt) + .with_context(|| "Failed to add balloon response into used queue")?; + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("balloon", VirtioInterruptType::Vring) + })? + } + + Ok(()) + } + + fn reporting_evt_handler(&mut self) -> Result<()> { + trace::reporting_evt_handler(); + let queue = self + .report_queue + .as_ref() + .with_context(|| VirtioError::VirtQueueIsNone)?; + let mut locked_queue = queue.lock().unwrap(); + + loop { + let elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring for reporting free pages")?; + + if elem.desc_num == 0 { + break; + } + let req = Request::parse(&elem, IN_IOVEC) + .with_context(|| "Fail to parse available descriptor chain")?; + if !self.mem_info.lock().unwrap().has_huge_page() { + req.release_pages(&self.mem_info); + } + locked_queue + .vring + .add_used(req.desc_index, req.elem_cnt) + .with_context(|| "Failed to add balloon response into used queue")?; + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("balloon", VirtioInterruptType::Vring) + })?; + } + + Ok(()) + } + + fn auto_msg_evt_handler(&mut self) -> Result<()> { + trace::auto_msg_evt_handler(); + let queue = self + .msg_queue + .as_ref() + .with_context(|| VirtioError::VirtQueueIsNone)?; + let mut locked_queue = queue.lock().unwrap(); + + loop { + let elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring")?; + + if elem.desc_num == 0 { + break; + } + let req = Request::parse(&elem, OUT_IOVEC) + .with_context(|| "Fail to parse available descriptor chain")?; + // SAFETY: There is no confliction when writing global variable BALLOON_DEV, in other + // words, this function will not be called simultaneously. + if let Some(dev) = unsafe { BALLOON_DEV.as_ref() } { + let mut balloon_dev = dev.lock().unwrap(); + for iov in req.iovec.iter() { + if let Some(stat) = iov_to_buf::(&self.mem_space, iov, 0) { + let ram_size = balloon_dev.mem_info.lock().unwrap().get_ram_size() + >> VIRTIO_BALLOON_PFN_SHIFT; + balloon_dev.set_num_pages(cmp::min(stat._val, ram_size) as u32); } - locked_queue - .vring - .add_used(&self.mem_space, req.desc_index, req.elem_cnt as u32) - .chain_err(|| "Failed to add balloon response into used queue")?; - } - Err(e) => { - error!( - "Fail to parse available descriptor chain, error is {}", - e.display_chain() - ); - break; } + balloon_dev + .signal_config_change() + .with_context(|| "Failed to notify guest")?; } + + locked_queue + .vring + .add_used(req.desc_index, req.elem_cnt) + .with_context(|| "Failed to add balloon response into used queue")?; + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("balloon", VirtioInterruptType::Vring) + })?; } - (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue)) - .chain_err(|| ErrorKind::InterruptTrigger("balloon", VirtioInterruptType::Vring))?; Ok(()) } @@ -518,42 +764,7 @@ impl BalloonIoHandler { /// Get the memory size of balloon. fn get_balloon_memory_size(&self) -> u64 { - (self.balloon_actual.load(Ordering::Acquire) as u64) << VIRTIO_BALLOON_PFN_SHIFT - } - - fn deactivate_evt_handler(&self) -> Vec { - let notifiers = vec![ - EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.inf_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.def_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.event_timer.clone().lock().unwrap().as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - ]; - - notifiers + u64::from(self.balloon_actual.load(Ordering::Acquire)) << VIRTIO_BALLOON_PFN_SHIFT } } @@ -563,13 +774,13 @@ impl BalloonIoHandler { /// /// * `fd` - Raw file descriptor. /// * `handler` - Handle function. -fn build_event_notifier(fd: RawFd, handler: Box) -> EventNotifier { +fn build_event_notifier(fd: RawFd, handler: Rc) -> EventNotifier { EventNotifier::new( NotifierOperation::AddShared, fd, None, EventSet::IN, - vec![Arc::new(Mutex::new(handler))], + vec![handler], ) } @@ -581,14 +792,19 @@ impl EventNotifierHelper for BalloonIoHandler { // register event notifier for inflate event. let cloned_balloon_io = balloon_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { + let handler: Rc = Rc::new(move |_, fd: RawFd| { read_fd(fd); - if let Err(e) = cloned_balloon_io - .lock() - .unwrap() - .process_balloon_queue(BALLOON_INFLATE_EVENT) - { - error!("Failed to inflate balloon: {}", e.display_chain()); + let mut locked_balloon_io = cloned_balloon_io.lock().unwrap(); + if locked_balloon_io.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Err(e) = locked_balloon_io.process_balloon_queue(BALLOON_INFLATE_EVENT) { + error!("Failed to inflate balloon: {:?}", e); + report_virtio_error( + locked_balloon_io.interrupt_cb.clone(), + locked_balloon_io.driver_features, + &locked_balloon_io.device_broken, + ); }; None }); @@ -599,14 +815,19 @@ impl EventNotifierHelper for BalloonIoHandler { // register event notifier for deflate event. let cloned_balloon_io = balloon_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { + let handler: Rc = Rc::new(move |_, fd: RawFd| { read_fd(fd); - if let Err(e) = cloned_balloon_io - .lock() - .unwrap() - .process_balloon_queue(BALLOON_DEFLATE_EVENT) - { - error!("Failed to deflate balloon: {}", e.display_chain()); + let mut locked_balloon_io = cloned_balloon_io.lock().unwrap(); + if locked_balloon_io.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Err(e) = locked_balloon_io.process_balloon_queue(BALLOON_DEFLATE_EVENT) { + error!("Failed to deflate balloon: {:?}", e); + report_virtio_error( + locked_balloon_io.interrupt_cb.clone(), + locked_balloon_io.driver_features, + &locked_balloon_io.device_broken, + ); }; None }); @@ -615,25 +836,58 @@ impl EventNotifierHelper for BalloonIoHandler { handler, )); - // register event notifier for reset event. - let cloned_balloon_io = balloon_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(cloned_balloon_io.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(build_event_notifier( - locked_balloon_io.deactivate_evt, - handler, - )); + // register event notifier for free page reporting event. + if let Some(report_evt) = locked_balloon_io.report_evt.as_ref() { + let cloned_balloon_io = balloon_io.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut locked_balloon_io = cloned_balloon_io.lock().unwrap(); + if locked_balloon_io.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Err(e) = locked_balloon_io.reporting_evt_handler() { + error!("Failed to report free pages: {:?}", e); + report_virtio_error( + locked_balloon_io.interrupt_cb.clone(), + locked_balloon_io.driver_features, + &locked_balloon_io.device_broken, + ); + } + None + }); + notifiers.push(build_event_notifier(report_evt.as_raw_fd(), handler)); + } + + if let Some(msg_evt) = locked_balloon_io.msg_evt.as_ref() { + let cloned_balloon_io = balloon_io.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut locked_balloon_io = cloned_balloon_io.lock().unwrap(); + if locked_balloon_io.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Err(e) = locked_balloon_io.auto_msg_evt_handler() { + error!("Failed to msg: {:?}", e); + report_virtio_error( + locked_balloon_io.interrupt_cb.clone(), + locked_balloon_io.driver_features, + &locked_balloon_io.device_broken, + ); + } + None + }); + notifiers.push(build_event_notifier(msg_evt.as_raw_fd(), handler)); + } // register event notifier for timer event. let cloned_balloon_io = balloon_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { + let handler: Rc = Rc::new(move |_, fd: RawFd| { read_fd(fd); - cloned_balloon_io - .lock() - .unwrap() - .send_balloon_changed_event(); + let locked_balloon_io = cloned_balloon_io.lock().unwrap(); + if locked_balloon_io.device_broken.load(Ordering::SeqCst) { + return None; + } + locked_balloon_io.send_balloon_changed_event(); None }); notifiers.push(build_event_notifier( @@ -650,15 +904,71 @@ impl EventNotifierHelper for BalloonIoHandler { } } +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct BalloonConfig { + #[arg(long)] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long, default_value = "false", action = ArgAction::Append)] + deflate_on_oom: bool, + #[arg(long, default_value = "false", action = ArgAction::Append)] + free_page_reporting: bool, + #[arg(long, default_value = "false", action = ArgAction::Append)] + auto_balloon: bool, + #[arg(long, default_value = "50")] + membuf_percent: u32, + #[arg(long, default_value = "10")] + monitor_interval: u32, +} + +impl ConfigCheck for BalloonConfig { + fn check(&self) -> Result<()> { + if !self.auto_balloon { + return Ok(()); + } + if self.membuf_percent > MEM_BUFFER_PERCENT_MAX + || self.membuf_percent < MEM_BUFFER_PERCENT_MIN + { + return Err(anyhow!(ConfigError::IllegalValue( + "balloon membuf-percent".to_string(), + u64::from(MEM_BUFFER_PERCENT_MIN), + false, + u64::from(MEM_BUFFER_PERCENT_MAX), + false, + ))); + } + if self.monitor_interval > MONITOR_INTERVAL_SECOND_MAX + || self.monitor_interval < MONITOR_INTERVAL_SECOND_MIN + { + return Err(anyhow!(ConfigError::IllegalValue( + "balloon monitor-interval".to_string(), + u64::from(MONITOR_INTERVAL_SECOND_MIN), + false, + u64::from(MONITOR_INTERVAL_SECOND_MAX), + false, + ))); + } + Ok(()) + } +} + /// A balloon device with some necessary information. pub struct Balloon { - /// Balloon device features. - device_features: u64, - /// Driver features. - driver_features: u64, - /// Actual memory pages. + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the balloon device. + bln_cfg: BalloonConfig, + /// Actual memory pages of balloon device. actual: Arc, - /// Target memory pages. + /// Target memory pages of balloon device. num_pages: u32, /// Interrupt callback function. interrupt_cb: Option>, @@ -668,8 +978,6 @@ pub struct Balloon { mem_space: Arc, /// Event timer for BALLOON_CHANGED event. event_timer: Arc>, - /// EventFd for device deactivate. - deactivate_evt: EventFd, } impl Balloon { @@ -678,29 +986,31 @@ impl Balloon { /// # Arguments /// /// * `bln_cfg` - Balloon configuration. - pub fn new(bln_cfg: &BalloonConfig, mem_space: Arc) -> Balloon { - let mut device_features = 1u64 << VIRTIO_F_VERSION_1; - if bln_cfg.deflate_on_oom { - device_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; + pub fn new(bln_cfg: BalloonConfig, mem_space: Arc) -> Balloon { + let mut queue_num = QUEUE_NUM_BALLOON; + if bln_cfg.free_page_reporting { + queue_num += 1; + } + if bln_cfg.auto_balloon { + queue_num += 1; } Balloon { - device_features, - driver_features: 0u64, + base: VirtioBase::new(VIRTIO_TYPE_BALLOON, queue_num, DEFAULT_VIRTQUEUE_SIZE), + bln_cfg, actual: Arc::new(AtomicU32::new(0)), num_pages: 0u32, interrupt_cb: None, mem_info: Arc::new(Mutex::new(BlnMemInfo::new())), mem_space, event_timer: Arc::new(Mutex::new(TimerFd::new().unwrap())), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), } } /// Init balloon object for global use. pub fn object_init(dev: Arc>) { - // Safe, because there is no confliction when writing global variable BALLOON_DEV, in other words, - // this function will not be called simultaneously. + // SAFETY: there is no confliction when writing global variable BALLOON_DEV, in other + // words, this function will not be called simultaneously. unsafe { if BALLOON_DEV.is_none() { BALLOON_DEV = Some(dev) @@ -711,10 +1021,13 @@ impl Balloon { /// Notify configuration changes to VM. fn signal_config_change(&self) -> Result<()> { if let Some(interrupt_cb) = &self.interrupt_cb { - interrupt_cb(&VirtioInterruptType::Config, None) - .chain_err(|| ErrorKind::InterruptTrigger("balloon", VirtioInterruptType::Vring)) + interrupt_cb(&VirtioInterruptType::Config, None, false).with_context(|| { + VirtioError::InterruptTrigger("balloon", VirtioInterruptType::Config) + }) } else { - Err(ErrorKind::DeviceNotActivated("balloon".to_string()).into()) + Err(anyhow!(VirtioError::DeviceNotActivated( + "balloon".to_string() + ))) } } @@ -723,18 +1036,18 @@ impl Balloon { /// /// # Argument /// - /// * `size` - Target momery size. - pub fn set_guest_memory_size(&mut self, size: u64) -> Result<()> { + /// * `size` - Target memory size. + fn set_guest_memory_size(&mut self, size: u64) -> Result<()> { let host_page_size = host_page_size(); if host_page_size > BALLOON_PAGE_SIZE && !self.mem_info.lock().unwrap().has_huge_page() { warn!("Balloon used with backing page size > 4kiB, this may not be reliable"); } - let target = (size >> VIRTIO_BALLOON_PFN_SHIFT) as u32; - let current_ram_size = - (self.mem_info.lock().unwrap().get_ram_size() >> VIRTIO_BALLOON_PFN_SHIFT) as u32; - let vm_target = cmp::min(target, current_ram_size); - self.num_pages = current_ram_size - vm_target; - self.signal_config_change().chain_err(|| { + let target = size >> VIRTIO_BALLOON_PFN_SHIFT; + let address_space_ram_size = + self.mem_info.lock().unwrap().get_ram_size() >> VIRTIO_BALLOON_PFN_SHIFT; + let vm_target = cmp::min(target, address_space_ram_size); + self.num_pages = (address_space_ram_size - vm_target) as u32; + self.signal_config_change().with_context(|| { "Failed to notify about configuration change after setting balloon memory" })?; let msg = BalloonInfo { @@ -746,99 +1059,74 @@ impl Balloon { /// Get the size of memory that reclaimed by balloon. fn get_balloon_memory_size(&self) -> u64 { - (self.actual.load(Ordering::Acquire) as u64) << VIRTIO_BALLOON_PFN_SHIFT + u64::from(self.actual.load(Ordering::Acquire)) << VIRTIO_BALLOON_PFN_SHIFT } /// Get the actual memory size of guest. - pub fn get_guest_memory_size(&self) -> u64 { + fn get_guest_memory_size(&self) -> u64 { self.mem_info.lock().unwrap().get_ram_size() - self.get_balloon_memory_size() } + + fn set_num_pages(&mut self, target: u32) { + self.num_pages = target; + } } impl VirtioDevice for Balloon { - /// Realize a balloon device. + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + fn realize(&mut self) -> Result<()> { - self.mem_info = Arc::new(Mutex::new(BlnMemInfo::new())); + self.bln_cfg.check()?; self.mem_space .register_listener(self.mem_info.clone()) - .chain_err(|| "Failed to register memory listener defined by balloon device.")?; + .with_context(|| "Failed to register memory listener defined by balloon device.")?; + self.init_config_features()?; Ok(()) } - /// Get the type of balloon. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_BALLOON as u32 - } - - /// Get the number of balloon-device queues. - fn queue_num(&self) -> usize { - QUEUE_NUM_BALLOON - } - - /// Get the zise of balloon queue. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_BALLOON - } - - /// Get the feature of `balloon` device. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.device_features, features_select) - } - - /// Set feature for device. - /// - /// # Arguments - /// - /// * `page` - Selector of feature. - /// * `value` - Value to be set. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.device_features; - if unrequested_features != 0 { - warn!("Received acknowledge request for unknown feature: {:x}", v); - v &= !unrequested_features; + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1u64 << VIRTIO_F_VERSION_1; + if self.bln_cfg.deflate_on_oom { + self.base.device_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; } - self.driver_features |= v; + if self.bln_cfg.free_page_reporting { + self.base.device_features |= 1u64 << VIRTIO_BALLOON_F_REPORTING; + } + if self.bln_cfg.auto_balloon { + self.base.device_features |= 1u64 << VIRTIO_BALLOON_F_MESSAGE_VQ; + } + Ok(()) } - /// Read configuration. - /// - /// # Arguments - /// - /// * `offset` - Offset from base address. - /// * `data` - Read data to `data`. - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { let new_config = VirtioBalloonConfig { - num_pages: self.num_pages, - actual: self.actual.load(Ordering::Acquire), + _num_pages: self.num_pages, + _actual: self.actual.load(Ordering::Acquire), + _reserved: 0_u32, + _reserved1: 0_u32, + _membuf_percent: self.bln_cfg.membuf_percent, + _monitor_interval: self.bln_cfg.monitor_interval, }; - if offset != 0 { - return Err(ErrorKind::IncorrectOffset(0, offset).into()); - } - data.write_all( - &new_config.as_bytes()[offset as usize - ..cmp::min( - offset as usize + data.len(), - size_of::(), - )], - ) - .chain_err(|| "Failed to write data to 'data' while reading balloon config")?; - Ok(()) + + let config_len = + if virtio_has_feature(self.base.device_features, VIRTIO_BALLOON_F_MESSAGE_VQ) { + size_of::() + } else { + offset_of!(VirtioBalloonConfig, _reserved) + }; + + let config = &new_config.as_bytes()[..config_len]; + read_config_default(config, offset, data) } - /// Write configuration. - /// - /// # Argument - /// - /// * `_offset` - Offset from base address. fn write_config(&mut self, _offset: u64, data: &[u8]) -> Result<()> { // Guest update actual balloon size - // Safe, because the results will be checked. let old_actual = self.actual.load(Ordering::Acquire); + // SAFETY: The results will be checked. let new_actual = match unsafe { data.align_to::() } { (_, [new_config], _) => *new_config, _ => { - return Err(ErrorKind::FailedToWriteConfig.into()); + return Err(anyhow!(VirtioError::FailedToWriteConfig)); } }; if old_actual != new_actual { @@ -847,7 +1135,7 @@ impl VirtioDevice for Balloon { if !ret { timer .reset(Duration::new(1, 0), None) - .chain_err(|| "Failed to reset timer for qmp event during ballooning")?; + .with_context(|| "Failed to reset timer for qmp event during ballooning")?; } } } @@ -856,83 +1144,92 @@ impl VirtioDevice for Balloon { Ok(()) } - /// Active balloon device. - /// - /// # Arguments - /// - /// * `mem_space` - Address space. - /// * `interrupt_evt` - Interrupt EventFd. - /// * `interrupt_stats` - Statistics interrupt. - /// * `queues` - Different virtio queues. - /// * `queue_evts` Different EventFd. fn activate( &mut self, mem_space: Arc, interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, + queue_evts: Vec>, ) -> Result<()> { - if queues.len() != QUEUE_NUM_BALLOON { - return Err(ErrorKind::IncorrectQueueNum(QUEUE_NUM_BALLOON, queues.len()).into()); + let queues = &self.base.queues; + if queues.len() != self.queue_num() { + return Err(anyhow!(VirtioError::IncorrectQueueNum( + self.queue_num(), + queues.len() + ))); } let inf_queue = queues[0].clone(); - let inf_queue_evt = queue_evts.remove(0); + let inf_evt = queue_evts[0].clone(); let def_queue = queues[1].clone(); - let def_queue_evt = queue_evts.remove(0); + let def_evt = queue_evts[1].clone(); + + // Get report queue and eventfd. + let mut queue_index = 2; + let mut report_queue = None; + let mut report_evt = None; + if virtio_has_feature(self.base.device_features, VIRTIO_BALLOON_F_REPORTING) { + report_queue = Some(queues[queue_index].clone()); + report_evt = Some(queue_evts[queue_index].clone()); + queue_index += 1; + } + + // Get msg queue and eventfd. + let mut msg_queue = None; + let mut msg_evt = None; + if virtio_has_feature(self.base.device_features, VIRTIO_BALLOON_F_MESSAGE_VQ) { + msg_queue = Some(queues[queue_index].clone()); + msg_evt = Some(queue_evts[queue_index].clone()); + } self.interrupt_cb = Some(interrupt_cb.clone()); let handler = BalloonIoHandler { - driver_features: self.driver_features, + driver_features: self.base.driver_features, mem_space, inf_queue, - inf_evt: inf_queue_evt, + inf_evt, def_queue, - def_evt: def_queue_evt, - deactivate_evt: self.deactivate_evt.as_raw_fd(), + def_evt, + report_queue, + report_evt, + msg_queue, + msg_evt, + device_broken: self.base.broken.clone(), interrupt_cb, mem_info: self.mem_info.clone(), event_timer: self.event_timer.clone(), balloon_actual: self.actual.clone(), }; - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - None, - ) - .chain_err(|| "Failed to register balloon event notifier to MainLoop")?; + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts) + .with_context(|| "Failed to register balloon event notifier to MainLoop")?; + self.base.broken.store(false, Ordering::SeqCst); Ok(()) } fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite) + unregister_event_helper(None, &mut self.base.deactivate_evts) } - fn update_config( - &mut self, - _dev_config: Option>, - ) -> Result<()> { - bail!("Unsupported to update configuration") + fn reset(&mut self) -> Result<()> { + if virtio_has_feature(self.base.device_features, VIRTIO_BALLOON_F_MESSAGE_VQ) { + self.num_pages = 0; + } + Ok(()) } } pub fn qmp_balloon(target: u64) -> bool { - // Safe, because there is no confliction when writing global variable BALLOON_DEV, in other words, - // this function will not be called simultaneously. - if let Some(dev) = unsafe { &BALLOON_DEV } { + // SAFETY: there is no confliction when writing global variable BALLOON_DEV, in other + // words, this function will not be called simultaneously. + if let Some(dev) = unsafe { BALLOON_DEV.as_ref() } { match dev.lock().unwrap().set_guest_memory_size(target) { Ok(()) => { return true; } Err(ref e) => { - error!( - "Failed to set balloon memory size: {}, :{}", - target, - error_chain::ChainedError::display_chain(e) - ); + error!("Failed to set balloon memory size: {}, :{:?}", target, e); return false; } } @@ -942,9 +1239,9 @@ pub fn qmp_balloon(target: u64) -> bool { } pub fn qmp_query_balloon() -> Option { - // Safe, because there is no confliction when writing global variable BALLOON_DEV, in other words, - // this function will not be called simultaneously. - if let Some(dev) = unsafe { &BALLOON_DEV } { + // SAFETY: There is no confliction when writing global variable BALLOON_DEV, in other + // words, this function will not be called simultaneously. + if let Some(dev) = unsafe { BALLOON_DEV.as_ref() } { let unlocked_dev = dev.lock().unwrap(); return Some(unlocked_dev.get_guest_memory_size()); } @@ -962,39 +1259,14 @@ pub fn balloon_allow_list(syscall_allow_list: &mut Vec) { #[cfg(test)] mod tests { - pub use super::super::*; - pub use super::*; - - use address_space::{AddressRange, HostMemMapping, Region}; + use super::*; + use crate::tests::{address_space_init, MEMORY_SIZE}; + use crate::*; + use address_space::{AddressAttr, AddressRange, HostMemMapping, Region}; + use machine_manager::event_loop::EventLoop; - const MEMORY_SIZE: u64 = 1024 * 1024; const QUEUE_SIZE: u16 = 256; - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - MEMORY_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } - fn create_flat_range(addr: u64, size: u64, offset_in_region: u64) -> FlatRange { let mem_mapping = Arc::new( HostMemMapping::new(GuestAddress(addr), None, size, None, false, false, false).unwrap(), @@ -1004,7 +1276,7 @@ mod tests { mem_mapping.start_address().unchecked_add(offset_in_region), mem_mapping.size() - offset_in_region, ), - owner: Region::init_ram_region(mem_mapping.clone()), + owner: Region::init_ram_region(mem_mapping.clone(), "mem"), offset_in_region, rom_dev_romd: None, } @@ -1015,40 +1287,45 @@ mod tests { let bln_cfg = BalloonConfig { id: "bln".to_string(), deflate_on_oom: true, + ..Default::default() }; let mem_space = address_space_init(); - let mut bln = Balloon::new(&bln_cfg, mem_space); - assert_eq!(bln.driver_features, 0); + let mut bln = Balloon::new(bln_cfg, mem_space); + + // Test realize function. + bln.realize().unwrap(); + assert_eq!(bln.device_type(), 5); + assert_eq!(bln.queue_num(), 2); + assert_eq!(bln.queue_size_max(), QUEUE_SIZE); + + assert_eq!(bln.base.driver_features, 0); assert_eq!(bln.actual.load(Ordering::Acquire), 0); assert_eq!(bln.num_pages, 0); assert!(bln.interrupt_cb.is_none()); let feature = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM); - assert_eq!(bln.device_features, feature); + assert_eq!(bln.base.device_features, feature); - let fts = bln.get_device_features(0); + let fts = bln.device_features(0); assert_eq!(fts, feature as u32); - let fts = bln.get_device_features(1); + let fts = bln.device_features(1); assert_eq!(fts, (feature >> 32) as u32); - bln.driver_features = 0; - bln.device_features = 1; + bln.base.driver_features = 0; + bln.base.device_features = 1 | 1 << 32; bln.set_driver_features(0, 1); - assert_eq!(bln.driver_features, 1); - bln.driver_features = 1 << 32; + assert_eq!(bln.base.driver_features, 1); + assert_eq!(bln.base.driver_features, u64::from(bln.driver_features(0))); + bln.base.driver_features = 1 << 32; bln.set_driver_features(1, 1); - assert_eq!(bln.driver_features, 1 << 32); + assert_eq!(bln.base.driver_features, 1 << 32); + assert_eq!( + bln.base.driver_features, + u64::from(bln.driver_features(1)) << 32 + ); - // Test realize function. - bln.realize().unwrap(); - assert_eq!(bln.device_type(), 5); - assert_eq!(bln.queue_num(), 2); - assert_eq!(bln.queue_size(), QUEUE_SIZE); // Test methods of balloon. let ram_size = bln.mem_info.lock().unwrap().get_ram_size(); assert_eq!(ram_size, MEMORY_SIZE); - - assert!(bln.deactivate().is_ok()); - assert!(bln.update_config(None).is_err()); } #[test] @@ -1056,17 +1333,55 @@ mod tests { let bln_cfg = BalloonConfig { id: "bln".to_string(), deflate_on_oom: true, + ..Default::default() }; let mem_space = address_space_init(); - let balloon = Balloon::new(&bln_cfg, mem_space); - let write_data = [0, 0, 0, 0, 1, 0, 0, 0]; - let mut random_data: Vec = vec![0; 8]; + let balloon = Balloon::new(bln_cfg, mem_space); + let ret_data = [0, 0, 0, 0, 1, 0, 0, 0]; + let mut read_data: Vec = vec![0; 8]; let addr = 0x00; assert_eq!(balloon.get_balloon_memory_size(), 0); balloon.actual.store(1, Ordering::Release); - balloon.read_config(addr, &mut random_data).unwrap(); - assert_eq!(random_data, write_data); + balloon.read_config(addr, &mut read_data).unwrap(); + assert_eq!(read_data, ret_data); + } + + #[test] + fn test_read_config_partial() { + let bln_cfg = BalloonConfig { + id: "bln".to_string(), + deflate_on_oom: true, + ..Default::default() + }; + + let mem_space = address_space_init(); + let balloon = Balloon::new(bln_cfg, mem_space); + let ret_data = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut read_data: Vec = vec![0; 8]; + let addr = 0x4; + assert_eq!(balloon.get_balloon_memory_size(), 0); + balloon.actual.store(1, Ordering::Release); + assert!(balloon.read_config(addr, &mut read_data).is_err()); + assert_ne!(read_data, ret_data); + } + + #[test] + fn test_read_config_overflow() { + let bln_cfg = BalloonConfig { + id: "bln".to_string(), + deflate_on_oom: true, + ..Default::default() + }; + + let mem_space = address_space_init(); + let balloon = Balloon::new(bln_cfg, mem_space); + let mut read_data: Vec = vec![0; 8]; + let addr: u64 = 0xffff_ffff_ffff_ffff; + assert_eq!(balloon.get_balloon_memory_size(), 0); + balloon.actual.store(1, Ordering::Release); + let ret = balloon.read_config(addr, &mut read_data); + assert!(ret.is_err()); } #[test] @@ -1074,10 +1389,11 @@ mod tests { let bln_cfg = BalloonConfig { id: "bln".to_string(), deflate_on_oom: true, + ..Default::default() }; let mem_space = address_space_init(); - let mut balloon = Balloon::new(&bln_cfg, mem_space); + let mut balloon = Balloon::new(bln_cfg, mem_space); let write_data = [1, 0, 0, 0]; let addr = 0x00; assert_eq!(balloon.get_balloon_memory_size(), 0); @@ -1091,8 +1407,9 @@ mod tests { let bln_cfg = BalloonConfig { id: "bln".to_string(), deflate_on_oom: true, + ..Default::default() }; - let mut bln = Balloon::new(&bln_cfg, mem_space.clone()); + let mut bln = Balloon::new(bln_cfg, mem_space.clone()); bln.realize().unwrap(); let ram_fr1 = create_flat_range(0, MEMORY_SIZE, 0); let blninfo = BlnMemInfo::new(); @@ -1104,13 +1421,15 @@ mod tests { let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let interrupt_status = Arc::new(AtomicU32::new(0)); let cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, _needs_reset: bool| { let status = match int_type { VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, }; interrupt_status.fetch_or(status, Ordering::SeqCst); - interrupt_evt.write(1).chain_err(|| ErrorKind::EventFdWrite) + interrupt_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite) }, ) as VirtioInterrupt); @@ -1119,51 +1438,66 @@ mod tests { let mut queue_config_inf = QueueConfig::new(QUEUE_SIZE); queue_config_inf.desc_table = GuestAddress(0x100); - queue_config_inf.addr_cache.desc_table_host = mem_space - .get_host_address(queue_config_inf.desc_table) - .unwrap(); + queue_config_inf.addr_cache.desc_table_host = unsafe { + mem_space + .get_host_address(queue_config_inf.desc_table, AddressAttr::Ram) + .unwrap() + }; queue_config_inf.avail_ring = GuestAddress(0x300); - queue_config_inf.addr_cache.avail_ring_host = mem_space - .get_host_address(queue_config_inf.avail_ring) - .unwrap(); + queue_config_inf.addr_cache.avail_ring_host = unsafe { + mem_space + .get_host_address(queue_config_inf.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config_inf.used_ring = GuestAddress(0x600); - queue_config_inf.addr_cache.used_ring_host = mem_space - .get_host_address(queue_config_inf.used_ring) - .unwrap(); + queue_config_inf.addr_cache.used_ring_host = unsafe { + mem_space + .get_host_address(queue_config_inf.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config_inf.ready = true; queue_config_inf.size = QUEUE_SIZE; let mut queue_config_def = QueueConfig::new(QUEUE_SIZE); queue_config_def.desc_table = GuestAddress(0x1100); - queue_config_def.addr_cache.desc_table_host = mem_space - .get_host_address(queue_config_def.desc_table) - .unwrap(); + queue_config_def.addr_cache.desc_table_host = unsafe { + mem_space + .get_host_address(queue_config_def.desc_table, AddressAttr::Ram) + .unwrap() + }; queue_config_def.avail_ring = GuestAddress(0x1300); - queue_config_def.addr_cache.avail_ring_host = mem_space - .get_host_address(queue_config_def.avail_ring) - .unwrap(); + queue_config_def.addr_cache.avail_ring_host = unsafe { + mem_space + .get_host_address(queue_config_def.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config_def.used_ring = GuestAddress(0x1600); - queue_config_def.addr_cache.used_ring_host = mem_space - .get_host_address(queue_config_def.used_ring) - .unwrap(); + queue_config_def.addr_cache.used_ring_host = unsafe { + mem_space + .get_host_address(queue_config_def.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config_def.ready = true; queue_config_def.size = QUEUE_SIZE; let queue1 = Arc::new(Mutex::new(Queue::new(queue_config_inf, 1).unwrap())); let queue2 = Arc::new(Mutex::new(Queue::new(queue_config_def, 1).unwrap())); - let event_inf = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let event_def = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let event_deactivate = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let event_inf = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + let event_def = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); let mut handler = BalloonIoHandler { - driver_features: bln.driver_features, + driver_features: bln.base.driver_features, mem_space: mem_space.clone(), inf_queue: queue1, - inf_evt: event_inf.try_clone().unwrap(), + inf_evt: event_inf, def_queue: queue2, def_evt: event_def, - deactivate_evt: event_deactivate.as_raw_fd(), + report_queue: None, + report_evt: None, + msg_queue: None, + msg_evt: None, + device_broken: bln.base.broken.clone(), interrupt_cb: cb.clone(), mem_info: bln.mem_info.clone(), event_timer: bln.event_timer.clone(), @@ -1180,27 +1514,39 @@ mod tests { let desc = SplitVringDesc { addr: GuestAddress(0x2000), len: 4, - flags: 1, + flags: 0, next: 1, }; // Set desc table. mem_space - .write_object::(&desc, GuestAddress(queue_config_inf.desc_table.0)) + .write_object::( + &desc, + GuestAddress(queue_config_inf.desc_table.0), + AddressAttr::Ram, + ) .unwrap(); - let ele = Iovec { + let ele = GuestIovec { iov_base: GuestAddress(0xff), - iov_len: std::mem::size_of::() as u64, + iov_len: std::mem::size_of::() as u64, }; mem_space - .write_object::(&ele, GuestAddress(0x2000)) + .write_object::(&ele, GuestAddress(0x2000), AddressAttr::Ram) .unwrap(); mem_space - .write_object::(&0, GuestAddress(queue_config_inf.avail_ring.0 + 4 as u64)) + .write_object::( + &0, + GuestAddress(queue_config_inf.avail_ring.0 + 4_u64), + AddressAttr::Ram, + ) .unwrap(); mem_space - .write_object::(&1, GuestAddress(queue_config_inf.avail_ring.0 + 2 as u64)) + .write_object::( + &1, + GuestAddress(queue_config_inf.avail_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); assert!(handler.process_balloon_queue(BALLOON_INFLATE_EVENT).is_ok()); @@ -1211,22 +1557,34 @@ mod tests { let desc = SplitVringDesc { addr: GuestAddress(0x2000), len: 4, - flags: 1, + flags: 0, next: 1, }; mem_space - .write_object::(&desc, GuestAddress(queue_config_def.desc_table.0)) + .write_object::( + &desc, + GuestAddress(queue_config_def.desc_table.0), + AddressAttr::Ram, + ) .unwrap(); mem_space - .write_object::(&ele, GuestAddress(0x3000)) + .write_object::(&ele, GuestAddress(0x3000), AddressAttr::Ram) .unwrap(); mem_space - .write_object::(&0, GuestAddress(queue_config_def.avail_ring.0 + 4 as u64)) + .write_object::( + &0, + GuestAddress(queue_config_def.avail_ring.0 + 4_u64), + AddressAttr::Ram, + ) .unwrap(); mem_space - .write_object::(&1, GuestAddress(queue_config_def.avail_ring.0 + 2 as u64)) + .write_object::( + &1, + GuestAddress(queue_config_def.avail_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); assert!(handler.process_balloon_queue(BALLOON_DEFLATE_EVENT).is_ok()); @@ -1234,41 +1592,49 @@ mod tests { #[test] fn test_balloon_activate() { + EventLoop::object_init(&None).unwrap(); + let mem_space = address_space_init(); let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let interrupt_status = Arc::new(AtomicU32::new(0)); let interrupt_cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, _needs_reset: bool| { let status = match int_type { VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, }; interrupt_status.fetch_or(status, Ordering::SeqCst); - interrupt_evt.write(1).chain_err(|| ErrorKind::EventFdWrite) + interrupt_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite) }, ) as VirtioInterrupt); - let mut queue_config_inf = QueueConfig::new(QUEUE_SIZE); - queue_config_inf.desc_table = GuestAddress(0); - queue_config_inf.avail_ring = GuestAddress(4096); - queue_config_inf.used_ring = GuestAddress(8192); - queue_config_inf.ready = true; - queue_config_inf.size = QUEUE_SIZE; - let mut queues: Vec>> = Vec::new(); - let queue1 = Arc::new(Mutex::new(Queue::new(queue_config_inf, 1).unwrap())); - queues.push(queue1); - let event_inf = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let queue_evts: Vec = vec![event_inf.try_clone().unwrap()]; + let mut queue_evts: Vec> = Vec::new(); + for i in 0..QUEUE_NUM_BALLOON as u64 { + let mut queue_config_inf = QueueConfig::new(QUEUE_SIZE); + queue_config_inf.desc_table = GuestAddress(12288 * i); + queue_config_inf.avail_ring = GuestAddress(12288 * i + 4096); + queue_config_inf.used_ring = GuestAddress(12288 * i + 8192); + queue_config_inf.ready = true; + queue_config_inf.size = QUEUE_SIZE; + let queue = Arc::new(Mutex::new(Queue::new(queue_config_inf, 1).unwrap())); + queues.push(queue); + let event_inf = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + queue_evts.push(event_inf); + } let bln_cfg = BalloonConfig { id: "bln".to_string(), deflate_on_oom: true, + ..Default::default() }; - let mut bln = Balloon::new(&bln_cfg, mem_space.clone()); - assert!(bln - .activate(mem_space, interrupt_cb, &queues, queue_evts) - .is_err()); + let mut bln = Balloon::new(bln_cfg, mem_space.clone()); + bln.base.queues = queues; + assert!(bln.activate(mem_space, interrupt_cb, queue_evts).is_ok()); + + EventLoop::loop_clean(); } #[test] @@ -1284,7 +1650,10 @@ mod tests { blninfo.regions.lock().unwrap().push(blndef); assert_eq!(blninfo.get_host_address(GuestAddress(0x200)), None); - assert_eq!(blninfo.get_host_address(GuestAddress(0x420)), Some(0x20)); + assert_eq!( + blninfo.get_host_address(GuestAddress(0x420)), + Some((0x20, false)) + ); let ram_size = 0x800; let ram_fr1 = create_flat_range(0, ram_size, 0); @@ -1321,4 +1690,39 @@ mod tests { // Out of range. assert!(!btp.is_full(65)); } + + #[test] + fn test_balloon_init_free_page_reporting() { + let bln_cfg = BalloonConfig { + id: "bln".to_string(), + deflate_on_oom: true, + free_page_reporting: true, + ..Default::default() + }; + let mem_space = address_space_init(); + let mut bln = Balloon::new(bln_cfg, mem_space); + + // Test realize function. + bln.realize().unwrap(); + assert_eq!(bln.device_type(), 5); + assert_eq!(bln.queue_num(), 3); + assert_eq!(bln.queue_size_max(), QUEUE_SIZE); + + assert_eq!(bln.base.driver_features, 0); + assert_eq!(bln.actual.load(Ordering::Acquire), 0); + assert_eq!(bln.num_pages, 0); + assert!(bln.interrupt_cb.is_none()); + let feature = (1u64 << VIRTIO_F_VERSION_1) + | (1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM | 1u64 << VIRTIO_BALLOON_F_REPORTING); + assert_eq!(bln.base.device_features, feature); + + let fts = bln.device_features(0); + assert_eq!(fts, feature as u32); + let fts = bln.device_features(1); + assert_eq!(fts, (feature >> 32) as u32); + + // Test methods of balloon. + let ram_size = bln.mem_info.lock().unwrap().get_ram_size(); + assert_eq!(ram_size, MEMORY_SIZE); + } } diff --git a/virtio/src/device/block.rs b/virtio/src/device/block.rs new file mode 100644 index 0000000000000000000000000000000000000000..45b91944263f42cbbbc6e06c4f8e25bd477a4828 --- /dev/null +++ b/virtio/src/device/block.rs @@ -0,0 +1,1790 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::cmp; +use std::collections::HashMap; +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use clap::Parser; +use log::{error, warn}; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + +use crate::{ + check_config_space_rw, gpa_hva_iovec_map, iov_discard_back, iov_discard_front, iov_read_object, + read_config_default, report_virtio_error, virtio_has_feature, Element, Queue, VirtioBase, + VirtioDevice, VirtioError, VirtioInterrupt, VirtioInterruptType, VIRTIO_BLK_F_DISCARD, + VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, + VIRTIO_BLK_F_WRITE_ZEROES, VIRTIO_BLK_ID_BYTES, VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, + VIRTIO_BLK_S_UNSUPP, VIRTIO_BLK_T_DISCARD, VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, + VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, VIRTIO_BLK_T_WRITE_ZEROES, + VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_RING_INDIRECT_DESC, + VIRTIO_F_VERSION_1, VIRTIO_TYPE_BLOCK, +}; +use address_space::{AddressAttr, AddressSpace, GuestAddress, RegionCache}; +use block_backend::{ + create_block_backend, remove_block_backend, BlockDriverOps, BlockIoErrorCallback, + BlockProperty, BlockStatus, +}; +use machine_manager::config::{ + get_pci_df, parse_bool, valid_block_device_virtqueue_size, valid_id, ConfigCheck, ConfigError, + DriveConfig, DriveFile, VmConfig, DEFAULT_VIRTQUEUE_SIZE, MAX_VIRTIO_QUEUE, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper, EventLoop}; +use migration::{ + migration::Migratable, DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, + StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; +use util::aio::{ + iov_from_buf_direct, iov_to_buf_direct, raw_datasync, Aio, AioCb, AioEngine, AioReqResult, + Iovec, OpCode, WriteZeroesState, +}; +use util::byte_code::ByteCode; +use util::leak_bucket::LeakBucket; +use util::loop_context::{ + create_new_eventfd, read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, +}; +use util::{gen_base_func, offset_of}; + +/// Number of virtqueues. +const QUEUE_NUM_BLK: usize = 1; +/// Used to compute the number of sectors. +const SECTOR_SHIFT: u8 = 9; +/// Size of a sector of the block device. +const SECTOR_SIZE: u64 = (0x01_u64) << SECTOR_SHIFT; +/// Size of the dummy block device. +const DUMMY_IMG_SIZE: u64 = 0; +/// Max number reqs of a merged request. +const MAX_NUM_MERGE_REQS: u16 = 32; +/// Max number iovs of a merged request. +const MAX_NUM_MERGE_IOVS: usize = 1024; +/// Max number bytes of a merged request. +const MAX_NUM_MERGE_BYTES: u64 = i32::MAX as u64; +/// Max iteration for every round of process queue. +const MAX_ITERATION_PROCESS_QUEUE: u16 = 10; +/// Max number sectors of per request. +const MAX_REQUEST_SECTORS: u32 = u32::MAX >> SECTOR_SHIFT; +/// Max length of serial number. +const MAX_SERIAL_NUM_LEN: usize = 20; + +type SenderConfig = ( + Option>>>, + u32, + u32, + u64, + Option, + bool, +); + +fn valid_serial(s: &str) -> Result { + if s.len() > MAX_SERIAL_NUM_LEN { + return Err(anyhow!(ConfigError::StringLengthTooLong( + "device serial number".to_string(), + MAX_SERIAL_NUM_LEN, + ))); + } + Ok(s.to_string()) +} + +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct VirtioBlkDevConfig { + #[arg(long, value_parser = ["virtio-blk-pci", "virtio-blk-device"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool)] + pub multifunction: Option, + #[arg(long)] + pub drive: String, + #[arg(long)] + pub bootindex: Option, + #[arg(long, alias = "num-queues", value_parser = clap::value_parser!(u16).range(1..=MAX_VIRTIO_QUEUE as i64))] + pub num_queues: Option, + #[arg(long)] + pub iothread: Option, + #[arg(long, alias = "queue-size", default_value = "256", value_parser = valid_block_device_virtqueue_size)] + pub queue_size: u16, + #[arg(long, value_parser = valid_serial)] + pub serial: Option, +} + +impl Default for VirtioBlkDevConfig { + fn default() -> Self { + Self { + classtype: "".to_string(), + id: "".to_string(), + bus: None, + addr: None, + multifunction: None, + drive: "".to_string(), + num_queues: Some(1), + bootindex: None, + iothread: None, + queue_size: DEFAULT_VIRTQUEUE_SIZE, + serial: None, + } + } +} + +impl ConfigCheck for VirtioBlkDevConfig { + fn check(&self) -> Result<()> { + if self.serial.is_some() { + valid_serial(&self.serial.clone().unwrap())?; + } + Ok(()) + } +} + +fn get_serial_num_config(serial_num: &str) -> Vec { + let mut id_bytes = vec![0; VIRTIO_BLK_ID_BYTES as usize]; + let bytes_to_copy = cmp::min(serial_num.len(), VIRTIO_BLK_ID_BYTES as usize); + + let serial_bytes = serial_num.as_bytes(); + id_bytes[..bytes_to_copy].clone_from_slice(&serial_bytes[..bytes_to_copy]); + id_bytes +} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct RequestOutHeader { + request_type: u32, + io_prio: u32, + sector: u64, +} + +impl ByteCode for RequestOutHeader {} + +/// The request of discard and write-zeroes use the same struct. +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct DiscardWriteZeroesSeg { + /// The start sector for discard or write-zeroes. + sector: u64, + /// The number of sectors for discard or write-zeroes. + num_sectors: u32, + /// The flags used for this range. + flags: u32, +} + +impl ByteCode for DiscardWriteZeroesSeg {} + +#[derive(Clone)] +pub struct AioCompleteCb { + queue: Arc>, + mem_space: Arc, + /// The head of merged Request list. + req: Arc, + interrupt_cb: Arc, + driver_features: u64, +} + +impl AioCompleteCb { + fn new( + queue: Arc>, + mem_space: Arc, + req: Arc, + interrupt_cb: Arc, + driver_features: u64, + ) -> Self { + AioCompleteCb { + queue, + mem_space, + req, + interrupt_cb, + driver_features, + } + } + + fn complete_request(&self, status: u8) -> Result<()> { + trace::virtio_blk_complete_request(status); + let mut req = Some(self.req.as_ref()); + while let Some(req_raw) = req { + self.complete_one_request(req_raw, status)?; + req = req_raw.next.as_ref().as_ref(); + } + Ok(()) + } + + fn complete_one_request(&self, req: &Request, status: u8) -> Result<()> { + if let Err(ref e) = self + .mem_space + .write_object(&status, req.in_header, AddressAttr::Ram) + { + bail!("Failed to write the status (blk io completion) {:?}", e); + } + + let mut queue_lock = self.queue.lock().unwrap(); + queue_lock + .vring + .add_used(req.desc_index, req.in_len) + .with_context(|| { + format!( + "Failed to add used ring(blk io completion), index {}, len {}", + req.desc_index, req.in_len + ) + })?; + trace::virtio_blk_complete_one_request(req.desc_index, req.in_len); + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger("blk io completion", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Block", &*queue_lock as *const _ as u64); + } + Ok(()) + } +} + +#[derive(Clone)] +struct Request { + desc_index: u16, + out_header: RequestOutHeader, + iovec: Vec, + data_len: u64, + in_len: u32, + in_header: GuestAddress, + /// Point to the next merged Request. + next: Box>, +} + +impl Request { + fn new( + handler: &BlockIoHandler, + cache: &Option, + elem: &mut Element, + status: &mut u8, + devid: &str, + ) -> Result { + if elem.out_iovec.is_empty() || elem.in_iovec.is_empty() { + bail!( + "Missed header for block {} request: out {} in {} desc num {}", + devid, + elem.out_iovec.len(), + elem.in_iovec.len(), + elem.desc_num + ); + } + + let mut out_header = + iov_read_object::(&handler.mem_space, &elem.out_iovec, cache)?; + out_header.request_type = LittleEndian::read_u32(out_header.request_type.as_bytes()); + out_header.sector = LittleEndian::read_u64(out_header.sector.as_bytes()); + + let in_iov_elem = elem.in_iovec.last().unwrap(); + if in_iov_elem.len < 1 { + bail!( + "Invalid in header for block request: length {}", + in_iov_elem.len + ); + } + // Note: addr plus len has been checked not overflow in virtqueue. + let in_header = GuestAddress(in_iov_elem.addr.0 + u64::from(in_iov_elem.len) - 1); + + let mut request = Request { + desc_index: elem.index, + out_header, + iovec: Vec::with_capacity(elem.desc_num as usize), + data_len: 0, + in_len: 0, + in_header, + next: Box::new(None), + }; + + // Count in_len before discard iovec. + // We always write the last status byte, so count all in_iovs. + // Note: in_iov and out_iov total len is no more than 1<<32, and + // out_iov is more than 1, so in_len will not overflow. + for in_iov in elem.in_iovec.iter() { + request.in_len += in_iov.len; + } + + match out_header.request_type { + VIRTIO_BLK_T_IN + | VIRTIO_BLK_T_GET_ID + | VIRTIO_BLK_T_OUT + | VIRTIO_BLK_T_DISCARD + | VIRTIO_BLK_T_WRITE_ZEROES => { + let data_iovec = match out_header.request_type { + VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_WRITE_ZEROES => { + iov_discard_front(&mut elem.out_iovec, size_of::() as u64) + } + // Otherwise discard the last "status" byte. + _ => iov_discard_back(&mut elem.in_iovec, 1), + } + .with_context(|| format!("Empty data for block {} request", devid))?; + + let (data_len, iovec) = gpa_hva_iovec_map(data_iovec, &handler.mem_space, cache)?; + request.data_len = data_len; + request.iovec = iovec; + } + VIRTIO_BLK_T_FLUSH => (), + others => { + error!("Request type {} is not supported for block", others); + *status = VIRTIO_BLK_S_UNSUPP; + } + } + + if !request.io_range_valid(handler.disk_sectors, devid) { + *status = VIRTIO_BLK_S_IOERR; + } + + Ok(request) + } + + fn execute( + &self, + iohandler: &mut BlockIoHandler, + block_backend: Arc>>, + aiocompletecb: AioCompleteCb, + ) -> Result<()> { + let mut req = Some(self); + let mut iovecs = Vec::new(); + while let Some(req_raw) = req { + for iov in req_raw.iovec.iter() { + let iovec = Iovec { + iov_base: iov.iov_base, + iov_len: iov.iov_len, + }; + iovecs.push(iovec); + } + req = req_raw.next.as_ref().as_ref(); + } + let offset = (aiocompletecb.req.out_header.sector << SECTOR_SHIFT) as usize; + let request_type = self.out_header.request_type; + if MigrationManager::is_active() + && (request_type == VIRTIO_BLK_T_IN || request_type == VIRTIO_BLK_T_GET_ID) + { + // FIXME: mark dirty page needs to be managed by `AddressSpace` crate. + for iov in iovecs.iter() { + // Mark vmm dirty page manually if live migration is active. + MigrationManager::mark_dirty_log(iov.iov_base, iov.iov_len); + } + } + trace::virtio_blk_execute(request_type, iovecs.len(), offset); + + let serial_num = &iohandler.serial_num; + let mut locked_backend = block_backend.lock().unwrap(); + match request_type { + VIRTIO_BLK_T_IN => { + locked_backend + .read_vectored(iovecs, offset, aiocompletecb) + .with_context(|| { + format!( + "Failed to process block {} request for reading", + iohandler.devid + ) + })?; + } + VIRTIO_BLK_T_OUT => { + locked_backend + .write_vectored(iovecs, offset, aiocompletecb) + .with_context(|| { + format!( + "Failed to process block {} request for writing", + iohandler.devid + ) + })?; + } + VIRTIO_BLK_T_FLUSH => { + locked_backend.datasync(aiocompletecb).with_context(|| { + format!( + "Failed to process block {} request for flushing", + iohandler.devid + ) + })?; + } + VIRTIO_BLK_T_GET_ID => { + let serial = serial_num.clone().unwrap_or_else(|| String::from("")); + let serial_vec = get_serial_num_config(&serial); + // SAFETY: iovec is generated by address_space. + let status = unsafe { iov_from_buf_direct(&self.iovec, &serial_vec) }.map_or_else( + |e| { + error!( + "Failed to process block {} request for getting id, {:?}", + iohandler.devid, e + ); + VIRTIO_BLK_S_IOERR + }, + |_| VIRTIO_BLK_S_OK, + ); + aiocompletecb.complete_request(status)?; + } + VIRTIO_BLK_T_DISCARD => { + if !iohandler.discard { + error!("Device does not support discard"); + return aiocompletecb.complete_request(VIRTIO_BLK_S_UNSUPP); + } + drop(locked_backend); + self.handle_discard_write_zeroes_req(iohandler, aiocompletecb, OpCode::Discard)?; + } + VIRTIO_BLK_T_WRITE_ZEROES => { + if iohandler.write_zeroes == WriteZeroesState::Off { + error!("Device does not support write-zeroes"); + return aiocompletecb.complete_request(VIRTIO_BLK_S_UNSUPP); + } + drop(locked_backend); + self.handle_discard_write_zeroes_req( + iohandler, + aiocompletecb, + OpCode::WriteZeroes, + )?; + } + // The illegal request type has been handled in method new(). + _ => {} + }; + Ok(()) + } + + fn handle_discard_write_zeroes_req( + &self, + iohandler: &mut BlockIoHandler, + iocompletecb: AioCompleteCb, + opcode: OpCode, + ) -> Result<()> { + let size = size_of::() as u64; + // Just support one segment per request. + if self.data_len > size { + error!("More than one discard or write-zeroes segment is not supported"); + return iocompletecb.complete_request(VIRTIO_BLK_S_UNSUPP); + } + + // Get and check the discard segment. + let mut segment = DiscardWriteZeroesSeg::default(); + // SAFETY: iovec is generated by address_space. + unsafe { iov_to_buf_direct(&self.iovec, 0, segment.as_mut_bytes()) }.and_then(|v| { + if v as u64 == size { + Ok(()) + } else { + Err(anyhow!("Invalid discard segment size {}", v)) + } + })?; + let sector = LittleEndian::read_u64(segment.sector.as_bytes()); + let num_sectors = LittleEndian::read_u32(segment.num_sectors.as_bytes()); + if sector + .checked_add(u64::from(num_sectors)) + .filter(|&off| off <= iohandler.disk_sectors) + .is_none() + || num_sectors > MAX_REQUEST_SECTORS + { + error!( + "Invalid discard or write zeroes request, sector offset {}, num_sectors {}", + sector, num_sectors + ); + return iocompletecb.complete_request(VIRTIO_BLK_S_IOERR); + } + let flags = LittleEndian::read_u32(segment.flags.as_bytes()); + if flags & !VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP != 0 { + error!("Invalid unmap flags 0x{:x}", flags); + return iocompletecb.complete_request(VIRTIO_BLK_S_UNSUPP); + } + + // The block_backend is not None here. + let block_backend = iohandler.block_backend.as_ref().unwrap(); + let mut locked_backend = block_backend.lock().unwrap(); + let offset = (sector as usize) << SECTOR_SHIFT; + let nbytes = u64::from(num_sectors) << SECTOR_SHIFT; + trace::virtio_blk_handle_discard_write_zeroes_req(&opcode, flags, offset, nbytes); + if opcode == OpCode::Discard { + if flags == VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP { + error!("Discard request must not set unmap flags"); + return iocompletecb.complete_request(VIRTIO_BLK_S_UNSUPP); + } + locked_backend + .discard(offset, nbytes, iocompletecb) + .with_context(|| "Failed to process block request for discard")?; + } else if opcode == OpCode::WriteZeroes { + let unmap = flags == VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP && iohandler.discard; + locked_backend + .write_zeroes(offset, nbytes, iocompletecb, unmap) + .with_context(|| "Failed to process block request for write-zeroes")?; + } + Ok(()) + } + + fn io_range_valid(&self, disk_sectors: u64, devid: &str) -> bool { + match self.out_header.request_type { + VIRTIO_BLK_T_IN | VIRTIO_BLK_T_OUT => { + if self.data_len % SECTOR_SIZE != 0 { + error!("Failed to process block request with size not aligned to 512B"); + return false; + } + if self + .get_req_sector_num() + .checked_add(self.out_header.sector) + .filter(|&off| off <= disk_sectors) + .is_none() + { + error!( + "devid {} offset {} invalid, disk sector {}", + devid, self.out_header.sector, disk_sectors + ); + return false; + } + true + } + _ => true, + } + } + + fn get_req_sector_num(&self) -> u64 { + self.data_len / SECTOR_SIZE + } +} + +/// Control block of Block IO. +struct BlockIoHandler { + /// Device id of this block device. + devid: String, + /// The virtqueue. + queue: Arc>, + /// Eventfd of the virtqueue for IO event. + queue_evt: Arc, + /// The address space to which the block device belongs. + mem_space: Arc, + /// The block backend opened by the block device. + block_backend: Option>>>, + /// The align requirement of request(offset/len). + req_align: u32, + /// The align requirement of buffer(iova_base). + buf_align: u32, + /// The number of sectors of the disk image. + disk_sectors: u64, + /// Serial number of the block device. + serial_num: Option, + /// If use direct access io. + direct: bool, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// The receiving half of Rust's channel to receive the image file. + receiver: Receiver, + /// Eventfd for config space update. + update_evt: Arc, + /// Device is broken or not. + device_broken: Arc, + /// Callback to trigger an interrupt. + interrupt_cb: Arc, + /// thread name of io handler + iothread: Option, + /// Using the leak bucket to implement IO limits + leak_bucket: Option, + /// Supporting discard or not. + discard: bool, + /// The write-zeroes state. + write_zeroes: WriteZeroesState, +} + +impl BlockIoHandler { + fn merge_req_queue(&self, mut req_queue: Vec) -> Vec { + req_queue.sort_by(|a, b| a.out_header.sector.cmp(&b.out_header.sector)); + + let mut merge_req_queue = Vec::::new(); + let mut last_req: Option<&mut Request> = None; + let mut merged_reqs: u16 = 0; + let mut merged_iovs: usize = 0; + let mut merged_bytes: u64 = 0; + + for req in req_queue { + let req_iovs = req.iovec.len(); + let req_bytes = req.data_len; + let io = req.out_header.request_type == VIRTIO_BLK_T_IN + || req.out_header.request_type == VIRTIO_BLK_T_OUT; + let can_merge = match last_req { + Some(ref req_ref) => { + io && req_ref.out_header.request_type == req.out_header.request_type + // Note: sector plus sector_num has been checked not overflow. + && (req_ref.out_header.sector + req_ref.get_req_sector_num() == req.out_header.sector) + && merged_reqs < MAX_NUM_MERGE_REQS + && merged_iovs + req_iovs <= MAX_NUM_MERGE_IOVS + && merged_bytes + req_bytes <= MAX_NUM_MERGE_BYTES + } + None => false, + }; + + if can_merge { + let last_req_raw = last_req.unwrap(); + last_req_raw.next = Box::new(Some(req)); + last_req = last_req_raw.next.as_mut().as_mut(); + merged_reqs += 1; + merged_iovs += req_iovs; + merged_bytes += req_bytes; + } else { + merge_req_queue.push(req); + last_req = merge_req_queue.last_mut(); + merged_reqs = 1; + merged_iovs = req_iovs; + merged_bytes = req_bytes; + } + trace::virtio_blk_merge_req_queue(can_merge, merged_reqs, merged_iovs, merged_bytes); + } + + merge_req_queue + } + + fn process_queue_internal(&mut self) -> Result { + let queue_size = self.queue.lock().unwrap().vring.actual_size() as usize; + let mut req_queue = Vec::with_capacity(queue_size); + let mut done = false; + + loop { + let mut queue = self.queue.lock().unwrap(); + let mut elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + // limit io operations if iops is configured + if let Some(lb) = self.leak_bucket.as_mut() { + if let Some(ctx) = EventLoop::get_ctx(self.iothread.as_ref()) { + if lb.throttled(ctx, 1_u32) { + queue.vring.push_back(); + break; + } + }; + } + + // Init and put valid request into request queue. + let mut status = VIRTIO_BLK_S_OK; + let cache = queue.vring.get_cache(); + let req = Request::new(self, cache, &mut elem, &mut status, &self.devid)?; + if status != VIRTIO_BLK_S_OK { + let aiocompletecb = AioCompleteCb::new( + self.queue.clone(), + self.mem_space.clone(), + Arc::new(req), + self.interrupt_cb.clone(), + self.driver_features, + ); + // unlock queue, because it will be hold below. + drop(queue); + aiocompletecb.complete_request(status)?; + continue; + } + // Avoid bogus guest stuck IO thread. + if req_queue.len() >= queue_size { + bail!("The front driver may be damaged, avail requests more than queue size"); + } + req_queue.push(req); + done = true; + } + + if req_queue.is_empty() { + return Ok(done); + } + + let merge_req_queue = self.merge_req_queue(req_queue); + for req in merge_req_queue.into_iter() { + let req_rc = Arc::new(req); + let aiocompletecb = AioCompleteCb::new( + self.queue.clone(), + self.mem_space.clone(), + req_rc.clone(), + self.interrupt_cb.clone(), + self.driver_features, + ); + if let Some(block_backend) = self.block_backend.as_ref() { + req_rc.execute(self, block_backend.clone(), aiocompletecb)?; + } else { + warn!( + "Failed to execute block {} request, block_backend not specified", + &self.devid + ); + aiocompletecb.complete_request(VIRTIO_BLK_S_IOERR)?; + } + } + if let Some(block_backend) = self.block_backend.as_ref() { + block_backend.lock().unwrap().flush_request()?; + } + Ok(done) + } + + fn process_queue_suppress_notify(&mut self) -> Result { + // Note: locked_status has two function: + // 1) set the status of the block device. + // 2) as a mutex lock which is mutual exclusive with snapshot operations. + // Do not unlock or drop the locked_status in this function. + let status; + let mut locked_status; + let len = self.queue.lock().unwrap().vring.avail_ring_len()?; + if len > 0 { + if let Some(block_backend) = self.block_backend.as_ref() { + status = block_backend.lock().unwrap().get_status(); + locked_status = status.lock().unwrap(); + *locked_status = BlockStatus::NormalIO; + } + } + trace::virtio_blk_process_queue_suppress_notify(len); + + let mut done = false; + let mut iteration: u16 = 0; + + while self.queue.lock().unwrap().vring.avail_ring_len()? != 0 { + // Do not stuck IO thread. + iteration += 1; + if iteration > MAX_ITERATION_PROCESS_QUEUE { + // Make sure we can come back. + self.queue_evt.write(1)?; + break; + } + + self.queue + .lock() + .unwrap() + .vring + .suppress_queue_notify(self.driver_features, true)?; + + done = self.process_queue_internal()?; + + self.queue + .lock() + .unwrap() + .vring + .suppress_queue_notify(self.driver_features, false)?; + + // See whether we have been throttled. + if let Some(lb) = self.leak_bucket.as_mut() { + if let Some(ctx) = EventLoop::get_ctx(self.iothread.as_ref()) { + if lb.throttled(ctx, 0_u32) { + break; + } + } + } + } + Ok(done) + } + + fn process_queue(&mut self) -> Result { + trace::virtio_receive_request("Block".to_string(), "to IO".to_string()); + let result = self.process_queue_suppress_notify(); + if result.is_err() { + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + } + result + } + + fn complete_func(aiocb: &AioCb, mut ret: i64) -> Result<()> { + match aiocb.req_is_completed(ret) { + AioReqResult::Inflight => return Ok(()), + AioReqResult::Error(v) => ret = v, + AioReqResult::Done => (), + } + let mut status = if ret < 0 { + VIRTIO_BLK_S_IOERR + } else { + VIRTIO_BLK_S_OK + }; + + let complete_cb = &aiocb.iocompletecb; + // When driver does not accept FLUSH feature, the device must be of + // writethrough cache type, so flush data before updating used ring. + if !virtio_has_feature(complete_cb.driver_features, VIRTIO_BLK_F_FLUSH) + && aiocb.opcode == OpCode::Pwritev + && ret >= 0 + && raw_datasync(aiocb.file_fd) < 0 + { + error!("Failed to flush data before send response to guest."); + status = VIRTIO_BLK_S_IOERR; + } + + complete_cb.complete_request(status) + } + + fn update_evt_handler(&mut self) { + match self.receiver.recv() { + Ok((image, req_align, buf_align, disk_sectors, serial_num, direct)) => { + self.disk_sectors = disk_sectors; + self.block_backend = image; + self.req_align = req_align; + self.buf_align = buf_align; + self.serial_num = serial_num; + self.direct = direct; + } + Err(e) => { + error!("Failed to receive config in updating handler {:?}", e); + self.disk_sectors = 0; + self.block_backend = None; + self.req_align = 1; + self.buf_align = 1; + self.serial_num = None; + self.direct = true; + } + }; + + if let Err(e) = (self.interrupt_cb)(&VirtioInterruptType::Config, None, false) { + error!( + "{:?}. {:?}", + VirtioError::InterruptTrigger("block", VirtioInterruptType::Config), + e + ); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + return; + } + + if let Err(ref e) = self.process_queue() { + error!("Failed to handle block IO for updating handler {:?}", e); + } + } +} + +fn build_event_notifier( + fd: RawFd, + handlers: Vec>, + handler_poll: Option>, +) -> EventNotifier { + let mut notifier = EventNotifier::new( + NotifierOperation::AddShared, + fd, + None, + EventSet::IN, + handlers, + ); + notifier.handler_poll = handler_poll; + notifier +} + +impl EventNotifierHelper for BlockIoHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let handler_raw = handler.lock().unwrap(); + let mut notifiers = Vec::new(); + + // Register event notifier for update_evt. + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock.update_evt_handler(); + None + }); + notifiers.push(build_event_notifier( + handler_raw.update_evt.as_raw_fd(), + vec![h], + None, + )); + + // Register event notifier for queue_evt. + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Err(ref e) = h_lock.process_queue() { + error!("Failed to handle block IO {:?}", e); + } + None + }); + let h_clone = handler.clone(); + let handler_iopoll: Box = Box::new(move |_, _fd: RawFd| { + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + match h_lock.process_queue() { + Ok(done) => { + if done { + Some(Vec::new()) + } else { + None + } + } + Err(e) => { + error!("Failed to handle block IO {:?}", e); + None + } + } + }); + notifiers.push(build_event_notifier( + handler_raw.queue_evt.as_raw_fd(), + vec![h], + Some(handler_iopoll), + )); + + // Register timer event notifier for IO limits + if let Some(lb) = handler_raw.leak_bucket.as_ref() { + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + if let Some(lb) = h_lock.leak_bucket.as_mut() { + lb.clear_timer(); + } + if let Err(ref e) = h_lock.process_queue() { + error!("Failed to handle block IO {:?}", e); + } + None + }); + notifiers.push(build_event_notifier(lb.as_raw_fd(), vec![h], None)); + } + + notifiers + } +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioBlkGeometry { + cylinders: u16, + heads: u8, + sectors: u8, +} + +impl ByteCode for VirtioBlkGeometry {} + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct VirtioBlkConfig { + /// The capacity in 512 byte sectors. + capacity: u64, + /// The maximum segment size. + size_max: u32, + /// The maximum number of segments. + pub seg_max: u32, + /// Geometry of the block device. + geometry: VirtioBlkGeometry, + /// Block size of device. + blk_size: u32, + /// Exponent for physical block per logical block. + physical_block_exp: u8, + /// Alignment offset in logical blocks. + alignment_offset: u8, + /// Minimum I/O size without performance penalty in logical blocks. + min_io_size: u16, + /// Optimal sustained I/O size in logical blocks. + opt_io_size: u32, + /// Writeback mode. + wce: u8, + /// Reserved data. + unused: u8, + /// Number of virtio queues, only available when `VIRTIO_BLK_F_MQ` is set. + pub num_queues: u16, + /// The maximum discard sectors for one segment. + pub max_discard_sectors: u32, + /// The maximum number of discard segments in a discard command. + pub max_discard_seg: u32, + /// Discard commands must be aligned to this number of sectors. + pub discard_sector_alignment: u32, + /// The maximum number of write zeros sectors. + pub max_write_zeroes_sectors: u32, + /// The maximum number of segments in a write zeroes command. + pub max_write_zeroes_seg: u32, + /// Deallocation of one or more of the sectors. + pub write_zeroes_may_unmap: u8, + /// Reserved data. + unused1: [u8; 3], +} + +impl ByteCode for VirtioBlkConfig {} + +/// State of block device. +#[repr(C)] +#[derive(Clone, Copy, Desc, ByteCode)] +#[desc_version(compat_version = "0.1.0")] +pub struct BlockState { + /// Bitmask of features supported by the backend. + pub device_features: u64, + /// Bit mask of features negotiated by the backend and the frontend. + pub driver_features: u64, + /// Config space of the block device. + pub config_space: VirtioBlkConfig, + /// Device broken status. + broken: bool, +} + +/// Block device structure. +#[derive(Default)] +pub struct Block { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the block device. + blk_cfg: VirtioBlkDevConfig, + /// Configuration of the block device's drive. + drive_cfg: DriveConfig, + /// Config space of the block device. + config_space: VirtioBlkConfig, + /// BLock backend opened by the block device. + block_backend: Option>>>, + /// The align requirement of request(offset/len). + pub req_align: u32, + /// The align requirement of buffer(iova_base). + pub buf_align: u32, + /// Number of sectors of the image file. + disk_sectors: u64, + /// Callback to trigger interrupt. + interrupt_cb: Option>, + /// The sending half of Rust's channel to send the image file. + senders: Vec>, + /// Eventfd for config space update. + update_evts: Vec>, + /// Drive backend files. + drive_files: Arc>>, +} + +impl Block { + pub fn new( + blk_cfg: VirtioBlkDevConfig, + drive_cfg: DriveConfig, + drive_files: Arc>>, + ) -> Block { + let queue_num = blk_cfg.num_queues.unwrap_or(1) as usize; + let queue_size = blk_cfg.queue_size; + Self { + base: VirtioBase::new(VIRTIO_TYPE_BLOCK, queue_num, queue_size), + blk_cfg, + drive_cfg, + req_align: 1, + buf_align: 1, + drive_files, + ..Default::default() + } + } + + fn build_device_config_space(&mut self) { + // capacity: 64bits + self.config_space.capacity = self.disk_sectors; + // seg_max = queue_size - 2: 32bits + self.config_space.seg_max = u32::from(self.queue_size_max()) - 2; + + if self.blk_cfg.num_queues.unwrap_or(1) > 1 { + self.config_space.num_queues = self.blk_cfg.num_queues.unwrap_or(1); + } + + if self.drive_cfg.discard { + // Just support one segment per request. + self.config_space.max_discard_seg = 1; + // The default discard alignment is 1 sector. + self.config_space.discard_sector_alignment = 1; + self.config_space.max_discard_sectors = MAX_REQUEST_SECTORS; + } + + if self.drive_cfg.write_zeroes != WriteZeroesState::Off { + // Just support one segment per request. + self.config_space.max_write_zeroes_seg = 1; + self.config_space.max_write_zeroes_sectors = MAX_REQUEST_SECTORS; + self.config_space.write_zeroes_may_unmap = 1; + } + } + + fn get_blk_config_size(&self) -> usize { + if virtio_has_feature(self.base.device_features, VIRTIO_BLK_F_WRITE_ZEROES) { + offset_of!(VirtioBlkConfig, unused1) + } else if virtio_has_feature(self.base.device_features, VIRTIO_BLK_F_DISCARD) { + offset_of!(VirtioBlkConfig, max_write_zeroes_sectors) + } else { + offset_of!(VirtioBlkConfig, max_discard_sectors) + } + } + + fn gen_error_cb(&self, interrupt_cb: Arc) -> BlockIoErrorCallback { + let cloned_features = self.base.driver_features; + let clone_broken = self.base.broken.clone(); + Arc::new(move || { + report_virtio_error(interrupt_cb.clone(), cloned_features, &clone_broken); + }) + } +} + +impl VirtioDevice for Block { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + // if iothread not found, return err + if self.blk_cfg.iothread.is_some() + && EventLoop::get_ctx(self.blk_cfg.iothread.as_ref()).is_none() + { + bail!( + "IOThread {:?} of Block is not configured in params.", + self.blk_cfg.iothread, + ); + } + + if !self.drive_cfg.path_on_host.is_empty() { + let drive_files = self.drive_files.lock().unwrap(); + let file = VmConfig::fetch_drive_file(&drive_files, &self.drive_cfg.path_on_host)?; + let alignments = + VmConfig::fetch_drive_align(&drive_files, &self.drive_cfg.path_on_host)?; + self.req_align = alignments.0; + self.buf_align = alignments.1; + let drive_id = VmConfig::get_drive_id(&drive_files, &self.drive_cfg.path_on_host)?; + + let mut thread_pool = None; + if self.drive_cfg.aio != AioEngine::Off { + thread_pool = Some(EventLoop::get_ctx(None).unwrap().thread_pool.clone()); + } + let aio = Aio::new( + Arc::new(BlockIoHandler::complete_func), + self.drive_cfg.aio, + thread_pool, + )?; + + let conf = BlockProperty { + id: drive_id, + format: self.drive_cfg.format, + iothread: self.blk_cfg.iothread.clone(), + direct: self.drive_cfg.direct, + req_align: self.req_align, + buf_align: self.buf_align, + discard: self.drive_cfg.discard, + write_zeroes: self.drive_cfg.write_zeroes, + l2_cache_size: self.drive_cfg.l2_cache_size, + refcount_cache_size: self.drive_cfg.refcount_cache_size, + }; + let backend = create_block_backend(file, aio, conf)?; + let disk_size = backend.lock().unwrap().disk_size()?; + self.block_backend = Some(backend); + self.disk_sectors = disk_size >> SECTOR_SHIFT; + } else { + self.req_align = 1; + self.buf_align = 1; + self.block_backend = None; + self.disk_sectors = DUMMY_IMG_SIZE >> SECTOR_SHIFT; + } + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1_u64 << VIRTIO_F_VERSION_1 + | 1_u64 << VIRTIO_F_RING_INDIRECT_DESC + | 1_u64 << VIRTIO_F_RING_EVENT_IDX + | 1_u64 << VIRTIO_BLK_F_FLUSH + | 1_u64 << VIRTIO_BLK_F_SEG_MAX; + if self.drive_cfg.readonly { + self.base.device_features |= 1_u64 << VIRTIO_BLK_F_RO; + }; + if self.blk_cfg.num_queues.unwrap_or(1) > 1 { + self.base.device_features |= 1_u64 << VIRTIO_BLK_F_MQ; + } + if self.drive_cfg.discard { + self.base.device_features |= 1_u64 << VIRTIO_BLK_F_DISCARD; + } + if self.drive_cfg.write_zeroes != WriteZeroesState::Off { + self.base.device_features |= 1_u64 << VIRTIO_BLK_F_WRITE_ZEROES; + } + self.build_device_config_space(); + + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + MigrationManager::unregister_device_instance(BlockState::descriptor(), &self.blk_cfg.id); + let drive_files = self.drive_files.lock().unwrap(); + let drive_id = VmConfig::get_drive_id(&drive_files, &self.drive_cfg.path_on_host)?; + remove_block_backend(&drive_id); + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let config_len = self.get_blk_config_size(); + let config = &self.config_space.as_bytes()[..config_len]; + read_config_default(config, offset, data)?; + trace::virtio_blk_read_config(offset, data); + Ok(()) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + let config_len = self.get_blk_config_size(); + let config = &self.config_space.as_bytes()[..config_len]; + check_config_space_rw(config, offset, data)?; + trace::virtio_blk_write_config(offset, data); + // The only writable field is "writeback", but it's not supported for now, + // so do nothing here. + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + self.interrupt_cb = Some(interrupt_cb.clone()); + let queues = self.base.queues.clone(); + for (index, queue) in queues.iter().enumerate() { + if !queue.lock().unwrap().is_enabled() { + continue; + } + let (sender, receiver) = channel(); + let update_evt = Arc::new(create_new_eventfd()?); + let driver_features = self.base.driver_features; + let handler = BlockIoHandler { + devid: self.blk_cfg.id.clone(), + queue: queue.clone(), + queue_evt: queue_evts[index].clone(), + mem_space: mem_space.clone(), + block_backend: self.block_backend.clone(), + req_align: self.req_align, + buf_align: self.buf_align, + disk_sectors: self.disk_sectors, + direct: self.drive_cfg.direct, + serial_num: self.blk_cfg.serial.clone(), + driver_features, + receiver, + update_evt: update_evt.clone(), + device_broken: self.base.broken.clone(), + interrupt_cb: interrupt_cb.clone(), + iothread: self.blk_cfg.iothread.clone(), + leak_bucket: match self.drive_cfg.iops { + Some(iops) => Some(LeakBucket::new(iops)?), + None => None, + }, + discard: self.drive_cfg.discard, + write_zeroes: self.drive_cfg.write_zeroes, + }; + + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper( + notifiers, + self.blk_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + self.update_evts.push(update_evt); + self.senders.push(sender); + } + + if let Some(block_backend) = self.block_backend.as_ref() { + let err_cb = self.gen_error_cb(interrupt_cb.clone()); + block_backend + .lock() + .unwrap() + .register_io_event(self.base.broken.clone(), err_cb)?; + } else { + warn!( + "No disk image when block device {} activate", + self.blk_cfg.id + ); + } + self.base.broken.store(false, Ordering::SeqCst); + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + // Stop receiving virtqueue requests and drain incomplete IO. + unregister_event_helper( + self.blk_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + if let Some(block_backend) = self.block_backend.as_ref() { + let mut block_backend = block_backend.lock().unwrap(); + // Must drain requests before unregister. + block_backend.drain_request(); + block_backend.unregister_io_event()?; + } + self.update_evts.clear(); + self.senders.clear(); + Ok(()) + } + + // configs[0]: DriveConfig. configs[1]: VirtioBlkDevConfig. + fn update_config(&mut self, configs: Vec>) -> Result<()> { + let mut is_plug = false; + if configs.len() == 2 { + self.drive_cfg = configs[0] + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + self.blk_cfg = configs[1] + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + // microvm type block device don't support multiple queue. + self.blk_cfg.num_queues = Some(QUEUE_NUM_BLK as u16); + is_plug = true; + } else if configs.is_empty() { + self.blk_cfg = Default::default(); + self.drive_cfg = Default::default(); + } else { + bail!("Invalid update configs."); + } + + if !is_plug { + // If it is an unplug operation, the block backend is set to none. Unregister aio before + // it. + if let Some(block_backend) = self.block_backend.as_ref() { + block_backend.lock().unwrap().unregister_io_event()?; + } else { + bail!( + "No block backend when block device {} unplug", + self.blk_cfg.id + ); + } + } + + self.realize()?; + + if is_plug { + // Block backend is set after device realized. + if let Some(cb) = self.interrupt_cb.as_ref() { + // NOTE: interrupter_cb may be is none for replaceable device. + let err_cb = self.gen_error_cb(cb.clone()); + self.block_backend + .as_ref() + .with_context(|| { + format!( + "No block backend when block device {} plug", + self.blk_cfg.id + ) + })? + .lock() + .unwrap() + .register_io_event(self.base.broken.clone(), err_cb)?; + } else { + warn!( + "No interrupter cb, may be device {} is not activated", + self.blk_cfg.id + ); + } + } + + for sender in &self.senders { + sender + .send(( + self.block_backend.clone(), + self.req_align, + self.buf_align, + self.disk_sectors, + self.blk_cfg.serial.clone(), + self.drive_cfg.direct, + )) + .with_context(|| VirtioError::ChannelSend("image fd".to_string()))?; + } + for update_evt in &self.update_evts { + update_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite)?; + } + + Ok(()) + } +} + +// SAFETY: Send and Sync is not auto-implemented for `Sender` type. +// Implementing them is safe because `Sender` field of Block won't +// change in migration workflow. +unsafe impl Sync for Block {} + +impl StateTransfer for Block { + fn get_state_vec(&self) -> Result> { + let state = BlockState { + device_features: self.base.device_features, + driver_features: self.base.driver_features, + config_space: self.config_space, + broken: self.base.broken.load(Ordering::SeqCst), + }; + Ok(state.as_bytes().to_vec()) + } + + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let state = BlockState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("BLOCK"))?; + self.base.device_features = state.device_features; + self.base.driver_features = state.driver_features; + self.base.broken.store(state.broken, Ordering::SeqCst); + self.config_space = state.config_space; + Ok(()) + } + + fn get_device_alias(&self) -> u64 { + MigrationManager::get_desc_alias(&BlockState::descriptor().name).unwrap_or(!0) + } +} + +impl MigrationHook for Block {} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicU32, Ordering}; + use std::{thread, time::Duration}; + + use vmm_sys_util::tempfile::TempFile; + + use super::*; + use crate::tests::address_space_init; + use crate::*; + use address_space::{AddressAttr, GuestAddress}; + use machine_manager::config::{ + str_slip_to_clap, IothreadConfig, VmConfig, DEFAULT_VIRTQUEUE_SIZE, + }; + + const QUEUE_NUM_BLK: usize = 1; + const CONFIG_SPACE_SIZE: usize = 60; + const VIRTQ_DESC_F_NEXT: u16 = 0x01; + const VIRTQ_DESC_F_WRITE: u16 = 0x02; + + fn init_default_block() -> Block { + Block::new( + VirtioBlkDevConfig::default(), + DriveConfig::default(), + Arc::new(Mutex::new(HashMap::new())), + ) + } + + #[test] + fn test_virtio_block_config_cmdline_parser() { + // Test1: Right. + let blk_cmd1 = "virtio-blk-pci,id=rootfs,bus=pcie.0,addr=0x1.0x2,drive=rootfs,serial=111111,num-queues=4"; + let blk_config = + VirtioBlkDevConfig::try_parse_from(str_slip_to_clap(blk_cmd1, true, false)).unwrap(); + assert_eq!(blk_config.id, "rootfs"); + assert_eq!(blk_config.bus.unwrap(), "pcie.0"); + assert_eq!(blk_config.addr.unwrap(), (1, 2)); + assert_eq!(blk_config.serial.unwrap(), "111111"); + assert_eq!(blk_config.num_queues.unwrap(), 4); + + // Test2: Default values. + assert_eq!(blk_config.queue_size, DEFAULT_VIRTQUEUE_SIZE); + + // Test3: Illegal values. + let blk_cmd3 = "virtio-blk-pci,id=rootfs,bus=pcie.0,addr=0x1.0x2,drive=rootfs,serial=111111,num-queues=33"; + let result = VirtioBlkDevConfig::try_parse_from(str_slip_to_clap(blk_cmd3, true, false)); + assert!(result.is_err()); + let blk_cmd3 = "virtio-blk-pci,id=rootfs,drive=rootfs,serial=111111111111111111111111111111111111111111111111111111111111111111111"; + let result = VirtioBlkDevConfig::try_parse_from(str_slip_to_clap(blk_cmd3, true, false)); + assert!(result.is_err()); + } + + // Use different input parameters to verify block `new()` and `realize()` functionality. + #[test] + fn test_block_init() { + assert!(EventLoop::object_init(&None).is_ok()); + // New block device + let mut block = init_default_block(); + assert_eq!(block.disk_sectors, 0); + assert_eq!(block.base.device_features, 0); + assert_eq!(block.base.driver_features, 0); + assert_eq!(block.config_space.as_bytes().len(), CONFIG_SPACE_SIZE); + assert!(block.block_backend.is_none()); + assert!(block.interrupt_cb.is_none()); + assert!(block.senders.is_empty()); + + // Realize block device: create TempFile as backing file. + block.drive_cfg.readonly = true; + block.drive_cfg.direct = false; + let f = TempFile::new().unwrap(); + block.drive_cfg.path_on_host = f.as_path().to_str().unwrap().to_string(); + VmConfig::add_drive_file( + &mut block.drive_files.lock().unwrap(), + "", + &block.drive_cfg.path_on_host, + block.drive_cfg.readonly, + block.drive_cfg.direct, + ) + .unwrap(); + assert!(block.realize().is_ok()); + + assert_eq!(block.device_type(), VIRTIO_TYPE_BLOCK); + assert_eq!(block.queue_num(), QUEUE_NUM_BLK); + assert_eq!(block.queue_size_max(), DEFAULT_VIRTQUEUE_SIZE); + EventLoop::loop_clean(); + } + + // Test `write_config` and `read_config`. The main contests include: compare expect data and + // read data are not same; Input invalid offset or data length, it will failed. + #[test] + fn test_read_write_config() { + let mut block = init_default_block(); + block.realize().unwrap(); + + let expect_config_space: [u8; 8] = [0x00, 020, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00]; + let mut read_config_space = [0u8; 8]; + block.write_config(0, &expect_config_space).unwrap(); + block.read_config(0, &mut read_config_space).unwrap(); + assert_ne!(read_config_space, expect_config_space); + + // Invalid write + assert!(block + .write_config(CONFIG_SPACE_SIZE as u64 + 1, &expect_config_space) + .is_err()); + let errlen_config_space = [0u8; CONFIG_SPACE_SIZE + 1]; + assert!(block.write_config(0, &errlen_config_space).is_err()); + // Invalid read + read_config_space = expect_config_space; + assert!(block + .read_config(CONFIG_SPACE_SIZE as u64 + 1, &mut read_config_space) + .is_err()); + } + + // Test `get_device_features` and `set_driver_features`. The main contests include: If the + // device feature is 0, all driver features are not supported; If both the device feature bit + // and the front-end driver feature bit are supported at the same time, this driver feature + // bit is supported. + #[test] + fn test_block_features() { + let mut block = init_default_block(); + + // If the device feature is 0, all driver features are not supported. + block.base.device_features = 0; + let driver_feature: u32 = 0xFF; + let page = 0_u32; + block.set_driver_features(page, driver_feature); + assert_eq!(block.base.driver_features, 0_u64); + assert_eq!(u64::from(block.driver_features(page)), 0_u64); + assert_eq!(block.device_features(0_u32), 0_u32); + + let driver_feature: u32 = 0xFF; + let page = 1_u32; + block.set_driver_features(page, driver_feature); + assert_eq!(block.base.driver_features, 0_u64); + assert_eq!(u64::from(block.driver_features(page)), 0_u64); + assert_eq!(block.device_features(1_u32), 0_u32); + + // If both the device feature bit and the front-end driver feature bit are + // supported at the same time, this driver feature bit is supported. + block.base.device_features = + 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_F_RING_INDIRECT_DESC; + let driver_feature: u32 = (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) as u32; + let page = 0_u32; + block.set_driver_features(page, driver_feature); + assert_eq!( + block.base.driver_features, + (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) + ); + assert_eq!( + u64::from(block.driver_features(page)), + (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) + ); + assert_eq!( + block.device_features(page), + (1_u32 << VIRTIO_F_RING_INDIRECT_DESC) + ); + block.base.driver_features = 0; + + block.base.device_features = 1_u64 << VIRTIO_F_VERSION_1; + let driver_feature: u32 = (1_u64 << VIRTIO_F_RING_INDIRECT_DESC) as u32; + let page = 0_u32; + block.set_driver_features(page, driver_feature); + assert_eq!(block.base.driver_features, 0); + assert_eq!(block.driver_features(page), 0); + assert_eq!(block.device_features(page), 0_u32); + block.base.driver_features = 0; + } + + // Test `get_serial_num_config`. The function will output the shorter length between 20 + // with serial_num length. + #[test] + fn test_serial_num_config() { + let serial_num = "fldXlNNdCeqMvoIfEFogBxlL"; + let serial_num_arr = serial_num.as_bytes(); + let id_bytes = get_serial_num_config(serial_num); + assert_eq!(id_bytes[..], serial_num_arr[..20]); + assert_eq!(id_bytes.len(), 20); + + let serial_num = "7681194149"; + let serial_num_arr = serial_num.as_bytes(); + let id_bytes = get_serial_num_config(serial_num); + assert_eq!(id_bytes[..10], serial_num_arr[..]); + assert_eq!(id_bytes.len(), 20); + + let serial_num = ""; + let id_bytes_temp = get_serial_num_config(serial_num); + assert_eq!(id_bytes_temp[..], [0; 20]); + assert_eq!(id_bytes_temp.len(), 20); + } + + // Test iothread and qos capability. The function will spawn a thread called 'iothread', then + // io request will be handled by this thread. + #[test] + fn test_iothread() { + let thread_name = "io1".to_string(); + + // spawn io thread + let io_conf = IothreadConfig { + classtype: "iothread".to_string(), + id: thread_name.clone(), + }; + EventLoop::object_init(&Some(vec![io_conf])).unwrap(); + + let mut block = init_default_block(); + let file = TempFile::new().unwrap(); + block.drive_cfg.path_on_host = file.as_path().to_str().unwrap().to_string(); + block.drive_cfg.direct = false; + + // config iothread and iops + block.blk_cfg.iothread = Some(thread_name); + block.drive_cfg.iops = Some(100); + + VmConfig::add_drive_file( + &mut block.drive_files.lock().unwrap(), + "", + &block.drive_cfg.path_on_host, + block.drive_cfg.readonly, + block.drive_cfg.direct, + ) + .unwrap(); + + let mem_space = address_space_init(); + let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let interrupt_status = Arc::new(AtomicU32::new(0)); + let interrupt_cb = Arc::new(Box::new( + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, _needs_reset: bool| { + let status = match int_type { + VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, + VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, + }; + interrupt_status.fetch_or(status, Ordering::SeqCst); + interrupt_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite)?; + + Ok(()) + }, + ) as VirtioInterrupt); + + let mut queue_config = QueueConfig::new(DEFAULT_VIRTQUEUE_SIZE); + queue_config.desc_table = GuestAddress(0); + queue_config.addr_cache.desc_table_host = unsafe { + mem_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(16 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.avail_ring_host = unsafe { + mem_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.used_ring = GuestAddress(32 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.used_ring_host = unsafe { + mem_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.size = DEFAULT_VIRTQUEUE_SIZE; + queue_config.ready = true; + + block.base.queues = vec![Arc::new(Mutex::new(Queue::new(queue_config, 1).unwrap()))]; + let event = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + + // activate block device + block + .activate(mem_space.clone(), interrupt_cb, vec![event.clone()]) + .unwrap(); + + // make first descriptor entry + let desc = SplitVringDesc { + addr: GuestAddress(0x100), + len: 16, + flags: VIRTQ_DESC_F_NEXT, + next: 1, + }; + mem_space + .write_object::( + &desc, + GuestAddress(queue_config.desc_table.0), + AddressAttr::Ram, + ) + .unwrap(); + + // write RequestOutHeader to first desc + let req_head = RequestOutHeader { + request_type: 0, // read + io_prio: 0, + sector: 0, + }; + mem_space + .write_object::(&req_head, GuestAddress(0x100), AddressAttr::Ram) + .unwrap(); + + // making the second descriptor entry to receive data from device + let desc = SplitVringDesc { + addr: GuestAddress(0x200), + len: 16, + flags: VIRTQ_DESC_F_WRITE, + next: 2, + }; + mem_space + .write_object::( + &desc, + GuestAddress(queue_config.desc_table.0 + 16_u64), + AddressAttr::Ram, + ) + .unwrap(); + + // write avail_ring idx + mem_space + .write_object::( + &0, + GuestAddress(queue_config.avail_ring.0 + 4_u64), + AddressAttr::Ram, + ) + .unwrap(); + + // write avail_ring id + mem_space + .write_object::( + &1, + GuestAddress(queue_config.avail_ring.0 + 2_u64), + AddressAttr::Ram, + ) + .unwrap(); + + // imitating guest OS to send notification. + event.write(1).unwrap(); + + // waiting for io handled + let mut wait = 10; // wait for 2 seconds + loop { + thread::sleep(Duration::from_millis(200)); + + wait -= 1; + if wait == 0 { + assert_eq!(0, 1); // timeout failed + } + + // get used_ring data + let idx = mem_space + .read_object::( + GuestAddress(queue_config.used_ring.0 + 2_u64), + AddressAttr::Ram, + ) + .unwrap(); + if idx == 1 { + break; + } + } + EventLoop::loop_clean(); + } +} diff --git a/virtio/src/device/gpu.rs b/virtio/src/device/gpu.rs new file mode 100644 index 0000000000000000000000000000000000000000..0725c09d4a2f28bb9943bfa1d9a0bcdc52208aed --- /dev/null +++ b/virtio/src/device/gpu.rs @@ -0,0 +1,1925 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::slice::from_raw_parts_mut; +use std::sync::{Arc, Mutex, Weak}; +use std::{ptr, vec}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::{ArgAction, Parser}; +use log::{error, info, warn}; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + +use crate::{ + check_config_space_rw, gpa_hva_iovec_map, iov_discard_front, iov_read_object, + read_config_default, ElemIovec, Element, Queue, VirtioBase, VirtioDevice, VirtioDeviceQuirk, + VirtioError, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_RING_EVENT_IDX, + VIRTIO_F_RING_INDIRECT_DESC, VIRTIO_F_VERSION_1, VIRTIO_GPU_CMD_GET_DISPLAY_INFO, + VIRTIO_GPU_CMD_GET_EDID, VIRTIO_GPU_CMD_MOVE_CURSOR, VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING, + VIRTIO_GPU_CMD_RESOURCE_CREATE_2D, VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING, + VIRTIO_GPU_CMD_RESOURCE_FLUSH, VIRTIO_GPU_CMD_RESOURCE_UNREF, VIRTIO_GPU_CMD_SET_SCANOUT, + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D, VIRTIO_GPU_CMD_UPDATE_CURSOR, VIRTIO_GPU_FLAG_FENCE, + VIRTIO_GPU_F_EDID, VIRTIO_GPU_F_MONOCHROME, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, VIRTIO_GPU_RESP_ERR_UNSPEC, VIRTIO_GPU_RESP_OK_DISPLAY_INFO, + VIRTIO_GPU_RESP_OK_EDID, VIRTIO_GPU_RESP_OK_NODATA, VIRTIO_TYPE_GPU, +}; +use address_space::{AddressSpace, FileBackend, GuestAddress}; +use machine_manager::config::{get_pci_df, valid_id, DEFAULT_VIRTQUEUE_SIZE}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use migration_derive::ByteCode; +use ui::console::{ + console_close, console_init, display_cursor_define, display_graphic_update, + display_replace_surface, display_set_major_screen, get_run_stage, set_run_stage, ConsoleType, + DisplayConsole, DisplayMouse, DisplaySurface, HardWareOperations, VmRunningStage, +}; +use ui::pixman::{ + create_pixman_image, get_image_data, get_image_format, get_image_height, get_image_stride, + get_image_width, ref_pixman_image, unref_pixman_image, +}; +use util::aio::{iov_from_buf_direct, iov_to_buf_direct, Iovec}; +use util::byte_code::ByteCode; +use util::edid::EdidInfo; +use util::gen_base_func; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; +use util::pixman::{ + pixman_format_bpp, pixman_format_code_t, pixman_image_set_destroy_function, pixman_image_t, + pixman_region16_t, pixman_region_extents, pixman_region_fini, pixman_region_init, + pixman_region_init_rect, pixman_region_intersect, pixman_region_translate, + virtio_gpu_unref_resource_callback, +}; + +/// Number of virtqueues +const QUEUE_NUM_GPU: usize = 2; +/// Display changed event +const VIRTIO_GPU_EVENT_DISPLAY: u32 = 1 << 0; + +/// The flag indicates that the frame buffer only used in windows. +const VIRTIO_GPU_RES_WIN_FRAMEBUF: u32 = 0x80000000; +/// The flag indicates that the frame buffer only used in special bios phase for windows. +const VIRTIO_GPU_RES_EFI_FRAMEBUF: u32 = 0x40000000; +const VIRTIO_GPU_RES_FRAMEBUF: u32 = VIRTIO_GPU_RES_WIN_FRAMEBUF | VIRTIO_GPU_RES_EFI_FRAMEBUF; + +/// The maximum number of outputs. +const VIRTIO_GPU_MAX_OUTPUTS: usize = 16; +/// The default maximum memory 256M. +const VIRTIO_GPU_DEFAULT_MAX_HOSTMEM: u64 = 0x10000000; + +#[derive(Parser, Clone, Debug, Default)] +#[command(no_binary_name(true))] +pub struct GpuDevConfig { + #[arg(long, value_parser = ["virtio-gpu-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long, alias = "max_outputs", default_value="1", value_parser = clap::value_parser!(u32).range(1..=VIRTIO_GPU_MAX_OUTPUTS as i64))] + pub max_outputs: u32, + #[arg(long, default_value="true", action = ArgAction::Append)] + pub edid: bool, + #[arg(long, default_value = "1024")] + pub xres: u32, + #[arg(long, default_value = "768")] + pub yres: u32, + // The default max_hostmem is 256M. + #[arg(long, alias = "max_hostmem", default_value="268435456", value_parser = clap::value_parser!(u64).range(1..))] + pub max_hostmem: u64, + #[arg(long, alias = "enable_bar0", default_value="false", action = ArgAction::Append)] + pub enable_bar0: bool, +} + +impl GpuDevConfig { + pub fn check(&self) { + if self.max_hostmem < VIRTIO_GPU_DEFAULT_MAX_HOSTMEM { + warn!( + "max_hostmem should >= {}, allocating less than it may cause \ + the GPU to fail to start or refresh.", + VIRTIO_GPU_DEFAULT_MAX_HOSTMEM + ); + } + } +} + +#[derive(Debug)] +struct GpuResource { + resource_id: u32, + width: u32, + height: u32, + format: u32, + iov: Vec, + scanouts_bitmask: u32, + host_mem: u64, + pixman_image: *mut pixman_image_t, + monochrome_cursor: Vec, +} + +impl Default for GpuResource { + fn default() -> Self { + GpuResource { + resource_id: 0, + width: 0, + height: 0, + format: 0, + iov: Vec::new(), + scanouts_bitmask: 0, + host_mem: 0, + pixman_image: ptr::null_mut(), + monochrome_cursor: Vec::new(), + } + } +} + +#[allow(unused)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuOutputState { + con_id: usize, + width: u32, + height: u32, + x_coor: i32, + y_coor: i32, +} + +trait CtrlHdr { + fn mut_ctrl_hdr(&mut self) -> &mut VirtioGpuCtrlHdr; +} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +struct VirtioGpuCtrlHdr { + hdr_type: u32, + flags: u32, + fence_id: u64, + ctx_id: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuCtrlHdr {} + +impl CtrlHdr for VirtioGpuCtrlHdr { + fn mut_ctrl_hdr(&mut self) -> &mut VirtioGpuCtrlHdr { + self + } +} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +struct VirtioGpuRect { + x_coord: u32, + y_coord: u32, + width: u32, + height: u32, +} + +impl ByteCode for VirtioGpuRect {} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +struct VirtioGpuDisplayOne { + rect: VirtioGpuRect, + enabled: u32, + flags: u32, +} + +impl ByteCode for VirtioGpuDisplayOne {} + +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +struct VirtioGpuDisplayInfo { + header: VirtioGpuCtrlHdr, + pmodes: [VirtioGpuDisplayOne; VIRTIO_GPU_MAX_OUTPUTS], +} + +impl ByteCode for VirtioGpuDisplayInfo {} + +impl CtrlHdr for VirtioGpuDisplayInfo { + fn mut_ctrl_hdr(&mut self) -> &mut VirtioGpuCtrlHdr { + &mut self.header + } +} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuGetEdid { + scanouts: u32, + padding: u32, +} +impl ByteCode for VirtioGpuGetEdid {} + +#[repr(C)] +// data which transfer to frontend need padding +#[derive(Clone, Copy)] +struct VirtioGpuRespEdid { + header: VirtioGpuCtrlHdr, + size: u32, + padding: u32, + edid: [u8; 1024], +} + +impl ByteCode for VirtioGpuRespEdid {} + +impl CtrlHdr for VirtioGpuRespEdid { + fn mut_ctrl_hdr(&mut self) -> &mut VirtioGpuCtrlHdr { + &mut self.header + } +} + +impl Default for VirtioGpuRespEdid { + fn default() -> Self { + VirtioGpuRespEdid { + header: VirtioGpuCtrlHdr::default(), + size: 0, + padding: 0, + edid: [0; 1024], + } + } +} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuResourceCreate2d { + resource_id: u32, + format: u32, + width: u32, + height: u32, +} + +impl ByteCode for VirtioGpuResourceCreate2d {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuResourceUnref { + resource_id: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuResourceUnref {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuSetScanout { + rect: VirtioGpuRect, + scanout_id: u32, + resource_id: u32, +} + +impl ByteCode for VirtioGpuSetScanout {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuResourceFlush { + rect: VirtioGpuRect, + resource_id: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuResourceFlush {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuTransferToHost2d { + rect: VirtioGpuRect, + offset: u64, + resource_id: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuTransferToHost2d {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuResourceAttachBacking { + resource_id: u32, + nr_entries: u32, +} + +impl ByteCode for VirtioGpuResourceAttachBacking {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuMemEntry { + addr: u64, + length: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuMemEntry {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuResourceDetachBacking { + resource_id: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuResourceDetachBacking {} + +struct GpuOpts { + /// Status of the emulated physical outputs. + output_states: Arc>, + /// Config space of the GPU device. + config_space: Arc>, + /// Callback to trigger interrupt. + interrupt_cb: Option>, + /// Whether to use it in the bios phase. + enable_bar0: bool, +} + +impl HardWareOperations for GpuOpts { + fn hw_update(&self, con: Arc>) { + // Only in the Bios phase and configured with enable_bar0 feature and need to + // use special modifications with edk2. + if !self.enable_bar0 || get_run_stage() != VmRunningStage::Bios { + return; + } + + let locked_con = con.lock().unwrap(); + if locked_con.surface.is_none() { + return; + } + let width = locked_con.width; + let height = locked_con.height; + trace::virtio_gpu_console_hw_update(locked_con.con_id, width, height); + drop(locked_con); + display_graphic_update(&Some(Arc::downgrade(&con)), 0, 0, width, height) + .unwrap_or_else(|e| error!("Error occurs during graphic updating: {:?}", e)); + } + + fn hw_ui_info(&self, con: Arc>, width: u32, height: u32) { + let con_id = con.lock().unwrap().con_id; + + // Update output size. + for output_state in self.output_states.lock().unwrap().iter_mut() { + if output_state.con_id == con_id { + output_state.width = width; + output_state.height = height; + break; + } + } + + // Update events_read in config sapce. + let mut config_space = self.config_space.lock().unwrap(); + config_space.events_read |= VIRTIO_GPU_EVENT_DISPLAY; + + if self.interrupt_cb.is_none() { + return; + } + info!( + "virtio-gpu receive resize request, con {} will be resize to {} {}.", + con_id, width, height + ); + let interrupt_cb = self.interrupt_cb.as_ref().unwrap(); + if let Err(e) = (interrupt_cb)(&VirtioInterruptType::Config, None, false) { + error!( + "{:?}. {:?}", + VirtioError::InterruptTrigger("gpu", VirtioInterruptType::Config), + e + ); + } + } +} + +#[derive(Default, Clone)] +struct VirtioGpuRequest { + header: VirtioGpuCtrlHdr, + index: u16, + out_iovec: Vec, + out_len: u32, + in_iovec: Vec, + _in_len: u32, +} + +impl VirtioGpuRequest { + fn new(mem_space: &Arc, elem: &mut Element) -> Result { + // Report errors for out_iovec invalid here, deal with in_iovec + // error in cmd process. + if elem.out_iovec.is_empty() { + bail!( + "Missed header for gpu request: out {} in {} desc num {}.", + elem.out_iovec.len(), + elem.in_iovec.len(), + elem.desc_num + ); + } + + let header = iov_read_object::(mem_space, &elem.out_iovec, &None)?; + + // Size of out_iovec is no less than size of VirtioGpuCtrlHdr, so + // it is possible to get none back. + let data_iovec = + iov_discard_front(&mut elem.out_iovec, size_of::() as u64) + .unwrap_or_default(); + + let (out_len, out_iovec) = gpa_hva_iovec_map(data_iovec, mem_space, &None)?; + let (in_len, in_iovec) = gpa_hva_iovec_map(&elem.in_iovec, mem_space, &None)?; + + // Note: in_iov and out_iov total len is no more than 1<<32, and + // out_iov is more than 1, so in_len and out_len will not overflow. + Ok(VirtioGpuRequest { + header, + index: elem.index, + out_iovec, + out_len: out_len as u32, + in_iovec, + _in_len: in_len as u32, + }) + } +} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuCursorPos { + scanout_id: u32, + x_coord: u32, + y_coord: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuCursorPos {} + +#[repr(C)] +#[derive(Default, Clone, Copy)] +struct VirtioGpuUpdateCursor { + pos: VirtioGpuCursorPos, + resource_id: u32, + hot_x: u32, + hot_y: u32, + padding: u32, +} + +impl ByteCode for VirtioGpuUpdateCursor {} + +#[derive(Default)] +struct GpuScanout { + con: Option>>, + surface: Option, + mouse: Option, + width: u32, + height: u32, + x: u32, + y: u32, + resource_id: u32, + cursor_visible: bool, +} + +impl GpuScanout { + fn clear(&mut self) { + self.resource_id = 0; + self.surface = None; + self.width = 0; + self.height = 0; + self.cursor_visible = false; + } +} + +/// Control block of GPU IO. +struct GpuIoHandler { + /// The virtqueue for for sending control commands. + ctrl_queue: Arc>, + /// The virtqueue for sending cursor updates. + cursor_queue: Arc>, + /// The address space to which the GPU device belongs. + mem_space: Arc, + /// Eventfd for control virtqueue. + ctrl_queue_evt: Arc, + /// Eventfd for cursor virtqueue. + cursor_queue_evt: Arc, + /// Callback to trigger an interrupt. + interrupt_cb: Arc, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// Vector for resources. + resources_list: Vec, + /// The bit mask of whether scanout is enabled or not. + enable_output_bitmask: u32, + /// The number of scanouts + num_scanouts: u32, + /// States of all output_states. + output_states: Arc>, + /// Scanouts of gpu, mouse doesn't realize copy trait, so it is a vector. + scanouts: Vec, + /// Max host mem for resource. + max_hostmem: u64, + /// Current usage of host mem. + used_hostmem: u64, +} + +fn create_surface( + scanout: &mut GpuScanout, + info_set_scanout: VirtioGpuSetScanout, + res: &GpuResource, + pixman_format: pixman_format_code_t, + pixman_stride: libc::c_int, + res_data_offset: *mut u32, +) -> DisplaySurface { + let mut surface = DisplaySurface::default(); + let rect = create_pixman_image( + pixman_format, + info_set_scanout.rect.width as i32, + info_set_scanout.rect.height as i32, + res_data_offset, + pixman_stride, + ); + ref_pixman_image(res.pixman_image); + // SAFETY: The param of create operation for image has been checked. + unsafe { + pixman_image_set_destroy_function( + rect, + Some(virtio_gpu_unref_resource_callback), + res.pixman_image.cast(), + ); + } + surface.format = pixman_format; + surface.image = ref_pixman_image(rect); + + if !surface.image.is_null() { + // Update surface in scanout. + scanout.surface = Some(surface); + unref_pixman_image(rect); + display_replace_surface(&scanout.con, scanout.surface) + .unwrap_or_else(|e| error!("Error occurs during surface switching: {:?}", e)); + } + + surface +} + +// simple formats for fbcon/X use +const VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: u32 = 1; +const VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: u32 = 2; +const VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: u32 = 3; +const VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: u32 = 4; +const VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: u32 = 67; +const VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: u32 = 68; +const VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: u32 = 121; +const VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: u32 = 134; +const VIRTIO_GPU_FORMAT_MONOCHROME: u32 = 500; +pub const VIRTIO_GPU_FORMAT_INVALID_UNORM: u32 = 135; +const VIRTIO_GPU_CURSOR_SIZE: usize = 64; + +pub fn get_pixman_format(format: u32) -> Result { + match format { + VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM => Ok(pixman_format_code_t::PIXMAN_a8r8g8b8), + VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM => Ok(pixman_format_code_t::PIXMAN_x8r8g8b8), + VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM => Ok(pixman_format_code_t::PIXMAN_b8g8r8a8), + VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM => Ok(pixman_format_code_t::PIXMAN_b8g8r8x8), + VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM => Ok(pixman_format_code_t::PIXMAN_a8b8g8r8), + VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM => Ok(pixman_format_code_t::PIXMAN_r8g8b8x8), + VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM => Ok(pixman_format_code_t::PIXMAN_r8g8b8a8), + VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM => Ok(pixman_format_code_t::PIXMAN_x8b8g8r8), + _ => { + bail!("Unsupported pixman format") + } + } +} + +// update curosr from monochrome source +// https://learn.microsoft.com/en-us/windows-hardware/drivers/display/drawing-monochrome-pointers +pub fn set_monochrome_cursor(cursor: &mut [u8], source: &[u8], width: usize, height: usize) { + let pixels_num = width * height; + let mask_value_size = pixels_num / 8; + let and_mask_value = &source[0..mask_value_size]; + let xor_mask_value = &source[mask_value_size..mask_value_size * 2]; + // Bytes per line + let bpl = VIRTIO_GPU_CURSOR_SIZE / 8; + // Bytes per pixel for cursor img, which expected export in RGBA format + let bpp = 4; + + for row in 0..VIRTIO_GPU_CURSOR_SIZE { + for col in 0..bpl { + for i in 0..8 { + let cursor_index = (row * VIRTIO_GPU_CURSOR_SIZE + col * 8 + i) * bpp; + + if row >= height || col * bpl >= width { + cursor[cursor_index] = 0x00; + cursor[cursor_index + 1] = 0x00; + cursor[cursor_index + 2] = 0x00; + cursor[cursor_index + 3] = 0x00; + continue; + } + + let mask_index: u8 = 0x80 >> i; + let and_v = (and_mask_value[row * (width / 8) + col] & mask_index) != 0; + let xor_v = (xor_mask_value[row * (width / 8) + col] & mask_index) != 0; + + if !and_v && !xor_v { + cursor[cursor_index] = 0x00; + cursor[cursor_index + 1] = 0x00; + cursor[cursor_index + 2] = 0x00; + cursor[cursor_index + 3] = 0xff; + } else if !and_v && xor_v { + cursor[cursor_index] = 0xff; + cursor[cursor_index + 1] = 0xff; + cursor[cursor_index + 2] = 0xff; + cursor[cursor_index + 3] = 0xff; + } else if and_v && !xor_v { + cursor[cursor_index] = 0x00; + cursor[cursor_index + 1] = 0x00; + cursor[cursor_index + 2] = 0x00; + cursor[cursor_index + 3] = 0x00; + } else { + // for inverted, in graphic is hard to get background color, just make it black. + cursor[cursor_index] = 0x00; + cursor[cursor_index + 1] = 0x00; + cursor[cursor_index + 2] = 0x00; + cursor[cursor_index + 3] = 0xff; + } + } + } + } +} + +pub fn cal_image_hostmem(format: u32, width: u32, height: u32) -> (Option, u32) { + // Expected monochrome cursor is 8 pixel aligned. + if format == VIRTIO_GPU_FORMAT_MONOCHROME { + if width as usize > VIRTIO_GPU_CURSOR_SIZE + || height as usize > VIRTIO_GPU_CURSOR_SIZE + || width % 8 != 0 + || height % 8 != 0 + { + error!( + "GuestError: monochrome cursor use invalid size: {} {}.", + width, height + ); + (None, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER) + } else { + let mem = (width * height / 8 * 2) as usize; + (Some(mem), 0) + } + } else { + let pixman_format = match get_pixman_format(format) { + Ok(f) => f, + Err(e) => { + error!("GuestError: {:?}", e); + return (None, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + } + }; + let bpp = pixman_format_bpp(pixman_format as u32); + let stride = ((u64::from(width) * u64::from(bpp) + 0x1f) >> 5) * (size_of::() as u64); + match stride.checked_mul(u64::from(height)) { + None => { + error!( + "stride * height is overflow: width {} height {} stride {} bpp {}", + width, height, stride, bpp, + ); + (None, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER) + } + Some(v) => (Some(v as usize), 0), + } + } +} + +fn is_rect_in_resource(rect: &VirtioGpuRect, res: &GpuResource) -> bool { + let x_in = rect + .x_coord + .checked_add(rect.width) + .filter(|&sum| sum <= res.width) + .is_some(); + let y_in = rect + .y_coord + .checked_add(rect.height) + .filter(|&sum| sum <= res.height) + .is_some(); + x_in && y_in +} + +impl GpuIoHandler { + fn change_run_stage(&self) -> Result<()> { + if get_run_stage() == VmRunningStage::Bios && !self.scanouts.is_empty() { + match &self.scanouts[0].con.as_ref().and_then(|c| c.upgrade()) { + Some(con) => { + let dev_name = con.lock().unwrap().dev_name.clone(); + display_set_major_screen(&dev_name)?; + set_run_stage(VmRunningStage::Os); + } + None => {} + }; + } + Ok(()) + } + + fn get_request(&mut self, header: &VirtioGpuRequest, req: &mut T) -> Result<()> { + // SAFETY: out_iovec is generated by address_space. + unsafe { iov_to_buf_direct(&header.out_iovec, 0, req.as_mut_bytes()) }.and_then(|size| { + if size == size_of::() { + Ok(()) + } else { + Err(anyhow!("Invalid header for gpu request: len {}.", size)) + } + }) + } + + fn complete_one_request(&mut self, index: u16, len: u32) -> Result<()> { + let mut queue_lock = self.ctrl_queue.lock().unwrap(); + + queue_lock.vring.add_used(index, len).with_context(|| { + format!( + "Failed to add used ring(gpu ctrl), index {}, len {}", + index, len, + ) + })?; + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| "Failed to trigger interrupt(gpu ctrl)")?; + trace::virtqueue_send_interrupt("Gpu", &*queue_lock as *const _ as u64); + } + + Ok(()) + } + + fn send_response( + &mut self, + req: &VirtioGpuRequest, + resp: &mut T, + ) -> Result<()> { + if (req.header.flags & VIRTIO_GPU_FLAG_FENCE) != 0 { + let header = resp.mut_ctrl_hdr(); + header.flags |= VIRTIO_GPU_FLAG_FENCE; + header.fence_id = req.header.fence_id; + header.ctx_id = req.header.ctx_id; + } + + // SAFETY: in_iovec is generated by address_space. + let len = unsafe { iov_from_buf_direct(&req.in_iovec, resp.as_bytes())? }; + if len != size_of::() { + error!( + "GuestError: An incomplete response will be used instead of the expected: expected \ + length is {}, actual length is {}. \ + Also, be aware that the virtual machine may suspended if response is too short to \ + carry the necessary information.", + size_of::(), len, + ); + } + self.complete_one_request(req.index, len as u32) + } + + fn response_nodata(&mut self, resp_head_type: u32, req: &VirtioGpuRequest) -> Result<()> { + let mut resp = VirtioGpuCtrlHdr { + hdr_type: resp_head_type, + ..Default::default() + }; + self.send_response(req, &mut resp) + } + + // Mask resource's scanout bit before disable a scanout. + fn disable_scanout(&mut self, scanout_id: usize) { + let resource_id = self.scanouts[scanout_id].resource_id; + if resource_id == 0 { + return; + } + + if let Some(res_idx) = self.get_resource_idx(resource_id) { + let res = &mut self.resources_list[res_idx]; + res.scanouts_bitmask &= !(1 << scanout_id); + } + + // TODO: present 'Guest disabled display.' in surface. + let scanout = &mut self.scanouts[scanout_id]; + display_replace_surface(&scanout.con, None) + .unwrap_or_else(|e| error!("Error occurs during surface switching: {:?}", e)); + scanout.clear(); + } + + fn get_resource_idx(&self, resource_id: u32) -> Option { + self.resources_list + .iter() + .position(|x| x.resource_id == resource_id) + } + + fn get_backed_resource_idx(&self, res_id: u32, caller: &str) -> (Option, u32) { + match self.get_resource_idx(res_id) { + None => { + error!( + "GuestError: The resource_id {} in {} request does not existed", + res_id, caller, + ); + (None, VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID) + } + Some(res_idx) => { + let res = &self.resources_list[res_idx]; + if res.iov.is_empty() + || (res.pixman_image.is_null() && res.monochrome_cursor.is_empty()) + { + error!( + "GuestError: The resource_id {} in {} request has no backing storage.", + res_id, caller, + ); + (None, VIRTIO_GPU_RESP_ERR_UNSPEC) + } else { + (Some(res_idx), 0) + } + } + } + } + + fn update_cursor_image(&mut self, info_cursor: &VirtioGpuUpdateCursor) { + let (res_idx, error) = + self.get_backed_resource_idx(info_cursor.resource_id, "cmd_update_cursor"); + if res_idx.is_none() { + error!("Failed to update cursor image, errcode: {}", error); + return; + } + + let res = &self.resources_list[res_idx.unwrap()]; + let scanout = &mut self.scanouts[info_cursor.pos.scanout_id as usize]; + let mse = scanout.mouse.as_mut().unwrap(); + let mse_data_size = mse.data.len(); + + if res.format == VIRTIO_GPU_FORMAT_MONOCHROME { + set_monochrome_cursor( + &mut mse.data, + &res.monochrome_cursor, + res.width as usize, + res.height as usize, + ); + } else { + let res_width = get_image_width(res.pixman_image); + let res_height = get_image_height(res.pixman_image); + if res_width as u32 != mse.width || res_height as u32 != mse.height { + return; + } + let res_data_ptr = get_image_data(res.pixman_image) as *mut u8; + // SAFETY: the length of the source and dest pointers can be ensured to be same, + // and equal to mse_data_size. + unsafe { + ptr::copy(res_data_ptr, mse.data.as_mut_ptr(), mse_data_size); + } + } + + // Windows front-end driver does not deliver data in format sequence. + // So we fix it in back-end. + // TODO: Fix front-end driver is a better solution. + if res.format == VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM + || res.format == VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM + { + let mut i = 0; + while i < mse_data_size { + mse.data.swap(i, i + 2); + i += 4; + } + } + scanout.cursor_visible = true; + } + + fn update_cursor(&mut self, info_cursor: &VirtioGpuUpdateCursor, hdr_type: u32) -> Result<()> { + trace::trace_scope_start!(update_cursor); + let scanout = &mut self.scanouts[info_cursor.pos.scanout_id as usize]; + match &mut scanout.mouse { + None => { + let mouse = DisplayMouse::new( + VIRTIO_GPU_CURSOR_SIZE as u32, + VIRTIO_GPU_CURSOR_SIZE as u32, + info_cursor.hot_x, + info_cursor.hot_y, + ); + scanout.mouse = Some(mouse); + } + Some(mouse) => { + if hdr_type == VIRTIO_GPU_CMD_UPDATE_CURSOR { + mouse.hot_x = info_cursor.hot_x; + mouse.hot_y = info_cursor.hot_y; + } + } + } + + if info_cursor.resource_id > 0 { + self.update_cursor_image(info_cursor); + } + let scanout = &mut self.scanouts[info_cursor.pos.scanout_id as usize]; + display_cursor_define(&scanout.con, scanout.mouse.as_ref().unwrap())?; + Ok(()) + } + + fn cmd_update_cursor(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_cursor = VirtioGpuUpdateCursor::default(); + self.get_request(req, &mut info_cursor)?; + + if info_cursor.pos.scanout_id >= self.num_scanouts { + error!( + "GuestError: The scanout id {} is out of range.", + info_cursor.pos.scanout_id + ); + return Ok(()); + } + trace::virtio_gpu_update_cursor( + info_cursor.pos.scanout_id, + info_cursor.pos.x_coord, + info_cursor.pos.y_coord, + info_cursor.resource_id, + if req.header.hdr_type == VIRTIO_GPU_CMD_MOVE_CURSOR { + "move" + } else { + "update" + }, + ); + + let scanout = &mut self.scanouts[info_cursor.pos.scanout_id as usize]; + if req.header.hdr_type == VIRTIO_GPU_CMD_MOVE_CURSOR { + if info_cursor.resource_id == 0 && scanout.cursor_visible && scanout.mouse.is_some() { + let data = &mut scanout.mouse.as_mut().unwrap().data; + // In order to improve performance, displaying cursor by virtio-gpu. + // But we have to displaying it in guest img if virtio-gpu can't do display job. + // In this case, to avoid overlapping displaying two cursor imgs, change + // cursor (render by virtio-gpu) color to transparent. + // + // Only A or X byte in RGBA\X needs to be set. + // We sure that the data is assembled in format like RGBA and the minimum unit + // is byte, so there is no size end problem. + // + // TODO: How much impact does it have on performance? + for (i, item) in data.iter_mut().enumerate() { + if i % 4 == 3 { + *item = 0_u8; + } + } + display_cursor_define(&scanout.con, scanout.mouse.as_ref().unwrap())?; + scanout.cursor_visible = false; + } else if info_cursor.resource_id > 0 && !scanout.cursor_visible { + self.update_cursor(&info_cursor, VIRTIO_GPU_CMD_MOVE_CURSOR)?; + } + } else if req.header.hdr_type == VIRTIO_GPU_CMD_UPDATE_CURSOR { + self.update_cursor(&info_cursor, VIRTIO_GPU_CMD_UPDATE_CURSOR)?; + } else { + bail!("Wrong header type for cursor queue"); + } + + Ok(()) + } + + fn cmd_get_display_info(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut display_info = VirtioGpuDisplayInfo::default(); + display_info.header.hdr_type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO; + + let output_states_lock = self.output_states.lock().unwrap(); + for i in 0..self.num_scanouts { + if (self.enable_output_bitmask & (1 << i)) != 0 { + let i = i as usize; + display_info.pmodes[i].enabled = 1; + display_info.pmodes[i].rect.width = output_states_lock[i].width; + display_info.pmodes[i].rect.height = output_states_lock[i].height; + display_info.pmodes[i].flags = 0; + } + } + drop(output_states_lock); + info!("virtio-gpu get the display info {:?}", display_info); + self.send_response(req, &mut display_info) + } + + fn cmd_get_edid(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut edid_req = VirtioGpuGetEdid::default(); + self.change_run_stage()?; + self.get_request(req, &mut edid_req)?; + + if edid_req.scanouts >= self.num_scanouts { + error!( + "GuestError: The scanouts {} of request exceeds the max_outputs {}.", + edid_req.scanouts, self.num_scanouts + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, req); + } + trace::virtio_gpu_get_edid(edid_req.scanouts); + + let mut edid_resp = VirtioGpuRespEdid::default(); + edid_resp.header.hdr_type = VIRTIO_GPU_RESP_OK_EDID; + + let output_states_lock = self.output_states.lock().unwrap(); + let mut edid_info = EdidInfo::new( + "HWV", + "STRA Monitor", + 100, + output_states_lock[edid_req.scanouts as usize].width, + output_states_lock[edid_req.scanouts as usize].height, + ); + drop(output_states_lock); + edid_info.edid_array_fulfill(&mut edid_resp.edid); + edid_resp.size = edid_resp.edid.len() as u32; + + self.send_response(req, &mut edid_resp) + } + + fn cmd_resource_create_2d(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_create_2d = VirtioGpuResourceCreate2d::default(); + self.get_request(req, &mut info_create_2d)?; + trace::virtio_gpu_resource_create_2d( + info_create_2d.resource_id, + info_create_2d.format, + info_create_2d.width, + info_create_2d.height, + ); + + if info_create_2d.resource_id == 0 { + error!("GuestError: resource id 0 is not allowed."); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, req); + } + + if self.get_resource_idx(info_create_2d.resource_id).is_some() { + error!( + "GuestError: resource {} already exists.", + info_create_2d.resource_id + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, req); + } + + let mut res = GpuResource { + width: info_create_2d.width, + height: info_create_2d.height, + format: info_create_2d.format, + resource_id: info_create_2d.resource_id, + ..Default::default() + }; + + let (mem, error) = cal_image_hostmem(res.format, res.width, res.height); + if mem.is_none() { + return self.response_nodata(error, req); + } + res.host_mem = mem.unwrap() as u64; + + if res + .host_mem + .checked_add(self.used_hostmem) + .filter(|&sum| sum <= self.max_hostmem) + .is_some() + { + if res.format == VIRTIO_GPU_FORMAT_MONOCHROME { + res.monochrome_cursor = vec![0_u8; (res.width * res.height / 8 * 2) as usize]; + } else { + res.pixman_image = create_pixman_image( + get_pixman_format(res.format).unwrap(), + info_create_2d.width as i32, + info_create_2d.height as i32, + ptr::null_mut(), + 0, + ); + } + } + + if res.monochrome_cursor.is_empty() && res.pixman_image.is_null() { + error!( + "GuestError: Fail to create resource(id {}, width {}, height {}) on host.", + res.resource_id, res.width, res.height + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, req); + } + + self.used_hostmem += res.host_mem; + self.resources_list.push(res); + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + + fn resource_destroy(&mut self, res_index: usize) { + let scanouts_bitmask = self.resources_list[res_index].scanouts_bitmask; + if scanouts_bitmask != 0 { + for i in 0..self.num_scanouts { + if (scanouts_bitmask & (1 << i)) != 0 { + self.disable_scanout(i as usize); + } + } + } + + let res = &mut self.resources_list[res_index]; + unref_pixman_image(res.pixman_image); + self.used_hostmem -= res.host_mem; + self.resources_list.remove(res_index); + } + + fn cmd_resource_unref(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_resource_unref = VirtioGpuResourceUnref::default(); + self.get_request(req, &mut info_resource_unref)?; + trace::virtio_gpu_resource_unref(info_resource_unref.resource_id); + + if let Some(res_index) = self.get_resource_idx(info_resource_unref.resource_id) { + self.resource_destroy(res_index); + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } else { + error!( + "GuestError: illegal resource specified {}.", + info_resource_unref.resource_id, + ); + self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, req) + } + } + + fn cmd_set_scanout(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_set_scanout = VirtioGpuSetScanout::default(); + self.get_request(req, &mut info_set_scanout)?; + trace::virtio_gpu_set_scanout( + info_set_scanout.scanout_id, + info_set_scanout.resource_id, + info_set_scanout.rect.width, + info_set_scanout.rect.height, + info_set_scanout.rect.x_coord, + info_set_scanout.rect.y_coord, + ); + + if info_set_scanout.scanout_id >= self.num_scanouts { + error!( + "GuestError: The scanout id {} is out of range.", + info_set_scanout.scanout_id + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, req); + } + + if info_set_scanout.resource_id == 0 { + // Set resource_id to 0 means disable the scanout. + self.disable_scanout(info_set_scanout.scanout_id as usize); + return self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req); + } + + // Check if resource is valid. + let (res_idx, error) = + self.get_backed_resource_idx(info_set_scanout.resource_id, "cmd_set_scanout"); + if res_idx.is_none() { + return self.response_nodata(error, req); + } + + let res = &mut self.resources_list[res_idx.unwrap()]; + if info_set_scanout.rect.width < 16 + || info_set_scanout.rect.height < 16 + || !is_rect_in_resource(&info_set_scanout.rect, res) + { + error!( + "GuestError: The resource (id: {} width: {} height: {}) is outfit for scanout (id: {} width: {} height: {} x_coord: {} y_coord: {}).", + res.resource_id, + res.width, + res.height, + info_set_scanout.scanout_id, + info_set_scanout.rect.width, + info_set_scanout.rect.height, + info_set_scanout.rect.x_coord, + info_set_scanout.rect.y_coord, + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, req); + } + + let pixman_format = get_image_format(res.pixman_image); + let bpp = (u32::from(pixman_format_bpp(pixman_format as u32)) + 8 - 1) / 8; + let pixman_stride = get_image_stride(res.pixman_image); + let offset = info_set_scanout.rect.x_coord * bpp + + info_set_scanout.rect.y_coord * pixman_stride as u32; + let res_data = if info_set_scanout.resource_id & VIRTIO_GPU_RES_FRAMEBUF != 0 { + res.iov[0].iov_base as *mut u32 + } else { + get_image_data(res.pixman_image) + }; + // SAFETY: The offset is within the legal address. + let res_data_offset = unsafe { res_data.offset(offset as isize) }; + + // Create surface for the scanout. + let scanout = &mut self.scanouts[info_set_scanout.scanout_id as usize]; + if scanout.surface.is_none() + || get_image_data(scanout.surface.unwrap().image) != res_data_offset + || scanout.width != info_set_scanout.rect.width + || scanout.height != info_set_scanout.rect.height + { + let surface = create_surface( + scanout, + info_set_scanout, + res, + pixman_format, + pixman_stride, + res_data_offset, + ); + if surface.image.is_null() { + error!("HostError: surface image create failed, check pixman library."); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req); + } + } + + // Unlink old resource. + let old_res_id = scanout.resource_id; + if let Some(old_res_idx) = self.get_resource_idx(old_res_id) { + let old_res = &mut self.resources_list[old_res_idx]; + old_res.scanouts_bitmask &= !(1 << info_set_scanout.scanout_id); + } + // Link new resource. + let res = &mut self.resources_list[res_idx.unwrap()]; + res.scanouts_bitmask |= 1 << info_set_scanout.scanout_id; + let scanout = &mut self.scanouts[info_set_scanout.scanout_id as usize]; + scanout.resource_id = info_set_scanout.resource_id; + scanout.x = info_set_scanout.rect.x_coord; + scanout.y = info_set_scanout.rect.y_coord; + scanout.width = info_set_scanout.rect.width; + scanout.height = info_set_scanout.rect.height; + + if (self.driver_features & (1 << VIRTIO_GPU_F_EDID)) == 0 + && (info_set_scanout.resource_id & VIRTIO_GPU_RES_WIN_FRAMEBUF) != 0 + { + self.change_run_stage()?; + } + + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + + fn cmd_resource_flush(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_res_flush = VirtioGpuResourceFlush::default(); + self.get_request(req, &mut info_res_flush)?; + trace::virtio_gpu_resource_flush( + info_res_flush.resource_id, + info_res_flush.rect.width, + info_res_flush.rect.height, + info_res_flush.rect.x_coord, + info_res_flush.rect.y_coord, + ); + + let res_index = self.get_resource_idx(info_res_flush.resource_id); + if res_index.is_none() { + error!( + "GuestError: The resource_id {} in resource flush request is not existed.", + info_res_flush.resource_id + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, req); + } + + let res = &self.resources_list[res_index.unwrap()]; + if !is_rect_in_resource(&info_res_flush.rect, res) { + error!( + "GuestError: The resource (id: {} width: {} height: {}) is outfit for flush rectangle (width: {} height: {} x_coord: {} y_coord: {}).", + res.resource_id, res.width, res.height, + info_res_flush.rect.width, info_res_flush.rect.height, + info_res_flush.rect.x_coord, info_res_flush.rect.y_coord, + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, req); + } + + let mut flush_reg = pixman_region16_t::default(); + let flush_reg_ptr = &mut flush_reg as *mut pixman_region16_t; + // SAFETY: rect information has been checked. + unsafe { + pixman_region_init_rect( + flush_reg_ptr, + info_res_flush.rect.x_coord as i32, + info_res_flush.rect.y_coord as i32, + info_res_flush.rect.width, + info_res_flush.rect.height, + ); + } + + for i in 0..self.num_scanouts { + // Flushes any scanouts the resource is being used on. + if res.scanouts_bitmask & (1 << i) == 0 { + continue; + } + let scanout = &self.scanouts[i as usize]; + + let mut rect_reg = pixman_region16_t::default(); + let mut final_reg = pixman_region16_t::default(); + let rect_reg_ptr = &mut rect_reg as *mut pixman_region16_t; + let final_reg_ptr = &mut final_reg as *mut pixman_region16_t; + // SAFETY: The pointer is not empty. + unsafe { + pixman_region_init(final_reg_ptr); + pixman_region_init_rect( + rect_reg_ptr, + scanout.x as i32, + scanout.y as i32, + scanout.width, + scanout.height, + ); + + pixman_region_intersect(final_reg_ptr, flush_reg_ptr, rect_reg_ptr); + pixman_region_translate(final_reg_ptr, -(scanout.x as i32), -(scanout.y as i32)); + let extents = pixman_region_extents(final_reg_ptr); + display_graphic_update( + &scanout.con, + i32::from((*extents).x1), + i32::from((*extents).y1), + i32::from((*extents).x2 - (*extents).x1), + i32::from((*extents).y2 - (*extents).y1), + )?; + pixman_region_fini(rect_reg_ptr); + pixman_region_fini(final_reg_ptr); + } + } + + // SAFETY: Tt can ensured that the pointer is not empty. + unsafe { + pixman_region_fini(flush_reg_ptr); + } + + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + + fn cmd_transfer_to_host_2d_params_check( + &mut self, + info_transfer: &VirtioGpuTransferToHost2d, + ) -> (Option, u32) { + let (res_idx, error) = + self.get_backed_resource_idx(info_transfer.resource_id, "cmd_transfer_to_host_2d"); + if res_idx.is_none() { + return (None, error); + } + + let res = &self.resources_list[res_idx.unwrap()]; + if res.resource_id & VIRTIO_GPU_RES_FRAMEBUF != 0 { + return (None, VIRTIO_GPU_RESP_OK_NODATA); + } + if !is_rect_in_resource(&info_transfer.rect, res) { + error!( + "GuestError: The resource (id: {} width: {} height: {}) is outfit for transfer rectangle (offset: {} width: {} height: {} x_coord: {} y_coord: {}).", + res.resource_id, + res.width, + res.height, + info_transfer.offset, + info_transfer.rect.width, + info_transfer.rect.height, + info_transfer.rect.x_coord, + info_transfer.rect.y_coord, + ); + (None, VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER) + } else { + (res_idx, 0) + } + } + + fn cmd_transfer_to_host_2d_update_resource( + &mut self, + trans_info: &VirtioGpuTransferToHost2d, + res_idx: usize, + ) -> Result<()> { + let res = &mut self.resources_list[res_idx]; + let pixman_format = get_image_format(res.pixman_image); + let width = get_image_width(res.pixman_image) as u32; + let bpp = (u32::from(pixman_format_bpp(pixman_format as u32)) + 8 - 1) / 8; + let stride = get_image_stride(res.pixman_image) as u32; + let data = get_image_data(res.pixman_image).cast() as *mut u8; + + if res.format == VIRTIO_GPU_FORMAT_MONOCHROME { + // SAFETY: iov is generated by address_space. + let v = unsafe { iov_to_buf_direct(&res.iov, 0, &mut res.monochrome_cursor)? }; + if v != res.monochrome_cursor.len() { + error!("No enough data is copied for transfer_to_host_2d with monochrome"); + } + return Ok(()); + } + + // When the dedicated area is continuous. + if trans_info.rect.x_coord == 0 && trans_info.rect.width == width { + let offset_dst = (trans_info.rect.y_coord * stride) as usize; + let trans_size = (trans_info.rect.height * stride) as usize; + // SAFETY: offset_dst and trans_size do not exceeds data size. + let dst = unsafe { from_raw_parts_mut(data.add(offset_dst), trans_size) }; + // SAFETY: iov is generated by address_space. + unsafe { iov_to_buf_direct(&res.iov, trans_info.offset, dst) }.map(|v| { + if v < trans_size { + warn!("No enough data is copied for transfer_to_host_2d"); + } + v + })?; + return Ok(()); + } + + // Otherwise transfer data line by line. + let mut offset_src = trans_info.offset as usize; + let mut offset_dst = + (trans_info.rect.y_coord * stride + trans_info.rect.x_coord * bpp) as usize; + let line_size = (trans_info.rect.width * bpp) as usize; + for _ in 0..trans_info.rect.height { + // SAFETY: offset_dst and line_size do not exceeds data size. + let dst = unsafe { from_raw_parts_mut(data.add(offset_dst), line_size) }; + // SAFETY: iov is generated by address_space. + unsafe { iov_to_buf_direct(&res.iov, offset_src as u64, dst) }.map(|v| { + if v < line_size { + warn!("No enough data is copied for transfer_to_host_2d"); + } + v + })?; + offset_src += stride as usize; + offset_dst += stride as usize; + } + Ok(()) + } + + fn cmd_transfer_to_host_2d(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_transfer = VirtioGpuTransferToHost2d::default(); + self.get_request(req, &mut info_transfer)?; + trace::virtio_gpu_xfer_toh_2d(info_transfer.resource_id); + + let (res_idx, error) = self.cmd_transfer_to_host_2d_params_check(&info_transfer); + if res_idx.is_none() { + return self.response_nodata(error, req); + } + + self.cmd_transfer_to_host_2d_update_resource(&info_transfer, res_idx.unwrap())?; + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + + fn cmd_resource_attach_backing(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_attach_backing = VirtioGpuResourceAttachBacking::default(); + self.get_request(req, &mut info_attach_backing)?; + trace::virtio_gpu_resource_attach_backing(info_attach_backing.resource_id); + + let res_idx = self.get_resource_idx(info_attach_backing.resource_id); + if res_idx.is_none() { + error!( + "The resource_id {} in attach backing request request is not existed.", + info_attach_backing.resource_id + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, req); + } + + let res = &mut self.resources_list[res_idx.unwrap()]; + if !res.iov.is_empty() { + error!( + "GuestError: The resource_id {} in resource attach backing request already has iov.", + info_attach_backing.resource_id + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req); + } + + if info_attach_backing.nr_entries > 16384 { + error!( + "GuestError: The nr_entries in resource attach backing request is too large ( {} > 16384).", + info_attach_backing.nr_entries + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req); + } + + let entries = info_attach_backing.nr_entries; + let ents_size = size_of::() as u64 * u64::from(entries); + let head_size = size_of::() as u64; + if u64::from(req.out_len) < (ents_size + head_size) { + error!( + "GuestError: The nr_entries {} in resource attach backing request is larger than total len {}.", + info_attach_backing.nr_entries, req.out_len, + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req); + } + + // Start reading and parsing. + let mut ents = Vec::::new(); + // SAFETY: Upper limit of ents is 16384. + ents.resize(entries as usize, VirtioGpuMemEntry::default()); + let ents_buf = + // SAFETY: ents is guaranteed not be null and the range of ents_size has been limited. + unsafe { from_raw_parts_mut(ents.as_mut_ptr() as *mut u8, ents_size as usize) }; + // SAFETY: out_iovec is generated by address_space. + let v = unsafe { iov_to_buf_direct(&req.out_iovec, head_size, ents_buf)? }; + if v as u64 != ents_size { + error!( + "Virtio-GPU: Load no enough ents buf when attach backing, {} vs {}", + v, ents_size + ); + return self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req); + } + + let mut elemiovec = Vec::with_capacity(ents.len()); + for ent in ents.iter() { + elemiovec.push(ElemIovec { + addr: GuestAddress(ent.addr), + len: ent.length, + }); + } + match gpa_hva_iovec_map(&elemiovec, &self.mem_space, &None) { + Ok((_, iov)) => { + res.iov = iov; + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + Err(e) => { + error!("Virtio-GPU: Map entry base failed, {:?}", e); + self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req) + } + } + } + + fn cmd_resource_detach_backing(&mut self, req: &VirtioGpuRequest) -> Result<()> { + let mut info_detach_backing = VirtioGpuResourceDetachBacking::default(); + self.get_request(req, &mut info_detach_backing)?; + trace::virtio_gpu_resource_detach_backing(info_detach_backing.resource_id); + + let (res_idx, error) = self.get_backed_resource_idx( + info_detach_backing.resource_id, + "cmd_resource_detach_backing", + ); + if res_idx.is_none() { + return self.response_nodata(error, req); + } + + self.resources_list[res_idx.unwrap()].iov.clear(); + self.response_nodata(VIRTIO_GPU_RESP_OK_NODATA, req) + } + + fn process_control_queue(&mut self, mut req_queue: Vec) -> Result<()> { + for req in req_queue.iter_mut() { + if let Err(e) = match req.header.hdr_type { + VIRTIO_GPU_CMD_GET_DISPLAY_INFO => self.cmd_get_display_info(req), + VIRTIO_GPU_CMD_RESOURCE_CREATE_2D => self.cmd_resource_create_2d(req), + VIRTIO_GPU_CMD_RESOURCE_UNREF => self.cmd_resource_unref(req), + VIRTIO_GPU_CMD_SET_SCANOUT => self.cmd_set_scanout(req), + VIRTIO_GPU_CMD_RESOURCE_FLUSH => self.cmd_resource_flush(req), + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D => self.cmd_transfer_to_host_2d(req), + VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING => self.cmd_resource_attach_backing(req), + VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING => self.cmd_resource_detach_backing(req), + VIRTIO_GPU_CMD_GET_EDID => self.cmd_get_edid(req), + _ => { + error!( + "Failed to process unsupported command: {}", + req.header.hdr_type + ); + self.response_nodata(VIRTIO_GPU_RESP_ERR_UNSPEC, req) + } + } { + error!("Fail to handle GPU request, {:?}.", e); + } + } + + Ok(()) + } + + fn ctrl_queue_evt_handler(&mut self) -> Result<()> { + let mut queue = self.ctrl_queue.lock().unwrap(); + let mut req_queue = Vec::new(); + + loop { + let mut elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + match VirtioGpuRequest::new(&self.mem_space, &mut elem) { + Ok(req) => { + req_queue.push(req); + } + Err(e) => { + error!( + "GuestError: Request will be ignored, because request header is incomplete and {:?}. \ + Also, be aware that the virtual machine may suspended as response is not sent.", + e + ); + } + } + } + drop(queue); + + self.process_control_queue(req_queue)?; + Ok(()) + } + + fn cursor_queue_evt_handler(&mut self) -> Result<()> { + let cursor_queue = self.cursor_queue.clone(); + let mut queue = cursor_queue.lock().unwrap(); + + loop { + let mut elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + match VirtioGpuRequest::new(&self.mem_space, &mut elem) { + Ok(req) => match self.cmd_update_cursor(&req) { + Ok(_) => {} + Err(e) => { + error!("Failed to handle gpu cursor cmd for {:?}.", e); + } + }, + // Ignore the request has no effect, because we handle it later. + Err(err) => { + error!("Failed to create GPU request, {:?}, just ignore it", err); + } + }; + + queue.vring.add_used(elem.index, 0).with_context(|| { + format!("Failed to add used ring(cursor), index {}", elem.index) + })?; + + if queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("gpu cursor", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Cursor", &*queue as *const _ as u64); + } + } + + Ok(()) + } +} + +impl Drop for GpuIoHandler { + fn drop(&mut self) { + while !self.resources_list.is_empty() { + self.resource_destroy(0); + } + } +} + +impl EventNotifierHelper for GpuIoHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let handler_raw = handler.lock().unwrap(); + let mut notifiers = Vec::new(); + + // Register event notifier for ctrl_queue_evt. + let handler_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + if let Err(e) = handler_clone.lock().unwrap().ctrl_queue_evt_handler() { + error!("Failed to process ctrlq for virtio gpu, err: {:?}", e); + } + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + handler_raw.ctrl_queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![h], + )); + + // Register event notifier for cursor_queue_evt. + let handler_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + if let Err(e) = handler_clone.lock().unwrap().cursor_queue_evt_handler() { + error!("Failed to process cursorq for virtio gpu, err: {:?}", e); + } + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + handler_raw.cursor_queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![h], + )); + + notifiers + } +} + +#[derive(Clone, Copy, Debug, ByteCode)] +struct VirtioGpuConfig { + events_read: u32, + events_clear: u32, + num_scanouts: u32, + _reserved: u32, +} + +/// GPU device structure. +#[derive(Default)] +pub struct Gpu { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the GPU device. + cfg: GpuDevConfig, + /// Config space of the GPU device. + config_space: Arc>, + /// Status of the emulated physical outputs. + output_states: Arc>, + /// Each console corresponds to a display. + consoles: Vec>>>, + /// bar0 file backend which is set by ohui server + bar0_fb: Option, +} + +/// SAFETY: The raw pointer in rust doesn't impl Send, all write operations +/// to this memory will be locked. So implement Send safe. +unsafe impl Send for Gpu {} + +impl Gpu { + pub fn new(cfg: GpuDevConfig) -> Gpu { + Self { + base: VirtioBase::new(VIRTIO_TYPE_GPU, QUEUE_NUM_GPU, DEFAULT_VIRTQUEUE_SIZE), + cfg, + ..Default::default() + } + } + + pub fn set_bar0_fb(&mut self, fb: Option) { + if !self.cfg.enable_bar0 { + self.bar0_fb = None; + return; + } + self.bar0_fb = fb; + } + + pub fn get_bar0_fb(&self) -> Option { + self.bar0_fb.as_ref().cloned() + } + + fn build_device_config_space(&mut self) { + let mut config_space = self.config_space.lock().unwrap(); + config_space.num_scanouts = self.cfg.max_outputs; + } +} + +impl VirtioDevice for Gpu { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn device_quirk(&self) -> Option { + if self.cfg.enable_bar0 { + return Some(VirtioDeviceQuirk::VirtioGpuEnableBar0); + } + None + } + + fn realize(&mut self) -> Result<()> { + if self.cfg.max_outputs > VIRTIO_GPU_MAX_OUTPUTS as u32 { + bail!( + "Invalid max_outputs {} which is bigger than {}", + self.cfg.max_outputs, + VIRTIO_GPU_MAX_OUTPUTS + ); + } + + let mut output_states = self.output_states.lock().unwrap(); + output_states[0].width = self.cfg.xres; + output_states[0].height = self.cfg.yres; + + let gpu_opts = Arc::new(GpuOpts { + output_states: self.output_states.clone(), + config_space: self.config_space.clone(), + interrupt_cb: None, + enable_bar0: self.cfg.enable_bar0, + }); + for i in 0..self.cfg.max_outputs { + let dev_name = format!("virtio-gpu{}", i); + let con = console_init(dev_name, ConsoleType::Graphic, gpu_opts.clone()); + let con_ref = con.as_ref().unwrap().upgrade().unwrap(); + output_states[i as usize].con_id = con_ref.lock().unwrap().con_id; + self.consoles.push(con); + } + + drop(output_states); + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1u64 << VIRTIO_F_VERSION_1 + | 1u64 << VIRTIO_F_RING_INDIRECT_DESC + | 1u64 << VIRTIO_F_RING_EVENT_IDX; + if self.cfg.edid { + self.base.device_features |= 1 << VIRTIO_GPU_F_EDID; + } + + self.base.device_features |= 1 << VIRTIO_GPU_F_MONOCHROME; + + trace::virtio_gpu_init_config_features(self.base.device_features); + self.build_device_config_space(); + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + for con in &self.consoles { + console_close(con)?; + } + + // TODO: support migration + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let config_space = self.config_space.lock().unwrap(); + read_config_default(config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + let mut config_space = self.config_space.lock().unwrap(); + check_config_space_rw(config_space.as_bytes(), offset, data)?; + + let mut config_cpy = *config_space; + let config_cpy_slice = config_cpy.as_mut_bytes(); + + config_cpy_slice[(offset as usize)..(offset as usize + data.len())].copy_from_slice(data); + if config_cpy.events_clear != 0 { + config_space.events_read &= !config_cpy.events_clear; + } + + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let queues = &self.base.queues; + if queues.len() != QUEUE_NUM_GPU { + return Err(anyhow!(VirtioError::IncorrectQueueNum( + QUEUE_NUM_GPU, + queues.len() + ))); + } + + let mut scanouts = vec![]; + let gpu_opts = Arc::new(GpuOpts { + output_states: self.output_states.clone(), + config_space: self.config_space.clone(), + interrupt_cb: Some(interrupt_cb.clone()), + enable_bar0: self.cfg.enable_bar0, + }); + for con in &self.consoles { + let con_ref = con.as_ref().unwrap().upgrade().unwrap(); + con_ref.lock().unwrap().dev_opts = gpu_opts.clone(); + + let scanout = GpuScanout { + con: con.clone(), + ..Default::default() + }; + scanouts.push(scanout); + } + + let handler = GpuIoHandler { + ctrl_queue: queues[0].clone(), + cursor_queue: queues[1].clone(), + mem_space, + ctrl_queue_evt: queue_evts[0].clone(), + cursor_queue_evt: queue_evts[1].clone(), + interrupt_cb, + driver_features: self.base.driver_features, + resources_list: Vec::new(), + enable_output_bitmask: 1, + num_scanouts: self.cfg.max_outputs, + output_states: self.output_states.clone(), + scanouts, + max_hostmem: self.cfg.max_hostmem, + used_hostmem: 0, + }; + + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts)?; + info!("virtio-gpu has been activated"); + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + if get_run_stage() == VmRunningStage::Os { + display_set_major_screen("ramfb")?; + set_run_stage(VmRunningStage::Bios); + } + + let result = unregister_event_helper(None, &mut self.base.deactivate_evts); + info!("virtio-gpu deactivate {:?}", result); + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_parse_virtio_gpu_pci_cmdline() { + // Test1: Right. + let gpu_cmd = "virtio-gpu-pci,id=gpu_1,bus=pcie.0,addr=0x4.0x0,max_outputs=5,edid=false,\ + xres=2048,yres=800,enable_bar0=true,max_hostmem=268435457"; + let gpu_cfg = GpuDevConfig::try_parse_from(str_slip_to_clap(gpu_cmd, true, false)).unwrap(); + assert_eq!(gpu_cfg.id, "gpu_1"); + assert_eq!(gpu_cfg.bus, "pcie.0"); + assert_eq!(gpu_cfg.addr, (4, 0)); + assert_eq!(gpu_cfg.max_outputs, 5); + assert_eq!(gpu_cfg.xres, 2048); + assert_eq!(gpu_cfg.yres, 800); + assert!(!gpu_cfg.edid); + assert_eq!(gpu_cfg.max_hostmem, 268435457); + assert!(gpu_cfg.enable_bar0); + + // Test2: Default. + let gpu_cmd2 = "virtio-gpu-pci,id=gpu_1,bus=pcie.0,addr=0x4.0x0"; + let gpu_cfg = + GpuDevConfig::try_parse_from(str_slip_to_clap(gpu_cmd2, true, false)).unwrap(); + assert_eq!(gpu_cfg.max_outputs, 1); + assert_eq!(gpu_cfg.xres, 1024); + assert_eq!(gpu_cfg.yres, 768); + assert!(gpu_cfg.edid); + assert_eq!(gpu_cfg.max_hostmem, VIRTIO_GPU_DEFAULT_MAX_HOSTMEM); + assert!(!gpu_cfg.enable_bar0); + + // Test3/4: max_outputs is illegal. + let gpu_cmd3 = "virtio-gpu-pci,id=gpu_1,bus=pcie.0,addr=0x4.0x0,max_outputs=17"; + let result = GpuDevConfig::try_parse_from(str_slip_to_clap(gpu_cmd3, true, false)); + assert!(result.is_err()); + let gpu_cmd4 = "virtio-gpu-pci,id=gpu_1,bus=pcie.0,addr=0x4.0x0,max_outputs=0"; + let result = GpuDevConfig::try_parse_from(str_slip_to_clap(gpu_cmd4, true, false)); + assert!(result.is_err()); + + // Test5: max_hostmem is illegal. + let gpu_cmd5 = "virtio-gpu-pci,id=gpu_1,bus=pcie.0,addr=0x4.0x0,max_hostmem=0"; + let result = GpuDevConfig::try_parse_from(str_slip_to_clap(gpu_cmd5, true, false)); + assert!(result.is_err()); + } +} diff --git a/virtio/src/device/mod.rs b/virtio/src/device/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..1f9ddba814b75c05da96806bd8aa3197010ca450 --- /dev/null +++ b/virtio/src/device/mod.rs @@ -0,0 +1,22 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod balloon; +pub mod block; +#[cfg(feature = "virtio_gpu")] +pub mod gpu; +pub mod net; +#[cfg(feature = "virtio_rng")] +pub mod rng; +#[cfg(feature = "virtio_scsi")] +pub mod scsi_cntlr; +pub mod serial; diff --git a/virtio/src/device/net.rs b/virtio/src/device/net.rs new file mode 100644 index 0000000000000000000000000000000000000000..4d74508977aaeb3a09767d5956d76322eaa7a97c --- /dev/null +++ b/virtio/src/device/net.rs @@ -0,0 +1,1987 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::collections::HashMap; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::path::Path; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex, RwLock}; +use std::{cmp, fs, mem}; + +use anyhow::{bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::{error, warn}; +use once_cell::sync::Lazy; +use util::aio::Iovec; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + +use crate::{ + check_config_space_rw, gpa_hva_iovec_map, iov_discard_front, iov_to_buf, mem_to_buf, + read_config_default, report_virtio_error, virtio_has_feature, ElemIovec, Element, Queue, + VirtioBase, VirtioDevice, VirtioError, VirtioInterrupt, VirtioInterruptType, VirtioNetHdr, + VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_RING_INDIRECT_DESC, VIRTIO_F_VERSION_1, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, VIRTIO_NET_CTRL_MAC_TABLE_SET, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, VIRTIO_NET_CTRL_RX, VIRTIO_NET_CTRL_RX_ALLMULTI, + VIRTIO_NET_CTRL_RX_ALLUNI, VIRTIO_NET_CTRL_RX_NOBCAST, VIRTIO_NET_CTRL_RX_NOMULTI, + VIRTIO_NET_CTRL_RX_NOUNI, VIRTIO_NET_CTRL_RX_PROMISC, VIRTIO_NET_CTRL_VLAN, + VIRTIO_NET_CTRL_VLAN_ADD, VIRTIO_NET_CTRL_VLAN_DEL, VIRTIO_NET_ERR, VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_CTRL_MAC_ADDR, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_RX_EXTRA, + VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, + VIRTIO_NET_F_MQ, VIRTIO_NET_OK, VIRTIO_TYPE_NET, +}; +use address_space::{AddressAttr, AddressSpace}; +use machine_manager::config::{ConfigCheck, NetDevcfg, NetworkInterfaceConfig}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper, EventLoop}; +use machine_manager::state_query::{ + register_state_query_callback, unregister_state_query_callback, +}; +use migration::{ + migration::Migratable, DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, + StateTransfer, +}; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + create_new_eventfd, read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, + NotifierOperation, +}; +use util::num_ops::str_to_num; +use util::tap::{ + Tap, IFF_MULTI_QUEUE, TUN_F_CSUM, TUN_F_TSO4, TUN_F_TSO6, TUN_F_TSO_ECN, TUN_F_UFO, +}; + +/// Number of virtqueues(rx/tx/ctrl). +const QUEUE_NUM_NET: usize = 3; +/// The Mac Address length. +pub const MAC_ADDR_LEN: usize = 6; +/// The length of ethernet header. +const ETHERNET_HDR_LENGTH: usize = 14; +/// The max "multicast + unicast" mac address table length. +const CTRL_MAC_TABLE_LEN: usize = 64; +/// From 802.1Q definition, the max vlan ID. +const CTRL_MAX_VLAN: u16 = 1 << 12; +/// The max num of the mac address. +const MAX_MAC_ADDR_NUM: usize = 0xff; +/// The header length of virtio net packet. +const NET_HDR_LENGTH: usize = mem::size_of::(); +/// The length of vlan tag. +const VLAN_TAG_LENGTH: usize = 4; +/// The offset of vlan tpid for 802.1Q tag. +const VLAN_TPID_LENGTH: usize = 2; + +type SenderConfig = Option; + +/// The first default mac address. +const FIRST_DEFAULT_MAC: [u8; MAC_ADDR_LEN] = [0x52, 0x54, 0x00, 0x12, 0x34, 0x56]; +/// Used to mark if the last byte of the mac address is used. +static USED_MAC_TABLE: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new([0_i8; MAX_MAC_ADDR_NUM]))); + +/// Configuration of virtio-net devices. +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +pub struct VirtioNetConfig { + /// Mac Address. + pub mac: [u8; MAC_ADDR_LEN], + /// Device status. + pub status: u16, + /// Maximum number of each of transmit and receive queues. + pub max_virtqueue_pairs: u16, + /// Maximum Transmission Unit. + pub mtu: u16, + /// Speed, in units of 1Mb. + pub speed: u32, + /// 0x00 - half duplex + /// 0x01 - full duplex + pub duplex: u8, +} + +impl ByteCode for VirtioNetConfig {} + +/// The control mode used for packet receive filtering. +struct CtrlRxMode { + /// If the device should receive all incoming packets. + promisc: bool, + /// If the device should allow all incoming multicast packets. + all_multi: bool, + /// If the device should allow all incoming unicast packets. + all_uni: bool, + /// Used to suppress multicast receive. + no_multi: bool, + /// Used to suppress unicast receive. + no_uni: bool, + /// Used to suppresses broadcast receive. + no_bcast: bool, +} + +impl Default for CtrlRxMode { + fn default() -> Self { + Self { + // For compatibility with older guest drivers, it + // needs to default to promiscuous. + promisc: true, + all_multi: false, + all_uni: false, + no_multi: false, + no_uni: false, + no_bcast: false, + } + } +} + +#[derive(Default, Clone)] +struct MacAddress { + address: [u8; MAC_ADDR_LEN], +} + +/// The Mac information used to filter incoming packet. +#[derive(Default)] +struct CtrlMacInfo { + /// Unicast mac address table. + uni_mac_table: Vec, + /// Unicast mac address overflow. + uni_mac_of: bool, + /// Multicast mac address table. + multi_mac_table: Vec, + /// Multicast mac address overflow. + multi_mac_of: bool, +} + +pub struct CtrlInfo { + /// The control rx mode for packet receive filtering. + rx_mode: CtrlRxMode, + /// The mac address information for packet receive filtering. + mac_info: CtrlMacInfo, + /// The map of all the vlan ids. + vlan_map: HashMap, + /// The net device status. + config: Arc>, +} + +impl CtrlInfo { + pub fn new(config: Arc>) -> Self { + CtrlInfo { + rx_mode: CtrlRxMode::default(), + mac_info: CtrlMacInfo::default(), + vlan_map: HashMap::new(), + config, + } + } + + fn handle_rx_mode( + &mut self, + mem_space: &AddressSpace, + cmd: u8, + data_iovec: &mut [ElemIovec], + ) -> Result { + // Get the command specific data, one byte containing 0(off) or 1(on). + let mut status: u8 = 0; + get_buf_and_discard(mem_space, data_iovec, status.as_mut_bytes()) + .with_context(|| "Failed to get control data")?; + // 0: off, 1: on. + if ![0, 1].contains(&status) { + return Ok(VIRTIO_NET_ERR); + } + let mut on_off = false; + if status == 1 { + on_off = true; + } + let mut ack = VIRTIO_NET_OK; + match cmd { + VIRTIO_NET_CTRL_RX_PROMISC => self.rx_mode.promisc = on_off, + VIRTIO_NET_CTRL_RX_ALLMULTI => self.rx_mode.all_multi = on_off, + VIRTIO_NET_CTRL_RX_ALLUNI => self.rx_mode.all_uni = on_off, + VIRTIO_NET_CTRL_RX_NOMULTI => self.rx_mode.no_multi = on_off, + VIRTIO_NET_CTRL_RX_NOUNI => self.rx_mode.no_uni = on_off, + VIRTIO_NET_CTRL_RX_NOBCAST => self.rx_mode.no_bcast = on_off, + _ => { + error!("Invalid command {} for control rx mode", cmd); + ack = VIRTIO_NET_ERR; + } + } + Ok(ack) + } + + fn set_mac_table( + &mut self, + mem_space: &AddressSpace, + data_iovec: &mut Vec, + ) -> Result { + let ack = VIRTIO_NET_OK; + let mut mac_table_len: usize = 0; + // Default for unicast. + let mut overflow = &mut self.mac_info.uni_mac_of; + let mut mac_table = &mut self.mac_info.uni_mac_table; + + // 0 for unicast, 1 for multicast. + for i in 0..2 { + if i == 1 { + overflow = &mut self.mac_info.multi_mac_of; + mac_table_len = self.mac_info.uni_mac_table.len(); + mac_table = &mut self.mac_info.multi_mac_table; + } + + let mut entries: u32 = 0; + *data_iovec = get_buf_and_discard(mem_space, data_iovec, entries.as_mut_bytes()) + .with_context(|| "Failed to get MAC entries".to_string())?; + if entries == 0 { + mac_table.clear(); + continue; + } + + let size = u64::from(entries) * MAC_ADDR_LEN as u64; + let res_len = Element::iovec_size(data_iovec); + if size > res_len { + bail!("Invalid request for setting mac table."); + } + if entries as usize > CTRL_MAC_TABLE_LEN - mac_table_len { + if size < res_len { + *data_iovec = iov_discard_front(data_iovec, size) + .with_context(|| "Failed to discard iovec from front side".to_string())? + .to_vec(); + } + *overflow = true; + mac_table.clear(); + continue; + } + + let mut macs = vec![0_u8; size as usize]; + *data_iovec = get_buf_and_discard(mem_space, data_iovec, &mut macs) + .with_context(|| "Failed to get MAC entries".to_string())?; + + mac_table.clear(); + for i in 0..entries { + let offset = i as usize * MAC_ADDR_LEN; + let mut mac: MacAddress = Default::default(); + mac.address + .copy_from_slice(&macs[offset..offset + MAC_ADDR_LEN]); + mac_table.push(mac); + } + } + Ok(ack) + } + + fn handle_mac( + &mut self, + mem_space: &AddressSpace, + cmd: u8, + data_iovec: &mut Vec, + ) -> u8 { + let mut ack = VIRTIO_NET_OK; + match cmd { + VIRTIO_NET_CTRL_MAC_ADDR_SET => { + let mut mac = [0; MAC_ADDR_LEN]; + *data_iovec = + get_buf_and_discard(mem_space, data_iovec, &mut mac).unwrap_or_else(|e| { + error!("Failed to get MAC address, error is {:?}", e); + ack = VIRTIO_NET_ERR; + Vec::new() + }); + if ack == VIRTIO_NET_ERR { + return VIRTIO_NET_ERR; + } + self.config.lock().unwrap().mac.copy_from_slice(&mac); + } + VIRTIO_NET_CTRL_MAC_TABLE_SET => { + ack = self + .set_mac_table(mem_space, data_iovec) + .unwrap_or_else(|e| { + error!("Failed to get Unicast Mac address, error is {:?}", e); + VIRTIO_NET_ERR + }); + } + _ => { + error!("Invalid cmd {} when handling control mac", cmd); + return VIRTIO_NET_ERR; + } + } + + ack + } + + fn handle_vlan_table( + &mut self, + mem_space: &AddressSpace, + cmd: u8, + data_iovec: &mut Vec, + ) -> u8 { + let mut ack = VIRTIO_NET_OK; + let mut vid: u16 = 0; + + *data_iovec = get_buf_and_discard(mem_space, data_iovec, vid.as_mut_bytes()) + .unwrap_or_else(|e| { + error!("Failed to get vlan id, error is {:?}", e); + ack = VIRTIO_NET_ERR; + Vec::new() + }); + if ack == VIRTIO_NET_ERR { + return ack; + } + vid = LittleEndian::read_u16(vid.as_bytes()); + if vid >= CTRL_MAX_VLAN { + return VIRTIO_NET_ERR; + } + + match cmd { + VIRTIO_NET_CTRL_VLAN_ADD => { + if let Some(value) = self.vlan_map.get_mut(&(vid >> 5)) { + *value |= 1 << (vid & 0x1f); + } else { + self.vlan_map.insert(vid >> 5, 1 << (vid & 0x1f)); + } + } + VIRTIO_NET_CTRL_VLAN_DEL => { + if let Some(value) = self.vlan_map.get_mut(&(vid >> 5)) { + *value &= !(1 << (vid & 0x1f)); + } + } + _ => { + error!("Invalid cmd {} when handling control vlan", cmd); + ack = VIRTIO_NET_ERR; + } + } + ack + } + + fn handle_mq( + &mut self, + mem_space: &AddressSpace, + taps: Option<&mut Vec>, + cmd: u8, + data_iovec: &mut Vec, + ) -> u8 { + let mut ack = VIRTIO_NET_OK; + if u16::from(cmd) == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET { + let mut queue_pairs: u16 = 0; + *data_iovec = get_buf_and_discard(mem_space, data_iovec, queue_pairs.as_mut_bytes()) + .unwrap_or_else(|e| { + error!("Failed to get queue pairs {:?}", e); + ack = VIRTIO_NET_ERR; + Vec::new() + }); + if ack == VIRTIO_NET_ERR { + return ack; + } + + queue_pairs = LittleEndian::read_u16(queue_pairs.as_bytes()); + let max_pairs = self.config.lock().unwrap().max_virtqueue_pairs; + if !(VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN..=max_pairs).contains(&queue_pairs) { + error!("Invalid queue pairs {}", queue_pairs); + return VIRTIO_NET_ERR; + } + if let Some(taps) = taps { + for (index, tap) in taps.iter_mut().enumerate() { + if tap.set_queue(index < queue_pairs as usize) != 0 { + error!("Failed to set queue, index is {}", index); + return VIRTIO_NET_ERR; + } + } + } + } else { + error!( + "Control queue header command can't match {}", + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET + ); + ack = VIRTIO_NET_ERR; + } + + ack + } + + fn filter_packets(&mut self, buf: &[u8]) -> bool { + // Broadcast address: 0xff:0xff:0xff:0xff:0xff:0xff. + let bcast = [0xff; MAC_ADDR_LEN]; + // TPID of the vlan tag, defined in IEEE 802.1Q, is 0x8100. + let vlan = [0x81, 0x00]; + + if self.rx_mode.promisc { + return false; + } + + if buf[ETHERNET_HDR_LENGTH - VLAN_TPID_LENGTH..ETHERNET_HDR_LENGTH] == vlan { + let vid = u16::from_be_bytes([buf[ETHERNET_HDR_LENGTH], buf[ETHERNET_HDR_LENGTH + 1]]); + let value = if let Some(value) = self.vlan_map.get(&(vid >> 5)) { + *value + } else { + 0 + }; + + if value & (1 << (vid & 0x1f)) == 0 { + return true; + } + } + + // The bit 0 in byte[0] means unicast(0) or multicast(1). + if buf[0] & 0x01 > 0 { + if buf[..MAC_ADDR_LEN] == bcast { + return self.rx_mode.no_bcast; + } + if self.rx_mode.no_multi { + return true; + } + if self.rx_mode.all_multi || self.mac_info.multi_mac_of { + return false; + } + for mac in self.mac_info.multi_mac_table.iter() { + if buf[..MAC_ADDR_LEN] == mac.address { + return false; + } + } + } else { + if self.rx_mode.no_uni { + return true; + } + if self.rx_mode.all_uni + || self.mac_info.uni_mac_of + || buf[..MAC_ADDR_LEN] == self.config.lock().unwrap().mac + { + return false; + } + for mac in self.mac_info.uni_mac_table.iter() { + if buf[..MAC_ADDR_LEN] == mac.address { + return false; + } + } + } + + true + } +} + +fn get_buf_and_discard( + mem_space: &AddressSpace, + iovec: &mut [ElemIovec], + buf: &mut [u8], +) -> Result> { + iov_to_buf(mem_space, &None, iovec, buf).and_then(|size| { + if size < buf.len() { + error!("Invalid length {}, expected length {}", size, buf.len()); + bail!("Invalid length {}, expected length {}", size, buf.len()); + } + Ok(()) + })?; + + if let Some(data_iovec) = iov_discard_front(iovec, buf.len() as u64) { + Ok(data_iovec.to_vec()) + } else { + Ok(Vec::new()) + } +} + +/// The control queue is used to verify the multi queue feature. +pub struct CtrlVirtio { + /// The control queue. + queue: Arc>, + /// The eventfd used to notify the control queue event. + queue_evt: Arc, + /// The information about control command. + ctrl_info: Arc>, +} + +impl CtrlVirtio { + pub fn new( + queue: Arc>, + queue_evt: Arc, + ctrl_info: Arc>, + ) -> Self { + Self { + queue, + queue_evt, + ctrl_info, + } + } +} + +/// Handle the frontend and the backend control channel virtio queue events and data. +pub struct NetCtrlHandler { + /// The control virtio queue. + pub ctrl: CtrlVirtio, + /// Memory space. + pub mem_space: Arc, + /// The interrupt call back function. + pub interrupt_cb: Arc, + /// Bit mask of features negotiated by the backend and the frontend. + pub driver_features: u64, + /// Device is broken or not. + pub device_broken: Arc, + pub taps: Option>, +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct CtrlHdr { + class: u8, + cmd: u8, +} + +impl ByteCode for CtrlHdr {} + +impl NetCtrlHandler { + fn handle_ctrl(&mut self) -> Result<()> { + let mut locked_queue = self.ctrl.queue.lock().unwrap(); + loop { + let mut ack = VIRTIO_NET_OK; + let mut elem = locked_queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring for net control queue")?; + if elem.desc_num == 0 { + break; + } + + // Validate the control request. + let in_size = Element::iovec_size(&elem.in_iovec); + let out_size = Element::iovec_size(&elem.out_iovec); + if in_size < mem::size_of_val(&ack) as u64 + || out_size < mem::size_of::() as u64 + { + bail!( + "Invalid length, in_iovec size is {}, out_iovec size is {}", + in_size, + out_size + ); + } + + // Get the control request header. + let mut ctrl_hdr = CtrlHdr::default(); + let mut data_iovec = get_buf_and_discard( + &self.mem_space, + &mut elem.out_iovec, + ctrl_hdr.as_mut_bytes(), + ) + .with_context(|| "Failed to get control header")?; + + trace::virtio_net_handle_ctrl(ctrl_hdr.class, ctrl_hdr.cmd); + match ctrl_hdr.class { + VIRTIO_NET_CTRL_RX => { + ack = self + .ctrl + .ctrl_info + .lock() + .unwrap() + .handle_rx_mode(&self.mem_space, ctrl_hdr.cmd, &mut data_iovec) + .unwrap_or_else(|e| { + error!("Failed to handle rx mode, error is {:?}", e); + VIRTIO_NET_ERR + }); + } + VIRTIO_NET_CTRL_MAC => { + ack = self.ctrl.ctrl_info.lock().unwrap().handle_mac( + &self.mem_space, + ctrl_hdr.cmd, + &mut data_iovec, + ); + } + VIRTIO_NET_CTRL_VLAN => { + ack = self.ctrl.ctrl_info.lock().unwrap().handle_vlan_table( + &self.mem_space, + ctrl_hdr.cmd, + &mut data_iovec, + ); + } + VIRTIO_NET_CTRL_MQ => { + ack = self.ctrl.ctrl_info.lock().unwrap().handle_mq( + &self.mem_space, + self.taps.as_mut(), + ctrl_hdr.cmd, + &mut data_iovec, + ); + } + _ => { + error!( + "Control queue header class {} not supported", + ctrl_hdr.class + ); + ack = VIRTIO_NET_ERR; + } + } + + // Write result to the device writable iovec. + let status = elem + .in_iovec + .first() + .with_context(|| "Failed to get device writable iovec")?; + self.mem_space + .write_object::(&ack, status.addr, AddressAttr::Ram)?; + + locked_queue + .vring + .add_used(elem.index, mem::size_of_val(&ack) as u32) + .with_context(|| format!("Failed to add used ring {}", elem.index))?; + + if locked_queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&locked_queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("ctrl", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Net", &*locked_queue as *const _ as u64); + } + } + + Ok(()) + } +} + +impl EventNotifierHelper for NetCtrlHandler { + fn internal_notifiers(net_io: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let locked_net_io = net_io.lock().unwrap(); + let cloned_net_io = net_io.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut locked_net_io = cloned_net_io.lock().unwrap(); + if locked_net_io.device_broken.load(Ordering::SeqCst) { + return None; + } + locked_net_io.handle_ctrl().unwrap_or_else(|e| { + error!("Failed to handle ctrl queue, error is {:?}.", e); + report_virtio_error( + locked_net_io.interrupt_cb.clone(), + locked_net_io.driver_features, + &locked_net_io.device_broken, + ); + }); + None + }); + notifiers.push(build_event_notifier( + locked_net_io.ctrl.queue_evt.as_raw_fd(), + Some(handler), + NotifierOperation::AddShared, + EventSet::IN, + )); + + notifiers + } +} + +struct RTxVirtio { + queue: Arc>, + queue_evt: Arc, +} + +impl RTxVirtio { + fn new(queue: Arc>, queue_evt: Arc) -> Self { + TxVirtio { queue, queue_evt } + } +} + +type RxVirtio = RTxVirtio; +type TxVirtio = RTxVirtio; + +struct NetIoQueue { + rx: RxVirtio, + tx: TxVirtio, + ctrl_info: Arc>, + mem_space: Arc, + interrupt_cb: Arc, + listen_state: Arc>, + driver_features: u64, + queue_size: u16, +} + +impl NetIoQueue { + fn handle_rx(&self, tap: &Arc>>) -> Result<()> { + trace::virtio_receive_request("Net".to_string(), "to rx".to_string()); + if tap.read().unwrap().is_none() { + return Ok(()); + } + + let mut queue = self.rx.queue.lock().unwrap(); + let mut rx_packets: u16 = 0; + loop { + let elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring for net rx")?; + if elem.desc_num == 0 { + queue + .vring + .suppress_queue_notify(self.driver_features, false) + .with_context(|| "Failed to enable rx queue notify")?; + self.listen_state.lock().unwrap().set_queue_avail(false); + break; + } else if elem.in_iovec.is_empty() { + bail!("The length of in iovec is 0"); + } + let (_, iovecs) = + gpa_hva_iovec_map(&elem.in_iovec, &self.mem_space, queue.vring.get_cache())?; + + if MigrationManager::is_active() { + // FIXME: mark dirty page needs to be managed by `AddressSpace` crate. + for iov in iovecs.iter() { + // Mark vmm dirty page manually if live migration is active. + MigrationManager::mark_dirty_log(iov.iov_base, iov.iov_len); + } + } + + // Read the data from the tap device. + let locked_tap = tap.read().unwrap(); + let size = if locked_tap.is_some() { + locked_tap.as_ref().unwrap().receive_packets(&iovecs) + } else { + -1 + }; + drop(locked_tap); + if size < (NET_HDR_LENGTH + ETHERNET_HDR_LENGTH + VLAN_TAG_LENGTH) as isize { + queue.vring.push_back(); + break; + } + + let mut buf = vec![0_u8; NET_HDR_LENGTH + ETHERNET_HDR_LENGTH + VLAN_TAG_LENGTH]; + get_net_header(&iovecs, &mut buf).and_then(|size| { + if size != buf.len() { + bail!( + "Invalid header length {}, expected length {}", + size, + buf.len() + ); + } + Ok(()) + })?; + if self + .ctrl_info + .lock() + .unwrap() + .filter_packets(&buf[NET_HDR_LENGTH..]) + { + queue.vring.push_back(); + continue; + } + + queue + .vring + .add_used(elem.index, u32::try_from(size)?) + .with_context(|| { + format!( + "Failed to add used ring for net rx, index: {}, len: {}", + elem.index, size + ) + })?; + + if queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("net", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Net", &*queue as *const _ as u64); + } + + rx_packets += 1; + if rx_packets >= self.queue_size { + self.rx + .queue_evt + .write(1) + .with_context(|| "Failed to trigger rx queue event".to_string())?; + break; + } + } + + Ok(()) + } + + fn handle_tx(&self, tap: &Arc>>) -> Result<()> { + trace::virtio_receive_request("Net".to_string(), "to tx".to_string()); + let mut queue = self.tx.queue.lock().unwrap(); + + let mut tx_packets: u16 = 0; + loop { + let elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features) + .with_context(|| "Failed to pop avail ring for net tx")?; + if elem.desc_num == 0 { + break; + } else if elem.out_iovec.is_empty() { + bail!("The length of out iovec is 0"); + } + + let (_, iovecs) = + gpa_hva_iovec_map(&elem.out_iovec, &self.mem_space, queue.vring.get_cache())?; + let locked_tap = tap.read().unwrap(); + if locked_tap.is_none() || locked_tap.as_ref().unwrap().send_packets(&iovecs) == -1 { + queue.vring.push_back(); + queue + .vring + .suppress_queue_notify(self.driver_features, true) + .with_context(|| "Failed to suppress tx queue notify")?; + self.listen_state.lock().unwrap().set_tap_full(true); + break; + } + drop(locked_tap); + + queue + .vring + .add_used(elem.index, 0) + .with_context(|| format!("Net tx: Failed to add used ring {}", elem.index))?; + + if queue.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue), false) + .with_context(|| { + VirtioError::InterruptTrigger("net", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Net", &*queue as *const _ as u64); + } + tx_packets += 1; + if tx_packets >= self.queue_size { + self.tx + .queue_evt + .write(1) + .with_context(|| "Failed to trigger tx queue event".to_string())?; + break; + } + } + + Ok(()) + } +} + +struct ListenState { + queue_avail: bool, + tap_full: bool, + is_listening: bool, + has_changed: bool, +} + +impl ListenState { + fn new() -> Self { + Self { + queue_avail: true, + tap_full: false, + is_listening: true, + has_changed: false, + } + } + + fn set_tap_full(&mut self, value: bool) { + if self.tap_full == value { + return; + } + self.tap_full = value; + self.has_changed = true; + } + + fn set_queue_avail(&mut self, value: bool) { + if self.queue_avail == value { + return; + } + self.queue_avail = value; + self.has_changed = true; + } + + fn tap_fd_handler(&mut self, tap: &Tap) -> Vec { + let mut notifiers = Vec::new(); + + if !self.is_listening && (self.queue_avail || self.tap_full) { + notifiers.push(EventNotifier::new( + NotifierOperation::Resume, + tap.as_raw_fd(), + None, + EventSet::empty(), + Vec::new(), + )); + self.is_listening = true; + } + + if !self.is_listening { + return notifiers; + } + + // NOTE: We want to poll for OUT event when the tap is full, and for IN event when the + // virtio queue is available. + let tap_events = match (self.queue_avail, self.tap_full) { + (true, true) => EventSet::OUT | EventSet::IN | EventSet::EDGE_TRIGGERED, + (false, true) => EventSet::OUT | EventSet::EDGE_TRIGGERED, + (true, false) => EventSet::IN | EventSet::EDGE_TRIGGERED, + (false, false) => EventSet::empty(), + }; + + let tap_operation = if tap_events.is_empty() { + self.is_listening = false; + NotifierOperation::Park + } else { + NotifierOperation::Modify + }; + + notifiers.push(EventNotifier::new( + tap_operation, + tap.as_raw_fd(), + None, + tap_events, + Vec::new(), + )); + notifiers + } +} + +fn get_net_header(iovec: &[Iovec], buf: &mut [u8]) -> Result { + let mut start: usize = 0; + let mut end: usize = 0; + + for elem in iovec { + end = start + .checked_add(elem.iov_len as usize) + .with_context(|| "Overflow when getting the net header")?; + end = cmp::min(end, buf.len()); + // SAFETY: iovec is generated by address_space and len is not less than buf's. + unsafe { + mem_to_buf(&mut buf[start..end], elem.iov_base)?; + } + if end >= buf.len() { + break; + } + start = end; + } + Ok(end) +} + +fn build_event_notifier( + fd: RawFd, + handler: Option>, + op: NotifierOperation, + event: EventSet, +) -> EventNotifier { + let mut handlers = Vec::new(); + if let Some(h) = handler { + handlers.push(h); + } + EventNotifier::new(op, fd, None, event, handlers) +} + +struct NetIoHandler { + /// The context name of iothread for tap and rx virtio queue. + /// Since we placed the handlers of RxVirtio, TxVirtio and tap_fd in different threads, + /// thread name is needed to change the monitoring status of tap_fd. + rx_iothread: Option, + /// Virtio queue used for net io. + net_queue: Arc, + /// The context of tap device. + tap: Arc>>, + /// Device is broken or not. + device_broken: Arc, + /// The receiver half of Rust's channel to recv tap information. + receiver: Receiver, + /// Eventfd for config space update. + update_evt: Arc, +} + +impl NetIoHandler { + fn update_evt_handler(&mut self) -> Result<()> { + let mut locked_tap = self.tap.write().unwrap(); + let old_tap_fd = if locked_tap.is_some() { + locked_tap.as_ref().unwrap().as_raw_fd() + } else { + -1 + }; + + *locked_tap = match self.receiver.recv() { + Ok(tap) => tap, + Err(e) => { + error!("Failed to receive the tap {:?}", e); + None + } + }; + drop(locked_tap); + + if old_tap_fd != -1 { + unregister_event_helper(self.rx_iothread.as_ref(), &mut vec![old_tap_fd])?; + } + if self.tap.read().unwrap().is_some() { + EventLoop::update_event(self.tap_notifier(), self.rx_iothread.as_ref())?; + } + Ok(()) + } + + /// Register event notifier for update_evt. + fn update_evt_notifier(&self, net_io: Arc>) -> Vec { + let device_broken = self.device_broken.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + + if device_broken.load(Ordering::SeqCst) { + return None; + } + + if let Err(e) = net_io.lock().unwrap().update_evt_handler() { + error!("Update net events failed: {:?}", e); + } + + None + }); + let notifiers = vec![build_event_notifier( + self.update_evt.as_raw_fd(), + Some(handler), + NotifierOperation::AddShared, + EventSet::IN, + )]; + notifiers + } + + /// Register event notifier for rx. + fn rx_virtio_notifier(&self) -> Vec { + let net_queue = self.net_queue.clone(); + let device_broken = self.device_broken.clone(); + let tap = self.tap.clone(); + let rx_iothread = self.rx_iothread.as_ref().cloned(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + + if device_broken.load(Ordering::SeqCst) { + return None; + } + + net_queue.listen_state.lock().unwrap().set_queue_avail(true); + let mut locked_queue = net_queue.rx.queue.lock().unwrap(); + + if let Err(ref err) = locked_queue + .vring + .suppress_queue_notify(net_queue.driver_features, true) + { + error!("Failed to suppress rx queue notify: {:?}", err); + report_virtio_error( + net_queue.interrupt_cb.clone(), + net_queue.driver_features, + &device_broken, + ); + return None; + }; + + drop(locked_queue); + + if let Err(ref err) = net_queue.handle_rx(&tap) { + error!("Failed to handle receive queue event: {:?}", err); + report_virtio_error( + net_queue.interrupt_cb.clone(), + net_queue.driver_features, + &device_broken, + ); + return None; + } + + let mut locked_listen = net_queue.listen_state.lock().unwrap(); + let locked_tap = tap.read().unwrap(); + if locked_tap.is_none() || !locked_listen.has_changed { + return None; + } + + let notifiers = locked_listen.tap_fd_handler(locked_tap.as_ref().unwrap()); + locked_listen.has_changed = false; + drop(locked_tap); + drop(locked_listen); + + if let Err(e) = EventLoop::update_event(notifiers, rx_iothread.as_ref()) { + error!("Update tap notifiers failed in handle rx: {:?}", e); + } + None + }); + let rx_fd = self.net_queue.rx.queue_evt.as_raw_fd(); + let notifiers = vec![build_event_notifier( + rx_fd, + Some(handler), + NotifierOperation::AddShared, + EventSet::IN, + )]; + notifiers + } + + /// Register event notifier for tx. + fn tx_virtio_notifier(&self) -> Vec { + let net_queue = self.net_queue.clone(); + let device_broken = self.device_broken.clone(); + let tap = self.tap.clone(); + let rx_iothread = self.rx_iothread.as_ref().cloned(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + + if device_broken.load(Ordering::SeqCst) { + return None; + } + + if let Err(ref e) = net_queue.handle_tx(&tap) { + error!("Failed to handle tx(tx event) for net, {:?}", e); + report_virtio_error( + net_queue.interrupt_cb.clone(), + net_queue.driver_features, + &device_broken, + ); + } + + let mut locked_listen = net_queue.listen_state.lock().unwrap(); + let locked_tap = tap.read().unwrap(); + if locked_tap.is_none() || !locked_listen.has_changed { + return None; + } + + let notifiers = locked_listen.tap_fd_handler(locked_tap.as_ref().unwrap()); + locked_listen.has_changed = false; + drop(locked_tap); + drop(locked_listen); + + if let Err(e) = EventLoop::update_event(notifiers, rx_iothread.as_ref()) { + error!("Update tap notifiers failed in handle tx: {:?}", e); + } + + None + }); + let tx_fd = self.net_queue.tx.queue_evt.as_raw_fd(); + let notifiers = vec![build_event_notifier( + tx_fd, + Some(handler), + NotifierOperation::AddShared, + EventSet::IN, + )]; + notifiers + } + + /// Register event notifier for tap. + fn tap_notifier(&self) -> Vec { + let tap = self.tap.clone(); + let net_queue = self.net_queue.clone(); + let device_broken = self.device_broken.clone(); + let locked_tap = self.tap.read().unwrap(); + if locked_tap.is_none() { + return vec![]; + } + let handler: Rc = Rc::new(move |events: EventSet, _| { + if device_broken.load(Ordering::SeqCst) { + return None; + } + + if events.contains(EventSet::OUT) { + net_queue.listen_state.lock().unwrap().set_tap_full(false); + net_queue + .tx + .queue_evt + .write(1) + .unwrap_or_else(|e| error!("Failed to notify tx thread: {:?}", e)); + } + + if events.contains(EventSet::IN) { + if let Err(ref err) = net_queue.handle_rx(&tap) { + error!("Failed to handle receive queue event: {:?}", err); + report_virtio_error( + net_queue.interrupt_cb.clone(), + net_queue.driver_features, + &device_broken, + ); + return None; + } + } + + let mut locked_listen = net_queue.listen_state.lock().unwrap(); + let locked_tap = tap.read().unwrap(); + if !locked_listen.has_changed || locked_tap.is_none() { + return None; + } + let tap_notifiers = locked_listen.tap_fd_handler(locked_tap.as_ref().unwrap()); + locked_listen.has_changed = false; + drop(locked_tap); + drop(locked_listen); + + Some(tap_notifiers) + }); + let tap_fd = locked_tap.as_ref().unwrap().as_raw_fd(); + let notifiers = vec![build_event_notifier( + tap_fd, + Some(handler), + NotifierOperation::AddShared, + EventSet::IN | EventSet::EDGE_TRIGGERED, + )]; + + notifiers + } +} + +/// Status of net device. +#[repr(C)] +#[derive(Copy, Clone, Desc, ByteCode)] +#[desc_version(compat_version = "0.1.0")] +pub struct VirtioNetState { + /// Bit mask of features supported by the backend. + pub device_features: u64, + /// Bit mask of features negotiated by the backend and the frontend. + pub driver_features: u64, + /// Virtio net configurations. + pub config_space: VirtioNetConfig, + /// Device broken status. + broken: bool, +} + +/// Network device structure. +#[derive(Default)] +pub struct Net { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the network device. + net_cfg: NetworkInterfaceConfig, + /// Configuration of the network device. + netdev_cfg: NetDevcfg, + /// Virtio net configurations. + config_space: Arc>, + /// Tap device opened. + taps: Option>, + /// The send half of Rust's channel to send tap information. + senders: Option>>, + /// Eventfd for config space update. + update_evts: Vec>, + /// The information about control command. + ctrl_info: Option>>, + /// The deactivate events for receiving. + rx_deactivate_evts: Vec, + /// The deactivate events for transporting. + tx_deactivate_evts: Vec, +} + +impl Net { + pub fn new(net_cfg: NetworkInterfaceConfig, netdev_cfg: NetDevcfg) -> Self { + let queue_num = if net_cfg.mq { + (netdev_cfg.queues + 1) as usize + } else { + QUEUE_NUM_NET + }; + let queue_size = net_cfg.queue_size; + + Self { + base: VirtioBase::new(VIRTIO_TYPE_NET, queue_num, queue_size), + net_cfg, + netdev_cfg, + ..Default::default() + } + } +} + +/// Set Mac address configured into the virtio configuration, and return features mask with +/// VIRTIO_NET_F_MAC set. +/// +/// # Arguments +/// +/// * `device_config` - Virtio net configurations. +/// * `mac` - Mac address configured by user. +pub fn build_device_config_space(device_config: &mut VirtioNetConfig, mac: &str) -> u64 { + let mut bytes = [0_u8; 6]; + for (i, s) in mac.split(':').collect::>().iter().enumerate() { + bytes[i] = if let Ok(v) = u8::from_str_radix(s, 16) { + v + } else { + return 0_u64; + }; + } + device_config.mac.copy_from_slice(&bytes); + 1 << VIRTIO_NET_F_MAC +} + +/// Mark the mac table used or free. +fn mark_mac_table(mac: &[u8], used: bool) { + if mac[..MAC_ADDR_LEN - 1] != FIRST_DEFAULT_MAC[..MAC_ADDR_LEN - 1] { + return; + } + let mut val = -1_i8; + if used { + val = 1; + } + let mut locked_mac_table = USED_MAC_TABLE.lock().unwrap(); + for i in FIRST_DEFAULT_MAC[MAC_ADDR_LEN - 1]..MAX_MAC_ADDR_NUM as u8 { + if mac[MAC_ADDR_LEN - 1] == i { + locked_mac_table[i as usize] += val; + } + } +} + +/// Get a default free mac address. +fn get_default_mac_addr() -> Result<[u8; MAC_ADDR_LEN]> { + let mut mac = [0_u8; MAC_ADDR_LEN]; + mac.copy_from_slice(&FIRST_DEFAULT_MAC); + let mut locked_mac_table = USED_MAC_TABLE.lock().unwrap(); + for i in FIRST_DEFAULT_MAC[MAC_ADDR_LEN - 1]..MAX_MAC_ADDR_NUM as u8 { + if locked_mac_table[i as usize] == 0 { + mac[MAC_ADDR_LEN - 1] = i; + locked_mac_table[i as usize] = 1; + return Ok(mac); + } + } + bail!("Failed to get a free mac address"); +} + +/// Check that tap flag supports multi queue feature. +/// +/// # Arguments +/// +/// * `dev_name` - The name of tap device on host. +/// * `queue_pairs` - The number of virtio queue pairs. +fn check_mq(dev_name: &str, queue_pair: u16) -> Result<()> { + let path = format!("/sys/class/net/{}/tun_flags", dev_name); + let tap_path = Path::new(&path); + if !tap_path.exists() { + warn!("Tap interface does not exist"); + return Ok(()); + } + + let is_mq = queue_pair > 1; + let ifr_flag = fs::read_to_string(tap_path) + .with_context(|| "Failed to read content from tun_flags file")?; + let flags = str_to_num::(&ifr_flag)?; + if (flags & IFF_MULTI_QUEUE != 0) && !is_mq { + bail!(format!( + "Tap device supports mq, but command set queue pairs {}.", + queue_pair + )); + } else if (flags & IFF_MULTI_QUEUE == 0) && is_mq { + bail!(format!( + "Tap device doesn't support mq, but command set queue pairs {}.", + queue_pair + )); + } + + Ok(()) +} + +/// Open tap device if no fd provided, configure and return it. +/// +/// # Arguments +/// +/// * `net_fd` - Fd of tap device opened. +/// * `host_dev_name` - Path of tap device on host. +/// * `queue_pairs` - The number of virtio queue pairs. +pub fn create_tap( + net_fds: Option<&Vec>, + host_dev_name: Option<&str>, + queue_pairs: u16, +) -> Result>> { + if net_fds.is_none() && host_dev_name.is_none() { + return Ok(None); + } + if net_fds.is_some() && host_dev_name.is_some() { + error!("Create tap: fd and file_path exist meanwhile (use fd by default)"); + } + + let mut taps = Vec::with_capacity(queue_pairs as usize); + for index in 0..queue_pairs { + let tap = if let Some(fds) = net_fds { + let fd = fds + .get(index as usize) + .with_context(|| format!("Failed to get fd from index {}", index))?; + Tap::new(None, Some(*fd), queue_pairs) + .with_context(|| format!("Failed to create tap, index is {}", index))? + } else { + // `unwrap()` won't fail because the arguments have been checked + let dev_name = host_dev_name.unwrap(); + check_mq(dev_name, queue_pairs)?; + Tap::new(Some(dev_name), None, queue_pairs).with_context(|| { + format!( + "Failed to create tap with name {}, index is {}", + dev_name, index + ) + })? + }; + + tap.set_hdr_size(NET_HDR_LENGTH as u32) + .with_context(|| "Failed to set tap hdr size")?; + + taps.push(tap); + } + + Ok(Some(taps)) +} + +/// Get the tap offload flags from driver features. +/// +/// # Arguments +/// +/// * `features` - The driver features. +fn get_tap_offload_flags(features: u64) -> u32 { + let mut flags: u32 = 0; + if virtio_has_feature(features, VIRTIO_NET_F_GUEST_CSUM) { + flags |= TUN_F_CSUM; + } + if virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4) { + flags |= TUN_F_TSO4; + } + if virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6) { + flags |= TUN_F_TSO6; + } + if virtio_has_feature(features, VIRTIO_NET_F_GUEST_ECN) { + flags |= TUN_F_TSO_ECN; + } + if virtio_has_feature(features, VIRTIO_NET_F_GUEST_UFO) { + flags |= TUN_F_UFO; + } + flags +} + +impl VirtioDevice for Net { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + // if iothread not found, return err + if self.net_cfg.iothread.is_some() + && EventLoop::get_ctx(self.net_cfg.iothread.as_ref()).is_none() + { + bail!( + "IOThread {:?} of Net is not configured in params.", + self.net_cfg.iothread, + ); + } + + let queue_pairs = self.netdev_cfg.queues / 2; + if !self.netdev_cfg.ifname.is_empty() { + self.taps = create_tap(None, Some(&self.netdev_cfg.ifname), queue_pairs) + .with_context(|| "Failed to open tap with file path")?; + } else if let Some(fds) = self.netdev_cfg.tap_fds.as_mut() { + let mut created_fds = 0; + if let Some(taps) = &self.taps { + for (index, tap) in taps.iter().enumerate() { + if fds.get(index).map_or(-1, |fd| *fd as RawFd) == tap.as_raw_fd() { + created_fds += 1; + } + } + } + + if created_fds != fds.len() { + self.taps = create_tap(Some(fds), None, queue_pairs) + .with_context(|| "Failed to open tap")?; + } + } else { + self.taps = None; + } + + if let Some(ref taps) = self.taps { + for (idx, tap) in taps.iter().enumerate() { + let upload_stats = tap.upload_stats.clone(); + let download_stats = tap.download_stats.clone(); + register_state_query_callback( + format!("tap-{}", idx), + Arc::new(move || { + let upload = upload_stats.load(Ordering::SeqCst); + let download = download_stats.load(Ordering::SeqCst); + format!("upload: {} download: {}", upload, download) + }), + ) + } + } + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_CSUM + | 1 << VIRTIO_NET_F_GUEST_CSUM + | 1 << VIRTIO_NET_F_GUEST_TSO4 + | 1 << VIRTIO_NET_F_GUEST_TSO6 + | 1 << VIRTIO_NET_F_GUEST_UFO + | 1 << VIRTIO_NET_F_HOST_TSO4 + | 1 << VIRTIO_NET_F_HOST_TSO6 + | 1 << VIRTIO_NET_F_HOST_UFO + | 1 << VIRTIO_NET_F_CTRL_RX + | 1 << VIRTIO_NET_F_CTRL_VLAN + | 1 << VIRTIO_NET_F_CTRL_RX_EXTRA + | 1 << VIRTIO_NET_F_CTRL_MAC_ADDR + | 1 << VIRTIO_NET_F_CTRL_VQ + | 1 << VIRTIO_F_RING_INDIRECT_DESC + | 1 << VIRTIO_F_RING_EVENT_IDX; + + let mut locked_config = self.config_space.lock().unwrap(); + + let queue_pairs = self.netdev_cfg.queues / 2; + if self.net_cfg.mq + && (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN..=VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + .contains(&queue_pairs) + { + self.base.device_features |= 1 << VIRTIO_NET_F_MQ; + locked_config.max_virtqueue_pairs = queue_pairs; + } + + // Using the first tap to test if all the taps have ufo. + if let Some(tap) = self.taps.as_ref().map(|t| &t[0]) { + if !tap.has_ufo() { + self.base.device_features &= + !(1 << VIRTIO_NET_F_GUEST_UFO | 1 << VIRTIO_NET_F_HOST_UFO); + } + } + + if let Some(mac) = &self.net_cfg.mac { + self.base.device_features |= build_device_config_space(&mut locked_config, mac); + mark_mac_table(&locked_config.mac, true); + } else if locked_config.mac == [0; MAC_ADDR_LEN] { + let mac = + get_default_mac_addr().with_context(|| "Failed to get a default mac address")?; + locked_config.mac.copy_from_slice(&mac); + self.base.device_features |= 1 << VIRTIO_NET_F_MAC; + } else { + // For microvm which will call realize() twice for one virtio-net-device. + self.base.device_features |= 1 << VIRTIO_NET_F_MAC; + } + + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + if let Some(ref taps) = self.taps { + for (idx, _) in taps.iter().enumerate() { + unregister_state_query_callback(&format!("tap-{}", idx)); + } + } + mark_mac_table(&self.config_space.lock().unwrap().mac, false); + MigrationManager::unregister_device_instance( + VirtioNetState::descriptor(), + &self.net_cfg.id, + ); + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let config_space = self.config_space.lock().unwrap(); + read_config_default(config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + let mut config_space = self.config_space.lock().unwrap(); + let config_slice = &mut config_space.as_mut_bytes()[..MAC_ADDR_LEN]; + check_config_space_rw(config_slice, offset, data)?; + + let data_len = data.len(); + let driver_features = self.base.driver_features; + if !virtio_has_feature(driver_features, VIRTIO_NET_F_CTRL_MAC_ADDR) + && !virtio_has_feature(driver_features, VIRTIO_F_VERSION_1) + && *data != config_slice[offset as usize..(offset as usize + data_len)] + { + config_slice[(offset as usize)..(offset as usize + data_len)].copy_from_slice(data); + } + + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let queues = self.base.queues.clone(); + let queue_num = queues.len(); + let ctrl_info = Arc::new(Mutex::new(CtrlInfo::new(self.config_space.clone()))); + self.ctrl_info = Some(ctrl_info.clone()); + let driver_features = self.base.driver_features; + if (driver_features & 1 << VIRTIO_NET_F_CTRL_VQ != 0) && (queue_num % 2 != 0) { + let ctrl_queue = queues[queue_num - 1].clone(); + let ctrl_queue_evt = queue_evts[queue_num - 1].clone(); + + let ctrl_handler = NetCtrlHandler { + ctrl: CtrlVirtio::new(ctrl_queue, ctrl_queue_evt, ctrl_info.clone()), + mem_space: mem_space.clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features, + device_broken: self.base.broken.clone(), + taps: self.taps.clone(), + }; + + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))); + register_event_helper( + notifiers, + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + } + + // The features about offload is included in bits 0 to 31. + let features = self.driver_features(0_u32); + let flags = get_tap_offload_flags(u64::from(features)); + + let mut senders = Vec::new(); + let queue_pairs = queue_num / 2; + for index in 0..queue_pairs { + let rx_queue = queues[index * 2].clone(); + let rx_queue_evt = queue_evts[index * 2].clone(); + let tx_queue = queues[index * 2 + 1].clone(); + let tx_queue_evt = queue_evts[index * 2 + 1].clone(); + + let (sender, receiver) = channel(); + senders.push(sender); + + if let Some(tap) = self.taps.as_ref().map(|t| t[index].clone()) { + tap.set_offload(flags) + .with_context(|| "Failed to set tap offload")?; + } + + let update_evt = Arc::new(create_new_eventfd()?); + let net_queue = Arc::new(NetIoQueue { + rx: RxVirtio::new(rx_queue, rx_queue_evt), + tx: TxVirtio::new(tx_queue, tx_queue_evt), + ctrl_info: ctrl_info.clone(), + mem_space: mem_space.clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features, + listen_state: Arc::new(Mutex::new(ListenState::new())), + queue_size: self.queue_size_max(), + }); + let tap = Arc::new(RwLock::new(self.taps.as_ref().map(|t| t[index].clone()))); + let net_io = Arc::new(Mutex::new(NetIoHandler { + rx_iothread: self.net_cfg.rx_iothread.as_ref().cloned(), + net_queue, + tap, + device_broken: self.base.broken.clone(), + receiver, + update_evt: update_evt.clone(), + })); + let cloned_net_io = net_io.clone(); + let locked_net_io = net_io.lock().unwrap(); + let update_evt_notifiers = locked_net_io.update_evt_notifier(cloned_net_io); + let rx_notifiers = locked_net_io.rx_virtio_notifier(); + let tx_notifiers = locked_net_io.tx_virtio_notifier(); + let tap_notifiers = locked_net_io.tap_notifier(); + drop(locked_net_io); + register_event_helper( + update_evt_notifiers, + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + register_event_helper( + rx_notifiers, + self.net_cfg.rx_iothread.as_ref(), + &mut self.rx_deactivate_evts, + )?; + register_event_helper( + tap_notifiers, + self.net_cfg.rx_iothread.as_ref(), + &mut self.rx_deactivate_evts, + )?; + register_event_helper( + tx_notifiers, + self.net_cfg.tx_iothread.as_ref(), + &mut self.tx_deactivate_evts, + )?; + self.update_evts.push(update_evt); + } + self.senders = Some(senders); + self.base.broken.store(false, Ordering::SeqCst); + + Ok(()) + } + + // configs[0]: NetDevcfg. configs[1]: NetworkInterfaceConfig. + fn update_config(&mut self, dev_config: Vec>) -> Result<()> { + if dev_config.len() == 2 { + self.netdev_cfg = dev_config[0] + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + self.net_cfg = dev_config[1] + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + + // Set tap offload. + // The features about offload is included in bits 0 to 31. + let features = self.driver_features(0_u32); + let flags = get_tap_offload_flags(u64::from(features)); + if let Some(taps) = &self.taps { + for (_, tap) in taps.iter().enumerate() { + tap.set_offload(flags) + .with_context(|| "Failed to set tap offload")?; + } + } + } else { + self.net_cfg = Default::default(); + } + + self.realize()?; + + if let Some(senders) = &self.senders { + for (index, sender) in senders.iter().enumerate() { + match self.taps.take() { + Some(taps) => { + let tap = taps + .get(index) + .cloned() + .with_context(|| format!("Failed to get index {} tap", index))?; + sender + .send(Some(tap)) + .with_context(|| VirtioError::ChannelSend("tap fd".to_string()))?; + } + None => sender + .send(None) + .with_context(|| "Failed to send status of None to channel".to_string())?, + } + } + + for update_evt in &self.update_evts { + update_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite)?; + } + } + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + unregister_event_helper( + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + unregister_event_helper( + self.net_cfg.rx_iothread.as_ref(), + &mut self.rx_deactivate_evts, + )?; + unregister_event_helper( + self.net_cfg.tx_iothread.as_ref(), + &mut self.tx_deactivate_evts, + )?; + self.update_evts.clear(); + self.ctrl_info = None; + Ok(()) + } + + fn reset(&mut self) -> Result<()> { + if let Some(ref mut taps) = self.taps { + for tap in taps.iter_mut() { + tap.download_stats.store(0, Ordering::SeqCst); + tap.upload_stats.store(0, Ordering::SeqCst); + } + } + Ok(()) + } +} + +// SAFETY: Send and Sync is not auto-implemented for `Sender` type. +// Implementing them is safe because `Sender` field of Net won't +// change in migration workflow. +unsafe impl Sync for Net {} + +impl StateTransfer for Net { + fn get_state_vec(&self) -> Result> { + let state = VirtioNetState { + device_features: self.base.device_features, + driver_features: self.base.driver_features, + config_space: *self.config_space.lock().unwrap(), + broken: self.base.broken.load(Ordering::SeqCst), + }; + Ok(state.as_bytes().to_vec()) + } + + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let s_len = std::mem::size_of::(); + if state.len() != s_len { + bail!("Invalid state length {}, expected {}", state.len(), s_len); + } + let state = VirtioNetState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("NET"))?; + self.base.device_features = state.device_features; + self.base.driver_features = state.driver_features; + self.base.broken.store(state.broken, Ordering::SeqCst); + *self.config_space.lock().unwrap() = state.config_space; + Ok(()) + } + + fn get_device_alias(&self) -> u64 { + MigrationManager::get_desc_alias(&VirtioNetState::descriptor().name).unwrap_or(!0) + } +} + +impl MigrationHook for Net {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_net_init() { + // test net new method + let mut net = Net::new(NetworkInterfaceConfig::default(), NetDevcfg::default()); + assert_eq!(net.base.device_features, 0); + assert_eq!(net.base.driver_features, 0); + + assert!(net.taps.is_none()); + assert!(net.senders.is_none()); + assert!(net.net_cfg.mac.is_none()); + assert!(net.netdev_cfg.tap_fds.is_none()); + assert!(net.netdev_cfg.vhost_type().is_none()); + assert!(net.netdev_cfg.vhost_fds.is_none()); + + // test net realize method + net.realize().unwrap(); + assert_eq!(net.device_type(), 1); + assert_eq!(net.queue_num(), 3); + assert_eq!(net.queue_size_max(), 256); + + // test read_config and write_config method + let write_data: Vec = vec![7; 4]; + let mut random_data: Vec = vec![0; 4]; + let mut origin_data: Vec = vec![0; 4]; + net.read_config(0x00, &mut origin_data).unwrap(); + + net.write_config(0x00, &write_data).unwrap(); + net.read_config(0x00, &mut random_data).unwrap(); + assert_eq!(random_data, write_data); + + net.write_config(0x00, &origin_data).unwrap(); + + // test boundary condition of offset and data parameters + let config_space = net.config_space.lock().unwrap(); + let device_config = config_space.as_bytes(); + let len = device_config.len() as u64; + drop(config_space); + + let mut data: Vec = vec![0; 10]; + let offset: u64 = len + 1; + assert!(net.read_config(offset, &mut data).is_err()); + + let offset: u64 = len; + assert!(net.read_config(offset, &mut data).is_err()); + + let offset: u64 = 0; + assert!(net.read_config(offset, &mut data).is_ok()); + + let offset: u64 = len; + let mut data: Vec = vec![0; 1]; + assert!(net.write_config(offset, &mut data).is_err()); + + let offset: u64 = len - 1; + let mut data: Vec = vec![0; 1]; + assert!(net.write_config(offset, &mut data).is_err()); + + let offset: u64 = 0; + let mut data: Vec = vec![0; len as usize]; + assert!(net.write_config(offset, &mut data).is_err()); + } + + #[test] + fn test_net_create_tap() { + // Test None net_fds and host_dev_name. + assert!(create_tap(None, None, 16).unwrap().is_none()); + + // Test create tap with net_fds and host_dev_name. + let net_fds = vec![32, 33]; + let tap_name = "tap0"; + if let Err(err) = create_tap(Some(&net_fds), Some(tap_name), 1) { + let err_msg = "Failed to create tap, index is 0".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + // Test create tap with empty net_fds. + if let Err(err) = create_tap(Some(&vec![]), None, 1) { + let err_msg = "Failed to get fd from index 0".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + // Test create tap with tap_name which is not exist. + if let Err(err) = create_tap(None, Some("the_tap_is_not_exist"), 1) { + let err_msg = + "Failed to create tap with name the_tap_is_not_exist, index is 0".to_string(); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + } + + #[test] + fn test_net_filter_vlan() { + let mut ctrl_info = CtrlInfo::new(Arc::new(Mutex::new(VirtioNetConfig::default()))); + ctrl_info.rx_mode.promisc = false; + let mut buf = [ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x81, 0x00, + 0x00, 0x00, + ]; + // It has no vla vid, the packet is filtered. + assert!(ctrl_info.filter_packets(&buf)); + + // It has valid vlan id, the packet is not filtered. + let vid: u16 = 1023; + buf[ETHERNET_HDR_LENGTH] = u16::to_be_bytes(vid)[0]; + buf[ETHERNET_HDR_LENGTH + 1] = u16::to_be_bytes(vid)[1]; + ctrl_info.vlan_map.insert(vid >> 5, 1 << (vid & 0x1f)); + assert!(!ctrl_info.filter_packets(&buf)); + } + + #[test] + fn test_net_config_space() { + let mut net_config = VirtioNetConfig::default(); + // Parsing the normal mac address. + let mac = "52:54:00:12:34:56"; + let ret = build_device_config_space(&mut net_config, mac); + assert_eq!(ret, 1 << VIRTIO_NET_F_MAC); + + // Parsing the abnormale mac address. + let mac = "52:54:00:12:34:"; + let ret = build_device_config_space(&mut net_config, mac); + assert_eq!(ret, 0); + } + + #[test] + fn test_mac_table() { + let mut mac = FIRST_DEFAULT_MAC; + // Add mac to mac table. + mark_mac_table(&mac, true); + assert_eq!( + USED_MAC_TABLE.lock().unwrap()[mac[MAC_ADDR_LEN - 1] as usize], + 1 + ); + // Delete mac from mac table. + mark_mac_table(&mac, false); + assert_eq!( + USED_MAC_TABLE.lock().unwrap()[mac[MAC_ADDR_LEN - 1] as usize], + 0 + ); + + // Mac not in the default mac range. + mac[0] += 1; + mark_mac_table(&mac, true); + assert_eq!( + USED_MAC_TABLE.lock().unwrap()[mac[MAC_ADDR_LEN - 1] as usize], + 0 + ); + + // Test no free mac in mac table. + for i in FIRST_DEFAULT_MAC[MAC_ADDR_LEN - 1]..MAX_MAC_ADDR_NUM as u8 { + USED_MAC_TABLE.lock().unwrap()[i as usize] = 1; + } + assert!(get_default_mac_addr().is_err()); + // Recover it. + for i in FIRST_DEFAULT_MAC[MAC_ADDR_LEN - 1]..MAX_MAC_ADDR_NUM as u8 { + USED_MAC_TABLE.lock().unwrap()[i as usize] = 0; + } + } + + #[test] + fn test_iothread() { + EventLoop::object_init(&None).unwrap(); + + let mut net = Net::new(NetworkInterfaceConfig::default(), NetDevcfg::default()); + net.net_cfg.iothread = Some("iothread".to_string()); + if let Err(err) = net.realize() { + let err_msg = format!( + "IOThread {:?} of Net is not configured in params.", + net.net_cfg.iothread + ); + assert_eq!(err.to_string(), err_msg); + } else { + assert!(false); + } + + EventLoop::loop_clean(); + } +} diff --git a/virtio/src/rng.rs b/virtio/src/device/rng.rs similarity index 48% rename from virtio/src/rng.rs rename to virtio/src/device/rng.rs index ff87fc2d59bade2a5f5b2be218e2e162180cfa09..68f17d84ac43a79b596115884e60f5cad9904801 100644 --- a/virtio/src/rng.rs +++ b/virtio/src/device/rng.rs @@ -10,34 +10,99 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::cmp::min; use std::fs::File; use std::os::unix::fs::FileTypeExt; use std::os::unix::io::{AsRawFd, RawFd}; use std::path::Path; +use std::rc::Rc; use std::sync::{Arc, Mutex}; -use address_space::AddressSpace; -use machine_manager::{config::RngConfig, event_loop::EventLoop}; +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use log::error; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::VirtioError; +use crate::{ + ElemIovec, Queue, VirtioBase, VirtioDevice, VirtioInterrupt, VirtioInterruptType, + VIRTIO_F_VERSION_1, VIRTIO_TYPE_RNG, +}; +use address_space::{AddressAttr, AddressSpace}; +use machine_manager::{ + config::{get_pci_df, valid_id, ConfigError, RngObjConfig, DEFAULT_VIRTQUEUE_SIZE}, + event_loop::EventLoop, + event_loop::{register_event_helper, unregister_event_helper}, +}; use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; use util::aio::raw_read; use util::byte_code::ByteCode; +use util::gen_base_func; use util::leak_bucket::LeakBucket; use util::loop_context::{ read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, }; -use util::num_ops::{read_u32, write_u32}; -use vmm_sys_util::epoll::EventSet; -use vmm_sys_util::eventfd::EventFd; +const QUEUE_NUM_RNG: usize = 1; +const RNG_SIZE_MAX: u32 = 1 << 20; + +const MIN_BYTES_PER_SEC: u64 = 64; +const MAX_BYTES_PER_SEC: u64 = 1_000_000_000; + +/// Config structure for virtio-rng. +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct RngConfig { + #[arg(long, value_parser = ["virtio-rng-device", "virtio-rng-pci"])] + pub classtype: String, + #[arg(long, default_value = "", value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub rng: String, + #[arg(long, alias = "max-bytes")] + pub max_bytes: Option, + #[arg(long)] + pub period: Option, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long)] + pub multifunction: Option, +} -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{ - ElemIovec, Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_VERSION_1, - VIRTIO_TYPE_RNG, -}; +impl RngConfig { + pub fn bytes_per_sec(&self) -> Result> { + if self.max_bytes.is_some() != self.period.is_some() { + bail!("\"max_bytes\" and \"period\" should be configured or not configured Simultaneously."); + } -const QUEUE_NUM_RNG: usize = 1; -const QUEUE_SIZE_RNG: u16 = 256; + if let Some(max) = self.max_bytes { + let peri = self.period.unwrap(); + let mul = max + .checked_mul(1000) + .with_context(|| format!("Illegal max-bytes arguments: {:?}", max))?; + let bytes_per_sec = mul + .checked_div(peri) + .with_context(|| format!("Illegal period arguments: {:?}", peri))?; + + if !(MIN_BYTES_PER_SEC..=MAX_BYTES_PER_SEC).contains(&bytes_per_sec) { + return Err(anyhow!(ConfigError::IllegalValue( + "The bytes per second of rng device".to_string(), + MIN_BYTES_PER_SEC, + true, + MAX_BYTES_PER_SEC, + true, + ))); + } + + return Ok(Some(bytes_per_sec)); + } + Ok(None) + } +} fn get_req_data_size(in_iov: &[ElemIovec]) -> Result { let mut size = 0_u32; @@ -48,13 +113,14 @@ fn get_req_data_size(in_iov: &[ElemIovec]) -> Result { }; } + size = min(size, RNG_SIZE_MAX); + Ok(size) } struct RngHandler { queue: Arc>, - queue_evt: EventFd, - deactivate_evt: RawFd, + queue_evt: Arc, interrupt_cb: Arc, driver_features: u64, mem_space: Arc, @@ -63,19 +129,30 @@ struct RngHandler { } impl RngHandler { - fn write_req_data(&self, in_iov: &[ElemIovec], buffer: &mut [u8]) -> Result<()> { + fn write_req_data(&self, in_iov: &[ElemIovec], buffer: &mut [u8], size: u32) -> Result<()> { let mut offset = 0_usize; for iov in in_iov { + if offset as u32 >= size { + break; + } self.mem_space - .write(&mut buffer[offset..].as_ref(), iov.addr, iov.len as u64) - .chain_err(|| "Failed to write request data for virtio rng")?; + .write( + &mut buffer[offset..].as_ref(), + iov.addr, + u64::from(min(size - offset as u32, iov.len)), + AddressAttr::Ram, + ) + .with_context(|| "Failed to write request data for virtio rng")?; + offset += iov.len as usize; } + trace::virtio_rng_write_req_data(size); Ok(()) } fn process_queue(&mut self) -> Result<()> { + trace::virtio_receive_request("Rng".to_string(), "to IO".to_string()); let mut queue_lock = self.queue.lock().unwrap(); let mut need_interrupt = false; @@ -83,35 +160,40 @@ impl RngHandler { .vring .pop_avail(&self.mem_space, self.driver_features) { - let size = - get_req_data_size(&elem.in_iovec).chain_err(|| "Failed to get request size")?; + if elem.desc_num == 0 { + break; + } + let mut size = + get_req_data_size(&elem.in_iovec).with_context(|| "Failed to get request size")?; if let Some(leak_bucket) = self.leak_bucket.as_mut() { - if let Some(ctx) = EventLoop::get_ctx(None) { - if leak_bucket.throttled(ctx, size as u64) { - queue_lock.vring.push_back(); - break; - } - } else { - bail!("Failed to get ctx in event loop context for virtio rng"); + if leak_bucket.throttled(EventLoop::get_ctx(None).unwrap(), size) { + queue_lock.vring.push_back(); + break; } } let mut buffer = vec![0_u8; size as usize]; - raw_read( - self.random_file.as_raw_fd(), - buffer.as_mut_ptr() as u64, - size as usize, - 0, - ) - .chain_err(|| format!("Failed to read random file, size: {}", size))?; + // SAFETY: buffer is valid and large enough. + let ret = unsafe { + raw_read( + self.random_file.as_raw_fd(), + buffer.as_mut_ptr() as u64, + size as usize, + 0, + ) + }; + if ret < 0 { + bail!("Failed to read random file, size: {}", size); + } + size = ret as u32; - self.write_req_data(&elem.in_iovec, &mut buffer)?; + self.write_req_data(&elem.in_iovec, &mut buffer, size)?; queue_lock .vring - .add_used(&self.mem_space, elem.index, size) - .chain_err(|| { + .add_used(elem.index, size) + .with_context(|| { format!( "Failed to add used ring, index: {}, size: {}", elem.index, size @@ -122,42 +204,15 @@ impl RngHandler { } if need_interrupt { - (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock)) - .chain_err(|| ErrorKind::InterruptTrigger("rng", VirtioInterruptType::Vring))?; + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger("rng", VirtioInterruptType::Vring) + })?; + trace::virtqueue_send_interrupt("Rng", &*queue_lock as *const _ as u64) } Ok(()) } - - fn deactivate_evt_handler(&self) -> Vec { - let mut notifiers = vec![ - EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.queue_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - ]; - if let Some(lb) = self.leak_bucket.as_ref() { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - lb.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )); - } - - notifiers - } } impl EventNotifierHelper for RngHandler { @@ -166,16 +221,11 @@ impl EventNotifierHelper for RngHandler { // Register event notifier for queue_evt let rng_handler_clone = rng_handler.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { + let handler: Rc = Rc::new(move |_, fd: RawFd| { read_fd(fd); - if let Err(ref e) = rng_handler_clone.lock().unwrap().process_queue() { - error!( - "Failed to process queue for virtio rng, err: {}", - error_chain::ChainedError::display_chain(e), - ); + error!("Failed to process queue for virtio rng, err: {:?}", e,); } - None }); notifiers.push(EventNotifier::new( @@ -183,49 +233,28 @@ impl EventNotifierHelper for RngHandler { rng_handler.lock().unwrap().queue_evt.as_raw_fd(), None, EventSet::IN, - vec![Arc::new(Mutex::new(handler))], - )); - - // Register event notifier for deactivate_evt - let rng_handler_clone = rng_handler.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(rng_handler_clone.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - rng_handler.lock().unwrap().deactivate_evt, - None, - EventSet::IN, - vec![Arc::new(Mutex::new(handler))], + vec![handler], )); // Register timer event notifier for the limit of request bytes per second if let Some(lb) = rng_handler.lock().unwrap().leak_bucket.as_ref() { let rng_handler_clone = rng_handler.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { + let handler: Rc = Rc::new(move |_, fd: RawFd| { read_fd(fd); - if let Some(leak_bucket) = rng_handler_clone.lock().unwrap().leak_bucket.as_mut() { leak_bucket.clear_timer(); } - if let Err(ref e) = rng_handler_clone.lock().unwrap().process_queue() { - error!( - "Failed to process queue for virtio rng, err: {}", - error_chain::ChainedError::display_chain(e), - ); + error!("Failed to process queue for virtio rng, err: {:?}", e,); } - None }); - notifiers.push(EventNotifier::new( NotifierOperation::AddShared, lb.as_raw_fd(), None, EventSet::IN, - vec![Arc::new(Mutex::new(handler))], + vec![handler], )); } @@ -245,43 +274,41 @@ pub struct RngState { } /// Random number generator device structure +#[derive(Default)] pub struct Rng { - /// Configuration of virtio rng device + /// Virtio device base property. + base: VirtioBase, + /// Configuration of virtio rng device. rng_cfg: RngConfig, + /// Configuration of rng-random. + rngobj_cfg: RngObjConfig, /// The file descriptor of random number generator random_file: Option, - /// The state of Rng device. - state: RngState, - /// Eventfd for device deactivate - deactivate_evt: EventFd, } impl Rng { - pub fn new(rng_cfg: RngConfig) -> Self { + pub fn new(rng_cfg: RngConfig, rngobj_cfg: RngObjConfig) -> Self { Rng { + base: VirtioBase::new(VIRTIO_TYPE_RNG, QUEUE_NUM_RNG, DEFAULT_VIRTQUEUE_SIZE), rng_cfg, - random_file: None, - state: RngState { - device_features: 0, - driver_features: 0, - }, - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + rngobj_cfg, + ..Default::default() } } fn check_random_file(&self) -> Result<()> { - let path = Path::new(&self.rng_cfg.random_file); + let path = Path::new(&self.rngobj_cfg.filename); if !path.exists() { bail!( "The path of random file {} is not existed", - self.rng_cfg.random_file + self.rngobj_cfg.filename ); } if !path.metadata().unwrap().file_type().is_char_device() { bail!( "The type of random file {} is not a character special file", - self.rng_cfg.random_file + self.rngobj_cfg.filename ); } @@ -290,50 +317,23 @@ impl Rng { } impl VirtioDevice for Rng { - /// Realize virtio rng device. + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + fn realize(&mut self) -> Result<()> { self.check_random_file() - .chain_err(|| "Failed to check random file")?; - let file = File::open(&self.rng_cfg.random_file) - .chain_err(|| "Failed to open file of random number generator")?; - + .with_context(|| "Failed to check random file")?; + let file = File::open(&self.rngobj_cfg.filename) + .with_context(|| "Failed to open file of random number generator")?; self.random_file = Some(file); - self.state.device_features = 1 << VIRTIO_F_VERSION_1 as u64; + self.init_config_features()?; Ok(()) } - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_RNG - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - QUEUE_NUM_RNG - } - - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_RNG - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.state.device_features, features_select) - } - - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.state.device_features; - if unrequested_features != 0 { - warn!("Received acknowledge request with unknown feature: {:x}", v); - v &= !unrequested_features; - } - self.state.driver_features |= v; + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1 << u64::from(VIRTIO_F_VERSION_1); + Ok(()) } - /// Read data of config from guest. fn read_config(&self, offset: u64, _data: &mut [u8]) -> Result<()> { bail!( "Reading device config space for rng is not supported, offset: {}", @@ -341,7 +341,6 @@ impl VirtioDevice for Rng { ); } - /// Write data to config from guest. fn write_config(&mut self, offset: u64, _data: &[u8]) -> Result<()> { bail!( "Writing device config space for rng is not supported, offset: {}", @@ -349,64 +348,61 @@ impl VirtioDevice for Rng { ); } - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. fn activate( &mut self, mem_space: Arc, interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, + queue_evts: Vec>, ) -> Result<()> { + let queues = &self.base.queues; let handler = RngHandler { queue: queues[0].clone(), - queue_evt: queue_evts.remove(0), - deactivate_evt: self.deactivate_evt.as_raw_fd(), + queue_evt: queue_evts[0].clone(), interrupt_cb, - driver_features: self.state.driver_features, + driver_features: self.base.driver_features, mem_space, random_file: self .random_file .as_ref() .unwrap() .try_clone() - .chain_err(|| "Failed to clone random file for virtio rng")?, - leak_bucket: self.rng_cfg.bytes_per_sec.map(LeakBucket::new), + .with_context(|| "Failed to clone random file for virtio rng")?, + leak_bucket: match self.rng_cfg.bytes_per_sec()? { + Some(bps) => Some(LeakBucket::new(bps)?), + None => None, + }, }; - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - None, - )?; + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts)?; Ok(()) } fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite) + unregister_event_helper(None, &mut self.base.deactivate_evts) } } impl StateTransfer for Rng { - fn get_state_vec(&self) -> migration::errors::Result> { - Ok(self.state.as_bytes().to_vec()) + fn get_state_vec(&self) -> Result> { + let state = RngState { + device_features: self.base.device_features, + driver_features: self.base.driver_features, + }; + Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *RngState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("RNG"))?; - + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let state = RngState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("RNG"))?; + self.base.device_features = state.device_features; + self.base.driver_features = state.driver_features; Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&RngState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&RngState::descriptor().name).unwrap_or(!0) } } @@ -414,122 +410,133 @@ impl MigrationHook for Rng {} #[cfg(test)] mod tests { - use super::super::*; - use super::*; - use std::io::Write; use std::mem::size_of; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex}; - use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; - use machine_manager::config::RngConfig; use vmm_sys_util::tempfile::TempFile; + use super::*; + use crate::tests::address_space_init; + use crate::*; + use address_space::AddressAttr; + use address_space::GuestAddress; + use machine_manager::config::{str_slip_to_clap, VmConfig, DEFAULT_VIRTQUEUE_SIZE}; + const VIRTQ_DESC_F_NEXT: u16 = 0x01; const VIRTQ_DESC_F_WRITE: u16 = 0x02; - const SYSTEM_SPACE_SIZE: u64 = (1024 * 1024) as u64; - - // build dummy address space of vm - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - SYSTEM_SPACE_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space + + #[test] + fn test_rng_config_cmdline_parse() { + // Test1: Right rng-random. + let mut vm_config = VmConfig::default(); + assert!(vm_config + .add_object("rng-random,id=objrng0,filename=/path/to/random_file") + .is_ok()); + let rngobj_cfg = vm_config.object.rng_object.remove("objrng0").unwrap(); + assert_eq!(rngobj_cfg.filename, "/path/to/random_file"); + + // Test2: virtio-rng-device + let rng_cmd = "virtio-rng-device,rng=objrng0"; + let rng_config = RngConfig::try_parse_from(str_slip_to_clap(rng_cmd, true, false)).unwrap(); + assert_eq!(rng_config.bytes_per_sec().unwrap(), None); + assert_eq!(rng_config.multifunction, None); + + // Test3: virtio-rng-pci. + let rng_cmd = "virtio-rng-pci,bus=pcie.0,addr=0x1,rng=objrng0,max-bytes=1234,period=1000"; + let rng_config = RngConfig::try_parse_from(str_slip_to_clap(rng_cmd, true, false)).unwrap(); + assert_eq!(rng_config.bytes_per_sec().unwrap(), Some(1234)); + assert_eq!(rng_config.bus.unwrap(), "pcie.0"); + assert_eq!(rng_config.addr.unwrap(), (1, 0)); + + // Test4: Illegal max-bytes/period. + let rng_cmd = "virtio-rng-device,rng=objrng0,max-bytes=63,period=1000"; + let rng_config = RngConfig::try_parse_from(str_slip_to_clap(rng_cmd, true, false)).unwrap(); + assert!(rng_config.bytes_per_sec().is_err()); + + let rng_cmd = "virtio-rng-device,rng=objrng0,max-bytes=1000000001,period=1000"; + let rng_config = RngConfig::try_parse_from(str_slip_to_clap(rng_cmd, true, false)).unwrap(); + assert!(rng_config.bytes_per_sec().is_err()); } #[test] fn test_rng_init() { - let file = TempFile::new().unwrap(); - let random_file = file.as_path().to_str().unwrap().to_string(); + let rngobj_config = RngObjConfig { + classtype: "rng-random".to_string(), + id: "rng0".to_string(), + filename: "".to_string(), + }; let rng_config = RngConfig { - id: "".to_string(), - random_file: random_file.clone(), - bytes_per_sec: Some(64), + classtype: "virtio-rng-pci".to_string(), + rng: "rng0".to_string(), + max_bytes: Some(64), + period: Some(1000), + bus: Some("pcie.0".to_string()), + addr: Some((3, 0)), + ..Default::default() }; - let rng = Rng::new(rng_config); + let rng = Rng::new(rng_config, rngobj_config); assert!(rng.random_file.is_none()); - assert_eq!(rng.state.driver_features, 0_u64); - assert_eq!(rng.state.device_features, 0_u64); - assert_eq!(rng.rng_cfg.random_file, random_file); - assert_eq!(rng.rng_cfg.bytes_per_sec, Some(64)); + assert_eq!(rng.base.driver_features, 0_u64); + assert_eq!(rng.base.device_features, 0_u64); + assert_eq!(rng.rng_cfg.bytes_per_sec().unwrap().unwrap(), 64); assert_eq!(rng.queue_num(), QUEUE_NUM_RNG); - assert_eq!(rng.queue_size(), QUEUE_SIZE_RNG); + assert_eq!(rng.queue_size_max(), DEFAULT_VIRTQUEUE_SIZE); assert_eq!(rng.device_type(), VIRTIO_TYPE_RNG); } #[test] fn test_rng_features() { - let random_file = TempFile::new() - .unwrap() - .as_path() - .to_str() - .unwrap() - .to_string(); - let rng_config = RngConfig { - id: "".to_string(), - random_file, - bytes_per_sec: Some(64), - }; - let mut rng = Rng::new(rng_config); + let rng_config = RngConfig::default(); + let rngobj_cfg = RngObjConfig::default(); + let mut rng = Rng::new(rng_config, rngobj_cfg); // If the device feature is 0, all driver features are not supported. - rng.state.device_features = 0; + rng.base.device_features = 0; let driver_feature: u32 = 0xFF; let page = 0_u32; rng.set_driver_features(page, driver_feature); - assert_eq!(rng.state.driver_features, 0_u64); - assert_eq!(rng.get_device_features(0_u32), 0_u32); + assert_eq!(rng.base.driver_features, 0_u64); + assert_eq!(u64::from(rng.driver_features(page)), 0_u64); + assert_eq!(rng.device_features(0_u32), 0_u32); let driver_feature: u32 = 0xFF; let page = 1_u32; rng.set_driver_features(page, driver_feature); - assert_eq!(rng.state.driver_features, 0_u64); - assert_eq!(rng.get_device_features(1_u32), 0_u32); + assert_eq!(rng.base.driver_features, 0_u64); + assert_eq!(u64::from(rng.driver_features(page)), 0_u64); + assert_eq!(rng.device_features(1_u32), 0_u32); // If both the device feature bit and the front-end driver feature bit are - // supported at the same time, this driver feature bit is supported. - rng.state.device_features = - 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_F_RING_INDIRECT_DESC as u64; + // supported at the same time, this driver feature bit is supported. + rng.base.device_features = + 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << u64::from(VIRTIO_F_RING_INDIRECT_DESC); let driver_feature: u32 = 1_u32 << VIRTIO_F_RING_INDIRECT_DESC; let page = 0_u32; rng.set_driver_features(page, driver_feature); assert_eq!( - rng.state.driver_features, - (1_u64 << VIRTIO_F_RING_INDIRECT_DESC as u64) + rng.base.driver_features, + (1_u64 << u64::from(VIRTIO_F_RING_INDIRECT_DESC)) ); assert_eq!( - rng.get_device_features(page), + u64::from(rng.driver_features(page)), + (1_u64 << u64::from(VIRTIO_F_RING_INDIRECT_DESC)) + ); + assert_eq!( + rng.device_features(page), (1_u32 << VIRTIO_F_RING_INDIRECT_DESC) ); - rng.state.driver_features = 0; + rng.base.driver_features = 0; - rng.state.device_features = 1_u64 << VIRTIO_F_VERSION_1; + rng.base.device_features = 1_u64 << VIRTIO_F_VERSION_1; let driver_feature: u32 = 1_u32 << VIRTIO_F_RING_INDIRECT_DESC; let page = 0_u32; rng.set_driver_features(page, driver_feature); - assert_eq!(rng.state.driver_features, 0); - assert_eq!(rng.get_device_features(page), 0_u32); - rng.state.driver_features = 0; + assert_eq!(rng.base.driver_features, 0); + assert_eq!(rng.device_features(page), 0_u32); + rng.base.driver_features = 0; } #[test] @@ -541,7 +548,7 @@ mod tests { len: u32::max_value(), }, ElemIovec { - addr: GuestAddress(u32::max_value() as u64), + addr: GuestAddress(u64::from(u32::max_value())), len: 1_u32, }, ]; @@ -555,7 +562,7 @@ mod tests { len, }, ElemIovec { - addr: GuestAddress(u32::max_value() as u64), + addr: GuestAddress(u64::from(u32::max_value())), len, }, ]; @@ -569,39 +576,48 @@ mod tests { #[test] fn test_rng_process_queue_01() { let mem_space = address_space_init(); - let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let cloned_interrupt_evt = interrupt_evt.try_clone().unwrap(); + let interrupt_evt = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + let cloned_interrupt_evt = interrupt_evt.clone(); let interrupt_status = Arc::new(AtomicU32::new(0)); let interrupt_cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, _needs_reset: bool| { let status = match int_type { VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, }; interrupt_status.fetch_or(status, Ordering::SeqCst); - interrupt_evt.write(1).chain_err(|| ErrorKind::EventFdWrite) + interrupt_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite) }, ) as VirtioInterrupt); - let mut queue_config = QueueConfig::new(QUEUE_SIZE_RNG); + let mut queue_config = QueueConfig::new(DEFAULT_VIRTQUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - mem_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress(16 * QUEUE_SIZE_RNG as u64); - queue_config.addr_cache.avail_ring_host = - mem_space.get_host_address(queue_config.avail_ring).unwrap(); - queue_config.used_ring = GuestAddress(32 * QUEUE_SIZE_RNG as u64); - queue_config.addr_cache.used_ring_host = - mem_space.get_host_address(queue_config.used_ring).unwrap(); - queue_config.size = QUEUE_SIZE_RNG; + queue_config.addr_cache.desc_table_host = unsafe { + mem_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(16 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.avail_ring_host = unsafe { + mem_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.used_ring = GuestAddress(32 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.used_ring_host = unsafe { + mem_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.size = DEFAULT_VIRTQUEUE_SIZE; queue_config.ready = true; let file = TempFile::new().unwrap(); - let reset_event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let mut rng_handler = RngHandler { queue: Arc::new(Mutex::new(Queue::new(queue_config, 1).unwrap())), - queue_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: reset_event.as_raw_fd(), + queue_evt: Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()), interrupt_cb, driver_features: 0_u64, mem_space: mem_space.clone(), @@ -618,15 +634,23 @@ mod tests { }; // write table descriptor for queue mem_space - .write_object(&desc, queue_config.desc_table) + .write_object(&desc, queue_config.desc_table, AddressAttr::Ram) .unwrap(); // write avail_ring idx mem_space - .write_object::(&0, GuestAddress(queue_config.avail_ring.0 + 4 as u64)) + .write_object::( + &0, + GuestAddress(queue_config.avail_ring.0 + 4_u64), + AddressAttr::Ram, + ) .unwrap(); // write avail_ring idx mem_space - .write_object::(&1, GuestAddress(queue_config.avail_ring.0 + 2 as u64)) + .write_object::( + &1, + GuestAddress(queue_config.avail_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); let buffer = vec![1_u8; data_len as usize]; @@ -637,13 +661,17 @@ mod tests { .read( &mut read_buffer.as_mut_slice(), GuestAddress(0x40000), - data_len as u64 + u64::from(data_len), + AddressAttr::Ram ) .is_ok()); assert_eq!(read_buffer, buffer); let idx = mem_space - .read_object::(GuestAddress(queue_config.used_ring.0 + 2 as u64)) + .read_object::( + GuestAddress(queue_config.used_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); assert_eq!(idx, 1); assert_eq!(cloned_interrupt_evt.read().unwrap(), 1); @@ -652,39 +680,48 @@ mod tests { #[test] fn test_rng_process_queue_02() { let mem_space = address_space_init(); - let interrupt_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let cloned_interrupt_evt = interrupt_evt.try_clone().unwrap(); + let interrupt_evt = Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + let cloned_interrupt_evt = interrupt_evt.clone(); let interrupt_status = Arc::new(AtomicU32::new(0)); let interrupt_cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, _needs_reset: bool| { let status = match int_type { VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, }; interrupt_status.fetch_or(status, Ordering::SeqCst); - interrupt_evt.write(1).chain_err(|| ErrorKind::EventFdWrite) + interrupt_evt + .write(1) + .with_context(|| VirtioError::EventFdWrite) }, ) as VirtioInterrupt); - let mut queue_config = QueueConfig::new(QUEUE_SIZE_RNG); + let mut queue_config = QueueConfig::new(DEFAULT_VIRTQUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - mem_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress(16 * QUEUE_SIZE_RNG as u64); - queue_config.addr_cache.avail_ring_host = - mem_space.get_host_address(queue_config.avail_ring).unwrap(); - queue_config.used_ring = GuestAddress(32 * QUEUE_SIZE_RNG as u64); - queue_config.addr_cache.used_ring_host = - mem_space.get_host_address(queue_config.used_ring).unwrap(); - queue_config.size = QUEUE_SIZE_RNG; + queue_config.addr_cache.desc_table_host = unsafe { + mem_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(16 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.avail_ring_host = unsafe { + mem_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.used_ring = GuestAddress(32 * u64::from(DEFAULT_VIRTQUEUE_SIZE)); + queue_config.addr_cache.used_ring_host = unsafe { + mem_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.size = DEFAULT_VIRTQUEUE_SIZE; queue_config.ready = true; let file = TempFile::new().unwrap(); - let reset_event = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let mut rng_handler = RngHandler { queue: Arc::new(Mutex::new(Queue::new(queue_config, 1).unwrap())), - queue_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: reset_event.as_raw_fd(), + queue_evt: Arc::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()), interrupt_cb, driver_features: 0_u64, mem_space: mem_space.clone(), @@ -701,7 +738,7 @@ mod tests { }; // write table descriptor for queue mem_space - .write_object(&desc, queue_config.desc_table) + .write_object(&desc, queue_config.desc_table, AddressAttr::Ram) .unwrap(); let desc = SplitVringDesc { @@ -715,16 +752,25 @@ mod tests { .write_object( &desc, GuestAddress(queue_config.desc_table.0 + size_of::() as u64), + AddressAttr::Ram, ) .unwrap(); // write avail_ring idx mem_space - .write_object::(&0, GuestAddress(queue_config.avail_ring.0 + 4 as u64)) + .write_object::( + &0, + GuestAddress(queue_config.avail_ring.0 + 4_u64), + AddressAttr::Ram, + ) .unwrap(); // write avail_ring idx mem_space - .write_object::(&1, GuestAddress(queue_config.avail_ring.0 + 2 as u64)) + .write_object::( + &1, + GuestAddress(queue_config.avail_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); let mut buffer1 = vec![1_u8; data_len as usize]; @@ -740,7 +786,8 @@ mod tests { .read( &mut read_buffer.as_mut_slice(), GuestAddress(0x40000), - data_len as u64 + u64::from(data_len), + AddressAttr::Ram ) .is_ok()); assert_eq!(read_buffer, buffer1_check); @@ -748,13 +795,17 @@ mod tests { .read( &mut read_buffer.as_mut_slice(), GuestAddress(0x50000), - data_len as u64 + u64::from(data_len), + AddressAttr::Ram ) .is_ok()); assert_eq!(read_buffer, buffer2_check); let idx = mem_space - .read_object::(GuestAddress(queue_config.used_ring.0 + 2 as u64)) + .read_object::( + GuestAddress(queue_config.used_ring.0 + 2_u64), + AddressAttr::Ram, + ) .unwrap(); assert_eq!(idx, 1); assert_eq!(cloned_interrupt_evt.read().unwrap(), 1); diff --git a/virtio/src/device/scsi_cntlr.rs b/virtio/src/device/scsi_cntlr.rs new file mode 100644 index 0000000000000000000000000000000000000000..1c03b471d175782306e3f3ebf49694e77e525ed0 --- /dev/null +++ b/virtio/src/device/scsi_cntlr.rs @@ -0,0 +1,1018 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex}; + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use log::{error, info, warn}; +use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; + +use crate::{ + check_config_space_rw, gpa_hva_iovec_map, iov_discard_front, iov_read_object, + read_config_default, report_virtio_error, Element, Queue, VirtioBase, VirtioDevice, + VirtioError, VirtioInterrupt, VirtioInterruptType, VIRTIO_F_RING_EVENT_IDX, + VIRTIO_F_RING_INDIRECT_DESC, VIRTIO_F_VERSION_1, VIRTIO_TYPE_SCSI, +}; +use address_space::{AddressAttr, AddressSpace, GuestAddress}; +use block_backend::BlockIoErrorCallback; +use devices::ScsiBus::{ + ScsiBus, ScsiRequest, ScsiRequestOps, ScsiSense, ScsiXferMode, CHECK_CONDITION, + EMULATE_SCSI_OPS, SCSI_CMD_BUF_SIZE, SCSI_SENSE_INVALID_OPCODE, +}; +use devices::ScsiDisk::ScsiDevice; +use devices::{convert_device_ref, Bus, SCSI_DEVICE}; +use machine_manager::config::{ + get_pci_df, parse_bool, valid_block_device_virtqueue_size, valid_id, MAX_VIRTIO_QUEUE, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper, EventLoop}; +use util::aio::Iovec; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; + +/// According to Virtio Spec. +/// Max_channel should be 0. +/// Max_target should be less than or equal to 255. +const VIRTIO_SCSI_MAX_TARGET: u16 = 255; +/// Max_lun should be less than or equal to 16383 (2^14 - 1). +const VIRTIO_SCSI_MAX_LUN: u32 = 16383; + +/// Virtio Scsi Controller has 1 ctrl queue, 1 event queue and at least 1 cmd queue. +const SCSI_CTRL_QUEUE_NUM: usize = 1; +const SCSI_EVENT_QUEUE_NUM: usize = 1; +const SCSI_MIN_QUEUE_NUM: usize = 3; + +/// Default values of the cdb and sense data size configuration fields. Cannot change cdb size +/// and sense data size Now. +/// To do: support Override CDB/sense data size.(Guest controlled) +const VIRTIO_SCSI_CDB_DEFAULT_SIZE: usize = 32; +const VIRTIO_SCSI_SENSE_DEFAULT_SIZE: usize = 96; + +/// Basic length of fixed format sense data. +const SCSI_SENSE_LEN: u32 = 18; + +/// Control type codes. +/// Task Management Function. +const VIRTIO_SCSI_T_TMF: u32 = 0; +/// Asynchronous notification query. +const VIRTIO_SCSI_T_AN_QUERY: u32 = 1; +/// Asynchronous notification subscription. +const VIRTIO_SCSI_T_AN_SUBSCRIBE: u32 = 2; + +/// Valid TMF Subtypes. +pub const VIRTIO_SCSI_T_TMF_ABORT_TASK: u32 = 0; +pub const VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: u32 = 1; +pub const VIRTIO_SCSI_T_TMF_CLEAR_ACA: u32 = 2; +pub const VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: u32 = 3; +pub const VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: u32 = 4; +pub const VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: u32 = 5; +pub const VIRTIO_SCSI_T_TMF_QUERY_TASK: u32 = 6; +pub const VIRTIO_SCSI_T_TMF_QUERY_TASK_SET: u32 = 7; + +/// Command-specific response values. +/// The request was completed and the status byte if filled with a SCSI status code. +const VIRTIO_SCSI_S_OK: u8 = 0; +/// If the content of the CDB(such as the allocation length, parameter length or transfer size) +/// requires more data than is available in the datain and dataout buffers. +const VIRTIO_SCSI_S_OVERRUN: u8 = 1; +/// The request was never processed because the target indicated by lun does not exist. +const VIRTIO_SCSI_S_BAD_TARGET: u8 = 3; +/// Other host or driver error. In particular, if neither dataout nor datain is empty, and the +/// VIRTIO_SCSI_F_INOUT feature has not been negotiated, the request will be immediately returned +/// with a response equal to VIRTIO_SCSI_S_FAILURE. +const VIRTIO_SCSI_S_FAILURE: u8 = 9; + +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct ScsiCntlrConfig { + #[arg(long, value_parser = ["virtio-scsi-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: String, + #[arg(long, value_parser = get_pci_df)] + pub addr: (u8, u8), + #[arg(long, value_parser = parse_bool)] + pub multifunction: Option, + #[arg(long, alias = "num-queues", value_parser = clap::value_parser!(u32).range(1..=MAX_VIRTIO_QUEUE as i64))] + pub num_queues: Option, + #[arg(long)] + pub iothread: Option, + #[arg(long, alias = "queue-size", default_value = "256", value_parser = valid_block_device_virtqueue_size)] + pub queue_size: u16, +} + +#[repr(C, packed)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiConfig { + num_queues: u32, + seg_max: u32, + max_sectors: u32, + cmd_per_lun: u32, + event_info_size: u32, + sense_size: u32, + cdb_size: u32, + max_channel: u16, + max_target: u16, + max_lun: u32, +} + +impl ByteCode for VirtioScsiConfig {} + +/// Virtio Scsi Controller device structure. +#[derive(Default)] +pub struct ScsiCntlr { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the virtio scsi controller. + pub config: ScsiCntlrConfig, + /// Config space of the virtio scsi controller. + config_space: VirtioScsiConfig, + /// Scsi bus. + pub bus: Option>>, +} + +impl ScsiCntlr { + pub fn new(config: ScsiCntlrConfig) -> ScsiCntlr { + // Note: config.queues <= MAX_VIRTIO_QUEUE(32). + let queue_num = + config.num_queues.unwrap() as usize + SCSI_CTRL_QUEUE_NUM + SCSI_EVENT_QUEUE_NUM; + let queue_size = config.queue_size; + + Self { + base: VirtioBase::new(VIRTIO_TYPE_SCSI, queue_num, queue_size), + config, + ..Default::default() + } + } + + fn gen_error_cb(&self, interrupt_cb: Arc) -> BlockIoErrorCallback { + let cloned_features = self.base.driver_features; + let clone_broken = self.base.broken.clone(); + Arc::new(move || { + report_virtio_error(interrupt_cb.clone(), cloned_features, &clone_broken); + }) + } +} + +impl VirtioDevice for ScsiCntlr { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + // If iothread not found, return err. + if self.config.iothread.is_some() + && EventLoop::get_ctx(self.config.iothread.as_ref()).is_none() + { + bail!( + "IOThread {:?} of virtio scsi is not configured in params.", + self.config.iothread, + ); + } + self.init_config_features()?; + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.config_space.max_sectors = 0xFFFF_u32; + // cmd_per_lun: maximum number of linked commands can be sent to one LUN. 32bit. + self.config_space.cmd_per_lun = 128; + // seg_max: queue size - 2, 32 bit. + self.config_space.seg_max = u32::from(self.queue_size_max()) - 2; + self.config_space.max_target = VIRTIO_SCSI_MAX_TARGET; + self.config_space.max_lun = VIRTIO_SCSI_MAX_LUN; + // num_queues: request queues number. + self.config_space.num_queues = self.config.num_queues.unwrap(); + + self.base.device_features |= (1_u64 << VIRTIO_F_VERSION_1) + | (1_u64 << VIRTIO_F_RING_EVENT_IDX) + | (1_u64 << VIRTIO_F_RING_INDIRECT_DESC); + + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + read_config_default(self.config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + check_config_space_rw(self.config_space.as_bytes(), offset, data)?; + // Guest can only set sense_size and cdb_size, which are fixed default values + // (VIRTIO_SCSI_CDB_DEFAULT_SIZE; VIRTIO_SCSI_SENSE_DEFAULT_SIZE) and cannot be + // changed in stratovirt now. So, do nothing when guest writes config. + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let queues = self.base.queues.clone(); + if queues.len() < SCSI_MIN_QUEUE_NUM { + bail!("virtio scsi controller queues num can not be less than 3!"); + } + + // Register event notifier for ctrl queue. + let ctrl_queue = queues[0].clone(); + let ctrl_queue_evt = queue_evts[0].clone(); + let ctrl_handler = ScsiCtrlQueueHandler { + queue: ctrl_queue, + queue_evt: ctrl_queue_evt, + mem_space: mem_space.clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features: self.base.driver_features, + device_broken: self.base.broken.clone(), + }; + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))); + register_event_helper( + notifiers, + self.config.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + + // Register event notifier for event queue. + let event_queue = queues[1].clone(); + let event_queue_evt = queue_evts[1].clone(); + let event_handler = ScsiEventQueueHandler { + _queue: event_queue, + queue_evt: event_queue_evt, + _mem_space: mem_space.clone(), + _interrupt_cb: interrupt_cb.clone(), + _driver_features: self.base.driver_features, + device_broken: self.base.broken.clone(), + }; + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(event_handler))); + register_event_helper( + notifiers, + self.config.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + + // Register event notifier for command queues. + for (index, cmd_queue) in queues[2..].iter().enumerate() { + let bus = self.bus.as_ref().unwrap(); + let cmd_handler = ScsiCmdQueueHandler { + scsibus: bus.clone(), + queue: cmd_queue.clone(), + queue_evt: queue_evts[index + 2].clone(), + mem_space: mem_space.clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features: self.base.driver_features, + device_broken: self.base.broken.clone(), + }; + + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(cmd_handler))); + + register_event_helper( + notifiers, + self.config.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + } + self.base.broken.store(false, Ordering::SeqCst); + + // Register event notifier for device aio. + let bus = self.bus.as_ref().unwrap(); + let locked_bus = bus.lock().unwrap(); + for device in locked_bus.child_devices().values() { + SCSI_DEVICE!(device, locked_dev, scsi_dev); + let err_cb = self.gen_error_cb(interrupt_cb.clone()); + // SAFETY: the disk_image is assigned after device realized. + let disk_image = scsi_dev.block_backend.as_ref().unwrap(); + let mut locked_backend = disk_image.lock().unwrap(); + locked_backend.register_io_event(self.base.broken.clone(), err_cb)?; + } + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + unregister_event_helper( + self.config.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + let bus = self.bus.as_ref().unwrap(); + let locked_bus = bus.lock().unwrap(); + for device in locked_bus.child_devices().values() { + SCSI_DEVICE!(device, locked_dev, scsi_dev); + // SAFETY: the disk_image is assigned after device realized. + let disk_image = scsi_dev.block_backend.as_ref().unwrap(); + let mut locked_backend = disk_image.lock().unwrap(); + locked_backend.unregister_io_event()?; + } + Ok(()) + } +} + +fn build_event_notifier(fd: RawFd, handler: Rc) -> EventNotifier { + EventNotifier::new( + NotifierOperation::AddShared, + fd, + None, + EventSet::IN, + vec![handler], + ) +} + +/// Task Managememt Request. +#[allow(unused)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiCtrlTmfReq { + ctrltype: u32, + subtype: u32, + lun: [u8; 8], + tag: u64, +} + +impl ByteCode for VirtioScsiCtrlTmfReq {} + +#[allow(unused)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiCtrlTmfResp { + response: u8, +} + +impl ByteCode for VirtioScsiCtrlTmfResp {} + +/// Asynchronous notification query/subscription. +#[allow(unused)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiCtrlAnReq { + ctrltype: u32, + lun: [u8; 8], + event_requested: u32, +} + +impl ByteCode for VirtioScsiCtrlAnReq {} + +#[allow(unused)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioScsiCtrlAnResp { + event_actual: u32, + response: u8, +} + +impl ByteCode for VirtioScsiCtrlAnResp {} + +#[repr(C, packed)] +#[derive(Default, Clone, Copy)] +struct VirtioScsiCmdReq { + /// Logical Unit Number. + lun: [u8; 8], + /// Command identifier. + tag: u64, + /// Task attribute. + task_attr: u8, + /// SAM command priority field. + prio: u8, + crn: u8, + cdb: [u8; VIRTIO_SCSI_CDB_DEFAULT_SIZE], +} + +impl ByteCode for VirtioScsiCmdReq {} + +#[repr(C)] +#[derive(Clone, Copy)] +struct VirtioScsiCmdResp { + /// Sense data length. + sense_len: u32, + /// Resudual bytes in data buffer. + resid: u32, + /// Status qualifier. + status_qualifier: u16, + /// Command completion status. + status: u8, + /// Response value. + response: u8, + /// Sense buffer data. + sense: [u8; VIRTIO_SCSI_SENSE_DEFAULT_SIZE], +} + +impl Default for VirtioScsiCmdResp { + fn default() -> Self { + VirtioScsiCmdResp { + sense_len: 0, + resid: 0, + status_qualifier: 0, + status: 0, + response: 0, + sense: [0; VIRTIO_SCSI_SENSE_DEFAULT_SIZE], + } + } +} + +impl VirtioScsiCmdResp { + fn set_scsi_sense(&mut self, sense: ScsiSense) { + // Response code: current errors(0x70). + self.sense[0] = 0x70; + self.sense[2] = sense.key; + // Additional sense length: sense len - 8. + self.sense[7] = SCSI_SENSE_LEN as u8 - 8; + self.sense[12] = sense.asc; + self.sense[13] = sense.ascq; + self.sense_len = SCSI_SENSE_LEN; + } +} + +impl ByteCode for VirtioScsiCmdResp {} + +/// T: request; U: response. +#[derive(Clone)] +struct VirtioScsiRequest { + mem_space: Arc, + queue: Arc>, + desc_index: u16, + /// Read or Write data, HVA, except resp. + iovec: Vec, + data_len: u32, + mode: ScsiXferMode, + interrupt_cb: Arc, + driver_features: u64, + /// resp GPA. + resp_addr: GuestAddress, + req: T, + resp: U, +} + +// Requests in Command Queue. +type CmdQueueRequest = VirtioScsiRequest; +// TMF Requests in Ctrl Queue. +type CtrlQueueTmfRequest = VirtioScsiRequest; +// An Requests in Command Queue. +type CtrlQueueAnRequest = VirtioScsiRequest; + +/// T: request; U:response. +impl VirtioScsiRequest { + fn new( + mem_space: &Arc, + queue: Arc>, + interrupt_cb: Arc, + driver_features: u64, + elem: &Element, + ) -> Result { + if elem.out_iovec.is_empty() || elem.in_iovec.is_empty() { + bail!( + "Missed header for scsi request: out {} in {} desc num {}", + elem.out_iovec.len(), + elem.in_iovec.len(), + elem.desc_num + ); + } + let locked_queue = queue.lock().unwrap(); + let cache = locked_queue.vring.get_cache(); + + // Get request from virtqueue Element. + let req = iov_read_object::(mem_space, &elem.out_iovec, cache)?; + // Get response from virtqueue Element. + let resp = iov_read_object::(mem_space, &elem.in_iovec, cache)?; + + let mut request = VirtioScsiRequest { + mem_space: mem_space.clone(), + queue: queue.clone(), + desc_index: elem.index, + iovec: Vec::with_capacity(elem.desc_num as usize), + data_len: 0, + mode: ScsiXferMode::ScsiXferNone, + interrupt_cb, + driver_features, + // Safety: in_iovec will not be empty since it has been checked after "iov_read_object". + resp_addr: elem.in_iovec[0].addr, + req, + resp, + }; + + // Get possible dataout buffer from virtqueue Element. + let mut iovec = elem.out_iovec.clone(); + let elemiov = iov_discard_front(&mut iovec, size_of::() as u64).unwrap_or_default(); + let (out_len, out_iovec) = gpa_hva_iovec_map(elemiov, mem_space, cache)?; + + // Get possible dataout buffer from virtqueue Element. + let mut iovec = elem.in_iovec.clone(); + let elemiov = iov_discard_front(&mut iovec, size_of::() as u64).unwrap_or_default(); + let (in_len, in_iovec) = gpa_hva_iovec_map(elemiov, mem_space, cache)?; + + if out_len > 0 && in_len > 0 { + warn!("Wrong scsi request! Don't support both datain and dataout buffer"); + request.data_len = u32::MAX; + return Ok(request); + } + + if out_len > 0 { + request.mode = ScsiXferMode::ScsiXferToDev; + request.data_len = out_len as u32; + request.iovec = out_iovec; + } else if in_len > 0 { + request.mode = ScsiXferMode::ScsiXferFromDev; + request.data_len = in_len as u32; + request.iovec = in_iovec; + } + + Ok(request) + } + + fn complete(&self) -> Result<()> { + self.mem_space + .write_object(&self.resp, self.resp_addr, AddressAttr::Ram) + .with_context(|| "Failed to write the scsi response")?; + + let mut queue_lock = self.queue.lock().unwrap(); + // Note: U(response) is the header part of in_iov and self.data_len is the rest part of the + // in_iov or the out_iov. in_iov and out_iov total len is no more than + // DESC_CHAIN_MAX_TOTAL_LEN(1 << 32). So, it will not overflow here. + queue_lock + .vring + .add_used(self.desc_index, self.data_len + (size_of::() as u32)) + .with_context(|| { + format!( + "Failed to add used ring(scsi completion), index {}, len {}", + self.desc_index, self.data_len + ) + })?; + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "scsi controller aio completion", + VirtioInterruptType::Vring, + ) + })?; + trace::virtqueue_send_interrupt("ScsiCntlr", &*queue_lock as *const _ as u64); + } + + Ok(()) + } +} + +struct ScsiCtrlQueueHandler { + /// The ctrl virtqueue. + queue: Arc>, + /// EventFd for the ctrl virtqueue. + queue_evt: Arc, + /// The address space to which the scsi HBA belongs. + mem_space: Arc, + /// The interrupt callback function. + interrupt_cb: Arc, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// Device is broken or not. + device_broken: Arc, +} + +impl ScsiCtrlQueueHandler { + fn handle_ctrl(&mut self) -> Result<()> { + trace::virtio_receive_request("ScsiCntlr".to_string(), "to ctrl".to_string()); + let result = self.handle_ctrl_queue_requests(); + if result.is_err() { + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + } + + result + } + + fn handle_ctrl_queue_requests(&mut self) -> Result<()> { + loop { + let mut queue = self.queue.lock().unwrap(); + let elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + drop(queue); + if elem.desc_num == 0 { + break; + } + + let ctrl_desc = elem + .out_iovec + .first() + .with_context(|| "Error request in ctrl queue. Empty dataout buf!")?; + let ctrl_type = self + .mem_space + .read_object::(ctrl_desc.addr, AddressAttr::Ram) + .with_context(|| "Failed to get control queue descriptor")?; + + match ctrl_type { + VIRTIO_SCSI_T_TMF => { + let mut tmf = CtrlQueueTmfRequest::new( + &self.mem_space, + self.queue.clone(), + self.interrupt_cb.clone(), + self.driver_features, + &elem, + )?; + info!("incomplete tmf req, subtype {}!", tmf.req.subtype); + // Scsi Task Management Function is not supported. + // So, do nothing when stratovirt receives TMF request except responding guest + // scsi drivers. + tmf.resp.response = VIRTIO_SCSI_S_OK; + tmf.complete()?; + } + VIRTIO_SCSI_T_AN_QUERY | VIRTIO_SCSI_T_AN_SUBSCRIBE => { + let mut an = CtrlQueueAnRequest::new( + &self.mem_space, + self.queue.clone(), + self.interrupt_cb.clone(), + self.driver_features, + &elem, + )?; + an.resp.event_actual = 0; + an.resp.response = VIRTIO_SCSI_S_OK; + an.complete()?; + } + _ => { + bail!("Invalid ctrl type {}", ctrl_type); + } + } + } + + Ok(()) + } +} + +impl EventNotifierHelper for ScsiCtrlQueueHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let h_locked = handler.lock().unwrap(); + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock + .handle_ctrl() + .unwrap_or_else(|e| error!("Failed to handle ctrl queue, error is {:?}", e)); + None + }); + notifiers.push(build_event_notifier(h_locked.queue_evt.as_raw_fd(), h)); + + notifiers + } +} + +struct ScsiEventQueueHandler { + /// The Event virtqueue. + _queue: Arc>, + /// EventFd for the Event virtqueue. + queue_evt: Arc, + /// The address space to which the scsi HBA belongs. + _mem_space: Arc, + /// The interrupt callback function. + _interrupt_cb: Arc, + /// Bit mask of features negotiated by the backend and the frontend. + _driver_features: u64, + /// Device is broken or not. + device_broken: Arc, +} + +impl EventNotifierHelper for ScsiEventQueueHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let h_locked = handler.lock().unwrap(); + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock + .handle_event() + .unwrap_or_else(|e| error!("Failed to handle event queue, err is {:?}", e)); + None + }); + notifiers.push(build_event_notifier(h_locked.queue_evt.as_raw_fd(), h)); + + notifiers + } +} + +impl ScsiEventQueueHandler { + fn handle_event(&mut self) -> Result<()> { + Ok(()) + } +} + +impl ScsiRequestOps for CmdQueueRequest { + fn scsi_request_complete_cb(&mut self, status: u8, scsisense: Option) -> Result<()> { + if let Some(sense) = scsisense { + self.resp.set_scsi_sense(sense); + } + self.resp.response = VIRTIO_SCSI_S_OK; + self.resp.status = status; + trace::virtio_scsi_handle_cmd_resp( + self.req.lun[1], + virtio_scsi_get_lun_id(self.req.lun), + self.req.tag, + self.resp.status, + self.resp.response, + ); + self.complete()?; + + Ok(()) + } +} + +// lun: [u8, 8] +// | Byte 0 | Byte 1 | Byte 2 | Byte 3 | Byte 4 | Byte 5 | Byte 6 | Byte 7 | +// | 1 | target | lun | 0 | +fn virtio_scsi_get_lun_id(lun: [u8; 8]) -> u16 { + ((u16::from(lun[2]) << 8) | u16::from(lun[3])) & 0x3FFF +} + +struct ScsiCmdQueueHandler { + /// The scsi controller. + scsibus: Arc>, + /// The Cmd virtqueue. + queue: Arc>, + /// EventFd for the Cmd virtqueue. + queue_evt: Arc, + /// The address space to which the scsi HBA belongs. + mem_space: Arc, + /// The interrupt callback function. + interrupt_cb: Arc, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// Device is broken or not. + device_broken: Arc, +} + +impl EventNotifierHelper for ScsiCmdQueueHandler { + fn internal_notifiers(handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + // Register event notifier for queue evt. + let h_locked = handler.lock().unwrap(); + let h_clone = handler.clone(); + let h: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = h_clone.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock + .handle_cmd() + .unwrap_or_else(|e| error!("Failed to handle cmd queue, err is {:?}", e)); + + None + }); + notifiers.push(build_event_notifier(h_locked.queue_evt.as_raw_fd(), h)); + + notifiers + } +} + +impl ScsiCmdQueueHandler { + fn handle_cmd(&mut self) -> Result<()> { + trace::virtio_receive_request("ScsiCntlr".to_string(), "to cmd".to_string()); + let result = self.handle_cmd_queue_requests(); + if result.is_err() { + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + } + + result + } + + fn handle_cmd_queue_requests(&mut self) -> Result<()> { + let mut sreq_queue = Vec::new(); + + loop { + let mut queue = self.queue.lock().unwrap(); + let elem = queue + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + drop(queue); + + let mut cmdq_request = CmdQueueRequest::new( + &self.mem_space, + self.queue.clone(), + self.interrupt_cb.clone(), + self.driver_features, + &elem, + )?; + trace::virtio_scsi_handle_cmd_req( + cmdq_request.req.lun[1], + virtio_scsi_get_lun_id(cmdq_request.req.lun), + cmdq_request.req.tag, + cmdq_request.req.cdb[0], + ); + let mut need_handle = false; + self.check_cmd_queue_request(&mut cmdq_request, &mut need_handle)?; + if !need_handle { + continue; + } + + self.enqueue_scsi_request(&mut cmdq_request, &mut sreq_queue)?; + } + + if sreq_queue.is_empty() { + return Ok(()); + } + + for sreq in sreq_queue.into_iter() { + self.handle_scsi_request(sreq)?; + } + + Ok(()) + } + + fn check_cmd_queue_request( + &mut self, + qrequest: &mut CmdQueueRequest, + need_handle: &mut bool, + ) -> Result<()> { + if qrequest.data_len == u32::MAX && qrequest.mode == ScsiXferMode::ScsiXferNone { + // If neither dataout nor datain is empty, return VIRTIO_SCSI_S_FAILURE immediately. + qrequest.resp.response = VIRTIO_SCSI_S_FAILURE; + qrequest.complete()?; + trace::virtio_scsi_handle_cmd_resp( + qrequest.req.lun[1], + virtio_scsi_get_lun_id(qrequest.req.lun), + qrequest.req.tag, + qrequest.resp.status, + qrequest.resp.response, + ); + return Ok(()); + } + + let target_id = qrequest.req.lun[1]; + let lun_id = virtio_scsi_get_lun_id(qrequest.req.lun); + let bus = self.scsibus.lock().unwrap(); + let device = bus.get_device(target_id, lun_id); + if device.is_none() { + // No such target. Response VIRTIO_SCSI_S_BAD_TARGET to guest scsi drivers. + // It's not an error! + qrequest.resp.response = VIRTIO_SCSI_S_BAD_TARGET; + qrequest.complete()?; + trace::virtio_scsi_handle_cmd_resp( + qrequest.req.lun[1], + virtio_scsi_get_lun_id(qrequest.req.lun), + qrequest.req.tag, + qrequest.resp.status, + qrequest.resp.response, + ); + return Ok(()); + } + + *need_handle = true; + Ok(()) + } + + fn enqueue_scsi_request( + &mut self, + qrequest: &mut CmdQueueRequest, + sreq_queue: &mut Vec, + ) -> Result<()> { + let cdb: [u8; SCSI_CMD_BUF_SIZE] = + qrequest.req.cdb[0..SCSI_CMD_BUF_SIZE].try_into().unwrap(); + + let lun_id = virtio_scsi_get_lun_id(qrequest.req.lun); + let bus = self.scsibus.lock().unwrap(); + // Device will not be None because check_virtio_scsi_request has checked it. + let device = bus.get_device(qrequest.req.lun[1], lun_id).unwrap(); + + let scsi_req = ScsiRequest::new( + cdb, + lun_id, + qrequest.iovec.clone(), + qrequest.data_len, + device, + Box::new(qrequest.clone()), + ); + if scsi_req.is_err() { + // Wrong scsi cdb. Response CHECK_CONDITION / SCSI_SENSE_INVALID_OPCODE to guest scsi + // drivers. + qrequest.resp.set_scsi_sense(SCSI_SENSE_INVALID_OPCODE); + qrequest.resp.status = CHECK_CONDITION; + qrequest.complete()?; + error!("Failed to create scsi request, error virtio scsi request!"); + return Ok(()); + } + + let sreq = scsi_req.unwrap(); + if sreq.cmd.xfer > u64::from(sreq.datalen) && sreq.cmd.mode != ScsiXferMode::ScsiXferNone { + // Wrong virtio scsi request which doesn't provide enough datain/dataout buffer. + qrequest.resp.response = VIRTIO_SCSI_S_OVERRUN; + qrequest.complete()?; + trace::virtio_scsi_handle_cmd_resp( + qrequest.req.lun[1], + virtio_scsi_get_lun_id(qrequest.req.lun), + qrequest.req.tag, + qrequest.resp.status, + qrequest.resp.response, + ); + return Ok(()); + } + + sreq_queue.push(sreq); + Ok(()) + } + + fn handle_scsi_request(&mut self, sreq: ScsiRequest) -> Result<()> { + if sreq.opstype == EMULATE_SCSI_OPS { + sreq.emulate_execute()?; + } else { + sreq.execute()?; + } + + Ok(()) + } +} + +pub fn scsi_cntlr_create_scsi_bus( + bus_name: &str, + scsi_cntlr: &Arc>, +) -> Result<()> { + let mut locked_scsi_cntlr = scsi_cntlr.lock().unwrap(); + let bus = ScsiBus::new(bus_name.to_string()); + locked_scsi_cntlr.bus = Some(Arc::new(Mutex::new(bus))); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_scsi_cntlr_config_cmdline_parser() { + // Test1: Right. + let cmdline1 = "virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3,multifunction=on,iothread=iothread1,num-queues=3,queue-size=128"; + let device_cfg = + ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline1, true, false)).unwrap(); + assert_eq!(device_cfg.id, "scsi0"); + assert_eq!(device_cfg.bus, "pcie.0"); + assert_eq!(device_cfg.addr, (3, 0)); + assert_eq!(device_cfg.multifunction, Some(true)); + assert_eq!(device_cfg.iothread.unwrap(), "iothread1"); + assert_eq!(device_cfg.num_queues.unwrap(), 3); + assert_eq!(device_cfg.queue_size, 128); + + // Test2: Default value. + let cmdline2 = "virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3.0x1"; + let device_cfg = + ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline2, true, false)).unwrap(); + assert_eq!(device_cfg.addr, (3, 1)); + assert_eq!(device_cfg.multifunction, None); + assert_eq!(device_cfg.num_queues, None); + assert_eq!(device_cfg.queue_size, 256); + + // Test3: Illegal value. + let cmdline3 = "virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3.0x1,num-queues=33"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + let cmdline3 = "virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3.0x1,queue-size=1025"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + let cmdline3 = "virtio-scsi-pci,id=scsi0,bus=pcie.0,addr=0x3.0x1,queue-size=65"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline3, true, false)); + assert!(result.is_err()); + + // Test4: Missing necessary parameters. + let cmdline4 = "virtio-scsi-pci,id=scsi0"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "virtio-scsi-pci,bus=pcie.0,addr=0x3.0x1"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + let cmdline4 = "virtio-scsi-pci,id=scsi0,addr=0x3.0x1"; + let result = ScsiCntlrConfig::try_parse_from(str_slip_to_clap(cmdline4, true, false)); + assert!(result.is_err()); + } +} diff --git a/virtio/src/device/serial.rs b/virtio/src/device/serial.rs new file mode 100644 index 0000000000000000000000000000000000000000..3141672bb910b298082c69b87ac324bcdc0ea7f4 --- /dev/null +++ b/virtio/src/device/serial.rs @@ -0,0 +1,1083 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::mem::size_of; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Mutex, Weak}; +use std::{cmp, usize}; + +use anyhow::{anyhow, bail, Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; +use log::{error, info, warn}; +use machine_manager::config::ChardevConfig; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::{ + gpa_hva_iovec_map, iov_read_object, iov_to_buf, read_config_default, report_virtio_error, + Element, Queue, VirtioBase, VirtioDevice, VirtioError, VirtioInterrupt, VirtioInterruptType, + VIRTIO_CONSOLE_F_MULTIPORT, VIRTIO_CONSOLE_F_SIZE, VIRTIO_F_VERSION_1, VIRTIO_TYPE_CONSOLE, +}; +use address_space::{AddressAttr, AddressSpace}; +use chardev_backend::chardev::{Chardev, ChardevNotifyDevice, ChardevStatus, InputReceiver}; +use machine_manager::{ + config::{ChardevType, VirtioSerialInfo, VirtioSerialPortCfg, DEFAULT_VIRTQUEUE_SIZE}, + event_loop::EventLoop, + event_loop::{register_event_helper, unregister_event_helper}, +}; +use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; +use util::aio::iov_from_buf_direct; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; + +// Buffer size for chardev backend. +const BUF_SIZE: usize = 4096; + +// The values for event. +// Sent by the driver at initialization to indicate that it is ready to receive control message. +const VIRTIO_CONSOLE_DEVICE_READY: u16 = 0; +// Sent by the device, to create a new port. +const VIRTIO_CONSOLE_PORT_ADD: u16 = 1; +// Sent by the device, to remove an existing port. +#[allow(unused)] +const VIRTIO_CONSOLE_PORT_REMOVE: u16 = 2; +// Sent by the driver in response to the device's VIRTIO_CONSOLE_PORT_ADD message. +// To indicate that the port is ready to be used. +const VIRTIO_CONSOLE_PORT_READY: u16 = 3; +// Sent by the device to nominate a port as a console port. +// There may be more than one console port. +const VIRTIO_CONSOLE_CONSOLE_PORT: u16 = 4; +// Sent by the device to indicate a console size change. +#[allow(unused)] +const VIRTIO_CONSOLE_RESIZE: u16 = 5; +// This message is sent by both the device and the driver. This allows for ports to be used +// directly by guest and host processes to communicate in an application-defined manner. +const VIRTIO_CONSOLE_PORT_OPEN: u16 = 6; +// Sent by the device to give a tag to the port. +const VIRTIO_CONSOLE_PORT_NAME: u16 = 7; + +/// If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT, the two control queues are used. +/// The layout of the control message is VirtioConsoleControl. +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioConsoleControl { + // Port number. + id: u32, + // The kind of control event. + event: u16, + // Extra information for event. + value: u16, +} + +impl ByteCode for VirtioConsoleControl {} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct VirtioConsoleConfig { + // The size of the console is supplied if VIRTIO_CONSOLE_F_SIZE feature is set. + cols: u16, + rows: u16, + // The maximum number of ports supported by the device can be fetched + // if VIRTIO_CONSOLE_F_MULTIPORT feature is set. + max_nr_ports: u32, + // The driver can use emergency write to output a single character without + // initializing virtio queues if VIRTIO_CONSOLE_F_EMERG_WRITE is set. + emerg_wr: u32, +} + +impl ByteCode for VirtioConsoleConfig {} + +impl VirtioConsoleConfig { + /// Create configuration of virtio-serial devices. + fn new(max_nr_ports: u32) -> Self { + VirtioConsoleConfig { + cols: 0_u16, + rows: 0_u16, + max_nr_ports, + emerg_wr: 0_u32, + } + } +} + +/// Status of serial device. +#[repr(C)] +#[derive(Copy, Clone, Desc, ByteCode)] +#[desc_version(compat_version = "0.1.0")] +pub struct VirtioSerialState { + /// Bit mask of features supported by the backend. + device_features: u64, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// Virtio serial config space. + config_space: VirtioConsoleConfig, +} + +/// Virtio serial device structure. +#[derive(Default)] +pub struct Serial { + /// Virtio device base property. + base: VirtioBase, + /// Virtio serial config space. + config_space: VirtioConsoleConfig, + /// Max serial ports number. + pub max_nr_ports: u32, + /// Serial port vector for serialport. + pub ports: Arc>>>>, +} + +impl Serial { + /// Create a virtio-serial device. + /// + /// # Arguments + /// + /// * `serial_cfg` - Device configuration set by user. + pub fn new(serial_cfg: VirtioSerialInfo) -> Self { + // Each port has 2 queues(receiveq/transmitq). + // And there exist 2 control queues(control receiveq/control transmitq). + let queue_num = serial_cfg.max_ports as usize * 2 + 2; + let queue_size = DEFAULT_VIRTQUEUE_SIZE; + + Serial { + base: VirtioBase::new(VIRTIO_TYPE_CONSOLE, queue_num, queue_size), + config_space: VirtioConsoleConfig::new(serial_cfg.max_ports), + max_nr_ports: serial_cfg.max_ports, + ..Default::default() + } + } + + fn control_queues_activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queues: &[Arc>], + queue_evts: Vec>, + device_broken: Arc, + ) -> Result<()> { + // queue[2]: control receiveq(host to guest). + // queue[3]: control transmitq(guest to host). + let handler = SerialControlHandler { + input_queue: queues[2].clone(), + output_queue: queues[3].clone(), + output_queue_evt: queue_evts[3].clone(), + mem_space, + interrupt_cb, + driver_features: self.base.driver_features, + device_broken, + ports: self.ports.clone(), + }; + + let handler_h = Arc::new(Mutex::new(handler)); + for port in self.ports.lock().unwrap().iter_mut() { + port.lock().unwrap().ctrl_handler = Some(Arc::downgrade(&handler_h.clone())); + } + let notifiers = EventNotifierHelper::internal_notifiers(handler_h); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts)?; + + Ok(()) + } +} + +pub fn get_max_nr(ports: &Arc>>>>) -> u32 { + let mut max: u32 = 0; + for port in ports.lock().unwrap().iter() { + let nr = port.lock().unwrap().nr; + if nr > max { + max = nr; + } + } + max +} + +pub fn find_port_by_nr( + ports: &Arc>>>>, + nr: u32, +) -> Option>> { + for port in ports.lock().unwrap().iter() { + if port.lock().unwrap().nr == nr { + return Some(port.clone()); + } + } + None +} + +impl VirtioDevice for Serial { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + self.init_config_features()?; + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + self.base.device_features = 1_u64 << VIRTIO_F_VERSION_1 + | 1_u64 << VIRTIO_CONSOLE_F_SIZE + | 1_u64 << VIRTIO_CONSOLE_F_MULTIPORT; + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + read_config_default(self.config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, _offset: u64, _data: &[u8]) -> Result<()> { + bail!("Writing device config space for virtio serial is not supported.") + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let queues = self.base.queues.clone(); + if queues.len() != self.queue_num() { + return Err(anyhow!(VirtioError::IncorrectQueueNum( + self.queue_num(), + queues.len() + ))); + } + + for queue_id in 0..queues.len() / 2 { + // queues[i * 2] (note: i != 1): receiveq(host to guest). + // queues[i * 2 + 1] (note: i != 1): transmitq(guest to host). + let nr = match queue_id { + 0 => 0, + 1 => continue, + _ => queue_id - 1, + } as u32; + let port = find_port_by_nr(&self.ports, nr); + let handler = SerialPortHandler { + input_queue: queues[queue_id * 2].clone(), + input_queue_evt: queue_evts[queue_id * 2].clone(), + output_queue: queues[queue_id * 2 + 1].clone(), + output_queue_evt: queue_evts[queue_id * 2 + 1].clone(), + mem_space: mem_space.clone(), + interrupt_cb: interrupt_cb.clone(), + driver_features: self.base.driver_features, + device_broken: self.base.broken.clone(), + port: port.clone(), + nr, + }; + let handler_h = Arc::new(Mutex::new(handler)); + let notifiers = EventNotifierHelper::internal_notifiers(handler_h.clone()); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts)?; + + if let Some(port_h) = port { + port_h.lock().unwrap().activate(&handler_h); + } + } + + self.control_queues_activate( + mem_space, + interrupt_cb, + &queues, + queue_evts, + self.base.broken.clone(), + )?; + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + for port in self.ports.lock().unwrap().iter_mut() { + port.lock().unwrap().deactivate(); + } + unregister_event_helper(None, &mut self.base.deactivate_evts)?; + + Ok(()) + } +} + +impl StateTransfer for Serial { + fn get_state_vec(&self) -> Result> { + let state = VirtioSerialState { + device_features: self.base.device_features, + driver_features: self.base.driver_features, + config_space: self.config_space, + }; + Ok(state.as_bytes().to_vec()) + } + + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let state = VirtioSerialState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("SERIAL"))?; + self.base.device_features = state.device_features; + self.base.driver_features = state.driver_features; + self.config_space = state.config_space; + Ok(()) + } + + fn get_device_alias(&self) -> u64 { + MigrationManager::get_desc_alias(&VirtioSerialState::descriptor().name).unwrap_or(!0) + } +} + +impl MigrationHook for Serial {} + +/// Virtio serial port structure. +#[derive(Clone)] +pub struct SerialPort { + name: Option, + /// Whether rx paused + paused: bool, + /// Chardev vector for serialport. + pub chardev: Arc>, + /// Number id. + nr: u32, + /// Whether the port is a console port. + pub is_console: bool, + /// Whether the guest open the serial port. + guest_connected: bool, + /// Whether the host open the serial socket. + host_connected: bool, + /// The handler used to send control event to guest. + ctrl_handler: Option>>, +} + +impl SerialPort { + pub fn new(port_cfg: VirtioSerialPortCfg, chardev_cfg: ChardevConfig) -> Self { + // Console is default host connected. And pty chardev has opened by default in realize() + // function. + let is_console = matches!(port_cfg.classtype.as_str(), "virtconsole"); + let mut host_connected = is_console; + if let ChardevType::Pty { .. } = chardev_cfg.classtype { + host_connected = true; + } + + SerialPort { + name: Some(port_cfg.id), + paused: false, + chardev: Arc::new(Mutex::new(Chardev::new(chardev_cfg))), + nr: port_cfg.nr.unwrap(), + is_console, + guest_connected: false, + host_connected, + ctrl_handler: None, + } + } + + pub fn realize(&mut self) -> Result<()> { + self.chardev + .lock() + .unwrap() + .realize() + .with_context(|| "Failed to realize chardev")?; + EventLoop::update_event( + EventNotifierHelper::internal_notifiers(self.chardev.clone()), + None, + )?; + Ok(()) + } + + fn unpause_chardev_rx(&mut self) { + trace::virtio_serial_unpause_chardev_rx(); + if self.paused { + self.paused = false; + self.chardev.lock().unwrap().unpause_rx(); + } + } + + fn activate(&mut self, handler: &Arc>) { + self.chardev.lock().unwrap().set_receiver(handler); + } + + fn deactivate(&mut self) { + self.guest_connected = false; + } +} + +/// Handler for queues which are used for port. +struct SerialPortHandler { + input_queue: Arc>, + input_queue_evt: Arc, + output_queue: Arc>, + output_queue_evt: Arc, + mem_space: Arc, + interrupt_cb: Arc, + driver_features: u64, + /// Virtio serial device is broken or not. + device_broken: Arc, + port: Option>>, + nr: u32, +} + +/// Handler for queues which are used for control. +struct SerialControlHandler { + input_queue: Arc>, + output_queue: Arc>, + output_queue_evt: Arc, + mem_space: Arc, + interrupt_cb: Arc, + driver_features: u64, + /// Virtio serial device is broken or not. + device_broken: Arc, + ports: Arc>>>>, +} + +impl SerialPortHandler { + fn output_handle(&mut self) { + trace::virtio_receive_request("Serial".to_string(), "to IO".to_string()); + self.output_handle_internal().unwrap_or_else(|e| { + error!("Port {} handle output error: {:?}", self.nr, e); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + }); + } + + fn input_avail_handle(&mut self) { + // new buffer appeared in input queue. Unpause RX + trace::virtio_serial_new_inputqueue_buf(); + + self.enable_inputqueue_notify(false); + let mut port_locked = self.port.as_ref().unwrap().lock().unwrap(); + port_locked.unpause_chardev_rx(); + } + + fn output_handle_internal(&mut self) -> Result<()> { + let mut queue_lock = self.output_queue.lock().unwrap(); + + loop { + if let Some(port) = self.port.as_ref() { + let locked_port = port.lock().unwrap(); + let locked_cdev = locked_port.chardev.lock().unwrap(); + if locked_cdev.outbuf_is_full() { + break; + } + } + + let elem = queue_lock + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + // Discard requests when there is no port using this queue. Popping elements without + // processing means discarding the request. + if let Some(port) = self.port.as_ref() { + let iovec = elem.out_iovec; + let iovec_size = Element::iovec_size(&iovec); + let mut buf = vec![0u8; iovec_size as usize]; + let cache = queue_lock.vring.get_cache(); + let size = iov_to_buf(&self.mem_space, cache, &iovec, &mut buf[..])? as u64; + + let locked_port = port.lock().unwrap(); + if locked_port.host_connected { + if let Err(e) = locked_port + .chardev + .lock() + .unwrap() + .fill_outbuf(buf, Some(self.output_queue_evt.clone())) + { + error!("Failed to append elem buffer to chardev with error {:?}", e); + } + } + trace::virtio_serial_output_data(iovec_size, size); + } + + queue_lock.vring.add_used(elem.index, 0).with_context(|| { + format!( + "Failed to add used ring for virtio serial port output, index: {} len: {}", + elem.index, 0, + ) + })?; + } + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "serial port output queue", + VirtioInterruptType::Vring, + ) + })?; + trace::virtqueue_send_interrupt("Serial", &*queue_lock as *const _ as u64); + } + + Ok(()) + } + + fn get_input_avail_bytes(&mut self, max_size: usize) -> usize { + let port = self.port.as_ref(); + if port.is_none() || !port.unwrap().lock().unwrap().guest_connected { + trace::virtio_serial_disconnected_port(); + return 0; + } + + if self.device_broken.load(Ordering::SeqCst) { + warn!("virtio-serial device is broken"); + return 0; + } + + let mut locked_queue = self.input_queue.lock().unwrap(); + match locked_queue + .vring + .get_avail_bytes(&self.mem_space, max_size, true) + { + Ok(n) => n, + Err(_) => { + warn!( + "error occurred while port {} getting available bytes of vring", + self.nr + ); + 0 + } + } + } + + fn enable_inputqueue_notify(&mut self, enable: bool) { + if self.device_broken.load(Ordering::SeqCst) { + return; + } + + let mut queue_lock = self.input_queue.lock().unwrap(); + let _ = queue_lock + .vring + .suppress_queue_notify(self.driver_features, !enable); + } + + fn input_handle_internal(&mut self, buffer: &[u8]) -> Result<()> { + let mut queue_lock = self.input_queue.lock().unwrap(); + + let mut left = buffer.len(); + let port = self.port.as_ref(); + if left == 0 || port.is_none() { + return Ok(()); + } + let port_locked = port.unwrap().lock().unwrap(); + if !port_locked.guest_connected { + return Ok(()); + } + + let mut written_count = 0_usize; + loop { + let elem = queue_lock + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + let mut once_count = 0_usize; + for elem_iov in elem.in_iovec.iter() { + let len = cmp::min(elem_iov.len as usize, left); + let write_end = written_count + len; + let mut source_slice = &buffer[written_count..write_end]; + + // GPAChecked: the elem_iov has been checked in pop_avail(). + self.mem_space + .write( + &mut source_slice, + elem_iov.addr, + len as u64, + AddressAttr::Ram, + ) + .with_context(|| { + format!( + "Failed to write slice for virtio serial port input: addr {:X} len {}", + elem_iov.addr.0, len + ) + })?; + + written_count = write_end; + once_count += len; + left -= len; + if left == 0 { + break; + } + } + + queue_lock + .vring + .add_used(elem.index, once_count as u32) + .with_context(|| { + format!( + "Failed to add used ring for virtio serial port input: index {} len {}", + elem.index, once_count + ) + })?; + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "serial port input queue", + VirtioInterruptType::Vring, + ) + })?; + trace::virtqueue_send_interrupt("Serial", &*queue_lock as *const _ as u64); + } + + if left == 0 { + break; + } + } + + Ok(()) + } +} + +impl EventNotifierHelper for SerialPortHandler { + fn internal_notifiers(serial_handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let cloned_cls = serial_handler.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = cloned_cls.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock.output_handle(); + None + }); + + let cloned_inp_cls = serial_handler.clone(); + let input_avail_handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = cloned_inp_cls.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock.input_avail_handle(); + None + }); + + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + serial_handler.lock().unwrap().output_queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + serial_handler.lock().unwrap().input_queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![input_avail_handler], + )); + + notifiers + } +} + +impl InputReceiver for SerialPortHandler { + fn receive(&mut self, buffer: &[u8]) { + self.input_handle_internal(buffer).unwrap_or_else(|e| { + error!("Port {} handle input error: {:?}", self.nr, e); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + }); + } + + fn remain_size(&mut self) -> usize { + self.get_input_avail_bytes(BUF_SIZE) + } + + fn set_paused(&mut self) { + trace::virtio_serial_pause_rx(); + if self.port.is_none() { + return; + } + + if self.port.as_ref().unwrap().lock().unwrap().guest_connected { + self.enable_inputqueue_notify(true); + } + + let mut locked_port = self.port.as_ref().unwrap().lock().unwrap(); + locked_port.paused = true; + } +} + +impl SerialControlHandler { + fn output_control(&mut self) { + self.output_control_internal().unwrap_or_else(|e| { + error!("handle output control error: {:?}", e); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + }); + } + + fn output_control_internal(&mut self) -> Result<()> { + let output_queue = self.output_queue.clone(); + let mut queue_lock = output_queue.lock().unwrap(); + + loop { + let elem = queue_lock + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + break; + } + + let mut req = iov_read_object::( + &self.mem_space, + &elem.out_iovec, + queue_lock.vring.get_cache(), + )?; + req.id = LittleEndian::read_u32(req.id.as_bytes()); + req.event = LittleEndian::read_u16(req.event.as_bytes()); + req.value = LittleEndian::read_u16(req.value.as_bytes()); + + info!( + "Serial port {} handle control message: event({}), value({})", + req.id, req.event, req.value + ); + self.handle_control_message(&mut req); + + queue_lock.vring.add_used(elem.index, 0).with_context(|| { + format!( + "Failed to add used ring for control port, index: {} len: {}.", + elem.index, 0 + ) + })?; + } + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "serial input control queue", + VirtioInterruptType::Vring, + ) + })?; + trace::virtqueue_send_interrupt("Serial", &*queue_lock as *const _ as u64); + } + + Ok(()) + } + + fn handle_control_message(&mut self, ctrl: &mut VirtioConsoleControl) { + if ctrl.event == VIRTIO_CONSOLE_DEVICE_READY { + if ctrl.value == 0 { + error!("Guest is not ready to receive control message."); + return; + } + + let cloned_ports = self.ports.clone(); + let mut locked_ports = cloned_ports.lock().unwrap(); + for port in locked_ports.iter_mut() { + self.send_control_event(port.lock().unwrap().nr, VIRTIO_CONSOLE_PORT_ADD, 1); + } + return; + } + + let port = if let Some(port) = find_port_by_nr(&self.ports, ctrl.id) { + port + } else { + error!("Invalid port id {}", ctrl.id); + return; + }; + + match ctrl.event { + VIRTIO_CONSOLE_PORT_READY => { + if ctrl.value == 0 { + error!("Driver failed to add port {}", ctrl.id); + return; + } + + let locked_port = port.lock().unwrap(); + if locked_port.is_console { + self.send_control_event(locked_port.nr, VIRTIO_CONSOLE_CONSOLE_PORT, 1); + } + + if let Some(name) = &locked_port.name { + let mut extra_data: Vec = Vec::new(); + extra_data.extend(name.as_bytes()); + extra_data.push(0); + self.send_input_control_msg( + locked_port.nr, + VIRTIO_CONSOLE_PORT_NAME, + 1, + &extra_data, + ) + .unwrap_or_else(|e| { + error!("Send input control message error: {:?}", e); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + }); + } + + if locked_port.host_connected { + self.send_control_event(locked_port.nr, VIRTIO_CONSOLE_PORT_OPEN, 1); + } + } + VIRTIO_CONSOLE_PORT_OPEN => { + let mut locked_port = port.lock().unwrap(); + locked_port.guest_connected = ctrl.value != 0; + if ctrl.value != 0 { + locked_port.unpause_chardev_rx(); + } + } + _ => (), + } + } + + fn send_control_event(&mut self, id: u32, event: u16, value: u16) { + info!( + "Serial port {} send control message: event({}), value({})", + id, event, value + ); + self.send_input_control_msg(id, event, value, &[]) + .unwrap_or_else(|e| { + error!("send input control message error: {:?}", e); + report_virtio_error( + self.interrupt_cb.clone(), + self.driver_features, + &self.device_broken, + ); + }); + } + + fn send_input_control_msg( + &mut self, + id: u32, + event: u16, + value: u16, + extra: &[u8], + ) -> Result<()> { + let mut queue_lock = self.input_queue.lock().unwrap(); + let elem = queue_lock + .vring + .pop_avail(&self.mem_space, self.driver_features)?; + if elem.desc_num == 0 { + warn!("empty input queue buffer!"); + return Ok(()); + } + + let cache = queue_lock.vring.get_cache(); + let (in_size, ctrl_vec) = gpa_hva_iovec_map(&elem.in_iovec, &self.mem_space, cache)?; + let len = size_of::() + extra.len(); + if in_size < len as u64 { + bail!( + "Invalid length for input control msg: get {}, expected {}", + in_size, + len, + ); + } + + let ctrl_msg = VirtioConsoleControl { id, event, value }; + let mut msg_data: Vec = Vec::new(); + msg_data.extend(ctrl_msg.as_bytes()); + if !extra.is_empty() { + msg_data.extend(extra); + } + + // SAFETY: ctrl_vec is generated by address_space. + unsafe { iov_from_buf_direct(&ctrl_vec, &msg_data) }.and_then(|size| { + if size != len { + bail!( + "Expected send msg length is {}, actual send length {}.", + len, + size + ); + } + Ok(()) + })?; + + queue_lock + .vring + .add_used(elem.index, len as u32) + .with_context(|| { + format!( + "Failed to add used ring(serial input control queue), index {}, len {}", + elem.index, len, + ) + })?; + + if queue_lock.vring.should_notify(self.driver_features) { + (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue_lock), false) + .with_context(|| { + VirtioError::InterruptTrigger( + "serial input control queue", + VirtioInterruptType::Vring, + ) + })?; + trace::virtqueue_send_interrupt("Serial", &*queue_lock as *const _ as u64); + } + + Ok(()) + } +} + +impl EventNotifierHelper for SerialControlHandler { + fn internal_notifiers(serial_handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let cloned_cls = serial_handler.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let mut h_lock = cloned_cls.lock().unwrap(); + if h_lock.device_broken.load(Ordering::SeqCst) { + return None; + } + h_lock.output_control(); + None + }); + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + serial_handler.lock().unwrap().output_queue_evt.as_raw_fd(), + None, + EventSet::IN, + vec![handler], + )); + + notifiers + } +} + +impl ChardevNotifyDevice for SerialPort { + fn chardev_notify(&mut self, status: ChardevStatus) { + match (&status, self.host_connected) { + (ChardevStatus::Close, _) => self.host_connected = false, + (ChardevStatus::Open, false) => self.host_connected = true, + (ChardevStatus::Open, true) => return, + } + + if self.ctrl_handler.is_none() { + warn!("No control handler for port {}.", self.nr); + return; + } + + let handler = self.ctrl_handler.as_ref().unwrap().upgrade(); + if handler.is_none() { + warn!("Control handler for port {} is invalid", self.nr); + return; + } + + // Note: when virtio serial devices are deactivated, all handlers will be unregistered. + // For this action is in the same thread with `chardev_notify`, these two operations will + // not be executed concurrently. So, `handler` must be effective here. + handler.unwrap().lock().unwrap().send_control_event( + self.nr, + VIRTIO_CONSOLE_PORT_OPEN, + status as u16, + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_set_driver_features() { + let mut serial = Serial::new(VirtioSerialInfo { + classtype: "virtio-serial-pci".to_string(), + id: "serial".to_string(), + multifunction: Some(false), + max_ports: 31, + bus: Some("pcie.0".to_string()), + addr: Some((0, 0)), + }); + + // If the device feature is 0, all driver features are not supported. + serial.base.device_features = 0; + let driver_feature: u32 = 0xFF; + let page = 0_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!(serial.base.driver_features, 0_u64); + assert_eq!(u64::from(serial.driver_features(page)), 0_u64); + + let driver_feature: u32 = 0xFF; + let page = 1_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!(serial.base.driver_features, 0_u64); + assert_eq!(u64::from(serial.driver_features(page)), 0_u64); + + // If both the device feature bit and the front-end driver feature bit are + // supported at the same time, this driver feature bit is supported. + serial.base.device_features = 1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_SIZE; + let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_SIZE) as u32; + let page = 0_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!( + serial.base.driver_features, + (1_u64 << VIRTIO_CONSOLE_F_SIZE) + ); + assert_eq!( + u64::from(serial.driver_features(page)), + (1_u64 << VIRTIO_CONSOLE_F_SIZE) + ); + serial.base.driver_features = 0; + + serial.base.device_features = 1_u64 << VIRTIO_F_VERSION_1; + let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_SIZE) as u32; + let page = 0_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!(serial.base.driver_features, 0); + serial.base.driver_features = 0; + + serial.base.device_features = 1_u64 << VIRTIO_F_VERSION_1 + | 1_u64 << VIRTIO_CONSOLE_F_SIZE + | 1_u64 << VIRTIO_CONSOLE_F_MULTIPORT; + let driver_feature: u32 = (1_u64 << VIRTIO_CONSOLE_F_MULTIPORT) as u32; + let page = 0_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!( + serial.base.driver_features, + (1_u64 << VIRTIO_CONSOLE_F_MULTIPORT) + ); + let driver_feature: u32 = ((1_u64 << VIRTIO_F_VERSION_1) >> 32) as u32; + let page = 1_u32; + serial.set_driver_features(page, driver_feature); + assert_eq!( + serial.base.driver_features, + (1_u64 << VIRTIO_F_VERSION_1 | 1_u64 << VIRTIO_CONSOLE_F_MULTIPORT) + ); + } + + #[test] + fn test_read_config() { + let max_ports: u8 = 31; + let serial = Serial::new(VirtioSerialInfo { + classtype: "virtio-serial-pci".to_string(), + id: "serial".to_string(), + multifunction: Some(false), + max_ports: u32::from(max_ports), + bus: Some("pcie.0".to_string()), + addr: Some((0, 0)), + }); + + // The offset of configuration that needs to be read exceeds the maximum. + let offset = size_of::() as u64; + let mut read_data: Vec = vec![0; 8]; + assert!(serial.read_config(offset, &mut read_data).is_err()); + + // Check the configuration that needs to be read. + let offset = 0_u64; + let mut read_data: Vec = vec![0; 12]; + let expect_data: Vec = vec![0, 0, 0, 0, max_ports, 0, 0, 0, 0, 0, 0, 0]; + assert!(serial.read_config(offset, &mut read_data).is_ok()); + assert_eq!(read_data, expect_data); + } +} diff --git a/virtio/src/error.rs b/virtio/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..b5e69caa4782899d33bdbeeb79da4dae92d4d72b --- /dev/null +++ b/virtio/src/error.rs @@ -0,0 +1,77 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum VirtioError { + #[error("Io")] + Io { + #[from] + source: std::io::Error, + }, + #[error("Util")] + Util { + #[from] + source: util::error::UtilError, + }, + #[error("AddressSpace")] + AddressSpace { + #[from] + source: address_space::error::AddressSpaceError, + }, + #[error("SysBus")] + SysBus { + #[from] + source: devices::sysbus::error::SysBusError, + }, + #[error("Failed to create eventfd.")] + EventFdCreate, + #[error("Failed to write eventfd.")] + EventFdWrite, + #[error("Failed to create {0} thread")] + ThreadCreate(String), + #[error("Failed to send {0} on the channel")] + ChannelSend(String), + #[error("Queue index {0} invalid, queue size is {1}")] + QueueIndex(u16, u16), + #[error("Vring descriptor is invalid")] + QueueDescInvalid, + #[error("Address overflows for {0}, address: 0x{1:x}, offset: {2}")] + AddressOverflow(&'static str, u64, u64), + #[error("Failed to r/w dev config space: overflows, offset {0}, len {1}, space size {2}")] + DevConfigOverflow(u64, u64, u64), + #[error("Failed to trigger interrupt for {0}, int-type {1:#?}")] + InterruptTrigger(&'static str, super::VirtioInterruptType), + #[error("Vhost ioctl failed: {0}")] + VhostIoctl(String), + #[error("Failed to get iovec from element!")] + ElementEmpty, + #[error("Virt queue is none!")] + VirtQueueIsNone, + #[error("Device {0} virt queue {1} is not enabled!")] + VirtQueueNotEnabled(String, usize), + #[error("Cannot perform activate. Expected {0} queue(s), got {1}")] + IncorrectQueueNum(usize, usize), + #[error("Incorrect offset, expected {0}, got {1}")] + IncorrectOffset(u64, u64), + #[error("Device {0} not activated")] + DeviceNotActivated(String), + #[error("Failed to write config")] + FailedToWriteConfig, + #[error("Failed to read object for {0}, address: 0x{1:x}")] + ReadObjectErr(&'static str, u64), + #[error("Invalid device status: 0x{0:x}.")] + DevStatErr(u32), + #[error("Unsupported mmio register at offset 0x{0:x}.")] + MmioRegErr(u64), +} diff --git a/virtio/src/lib.rs b/virtio/src/lib.rs index 909099a0c908c081070b4a5a35a96d0570b0efdb..0c8e7673bbcd786e4f17689bbc0e6f843973af9c 100644 --- a/virtio/src/lib.rs +++ b/virtio/src/lib.rs @@ -25,114 +25,51 @@ //! - `x86_64` //! - `aarch64` -#[macro_use] -extern crate error_chain; -#[macro_use] -extern crate log; -#[macro_use] -extern crate machine_manager; -#[macro_use] -extern crate vmm_sys_util; -#[macro_use] -extern crate migration_derive; - -pub mod errors { - error_chain! { - foreign_links { - Io(std::io::Error); - } - links { - Util(util::errors::Error, util::errors::ErrorKind); - AddressSpace(address_space::errors::Error, address_space::errors::ErrorKind); - SysBus(sysbus::errors::Error, sysbus::errors::ErrorKind); - } - errors { - EventFdCreate { - display("Failed to create eventfd.") - } - EventFdWrite { - display("Failed to write eventfd.") - } - ThreadCreate(name: String) { - display("Failed to create {} thread", name) - } - ChannelSend(value: String) { - display("Failed to send {} on the channel", value) - } - QueueIndex(index: u16, size: u16) { - display("Queue index {} invalid, queue size is {}", index, size) - } - QueueDescInvalid { - display("Vring descriptor is invalid") - } - AddressOverflow(value: &'static str, address: u64, offset: u64) { - display("Address overflows for {}, address: 0x{:x}, offset: {}", value, address, offset) - } - DevConfigOverflow(offset: u64, size: u64) { - display("Failed to r/w dev config space: overflows, offset {}, space size {}", offset, size) - } - InterruptTrigger(dev_ty: &'static str, int_type: super::VirtioInterruptType) { - display("Failed to trigger interrupt for {}, int-type {:#?}", dev_ty, int_type) - } - VhostIoctl(ioctl: String) { - display("Vhost ioctl failed: {}", ioctl) - } - ElementEmpty { - display("Failed to get iovec from element!") - } - IncorrectQueueNum(expect: usize, actual: usize) { - display("Cannot perform activate. Expected {} queue(s), got {}", expect, actual) - } - IncorrectOffset(expect: u64, actual: u64) { - display("Incorrect offset, expected {}, got {}", expect, actual) - } - DeviceNotActivated(devname: String) { - display("Device {} not activated", devname) - } - FailedToWriteConfig { - display("Failed to write config") - } - ReadObjectErr(object: &'static str, address: u64) { - display("Failed to read object for {}, address: 0x{:x}", object, address) - } - DevStatErr(status: u32) { - display("Invalid device status: 0x{:x}.", status) - } - MmioRegErr(offset: u64) { - display("Unsupported mmio register at offset 0x{:x}.", offset) - } - } - } -} +pub mod device; +pub mod error; +pub mod vhost; -mod balloon; -mod block; -mod console; -mod net; mod queue; -mod rng; -mod vhost; -mod virtio_mmio; -#[allow(dead_code)] -mod virtio_pci; - -pub use balloon::*; -pub use block::{Block, BlockState}; -pub use console::{Console, VirtioConsoleState}; -pub use errors::*; -pub use net::*; +mod transport; + +pub use device::balloon::*; +pub use device::block::{Block, BlockState, VirtioBlkConfig, VirtioBlkDevConfig}; +#[cfg(feature = "virtio_gpu")] +pub use device::gpu::*; +pub use device::net::*; +#[cfg(feature = "virtio_rng")] +pub use device::rng::{Rng, RngConfig, RngState}; +#[cfg(feature = "virtio_scsi")] +pub use device::scsi_cntlr as ScsiCntlr; +pub use device::serial::{find_port_by_nr, get_max_nr, Serial, SerialPort, VirtioSerialState}; +pub use error::VirtioError; pub use queue::*; -pub use rng::{Rng, RngState}; +pub use transport::virtio_mmio::{VirtioMmioDevice, VirtioMmioState}; +pub use transport::virtio_pci::VirtioPciDevice; pub use vhost::kernel as VhostKern; -pub use virtio_mmio::{VirtioMmioDevice, VirtioMmioState}; -pub use virtio_pci::VirtioPciDevice; +pub use vhost::user as VhostUser; +use std::cmp; +use std::io::Write; +use std::mem::size_of; +use std::os::unix::prelude::RawFd; +use std::sync::atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicU8, Ordering}; use std::sync::{Arc, Mutex}; -use address_space::AddressSpace; -use machine_manager::config::ConfigCheck; +use anyhow::{anyhow, bail, Context, Result}; +use log::{error, warn}; use vmm_sys_util::eventfd::EventFd; +use address_space::{AddressSpace, RegionCache}; +use devices::pci::register_pcidevops_type; +use devices::sysbus::register_sysbusdevops_type; +use machine_manager::config::ConfigCheck; +use migration_derive::ByteCode; +use util::aio::{mem_to_buf, Iovec}; +use util::byte_code::ByteCode; +use util::num_ops::{read_u32, write_u32}; +use util::AsAny; + /// Check if the bit of features is configured. pub fn virtio_has_feature(feature: u64, fbit: u32) -> bool { feature & (1 << fbit) != 0 @@ -144,14 +81,17 @@ pub const VIRTIO_TYPE_BLOCK: u32 = 2; pub const VIRTIO_TYPE_CONSOLE: u32 = 3; pub const VIRTIO_TYPE_RNG: u32 = 4; pub const VIRTIO_TYPE_BALLOON: u32 = 5; +pub const VIRTIO_TYPE_SCSI: u32 = 8; +pub const VIRTIO_TYPE_GPU: u32 = 16; pub const VIRTIO_TYPE_VSOCK: u32 = 19; -pub const _VIRTIO_TYPE_FS: u32 = 26; +pub const VIRTIO_TYPE_FS: u32 = 26; // The Status of Virtio Device. const CONFIG_STATUS_ACKNOWLEDGE: u32 = 0x01; const CONFIG_STATUS_DRIVER: u32 = 0x02; const CONFIG_STATUS_DRIVER_OK: u32 = 0x04; const CONFIG_STATUS_FEATURES_OK: u32 = 0x08; +const CONFIG_STATUS_NEEDS_RESET: u32 = 0x40; const CONFIG_STATUS_FAILED: u32 = 0x80; /// Feature Bits, refer to Virtio Spec. @@ -176,36 +116,121 @@ pub const VIRTIO_NET_F_GUEST_CSUM: u32 = 1; pub const VIRTIO_NET_F_MAC: u32 = 5; /// Driver can receive TSOv4. pub const VIRTIO_NET_F_GUEST_TSO4: u32 = 7; +/// Driver can receive TSOv6. +pub const VIRTIO_NET_F_GUEST_TSO6: u32 = 8; +/// Driver can receive TSO with ECN. +pub const VIRTIO_NET_F_GUEST_ECN: u32 = 9; /// Driver can receive UFO. pub const VIRTIO_NET_F_GUEST_UFO: u32 = 10; /// Device can receive TSOv4. pub const VIRTIO_NET_F_HOST_TSO4: u32 = 11; +/// Device can receive TSOv6. +pub const VIRTIO_NET_F_HOST_TSO6: u32 = 12; /// Device can receive UFO. pub const VIRTIO_NET_F_HOST_UFO: u32 = 14; +/// Device can merge receive buffers. +pub const VIRTIO_NET_F_MRG_RXBUF: u32 = 15; /// Control channel is available. pub const VIRTIO_NET_F_CTRL_VQ: u32 = 17; +/// Control channel RX mode support. +pub const VIRTIO_NET_F_CTRL_RX: u32 = 18; +/// Control channel VLAN filtering. +pub const VIRTIO_NET_F_CTRL_VLAN: u32 = 19; +/// Extra RX mode control support. +pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u32 = 20; /// Device supports multi queue with automatic receive steering. pub const VIRTIO_NET_F_MQ: u32 = 22; +/// Set Mac Address through control channel. +pub const VIRTIO_NET_F_CTRL_MAC_ADDR: u32 = 23; /// Configuration cols and rows are valid. pub const VIRTIO_CONSOLE_F_SIZE: u64 = 0; +/// Device has support for multiple ports. +/// max_nr_ports is valid and control virtqueues will be used. +pub const VIRTIO_CONSOLE_F_MULTIPORT: u64 = 1; +/// Device has support for emergency write. +/// Configuration field emerg_wr is valid. +pub const VIRTIO_CONSOLE_F_EMERG_WRITE: u64 = 2; /// Maximum size of any single segment is in size_max. pub const VIRTIO_BLK_F_SIZE_MAX: u32 = 1; /// Maximum number of segments in a request is in seg_max. pub const VIRTIO_BLK_F_SEG_MAX: u32 = 2; +/// Legacy geometry available. +pub const VIRTIO_BLK_F_GEOMETRY: u32 = 4; /// Device is read-only. pub const VIRTIO_BLK_F_RO: u32 = 5; +/// Block size of disk is available. +pub const VIRTIO_BLK_F_BLK_SIZE: u32 = 6; /// Cache flush command support. pub const VIRTIO_BLK_F_FLUSH: u32 = 9; -/// The device sets MQ ok status values to driver. +/// Topology information is available. +pub const VIRTIO_BLK_F_TOPOLOGY: u32 = 10; +/// DISCARD is supported. +pub const VIRTIO_BLK_F_DISCARD: u32 = 13; +/// WRITE ZEROES is supported. +pub const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 14; +/// Unmap flags for write zeroes command. +pub const VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP: u32 = 1; +/// GPU EDID feature is supported. +pub const VIRTIO_GPU_F_EDID: u32 = 1; +/// TODO: need to change to 5 or bigger +pub const VIRTIO_GPU_F_MONOCHROME: u32 = 4; + +/// The device sets control ok status to driver. pub const VIRTIO_NET_OK: u8 = 0; +/// The device sets control err status to driver. +pub const VIRTIO_NET_ERR: u8 = 1; + +/// Driver can send control commands. +pub const VIRTIO_NET_CTRL_RX: u8 = 0; +/// Control commands for promiscuous mode. +pub const VIRTIO_NET_CTRL_RX_PROMISC: u8 = 0; +/// Control commands for all-multicast receive. +pub const VIRTIO_NET_CTRL_RX_ALLMULTI: u8 = 1; +/// Control commands for all-unicast receive. +pub const VIRTIO_NET_CTRL_RX_ALLUNI: u8 = 2; +/// Control commands for suppressing multicast receive. +pub const VIRTIO_NET_CTRL_RX_NOMULTI: u8 = 3; +/// Control commands for suppressing unicast receive. +pub const VIRTIO_NET_CTRL_RX_NOUNI: u8 = 4; +/// Control commands for suppressing broadcast receive. +pub const VIRTIO_NET_CTRL_RX_NOBCAST: u8 = 5; + +/// The driver can send control commands for MAC address filtering. +pub const VIRTIO_NET_CTRL_MAC: u8 = 1; +/// The driver sets the unicast/multicast address table. +pub const VIRTIO_NET_CTRL_MAC_TABLE_SET: u8 = 0; +/// The driver sets the default MAC address which rx filtering accepts. +pub const VIRTIO_NET_CTRL_MAC_ADDR_SET: u8 = 1; + +/// The driver can send control commands for vlan filtering. +pub const VIRTIO_NET_CTRL_VLAN: u8 = 2; +/// The driver adds a vlan id to the vlan filtering table. +pub const VIRTIO_NET_CTRL_VLAN_ADD: u8 = 0; +/// The driver adds a vlan id from the vlan filtering table. +pub const VIRTIO_NET_CTRL_VLAN_DEL: u8 = 1; + /// Driver configure the class before enabling virtqueue. -pub const VIRTIO_NET_CTRL_MQ: u16 = 4; +pub const VIRTIO_NET_CTRL_MQ: u8 = 4; /// Driver configure the command before enabling virtqueue. pub const VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: u16 = 0; /// The minimum pairs of multiple queue. pub const VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN: u16 = 1; /// The maximum pairs of multiple queue. pub const VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX: u16 = 0x8000; +/// Support more than one virtqueue. +pub const VIRTIO_BLK_F_MQ: u32 = 12; + +/// A single request can include both device-readable and device-writable data buffers. +pub const VIRTIO_SCSI_F_INOUT: u32 = 0; +/// The host SHOULD enable reporting of hot-plug and hot-unplug events for LUNs and targets on the +/// SCSI bus. The guest SHOULD handle hot-plug and hot-unplug events. +pub const VIRTIO_SCSI_F_HOTPLUG: u32 = 1; +/// The host will report changes to LUN parameters via a VIRTIO_SCSI_T_PARAM_CHANGE event. +/// The guest SHOULD handle them. +pub const VIRTIO_SCSI_F_CHANGE: u32 = 2; +/// The extended fields for T10 protection information (DIF/DIX) are included in the SCSI request +/// header. +pub const VIRTIO_SCSI_F_T10_PI: u32 = 3; /// The IO type of virtio block, refer to Virtio Spec. /// Read. @@ -216,10 +241,61 @@ pub const VIRTIO_BLK_T_OUT: u32 = 1; pub const VIRTIO_BLK_T_FLUSH: u32 = 4; /// Device id pub const VIRTIO_BLK_T_GET_ID: u32 = 8; +/// Discard command. +pub const VIRTIO_BLK_T_DISCARD: u32 = 11; +/// Write zeroes command. +pub const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; /// Device id length pub const VIRTIO_BLK_ID_BYTES: u32 = 20; /// Success -pub const VIRTIO_BLK_S_OK: u32 = 0; +pub const VIRTIO_BLK_S_OK: u8 = 0; +/// IO Error. +pub const VIRTIO_BLK_S_IOERR: u8 = 1; +/// Unsupported. +pub const VIRTIO_BLK_S_UNSUPP: u8 = 2; + +/// The Type of virtio gpu, refer to Virtio Spec. +/// 2D commands: +/// Retrieve the current output configuration. +pub const VIRTIO_GPU_CMD_GET_DISPLAY_INFO: u32 = 0x0100; +/// Create a 2D resource on the host. +pub const VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: u32 = 0x0101; +/// Destroy a resource on the host. +pub const VIRTIO_GPU_CMD_RESOURCE_UNREF: u32 = 0x0102; +/// Set the scanout parameters for a single output. +pub const VIRTIO_GPU_CMD_SET_SCANOUT: u32 = 0x0103; +/// Flush a scanout resource. +pub const VIRTIO_GPU_CMD_RESOURCE_FLUSH: u32 = 0x0104; +/// Transfer from guest memory to host resource. +pub const VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: u32 = 0x0105; +/// Assign backing pages to a resource. +pub const VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: u32 = 0x0106; +/// Detach backing pages from a resource. +pub const VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: u32 = 0x0107; +/// Retrieve the EDID data for a given scanout. +pub const VIRTIO_GPU_CMD_GET_EDID: u32 = 0x010a; +/// update cursor +pub const VIRTIO_GPU_CMD_UPDATE_CURSOR: u32 = 0x0300; +/// move cursor +pub const VIRTIO_GPU_CMD_MOVE_CURSOR: u32 = 0x0301; +/// Success for cmd without data back. +pub const VIRTIO_GPU_RESP_OK_NODATA: u32 = 0x1100; +/// Success for VIRTIO_GPU_CMD_GET_DISPLAY_INFO. +pub const VIRTIO_GPU_RESP_OK_DISPLAY_INFO: u32 = 0x1101; +/// Success for VIRTIO_GPU_CMD_GET_EDID. +pub const VIRTIO_GPU_RESP_OK_EDID: u32 = 0x1104; +/// unspecificated +pub const VIRTIO_GPU_RESP_ERR_UNSPEC: u32 = 0x1200; +/// out of host memory +pub const VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY: u32 = 0x1201; +/// invalid id of scanout +pub const VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID: u32 = 0x1202; +/// invalid id of 2D resource +pub const VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID: u32 = 0x1203; +/// invalid parameter +pub const VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER: u32 = 0x1205; +/// Flags in virtio gpu cmd which means need a fence. +pub const VIRTIO_GPU_FLAG_FENCE: u32 = 1 << 0; /// Interrupt status: Used Buffer Notification pub const VIRTIO_MMIO_INT_VRING: u32 = 0x01; @@ -232,7 +308,7 @@ pub const NOTIFY_REG_OFFSET: u32 = 0x50; /// Packet header, refer to Virtio Spec. #[repr(C)] -#[derive(Debug, Default, Copy, Clone, PartialEq)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] pub struct VirtioNetHdr { pub flags: u8, pub gso_type: u8, @@ -250,32 +326,372 @@ pub enum VirtioInterruptType { } pub type VirtioInterrupt = - Box) -> Result<()> + Send + Sync>; + Box, bool) -> Result<()> + Send + Sync>; + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum VirtioDeviceQuirk { + VirtioGpuEnableBar0, + VirtioDeviceQuirkMax, +} + +#[derive(Default)] +pub struct VirtioBase { + /// Device type + device_type: u32, + /// Bit mask of features supported by the backend. + device_features: u64, + /// Bit mask of features negotiated by the backend and the frontend. + driver_features: u64, + /// Device (host) feature-setting selector. + hfeatures_sel: u32, + /// Driver (guest) feature-setting selector. + gfeatures_sel: u32, + /// Interrupt status. + interrupt_status: Arc, + /// Device status. + device_status: Arc, + /// If this device is activated or not. + device_activated: Arc, + /// Configuration atomicity value. + config_generation: Arc, + /// The MSI-X vector for config change notification. + config_vector: Arc, + /// The type of queue, split-vring or packed-vring. + queue_type: u16, + /// The number of device queues. + queue_num: usize, + /// The max size of each queue. + queue_size_max: u16, + /// Queue selector. + queue_select: u16, + /// The configuration of queues. + queues_config: Vec, + /// Virtio queues. + queues: Vec>>, + /// Eventfd for device deactivate. + deactivate_evts: Vec, + /// Device is broken or not. + broken: Arc, +} + +#[derive(Copy, Clone, ByteCode)] +struct VirtioBaseState { + device_activated: bool, + hfeatures_sel: u32, + gfeatures_sel: u32, + interrupt_status: u32, + device_status: u32, + config_generation: u8, + queue_select: u16, + config_vector: u16, + queues_config: [QueueConfig; 32], + /// The number of activated queues. + queue_num: usize, + queue_type: u16, +} + +impl VirtioBase { + fn new(device_type: u32, queue_num: usize, queue_size_max: u16) -> Self { + Self { + device_type, + config_vector: Arc::new(AtomicU16::new(INVALID_VECTOR_NUM)), + queue_num, + queue_size_max, + queue_type: QUEUE_TYPE_SPLIT_VRING, + queues_config: vec![QueueConfig::new(queue_size_max); queue_num], + ..Default::default() + } + } + + fn reset(&mut self) { + // device_type, device_features, queue_num and queue_size_max + // is not mutable, thus no need to reset. + self.driver_features = 0; + self.hfeatures_sel = 0; + self.gfeatures_sel = 0; + self.interrupt_status.store(0, Ordering::SeqCst); + self.device_status.store(0, Ordering::SeqCst); + self.device_activated.store(false, Ordering::SeqCst); + self.config_generation.store(0, Ordering::SeqCst); + self.config_vector + .store(INVALID_VECTOR_NUM, Ordering::SeqCst); + self.queue_type = QUEUE_TYPE_SPLIT_VRING; + self.queue_select = 0; + self.queues_config.iter_mut().for_each(|q| q.reset()); + self.queues.clear(); + self.broken.store(false, Ordering::SeqCst); + } + + fn get_state(&self) -> VirtioBaseState { + let mut state = VirtioBaseState { + device_activated: self.device_activated.load(Ordering::Acquire), + hfeatures_sel: self.hfeatures_sel, + gfeatures_sel: self.gfeatures_sel, + interrupt_status: self.interrupt_status.load(Ordering::Acquire), + device_status: self.device_status.load(Ordering::Acquire), + config_generation: self.config_generation.load(Ordering::Acquire), + queue_select: self.queue_select, + config_vector: self.config_vector.load(Ordering::Acquire), + queues_config: [QueueConfig::default(); 32], + queue_num: 0, + queue_type: self.queue_type, + }; + + for (index, queue) in self.queues_config.iter().enumerate() { + state.queues_config[index] = *queue; + } + for (index, queue) in self.queues.iter().enumerate() { + state.queues_config[index] = queue.lock().unwrap().vring.get_queue_config(); + state.queue_num += 1; + } + + state + } + + fn set_state( + &mut self, + state: &VirtioBaseState, + mem_space: Arc, + interrupt_cb: Arc, + ) { + self.device_activated + .store(state.device_activated, Ordering::SeqCst); + self.hfeatures_sel = state.hfeatures_sel; + self.gfeatures_sel = state.gfeatures_sel; + self.interrupt_status + .store(state.interrupt_status, Ordering::SeqCst); + self.device_status + .store(state.device_status, Ordering::SeqCst); + self.config_generation + .store(state.config_generation, Ordering::SeqCst); + self.queue_select = state.queue_select; + self.config_vector + .store(state.config_vector, Ordering::SeqCst); + self.queues_config = state.queues_config[..self.queue_num].to_vec(); + self.queue_type = state.queue_type; + + if state.queue_num == 0 { + return; + } + + let mut queues = Vec::with_capacity(self.queue_num); + for queue_config in self.queues_config.iter_mut().take(state.queue_num) { + if queue_config.ready { + queue_config.set_addr_cache( + mem_space.clone(), + interrupt_cb.clone(), + self.driver_features, + &self.broken, + ); + } + queues.push(Arc::new(Mutex::new( + Queue::new(*queue_config, self.queue_type).unwrap(), + ))); + } + self.queues = queues; + } +} /// The trait for virtio device operations. -pub trait VirtioDevice: Send { +pub trait VirtioDevice: Send + AsAny { + /// Get base property of virtio device. + fn virtio_base(&self) -> &VirtioBase; + + /// Get mutable base property virtio device. + fn virtio_base_mut(&mut self) -> &mut VirtioBase; + /// Realize low level device. fn realize(&mut self) -> Result<()>; - /// Unrealize low level device + /// Unrealize low level device. fn unrealize(&mut self) -> Result<()> { bail!("Unrealize of the virtio device is not implemented"); } /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32; + fn device_type(&self) -> u32 { + self.virtio_base().device_type + } + + /// Get the virtio device customized modification. + fn device_quirk(&self) -> Option { + None + } /// Get the count of virtio device queues. - fn queue_num(&self) -> usize; + fn queue_num(&self) -> usize { + self.virtio_base().queue_num + } /// Get the queue size of virtio device. - fn queue_size(&self) -> u16; + fn queue_size_max(&self) -> u16 { + self.virtio_base().queue_size_max + } + + /// Init device configure space and features. + fn init_config_features(&mut self) -> Result<()>; /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32; + fn device_features(&self, features_select: u32) -> u32 { + read_u32(self.virtio_base().device_features, features_select) + } /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32); + fn set_driver_features(&mut self, page: u32, value: u32) { + let mut v = value; + let unsupported_features = value & !self.device_features(page); + if unsupported_features != 0 { + warn!( + "Receive acknowledge request with unknown feature: {:x}", + write_u32(value, page) + ); + v &= !unsupported_features; + } + + let features = if page == 0 { + u64::from(self.driver_features(1)) << 32 | u64::from(v) + } else { + u64::from(v) << 32 | u64::from(self.driver_features(0)) + }; + self.virtio_base_mut().driver_features = features; + } + + /// Get driver features by guest. + fn driver_features(&self, features_select: u32) -> u32 { + read_u32(self.virtio_base().driver_features, features_select) + } + + /// Get host feature selector. + fn hfeatures_sel(&self) -> u32 { + self.virtio_base().hfeatures_sel + } + + /// Set host feature selector. + fn set_hfeatures_sel(&mut self, val: u32) { + self.virtio_base_mut().hfeatures_sel = val; + } + + /// Get guest feature selector. + fn gfeatures_sel(&self) -> u32 { + self.virtio_base().gfeatures_sel + } + + /// Set guest feature selector. + fn set_gfeatures_sel(&mut self, val: u32) { + self.virtio_base_mut().gfeatures_sel = val; + } + + /// Check whether virtio device status is as expected. + fn check_device_status(&self, set: u32, clr: u32) -> bool { + self.device_status() & (set | clr) == set + } + + /// Get the status of virtio device. + fn device_status(&self) -> u32 { + self.virtio_base().device_status.load(Ordering::Acquire) + } + + /// Set the status of virtio device. + fn set_device_status(&mut self, val: u32) { + self.virtio_base_mut() + .device_status + .store(val, Ordering::SeqCst) + } + + /// Check device is activated or not. + fn device_activated(&self) -> bool { + self.virtio_base().device_activated.load(Ordering::Acquire) + } + + /// Set device activate status. + fn set_device_activated(&mut self, val: bool) { + self.virtio_base_mut() + .device_activated + .store(val, Ordering::SeqCst) + } + + /// Get config generation. + fn config_generation(&self) -> u8 { + self.virtio_base().config_generation.load(Ordering::Acquire) + } + + /// Set config generation. + fn set_config_generation(&mut self, val: u8) { + self.virtio_base_mut() + .config_generation + .store(val, Ordering::SeqCst); + } + + /// Get msix vector of config change interrupt. + fn config_vector(&self) -> u16 { + self.virtio_base().config_vector.load(Ordering::Acquire) + } + + /// Set msix vector of config change interrupt. + fn set_config_vector(&mut self, val: u16) { + self.virtio_base_mut() + .config_vector + .store(val, Ordering::SeqCst); + } + + /// Get virtqueue type. + fn queue_type(&self) -> u16 { + self.virtio_base().queue_type + } + + /// Set virtqueue type. + fn set_queue_type(&mut self, val: u16) { + self.virtio_base_mut().queue_type = val; + } + + /// Get virtqueue selector. + fn queue_select(&self) -> u16 { + self.virtio_base().queue_select + } + + /// Set virtqueue selector. + fn set_queue_select(&mut self, val: u16) { + self.virtio_base_mut().queue_select = val; + } + + /// Get virtqueue config. + fn queue_config(&self) -> Result<&QueueConfig> { + let queues_config = &self.virtio_base().queues_config; + let queue_select = self.virtio_base().queue_select; + queues_config + .get(queue_select as usize) + .with_context(|| "queue_select overflows") + } + + /// Get mutable virtqueue config. + fn queue_config_mut(&mut self, need_check: bool) -> Result<&mut QueueConfig> { + if need_check + && !self.check_device_status( + CONFIG_STATUS_FEATURES_OK, + CONFIG_STATUS_DRIVER_OK | CONFIG_STATUS_FAILED, + ) + { + return Err(anyhow!(VirtioError::DevStatErr(self.device_status()))); + } + + let queue_select = self.virtio_base().queue_select; + let queues_config = &mut self.virtio_base_mut().queues_config; + return queues_config + .get_mut(queue_select as usize) + .with_context(|| "queue_select overflows"); + } + + /// Get ISR register. + fn interrupt_status(&self) -> u32 { + self.virtio_base().interrupt_status.load(Ordering::Acquire) + } + + /// Set ISR register. + fn set_interrupt_status(&mut self, val: u32) { + self.virtio_base_mut() + .interrupt_status + .store(val, Ordering::SeqCst) + } /// Read data of config from guest. fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()>; @@ -289,16 +705,14 @@ pub trait VirtioDevice: Send { /// # Arguments /// /// * `mem_space` - System mem. - /// * `interrupt_evt` - The eventfd used to send interrupt to guest. - /// * `interrupt_status` - The interrupt status present to guest. + /// * `interrupt_cb` - The callback used to send interrupt to guest. /// * `queues` - The virtio queues. /// * `queue_evts` - The notifier events from guest. fn activate( &mut self, mem_space: Arc, interrupt_cb: Arc, - queues: &[Arc>], - queue_evts: Vec, + queue_evts: Vec>, ) -> Result<()>; /// Deactivate virtio device, this function remove event fd @@ -310,7 +724,8 @@ pub trait VirtioDevice: Send { ); } - /// Reset virtio device. + /// Reset virtio device, used to do some special reset action for + /// different device. fn reset(&mut self) -> Result<()> { Ok(()) } @@ -320,8 +735,220 @@ pub trait VirtioDevice: Send { /// /// # Arguments /// - /// * `_file_path` - The related backend file path. - fn update_config(&mut self, _dev_config: Option>) -> Result<()> { + /// * `_configs` - The related configs for device. + /// eg: DriveConfig and VirtioBlkDevConfig for virtio blk device. + fn update_config(&mut self, _configs: Vec>) -> Result<()> { bail!("Unsupported to update configuration") } + + /// Set guest notifiers for notifying the guest. + /// + /// # Arguments + /// + /// * `_queue_evts` - The notifier events from host. + fn set_guest_notifiers(&mut self, _queue_evts: &[Arc]) -> Result<()> { + Ok(()) + } + + /// Get whether the virtio device has a control queue, + /// devices with a control queue should override this function. + fn has_control_queue(&self) -> bool { + false + } +} + +/// Check boundary for config space rw. +fn check_config_space_rw(config: &[u8], offset: u64, data: &[u8]) -> Result<()> { + let config_len = config.len() as u64; + let data_len = data.len() as u64; + offset + .checked_add(data_len) + .filter(|&end| end <= config_len) + .with_context(|| VirtioError::DevConfigOverflow(offset, data_len, config_len))?; + Ok(()) +} + +/// Default implementation for config space read. +fn read_config_default(config: &[u8], offset: u64, mut data: &mut [u8]) -> Result<()> { + check_config_space_rw(config, offset, data)?; + let read_end = offset as usize + data.len(); + data.write_all(&config[offset as usize..read_end])?; + Ok(()) +} + +/// The function used to inject interrupt to guest when encounter an virtio error. +pub fn report_virtio_error( + interrupt_cb: Arc, + features: u64, + broken: &Arc, +) { + if virtio_has_feature(features, VIRTIO_F_VERSION_1) { + interrupt_cb(&VirtioInterruptType::Config, None, true).unwrap_or_else(|e| { + error!( + "Failed to trigger interrupt for virtio error, error is {:?}", + e + ) + }); + } + // The device should not work when meeting virtio error. + broken.store(true, Ordering::SeqCst); +} + +/// Read object typed `T` from iovec. +pub fn iov_read_object( + mem_space: &Arc, + iovec: &[ElemIovec], + cache: &Option, +) -> Result { + let mut obj = T::default(); + let count = iov_to_buf(mem_space, cache, iovec, obj.as_mut_bytes())?; + let size = size_of::(); + if count < size { + bail!("Read length error: expected {}, read {}.", size, count); + } + Ok(obj) +} + +/// Read iovec to buf and return the read number of bytes. +pub fn iov_to_buf( + mem_space: &AddressSpace, + cache: &Option, + iovec: &[ElemIovec], + buf: &mut [u8], +) -> Result { + let mut start: usize = 0; + let mut end: usize = 0; + + // Note: iovec is part of elem.in_iovec/out_iovec which has been checked + // in pop_avail(). The sum of iov_len is not greater than u32::MAX. + for iov in iovec { + let mut addr_map = Vec::new(); + mem_space.get_address_map(cache, iov.addr, u64::from(iov.len), &mut addr_map)?; + for addr in addr_map.into_iter() { + end = cmp::min(start + addr.iov_len as usize, buf.len()); + // SAFETY: addr_map is generated by address_space and len is not less than buf's. + unsafe { + mem_to_buf(&mut buf[start..end], addr.iov_base)?; + } + if end >= buf.len() { + return Ok(end); + } + start = end; + } + } + Ok(end) +} + +/// Discard "size" bytes of the front of iovec. +pub fn iov_discard_front(iovec: &mut [ElemIovec], mut size: u64) -> Option<&mut [ElemIovec]> { + for (index, iov) in iovec.iter_mut().enumerate() { + if u64::from(iov.len) > size { + iov.addr.0 += size; + iov.len -= size as u32; + return Some(&mut iovec[index..]); + } + size -= u64::from(iov.len); + } + None +} + +/// Discard "size" bytes of the back of iovec. +pub fn iov_discard_back(iovec: &mut [ElemIovec], mut size: u64) -> Option<&mut [ElemIovec]> { + let len = iovec.len(); + for (index, iov) in iovec.iter_mut().rev().enumerate() { + if u64::from(iov.len) > size { + iov.len -= size as u32; + return Some(&mut iovec[..(len - index)]); + } + size -= u64::from(iov.len); + } + None +} + +/// Convert GPA buffer iovec to HVA buffer iovec. +/// If don't need the entire iovec, use iov_discard_front/iov_discard_back firstly. +fn gpa_hva_iovec_map( + gpa_elemiovec: &[ElemIovec], + mem_space: &AddressSpace, + cache: &Option, +) -> Result<(u64, Vec)> { + let mut iov_size: u64 = 0; + let mut hva_iovec = Vec::with_capacity(gpa_elemiovec.len()); + + // Note: gpa_elemiovec is part of elem.in_iovec/out_iovec which has been checked + // in pop_avail(). The sum of iov_len is not greater than u32::MAX. + for elem in gpa_elemiovec.iter() { + mem_space.get_address_map(cache, elem.addr, u64::from(elem.len), &mut hva_iovec)?; + iov_size += u64::from(elem.len); + } + + Ok((iov_size, hva_iovec)) +} + +pub fn virtio_register_sysbusdevops_type() -> Result<()> { + register_sysbusdevops_type::() +} + +pub fn virtio_register_pcidevops_type() -> Result<()> { + register_pcidevops_type::() +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + + use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; + use devices::sysbus::{SysBus, IRQ_BASE, IRQ_MAX}; + + pub const MEMORY_SIZE: u64 = 1024 * 1024; + + pub fn sysbus_init() -> Arc> { + let sys_mem = AddressSpace::new( + Region::init_container_region(u64::max_value(), "sys_mem"), + "sys_mem", + None, + ) + .unwrap(); + #[cfg(target_arch = "x86_64")] + let sys_io = AddressSpace::new( + Region::init_container_region(1 << 16, "sys_io"), + "sys_io", + None, + ) + .unwrap(); + let free_irqs: (i32, i32) = (IRQ_BASE, IRQ_MAX); + let mmio_region: (u64, u64) = (0x0A00_0000, 0x1000_0000); + Arc::new(Mutex::new(SysBus::new( + #[cfg(target_arch = "x86_64")] + &sys_io, + &sys_mem, + free_irqs, + mmio_region, + ))) + } + + pub fn address_space_init() -> Arc { + let root = Region::init_container_region(1 << 36, "root"); + let sys_space = AddressSpace::new(root, "sys_space", None).unwrap(); + let host_mmap = Arc::new( + HostMemMapping::new( + GuestAddress(0), + None, + MEMORY_SIZE, + None, + false, + false, + false, + ) + .unwrap(), + ); + sys_space + .root() + .add_subregion( + Region::init_ram_region(host_mmap.clone(), "region_1"), + host_mmap.start_address().raw_value(), + ) + .unwrap(); + sys_space + } } diff --git a/virtio/src/net.rs b/virtio/src/net.rs deleted file mode 100644 index 59fed99a76a4c216349ef1f2941f3408b32e8e8a..0000000000000000000000000000000000000000 --- a/virtio/src/net.rs +++ /dev/null @@ -1,1091 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. -// -// StratoVirt is licensed under Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan -// PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -// See the Mulan PSL v2 for more details. - -use std::io::Write; -use std::os::unix::io::{AsRawFd, RawFd}; -use std::path::Path; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::{Arc, Mutex}; -use std::{cmp, fs, mem}; - -use address_space::AddressSpace; -use machine_manager::{ - config::{ConfigCheck, NetworkInterfaceConfig}, - event_loop::EventLoop, -}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::byte_code::ByteCode; -use util::loop_context::{ - read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, -}; -use util::num_ops::{read_u32, write_u32}; -use util::tap::{Tap, IFF_MULTI_QUEUE, TUN_F_VIRTIO}; -use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; - -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{ - Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VirtioNetHdr, - VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MQ, - VIRTIO_NET_OK, VIRTIO_TYPE_NET, -}; - -/// Number of virtqueues. -const QUEUE_NUM_NET: usize = 2; -/// Size of each virtqueue. -const QUEUE_SIZE_NET: u16 = 256; - -type SenderConfig = Option; - -/// Configuration of virtio-net devices. -#[repr(C, packed)] -#[derive(Copy, Clone, Debug, Default)] -pub struct VirtioNetConfig { - /// Mac Address. - pub mac: [u8; 6], - /// Device status. - pub status: u16, - /// Maximum number of each of transmit and receive queues. - pub max_virtqueue_pairs: u16, - /// Maximum Transmission Unit. - pub mtu: u16, - /// Speed, in units of 1Mb. - pub speed: u32, - /// 0x00 - half duplex - /// 0x01 - full duplex - pub duplex: u8, -} - -impl ByteCode for VirtioNetConfig {} - -/// The control queue is used to verify the multi queue feature. -pub struct CtrlVirtio { - queue: Arc>, - queue_evt: EventFd, -} - -impl CtrlVirtio { - pub fn new(queue: Arc>, queue_evt: EventFd) -> Self { - Self { queue, queue_evt } - } -} - -/// Handle the frontend and the backend control channel virtio queue events and data. -pub struct NetCtrlHandler { - /// The control virtio queue. - pub ctrl: CtrlVirtio, - /// Memory space. - pub mem_space: Arc, - /// The interrupt call back function. - pub interrupt_cb: Arc, - // Bit mask of features negotiated by the backend and the frontend. - pub driver_features: u64, -} - -#[repr(C, packed)] -#[derive(Copy, Clone, Debug, Default)] -struct CrtlHdr { - class: u8, - cmd: u8, -} - -impl ByteCode for CrtlHdr {} - -impl NetCtrlHandler { - fn handle_ctrl(&mut self) -> Result<()> { - let elem = self - .ctrl - .queue - .lock() - .unwrap() - .vring - .pop_avail(&self.mem_space, self.driver_features) - .chain_err(|| "Failed to pop avail ring for net control queue")?; - - let mut used_len = 0; - if let Some(ctrl_desc) = elem.out_iovec.get(0) { - used_len += ctrl_desc.len; - let ctrl_hdr = self - .mem_space - .read_object::(ctrl_desc.addr) - .chain_err(|| "Failed to get control queue descriptor")?; - match ctrl_hdr.class as u16 { - VIRTIO_NET_CTRL_MQ => { - if ctrl_hdr.cmd as u16 != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET { - bail!( - "Control queue header command can't match {}", - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET - ); - } - if let Some(mq_desc) = elem.out_iovec.get(0) { - used_len += mq_desc.len; - let queue_pairs = self - .mem_space - .read_object::(mq_desc.addr) - .chain_err(|| "Failed to read multi queue descriptor")?; - if !(VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN..=VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) - .contains(&queue_pairs) - { - bail!("Invalid queue pairs {}", queue_pairs); - } - } - } - _ => { - bail!( - "Control queue header class can't match {}", - VIRTIO_NET_CTRL_MQ - ); - } - } - } - if let Some(status) = elem.in_iovec.get(0) { - used_len += status.len; - let data = VIRTIO_NET_OK; - self.mem_space.write_object::(&data, status.addr)?; - } - - self.ctrl - .queue - .lock() - .unwrap() - .vring - .add_used(&self.mem_space, elem.index, used_len) - .chain_err(|| format!("Failed to add used ring {}", elem.index))?; - - (self.interrupt_cb)( - &VirtioInterruptType::Vring, - Some(&self.ctrl.queue.lock().unwrap()), - ) - .chain_err(|| ErrorKind::InterruptTrigger("ctrl", VirtioInterruptType::Vring))?; - - Ok(()) - } -} - -impl EventNotifierHelper for NetCtrlHandler { - fn internal_notifiers(net_io: Arc>) -> Vec { - let locked_net_io = net_io.lock().unwrap(); - let cloned_net_io = net_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - cloned_net_io - .lock() - .unwrap() - .handle_ctrl() - .unwrap_or_else(|e| error!("Failed to handle ctrl queue, error is {}.", e)); - None - }); - let mut notifiers = Vec::new(); - let ctrl_fd = locked_net_io.ctrl.queue_evt.as_raw_fd(); - notifiers.push(build_event_notifier( - ctrl_fd, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN, - )); - - notifiers - } -} - -struct TxVirtio { - queue: Arc>, - queue_evt: EventFd, -} - -impl TxVirtio { - fn new(queue: Arc>, queue_evt: EventFd) -> Self { - TxVirtio { queue, queue_evt } - } -} - -struct RxVirtio { - queue_full: bool, - need_irqs: bool, - queue: Arc>, - queue_evt: EventFd, -} - -impl RxVirtio { - fn new(queue: Arc>, queue_evt: EventFd) -> Self { - RxVirtio { - queue_full: false, - need_irqs: false, - queue, - queue_evt, - } - } -} - -struct NetIoHandler { - rx: RxVirtio, - tx: TxVirtio, - tap: Option, - tap_fd: RawFd, - mem_space: Arc, - interrupt_cb: Arc, - driver_features: u64, - receiver: Receiver, - update_evt: RawFd, - deactivate_evt: RawFd, - is_listening: bool, -} - -impl NetIoHandler { - fn handle_rx(&mut self) -> Result<()> { - let mut queue = self.rx.queue.lock().unwrap(); - while let Some(tap) = self.tap.as_mut() { - if queue.vring.avail_ring_len(&self.mem_space)? == 0 { - self.rx.queue_full = true; - break; - } - let elem = queue - .vring - .pop_avail(&self.mem_space, self.driver_features) - .chain_err(|| "Failed to pop avail ring for net rx")?; - let mut iovecs = Vec::new(); - for elem_iov in elem.in_iovec.iter() { - let host_addr = queue - .vring - .get_host_address_from_cache(elem_iov.addr, &self.mem_space); - if host_addr != 0 { - let iovec = libc::iovec { - iov_base: host_addr as *mut libc::c_void, - iov_len: elem_iov.len as libc::size_t, - }; - iovecs.push(iovec); - } else { - error!("Failed to get host address for {}", elem_iov.addr.0); - } - } - let write_count = unsafe { - libc::readv( - tap.as_raw_fd() as libc::c_int, - iovecs.as_ptr() as *const libc::iovec, - iovecs.len() as libc::c_int, - ) - }; - if write_count < 0 { - let e = std::io::Error::last_os_error(); - queue.vring.push_back(); - if e.kind() == std::io::ErrorKind::WouldBlock { - break; - } - - // If the backend tap device is removed, readv returns less than 0. - // At this time, the content in the tap needs to be cleaned up. - // Here, read is called to process, otherwise handle_rx may be triggered all the time. - let mut buf = [0; 1024]; - match tap.read(&mut buf) { - Ok(cnt) => error!("Failed to call readv but tap read is ok: cnt {}", cnt), - Err(e) => { - // When the backend tap device is abnormally removed, read return EBADFD. - error!("Failed to read tap: {}", e); - } - } - bail!("Failed to call readv for net handle_rx: {}", e); - } - - queue - .vring - .add_used(&self.mem_space, elem.index, write_count as u32) - .chain_err(|| { - format!( - "Failed to add used ring for net rx, index: {}, len: {}", - elem.index, write_count - ) - })?; - self.rx.need_irqs = true; - } - - if self.rx.need_irqs { - self.rx.need_irqs = false; - (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue)) - .chain_err(|| ErrorKind::InterruptTrigger("net", VirtioInterruptType::Vring))?; - } - - Ok(()) - } - - fn handle_tx(&mut self) -> Result<()> { - let mut queue = self.tx.queue.lock().unwrap(); - let mut need_irq = false; - - while let Ok(elem) = queue.vring.pop_avail(&self.mem_space, self.driver_features) { - let mut iovecs = Vec::new(); - for elem_iov in elem.out_iovec.iter() { - let host_addr = queue - .vring - .get_host_address_from_cache(elem_iov.addr, &self.mem_space); - if host_addr != 0 { - let iovec = libc::iovec { - iov_base: host_addr as *mut libc::c_void, - iov_len: elem_iov.len as libc::size_t, - }; - iovecs.push(iovec); - } else { - error!("Failed to get host address for {}", elem_iov.addr.0); - } - } - let mut read_len = 0; - if let Some(tap) = self.tap.as_mut() { - if !iovecs.is_empty() { - read_len = unsafe { - libc::writev( - tap.as_raw_fd() as libc::c_int, - iovecs.as_ptr() as *const libc::iovec, - iovecs.len() as libc::c_int, - ) - }; - } - }; - if read_len < 0 { - let e = std::io::Error::last_os_error(); - bail!("Failed to call writev for net handle_tx: {}", e); - } - - queue - .vring - .add_used(&self.mem_space, elem.index, 0) - .chain_err(|| format!("Net tx: Failed to add used ring {}", elem.index))?; - - need_irq = true; - } - - if need_irq { - (self.interrupt_cb)(&VirtioInterruptType::Vring, Some(&queue)) - .chain_err(|| ErrorKind::InterruptTrigger("net", VirtioInterruptType::Vring))?; - } - - Ok(()) - } - - fn update_evt_handler(net_io: &Arc>) -> Vec { - let mut locked_net_io = net_io.lock().unwrap(); - locked_net_io.tap = match locked_net_io.receiver.recv() { - Ok(tap) => tap, - Err(e) => { - error!("Failed to receive the tap {}", e); - None - } - }; - let old_tap_fd = locked_net_io.tap_fd; - locked_net_io.tap_fd = -1; - if let Some(tap) = locked_net_io.tap.as_ref() { - locked_net_io.tap_fd = tap.as_raw_fd(); - } - - let mut notifiers = vec![ - build_event_notifier( - locked_net_io.update_evt, - None, - NotifierOperation::Delete, - EventSet::IN, - ), - build_event_notifier( - locked_net_io.rx.queue_evt.as_raw_fd(), - None, - NotifierOperation::Delete, - EventSet::IN, - ), - build_event_notifier( - locked_net_io.tx.queue_evt.as_raw_fd(), - None, - NotifierOperation::Delete, - EventSet::IN, - ), - ]; - if old_tap_fd != -1 { - notifiers.push(build_event_notifier( - old_tap_fd, - None, - NotifierOperation::Delete, - EventSet::IN, - )); - } - drop(locked_net_io); - - notifiers.append(&mut EventNotifierHelper::internal_notifiers(net_io.clone())); - notifiers - } - - fn deactivate_evt_handler(&mut self) -> Vec { - let mut notifiers = vec![ - EventNotifier::new( - NotifierOperation::Delete, - self.update_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.rx.queue_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - EventNotifier::new( - NotifierOperation::Delete, - self.tx.queue_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - ), - ]; - if self.tap_fd != -1 { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - self.tap_fd, - None, - EventSet::IN, - Vec::new(), - )); - self.tap_fd = -1; - } - - notifiers - } -} - -fn build_event_notifier( - fd: RawFd, - handler: Option>, - op: NotifierOperation, - event: EventSet, -) -> EventNotifier { - let mut handlers = Vec::new(); - if let Some(h) = handler { - handlers.push(Arc::new(Mutex::new(h))); - } - EventNotifier::new(op, fd, None, event, handlers) -} - -impl EventNotifierHelper for NetIoHandler { - fn internal_notifiers(net_io: Arc>) -> Vec { - // Register event notifier for update_evt. - let locked_net_io = net_io.lock().unwrap(); - let cloned_net_io = net_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(NetIoHandler::update_evt_handler(&cloned_net_io)) - }); - let mut notifiers = Vec::new(); - let update_fd = locked_net_io.update_evt; - notifiers.push(build_event_notifier( - update_fd, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN, - )); - - // Register event notifier for deactivate_evt. - let cloned_net_io = net_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(cloned_net_io.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(build_event_notifier( - locked_net_io.deactivate_evt, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN, - )); - - // Register event notifier for rx. - let cloned_net_io = net_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - let mut locked_net_io = cloned_net_io.lock().unwrap(); - read_fd(fd); - if let Some(tap) = locked_net_io.tap.as_ref() { - if !locked_net_io.is_listening { - let notifier = vec![EventNotifier::new( - NotifierOperation::Resume, - tap.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )]; - locked_net_io.is_listening = true; - return Some(notifier); - } - } - None - }); - let rx_fd = locked_net_io.rx.queue_evt.as_raw_fd(); - notifiers.push(build_event_notifier( - rx_fd, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN, - )); - - // Register event notifier for tx. - let cloned_net_io = net_io.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - if let Err(ref e) = cloned_net_io.lock().unwrap().handle_tx() { - error!( - "Failed to handle tx(tx event) for net, {}", - error_chain::ChainedError::display_chain(e) - ); - } - None - }); - let tx_fd = locked_net_io.tx.queue_evt.as_raw_fd(); - notifiers.push(build_event_notifier( - tx_fd, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN, - )); - - // Register event notifier for tap. - let cloned_net_io = net_io.clone(); - if let Some(tap) = locked_net_io.tap.as_ref() { - let handler: Box = Box::new(move |_, _| { - let mut locked_net_io = cloned_net_io.lock().unwrap(); - if let Err(ref e) = locked_net_io.handle_rx() { - error!( - "Failed to handle rx(tap event), {}", - error_chain::ChainedError::display_chain(e) - ); - } - - if let Some(tap) = locked_net_io.tap.as_ref() { - if locked_net_io.rx.queue_full { - let notifier = vec![EventNotifier::new( - NotifierOperation::Park, - tap.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )]; - locked_net_io.is_listening = false; - locked_net_io.rx.queue_full = false; - return Some(notifier); - } - } - None - }); - let tap_fd = tap.as_raw_fd(); - notifiers.push(build_event_notifier( - tap_fd, - Some(handler), - NotifierOperation::AddShared, - EventSet::IN | EventSet::EDGE_TRIGGERED, - )); - } - - notifiers - } -} - -/// Status of net device. -#[repr(C)] -#[derive(Copy, Clone, Desc, ByteCode)] -#[desc_version(compat_version = "0.1.0")] -pub struct VirtioNetState { - /// Bit mask of features supported by the backend. - device_features: u64, - /// Bit mask of features negotiated by the backend and the frontend. - driver_features: u64, - /// Virtio net configurations. - config_space: VirtioNetConfig, -} - -/// Network device structure. -pub struct Net { - /// Configuration of the network device. - net_cfg: NetworkInterfaceConfig, - /// Tap device opened. - taps: Option>, - /// The status of net device. - state: VirtioNetState, - /// The send half of Rust's channel to send tap information. - senders: Option>>, - /// Eventfd for config space update. - update_evt: EventFd, - /// Eventfd for device deactivate. - deactivate_evt: EventFd, -} - -impl Default for Net { - fn default() -> Self { - Self { - net_cfg: Default::default(), - taps: None, - state: VirtioNetState::default(), - senders: None, - update_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } - } -} - -impl Net { - pub fn new(net_cfg: NetworkInterfaceConfig) -> Self { - Self { - net_cfg, - taps: None, - state: VirtioNetState::default(), - senders: None, - update_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - } - } -} - -/// Set Mac address configured into the virtio configuration, and return features mask with -/// VIRTIO_NET_F_MAC set. -/// -/// # Arguments -/// -/// * `device_config` - Virtio net configurations. -/// * `mac` - Mac address configured by user. -pub fn build_device_config_space(device_config: &mut VirtioNetConfig, mac: &str) -> u64 { - let mut config_features = 0_u64; - let mut bytes = [0_u8; 6]; - for (i, s) in mac.split(':').collect::>().iter().enumerate() { - bytes[i] = if let Ok(v) = u8::from_str_radix(s, 16) { - v - } else { - return config_features; - }; - } - device_config.mac.copy_from_slice(&bytes); - config_features |= 1 << VIRTIO_NET_F_MAC; - - config_features -} - -/// Check that tap flag supports multi queue feature. -/// -/// # Arguments -/// -/// * `dev_name` - The name of tap device on host. -/// * `queue_pairs` - The number of virtio queue pairs. -pub fn check_mq(dev_name: &str, queue_pair: u16) -> Result<()> { - let path = format!("/sys/class/net/{}/tun_flags", dev_name); - let tap_path = Path::new(&path); - if !tap_path.exists() { - bail!("Tap path doesn't exist"); - } - - let is_mq = queue_pair > 1; - let ifr_flag = - fs::read_to_string(tap_path).chain_err(|| "Failed to read content from tun_flags file")?; - let flags = u16::from_str_radix(ifr_flag.trim().trim_start_matches("0x"), 16) - .chain_err(|| "Failed to parse tap ifr flag")?; - if (flags & IFF_MULTI_QUEUE != 0) && !is_mq { - bail!(format!( - "Tap device supports mq, but command set queue pairs {}.", - queue_pair - )); - } else if (flags & IFF_MULTI_QUEUE == 0) && is_mq { - bail!(format!( - "Tap device doesn't support mq, but command set queue pairs {}.", - queue_pair - )); - } - - Ok(()) -} - -/// Open tap device if no fd provided, configure and return it. -/// -/// # Arguments -/// -/// * `net_fd` - Fd of tap device opened. -/// * `host_dev_name` - Path of tap device on host. -/// * `queue_pairs` - The number of virtio queue pairs. -pub fn create_tap( - net_fds: Option<&Vec>, - host_dev_name: Option<&str>, - queue_pairs: u16, -) -> Result>> { - if net_fds.is_none() && host_dev_name.is_none() { - return Ok(None); - } - if net_fds.is_some() && host_dev_name.is_some() { - error!("Create tap: fd and file_path exist meanwhile (use fd by default)"); - } - - let mut taps = Vec::with_capacity(queue_pairs as usize); - for index in 0..queue_pairs { - let tap = if let Some(fds) = net_fds { - let fd = fds - .get(index as usize) - .chain_err(|| format!("Failed to get fd from index {}", index))?; - Tap::new(None, Some(*fd), queue_pairs) - .chain_err(|| format!("Failed to create tap, index is {}", index))? - } else { - // `unwrap()` won't fail because the arguments have been checked - let dev_name = host_dev_name.unwrap(); - check_mq(dev_name, queue_pairs)?; - Tap::new(Some(dev_name), None, queue_pairs).chain_err(|| { - format!( - "Failed to create tap with name {}, index is {}", - dev_name, index - ) - })? - }; - - tap.set_offload(TUN_F_VIRTIO) - .chain_err(|| "Failed to set tap offload")?; - - let vnet_hdr_size = mem::size_of::() as u32; - tap.set_hdr_size(vnet_hdr_size) - .chain_err(|| "Failed to set tap hdr size")?; - - taps.push(tap); - } - - Ok(Some(taps)) -} - -impl VirtioDevice for Net { - /// Realize virtio network device. - fn realize(&mut self) -> Result<()> { - // if iothread not found, return err - if self.net_cfg.iothread.is_some() - && EventLoop::get_ctx(self.net_cfg.iothread.as_ref()).is_none() - { - bail!( - "IOThread {:?} of Net is not configured in params.", - self.net_cfg.iothread, - ); - } - - self.state.device_features = 1 << VIRTIO_F_VERSION_1 - | 1 << VIRTIO_NET_F_CSUM - | 1 << VIRTIO_NET_F_GUEST_CSUM - | 1 << VIRTIO_NET_F_GUEST_TSO4 - | 1 << VIRTIO_NET_F_GUEST_UFO - | 1 << VIRTIO_NET_F_HOST_TSO4 - | 1 << VIRTIO_NET_F_HOST_UFO - | 1 << VIRTIO_F_RING_EVENT_IDX; - - let queue_pairs = self.net_cfg.queues / 2; - if self.net_cfg.mq - && queue_pairs >= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN - && queue_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX - { - self.state.device_features |= 1 << VIRTIO_NET_F_MQ; - self.state.device_features |= 1 << VIRTIO_NET_F_CTRL_VQ; - self.state.config_space.max_virtqueue_pairs = queue_pairs; - } - - if !self.net_cfg.host_dev_name.is_empty() { - self.taps = None; - self.taps = create_tap(None, Some(&self.net_cfg.host_dev_name), queue_pairs) - .chain_err(|| "Failed to open tap with file path")?; - } else if let Some(fds) = self.net_cfg.tap_fds.as_mut() { - let mut created_fds = 0; - if let Some(taps) = &self.taps { - for (index, tap) in taps.iter().enumerate() { - if fds.get(index).map_or(-1, |fd| *fd as RawFd) == tap.as_raw_fd() { - created_fds += 1; - } - } - } - - if created_fds != fds.len() { - self.taps = - create_tap(Some(fds), None, queue_pairs).chain_err(|| "Failed to open tap")?; - } - } else { - self.taps = None; - } - - if let Some(mac) = &self.net_cfg.mac { - self.state.device_features |= - build_device_config_space(&mut self.state.config_space, mac); - } - - Ok(()) - } - - fn unrealize(&mut self) -> Result<()> { - MigrationManager::unregister_device_instance_mutex_by_id( - VirtioNetState::descriptor(), - &self.net_cfg.id, - ); - Ok(()) - } - - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_NET - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - if self.net_cfg.mq { - (self.net_cfg.queues + 1) as usize - } else { - QUEUE_NUM_NET - } - } - - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_NET - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.state.device_features, features_select) - } - - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.state.device_features; - if unrequested_features != 0 { - warn!("Received acknowledge request with unknown feature: {:x}", v); - v &= !unrequested_features; - } - self.state.driver_features |= v; - } - - /// Read data of config from guest. - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { - let config_slice = self.state.config_space.as_bytes(); - let config_len = config_slice.len() as u64; - if offset >= config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len).into()); - } - if let Some(end) = offset.checked_add(data.len() as u64) { - data.write_all(&config_slice[offset as usize..cmp::min(end, config_len) as usize])?; - } - Ok(()) - } - - /// Write data to config from guest. - fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { - let data_len = data.len(); - let config_slice = self.state.config_space.as_mut_bytes(); - let config_len = config_slice.len(); - if offset as usize + data_len > config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len as u64).into()); - } - - config_slice[(offset as usize)..(offset as usize + data_len)].copy_from_slice(data); - - Ok(()) - } - - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. - fn activate( - &mut self, - mem_space: Arc, - interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, - ) -> Result<()> { - let queue_num = queues.len(); - if (self.state.driver_features & 1 << VIRTIO_NET_F_CTRL_VQ != 0) && (queue_num % 2 != 0) { - let ctrl_queue = queues[queue_num - 1].clone(); - let ctrl_queue_evt = queue_evts.remove(queue_num - 1); - - let ctrl_handler = NetCtrlHandler { - ctrl: CtrlVirtio::new(ctrl_queue, ctrl_queue_evt), - mem_space: mem_space.clone(), - interrupt_cb: interrupt_cb.clone(), - driver_features: self.state.driver_features, - }; - - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))), - self.net_cfg.iothread.as_ref(), - )?; - } - - let mut senders = Vec::new(); - let queue_pairs = queue_num / 2; - for index in 0..queue_pairs { - let rx_queue = queues[index * 2].clone(); - let rx_queue_evt = queue_evts.remove(0); - let tx_queue = queues[index * 2 + 1].clone(); - let tx_queue_evt = queue_evts.remove(0); - - let (sender, receiver) = channel(); - senders.push(sender); - - let mut handler = NetIoHandler { - rx: RxVirtio::new(rx_queue, rx_queue_evt), - tx: TxVirtio::new(tx_queue, tx_queue_evt), - tap: self.taps.as_ref().map(|t| t[index].clone()), - tap_fd: -1, - mem_space: mem_space.clone(), - interrupt_cb: interrupt_cb.clone(), - driver_features: self.state.driver_features, - receiver, - update_evt: self.update_evt.as_raw_fd(), - deactivate_evt: self.deactivate_evt.as_raw_fd(), - is_listening: true, - }; - if let Some(tap) = &handler.tap { - handler.tap_fd = tap.as_raw_fd(); - } - - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - self.net_cfg.iothread.as_ref(), - )?; - } - self.senders = Some(senders); - - Ok(()) - } - - fn update_config(&mut self, dev_config: Option>) -> Result<()> { - if let Some(conf) = dev_config { - self.net_cfg = conf - .as_any() - .downcast_ref::() - .unwrap() - .clone(); - } else { - self.net_cfg = Default::default(); - } - - self.realize()?; - - if let Some(senders) = &self.senders { - if let Some(mut taps) = self.taps.take() { - for (index, sender) in senders.iter().enumerate() { - let tap = taps.remove(index); - sender - .send(Some(tap)) - .chain_err(|| ErrorKind::ChannelSend("tap fd".to_string()))?; - } - } - self.update_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; - } - - Ok(()) - } - - fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite) - } -} - -// Send and Sync is not auto-implemented for `Sender` type. -// Implementing them is safe because `Sender` field of Net won't change in migration -// workflow. -unsafe impl Sync for Net {} - -impl StateTransfer for Net { - fn get_state_vec(&self) -> migration::errors::Result> { - Ok(self.state.as_bytes().to_vec()) - } - - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *VirtioNetState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("NET"))?; - - Ok(()) - } - - fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&VirtioNetState::descriptor().name) { - alias - } else { - !0 - } - } -} - -impl MigrationHook for Net {} - -#[cfg(test)] -mod tests { - pub use super::super::*; - pub use super::*; - - #[test] - fn test_net_init() { - // test net new method - let mut net = Net::default(); - assert_eq!(net.state.device_features, 0); - assert_eq!(net.state.driver_features, 0); - - assert_eq!(net.taps.is_none(), true); - assert_eq!(net.senders.is_none(), true); - assert_eq!(net.net_cfg.mac.is_none(), true); - assert_eq!(net.net_cfg.tap_fds.is_none(), true); - assert_eq!(net.net_cfg.vhost_type.is_none(), true); - assert_eq!(net.net_cfg.vhost_fds.is_none(), true); - - // test net realize method - net.realize().unwrap(); - assert_eq!(net.device_type(), 1); - assert_eq!(net.queue_num(), 2); - assert_eq!(net.queue_size(), 256); - - // test read_config and write_config method - let write_data: Vec = vec![7; 4]; - let mut random_data: Vec = vec![0; 4]; - let mut origin_data: Vec = vec![0; 4]; - net.read_config(0x00, &mut origin_data).unwrap(); - - net.write_config(0x00, &write_data).unwrap(); - net.read_config(0x00, &mut random_data).unwrap(); - assert_eq!(random_data, write_data); - - net.write_config(0x00, &origin_data).unwrap(); - - // test boundary condition of offset and data parameters - let device_config = net.state.config_space.as_bytes(); - let len = device_config.len() as u64; - - let mut data: Vec = vec![0; 10]; - let offset: u64 = len + 1; - assert_eq!(net.read_config(offset, &mut data).is_ok(), false); - - let offset: u64 = len; - assert_eq!(net.read_config(offset, &mut data).is_ok(), false); - - let offset: u64 = 0; - assert_eq!(net.read_config(offset, &mut data).is_ok(), true); - - let offset: u64 = len; - let mut data: Vec = vec![0; 1]; - assert_eq!(net.write_config(offset, &mut data).is_ok(), false); - - let offset: u64 = len - 1; - let mut data: Vec = vec![0; 1]; - assert_eq!(net.write_config(offset, &mut data).is_ok(), true); - - let offset: u64 = 0; - let mut data: Vec = vec![0; len as usize]; - assert_eq!(net.write_config(offset, &mut data).is_ok(), true); - } -} diff --git a/virtio/src/queue/mod.rs b/virtio/src/queue/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..44f164731ced5c54480a5031a5bb0fe543099ed1 --- /dev/null +++ b/virtio/src/queue/mod.rs @@ -0,0 +1,229 @@ +// Copyright (c) 2020 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +mod split; + +pub use split::*; + +use std::sync::Arc; + +use anyhow::{bail, Result}; +use vmm_sys_util::eventfd::EventFd; + +use address_space::{AddressSpace, GuestAddress, RegionCache}; +use machine_manager::config::DEFAULT_VIRTQUEUE_SIZE; +use util::loop_context::create_new_eventfd; + +/// Split Virtqueue. +pub const QUEUE_TYPE_SPLIT_VRING: u16 = 1; +/// Packed Virtqueue. +pub const QUEUE_TYPE_PACKED_VRING: u16 = 2; +/// Invalid queue vector num. +pub const INVALID_VECTOR_NUM: u16 = 0xFFFF; + +/// This marks a buffer as continuing via the next field. +const VIRTQ_DESC_F_NEXT: u16 = 0x1; +/// This marks a buffer as write-only (otherwise read-only). +const VIRTQ_DESC_F_WRITE: u16 = 0x2; +/// This means the buffer contains a list of buffer descriptors. +const VIRTQ_DESC_F_INDIRECT: u16 = 0x4; + +fn checked_offset_mem( + mmio_space: &Arc, + base: GuestAddress, + offset: u64, +) -> Result { + if !mmio_space.address_in_memory(base, offset) { + bail!( + "Invalid Address for queue: base 0x{:X}, size {}", + base.raw_value(), + offset + ); + } + Ok(base.unchecked_add(offset)) +} + +/// IO vector element which contains the information of a descriptor. +#[derive(Debug, Clone, Copy)] +pub struct ElemIovec { + /// Guest address of descriptor. + pub addr: GuestAddress, + /// Length of descriptor. + pub len: u32, +} + +/// IO request element. +pub struct Element { + /// Index of the descriptor in the table. + pub index: u16, + /// Number of descriptors. + pub desc_num: u16, + /// Vector to put host readable descriptors. + pub out_iovec: Vec, + /// Vector to put host writable descriptors. + pub in_iovec: Vec, +} + +impl Element { + /// Create an IO request element. + /// + /// # Arguments + /// + /// * `index` - The index of descriptor in the virqueue descriptor table. + fn new(index: u16) -> Self { + Element { + index, + desc_num: 0, + out_iovec: Vec::with_capacity(DEFAULT_VIRTQUEUE_SIZE.into()), + in_iovec: Vec::with_capacity(DEFAULT_VIRTQUEUE_SIZE.into()), + } + } + + pub fn iovec_size(iovec: &[ElemIovec]) -> u64 { + let mut size: u64 = 0; + for elem in iovec.iter() { + // Note: iovec is part of elem.in_iovec/out_iovec which has been checked + // in pop_avail(). The sum of iov_len is not greater than u32::MAX. + size += u64::from(elem.len); + } + size + } +} + +/// Vring operations. +pub trait VringOps { + /// Return true if the vring is enable by driver. + fn is_enabled(&self) -> bool; + + /// Return true if the configuration of vring is valid. + /// + /// # Arguments + /// + /// * `sys_mem` - Address space to which the vring belongs. + fn is_valid(&self, sys_mem: &Arc) -> bool; + + /// Assemble an IO request element with descriptors from the available vring. + /// + /// # Arguments + /// + /// * `sys_mem` - Address space to which the vring belongs. + /// * `features` - Bit mask of features negotiated by the backend and the frontend. + fn pop_avail(&mut self, sys_mem: &Arc, features: u64) -> Result; + + /// Rollback the entry which is pop from available queue by `pop_avail`. + fn push_back(&mut self); + + /// Fill the used vring after processing the IO request. + /// + /// # Arguments + /// + /// * `index` - Index of descriptor in the virqueue descriptor table. + /// * `len` - Total length of the descriptor chain which was used (written to). + fn add_used(&mut self, index: u16, len: u32) -> Result<()>; + + /// Return true if guest needed to be notified. + /// + /// # Arguments + /// + /// * `features` - Bit mask of features negotiated by the backend and the frontend. + fn should_notify(&mut self, features: u64) -> bool; + + /// Give guest a hint to suppress virtqueue notification. + /// + /// # Arguments + /// + /// * `features` - Bit mask of features negotiated by the backend and the frontend. + /// * `suppress` - Suppress virtqueue notification or not. + fn suppress_queue_notify(&mut self, features: u64, suppress: bool) -> Result<()>; + + /// Get the actual size of the vring. + fn actual_size(&self) -> u16; + + /// Get the configuration of the vring. + fn get_queue_config(&self) -> QueueConfig; + + /// The number of descriptor chains in the available ring. + fn avail_ring_len(&mut self) -> Result; + + /// Get the avail index of the vring. + fn get_avail_idx(&self) -> Result; + + /// Get the used index of the vring. + fn get_used_idx(&self) -> Result; + + /// Get the region cache information of the SplitVring. + fn get_cache(&self) -> &Option; + + /// Get the available bytes of the vring to read from or write to the guest + fn get_avail_bytes( + &mut self, + sys_mem: &Arc, + max_size: usize, + is_in: bool, + ) -> Result; +} + +/// Virtio queue. +pub struct Queue { + /// Vring structure. + pub vring: Box, +} + +impl Queue { + /// Create a virtqueue. + /// + /// # Arguments + /// + /// * `queue_config` - Configuration of the vring. + /// * `queue_type` - Type of virtqueue. + pub fn new(queue_config: QueueConfig, queue_type: u16) -> Result { + let vring: Box = match queue_type { + QUEUE_TYPE_SPLIT_VRING => Box::new(SplitVring::new(queue_config)), + _ => { + bail!("Unsupported queue type {}", queue_type); + } + }; + + Ok(Queue { vring }) + } + + /// Return true if the virtqueue is enabled by driver. + pub fn is_enabled(&self) -> bool { + self.vring.is_enabled() + } + + /// Return true if the memory layout of the virqueue is valid. + /// + /// # Arguments + /// + /// * `sys_mem` - Address space to which the vring belongs. + pub fn is_valid(&self, sys_mem: &Arc) -> bool { + self.vring.is_valid(sys_mem) + } +} + +/// Virt Queue Notify EventFds +#[derive(Clone)] +pub struct NotifyEventFds { + pub events: Vec>, +} + +impl NotifyEventFds { + pub fn new(queue_num: usize) -> Self { + let mut events = Vec::new(); + for _i in 0..queue_num { + events.push(Arc::new(create_new_eventfd().unwrap())); + } + + NotifyEventFds { events } + } +} diff --git a/virtio/src/queue.rs b/virtio/src/queue/split.rs similarity index 48% rename from virtio/src/queue.rs rename to virtio/src/queue/split.rs index 13409a875297bd1bcb3e83223ff01080fef2c3f5..30a906ffc827a672762620acbeb8fa3068c52327 100644 --- a/virtio/src/queue.rs +++ b/virtio/src/queue/split.rs @@ -11,38 +11,92 @@ // See the Mulan PSL v2 for more details. use std::cmp::min; +use std::io::Write; use std::mem::size_of; use std::num::Wrapping; -use std::sync::atomic::{fence, Ordering}; +use std::ops::{Deref, DerefMut}; +use std::sync::atomic::{fence, AtomicBool, Ordering}; use std::sync::Arc; -use address_space::{AddressSpace, GuestAddress, RegionCache, RegionType}; +use anyhow::{anyhow, bail, Context, Result}; +use log::{error, warn}; + +use super::{ + checked_offset_mem, ElemIovec, Element, VringOps, INVALID_VECTOR_NUM, VIRTQ_DESC_F_INDIRECT, + VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE, +}; +use crate::{ + report_virtio_error, virtio_has_feature, VirtioError, VirtioInterrupt, VIRTIO_F_RING_EVENT_IDX, +}; +use address_space::{AddressAttr, AddressSpace, GuestAddress, RegionCache, RegionType}; +use migration::{migration::Migratable, MigrationManager}; use util::byte_code::ByteCode; -use super::errors::{ErrorKind, Result, ResultExt}; -use super::{virtio_has_feature, VIRTIO_F_RING_EVENT_IDX}; - /// When host consumes a buffer, don't interrupt the guest. const VRING_AVAIL_F_NO_INTERRUPT: u16 = 1; -/// Split Virtqueue. -pub const QUEUE_TYPE_SPLIT_VRING: u16 = 1; -/// Packed Virtqueue. -pub const QUEUE_TYPE_PACKED_VRING: u16 = 2; - -fn checked_offset_mem( - mmio_space: &Arc, - base: GuestAddress, - offset: u64, -) -> Result { - if !mmio_space.address_in_memory(base, offset) { - bail!( - "Invalid Address for queue: base 0x{:X}, size {}", - base.raw_value(), - offset - ); - } - base.checked_add(offset) - .ok_or_else(|| ErrorKind::AddressOverflow("queue", base.raw_value(), offset).into()) +/// When guest produces a buffer, don't notify the host. +const VRING_USED_F_NO_NOTIFY: u16 = 1; + +/// Max total len of a descriptor chain. +const DESC_CHAIN_MAX_TOTAL_LEN: u64 = 1u64 << 32; +/// The length of used element. +const USEDELEM_LEN: u64 = size_of::() as u64; +/// The length of avail element. +const AVAILELEM_LEN: u64 = size_of::() as u64; +/// The length of available ring except array of avail element(flags: u16 idx: u16 used_event: u16). +const VRING_AVAIL_LEN_EXCEPT_AVAILELEM: u64 = (size_of::() * 3) as u64; +/// The length of used ring except array of used element(flags: u16 idx: u16 avail_event: u16). +const VRING_USED_LEN_EXCEPT_USEDELEM: u64 = (size_of::() * 3) as u64; +/// The length of flags(u16) and idx(u16). +const VRING_FLAGS_AND_IDX_LEN: u64 = size_of::() as u64; +/// The position of idx in the available ring and the used ring. +const VRING_IDX_POSITION: u64 = size_of::() as u64; +/// The length of virtio descriptor. +const DESCRIPTOR_LEN: u64 = size_of::() as u64; + +/// Read some data from memory to form an object via host address. +/// +/// # Arguments +/// +/// * `hoat_addr` - The start host address where the data will be read from. +/// +/// # Safety +/// +/// Make true that host_addr and std::mem::size_of::() are in the range of ram. +/// +/// # Note +/// To use this method, it is necessary to implement `ByteCode` trait for your object. +unsafe fn read_object_direct(host_addr: u64) -> Result { + trace::virtio_read_object_direct(host_addr, std::mem::size_of::()); + let mut obj = T::default(); + let mut dst = obj.as_mut_bytes(); + let src = std::slice::from_raw_parts_mut(host_addr as *mut u8, std::mem::size_of::()); + dst.write_all(src) + .with_context(|| "Failed to read object via host address")?; + + Ok(obj) +} + +/// Write an object to memory via host address. +/// +/// # Arguments +/// +/// * `data` - The object that will be written to the memory. +/// * `host_addr` - The start host address where the object will be written to. +/// +/// # Safety +/// +/// Make true that host_addr and std::mem::size_of::() are in the range of ram. +/// +/// # Note +/// To use this method, it is necessary to implement `ByteCode` trait for your object. +unsafe fn write_object_direct(data: &T, host_addr: u64) -> Result<()> { + trace::virtio_write_object_direct(host_addr, std::mem::size_of::()); + // Mark vmm dirty page manually if live migration is active. + MigrationManager::mark_dirty_log(host_addr, data.as_bytes().len() as u64); + let mut dst = std::slice::from_raw_parts_mut(host_addr as *mut u8, std::mem::size_of::()); + dst.write_all(data.as_bytes()) + .with_context(|| "Failed to write object via host address") } #[derive(Default, Clone, Copy)] @@ -54,6 +108,7 @@ pub struct VirtioAddrCache { /// Host virtual address of the used ring. pub used_ring_host: u64, } + /// The configuration of virtqueue. #[derive(Default, Clone, Copy)] pub struct QueueConfig { @@ -74,11 +129,13 @@ pub struct QueueConfig { /// Interrupt vector index of the queue for msix pub vector: u16, /// The next index which can be popped in the available vring. - next_avail: u16, + next_avail: Wrapping, /// The next index which can be pushed in the used vring. - next_used: u16, + next_used: Wrapping, /// The index of last descriptor used which has triggered interrupt. - last_signal_used: u16, + last_signal_used: Wrapping, + /// The last_signal_used is valid or not. + signal_used_valid: bool, } impl QueueConfig { @@ -87,7 +144,6 @@ impl QueueConfig { /// # Arguments /// /// * `max_size` - The maximum size of the virtqueue. - /// pub fn new(max_size: u16) -> Self { let addr_cache = VirtioAddrCache::default(); QueueConfig { @@ -98,99 +154,89 @@ impl QueueConfig { max_size, size: max_size, ready: false, - vector: 0, - next_avail: 0, - next_used: 0, - last_signal_used: 0, + vector: INVALID_VECTOR_NUM, + next_avail: Wrapping(0), + next_used: Wrapping(0), + last_signal_used: Wrapping(0), + signal_used_valid: false, } } -} -/// IO vector element which contains the information of a descriptor. -#[derive(Debug, Clone, Copy)] -pub struct ElemIovec { - /// Guest address of descriptor. - pub addr: GuestAddress, - /// Length of descriptor. - pub len: u32, -} - -/// IO request element. -pub struct Element { - /// Index of the descriptor in the table. - pub index: u16, - /// Number of descriptors. - pub desc_num: u16, - /// Vector to put host readable descriptors. - pub out_iovec: Vec, - /// Vector to put host writable descriptors. - pub in_iovec: Vec, -} - -impl Element { - /// Create an IO request element. - /// - /// # Arguments - /// - /// * `index` - The index of descriptor in the virqueue descriptor table. - pub fn new(index: u16) -> Self { - Element { - index, - desc_num: 0, - out_iovec: Vec::new(), - in_iovec: Vec::new(), - } + fn get_desc_size(&self) -> u64 { + u64::from(min(self.size, self.max_size)) * DESCRIPTOR_LEN } -} -/// Vring operations. -pub trait VringOps { - /// Return true if the configuration of vring is valid. - /// - /// # Arguments - /// - /// * `sys_mem` - Address space to which the vring belongs. - fn is_valid(&self, sys_mem: &Arc) -> bool; - - /// Assemble an IO request element with descriptors from the available vring. - /// - /// # Arguments - /// - /// * `sys_mem` - Address space to which the vring belongs. - /// * `features` - Bit mask of features negotiated by the backend and the frontend. - fn pop_avail(&mut self, sys_mem: &Arc, features: u64) -> Result; + fn get_used_size(&self, features: u64) -> u64 { + let size = if virtio_has_feature(features, VIRTIO_F_RING_EVENT_IDX) { + 2_u64 + } else { + 0_u64 + }; - /// Rollback the entry which is pop from available queue by `pop_avail`. - fn push_back(&mut self); + size + VRING_FLAGS_AND_IDX_LEN + u64::from(min(self.size, self.max_size)) * USEDELEM_LEN + } - /// Fill the used vring after processing the IO request. - /// - /// # Arguments - /// - /// * `sys_mem` - Address space to which the vring belongs. - /// * `index` - Index of descriptor in the virqueue descriptor table. - /// * `len` - Total length of the descriptor chain which was used (written to). - fn add_used(&mut self, sys_mem: &Arc, index: u16, len: u32) -> Result<()>; + fn get_avail_size(&self, features: u64) -> u64 { + let size = if virtio_has_feature(features, VIRTIO_F_RING_EVENT_IDX) { + 2_u64 + } else { + 0_u64 + }; - /// Return true if guest needed to be notified. - /// - /// # Arguments - /// - /// * `sys_mem` - Address space to which the vring belongs. - /// * `features` - Bit mask of features negotiated by the backend and the frontend. - fn should_notify(&mut self, system_space: &Arc, features: u64) -> bool; + size + VRING_FLAGS_AND_IDX_LEN + + u64::from(min(self.size, self.max_size)) * (size_of::() as u64) + } - /// Get the actual size of the vring. - fn actual_size(&self) -> u16; + pub fn reset(&mut self) { + *self = Self::new(self.max_size); + } - /// Get the configuration of the vring. - fn get_queue_config(&self) -> QueueConfig; + pub fn set_addr_cache( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + features: u64, + broken: &Arc, + ) { + self.addr_cache.desc_table_host = if let Some((addr, size)) = + mem_space.addr_cache_init(self.desc_table, AddressAttr::Ram) + { + if size < self.get_desc_size() { + report_virtio_error(interrupt_cb.clone(), features, broken); + 0_u64 + } else { + addr + } + } else { + 0_u64 + }; - /// The number of descriptor chains in the available ring. - fn avail_ring_len(&mut self, sys_mem: &Arc) -> Result; + self.addr_cache.avail_ring_host = if let Some((addr, size)) = + mem_space.addr_cache_init(self.avail_ring, AddressAttr::Ram) + { + if size < self.get_avail_size(features) { + report_virtio_error(interrupt_cb.clone(), features, broken); + 0_u64 + } else { + addr + } + } else { + 0_u64 + }; - fn get_host_address_from_cache(&self, addr: GuestAddress, mem_space: &Arc) - -> u64; + self.addr_cache.used_ring_host = if let Some((addr, size)) = + mem_space.addr_cache_init(self.used_ring, AddressAttr::Ram) + { + if size < self.get_used_size(features) { + report_virtio_error(interrupt_cb.clone(), features, broken); + 0_u64 + } else { + addr + } + } else { + 0_u64 + }; + } } /// Virtio used element. @@ -215,24 +261,16 @@ struct SplitVringFlagsIdx { impl ByteCode for SplitVringFlagsIdx {} -/// The length of used element. -const USEDELEM_LEN: u64 = size_of::() as u64; -/// The length of avail element. -const AVAILELEM_LEN: u64 = size_of::() as u64; -/// The length of available ring except array of avail element(flags: u16 idx: u16 used_event: u16). -const VRING_AVAIL_LEN_EXCEPT_AVAILELEM: u64 = (size_of::() * 3) as u64; -/// The length of used ring except array of used element(flags: u16 idx: u16 avail_event: u16). -const VRING_USED_LEN_EXCEPT_USEDELEM: u64 = (size_of::() * 3) as u64; -/// The length of flags(u16) and idx(u16). -const VRING_FLAGS_AND_IDX_LEN: u64 = size_of::() as u64; -/// The position of idx in the available ring and the used ring. -const VRING_IDX_POSITION: u64 = size_of::() as u64; -/// This marks a buffer as continuing via the next field. -const VIRTQ_DESC_F_NEXT: u16 = 0x1; -/// This marks a buffer as write-only (otherwise read-only). -const VIRTQ_DESC_F_WRITE: u16 = 0x2; -/// This means the buffer contains a list of buffer descriptors. -const VIRTQ_DESC_F_INDIRECT: u16 = 0x4; +struct DescInfo { + /// The host virtual address of the descriptor table. + table_host: u64, + /// The size of the descriptor table. + size: u16, + /// The index of the current descriptor table. + index: u16, + /// The descriptor table. + desc: SplitVringDesc, +} /// Descriptor of split vring. #[repr(C)] @@ -248,9 +286,6 @@ pub struct SplitVringDesc { pub next: u16, } -/// The length of virtio descriptor. -const DESCRIPTOR_LEN: u64 = size_of::() as u64; - impl SplitVringDesc { /// Create a descriptor of split vring. /// @@ -260,7 +295,7 @@ impl SplitVringDesc { /// * `desc_table` - Guest address of virtqueue descriptor table. /// * `queue_size` - Size of virtqueue. /// * `index` - Index of descriptor in the virqueue descriptor table. - pub fn new( + fn new( sys_mem: &Arc, desc_table_host: u64, queue_size: u16, @@ -268,26 +303,28 @@ impl SplitVringDesc { cache: &mut Option, ) -> Result { if index >= queue_size { - return Err(ErrorKind::QueueIndex(index, queue_size).into()); + return Err(anyhow!(VirtioError::QueueIndex(index, queue_size))); } let desc_addr = desc_table_host .checked_add(u64::from(index) * DESCRIPTOR_LEN) - .chain_err(|| { - ErrorKind::AddressOverflow( + .with_context(|| { + VirtioError::AddressOverflow( "creating a descriptor", desc_table_host, u64::from(index) * DESCRIPTOR_LEN, ) })?; - let desc = sys_mem - .read_object_direct::(desc_addr) - .chain_err(|| ErrorKind::ReadObjectErr("a descriptor", desc_addr))?; + // SAFETY: dest_addr has been checked in SplitVringDesc::is_valid() and is guaranteed to be within the ram range. + let desc = unsafe { + read_object_direct::(desc_addr) + .with_context(|| VirtioError::ReadObjectErr("a descriptor", desc_addr)) + }?; if desc.is_valid(sys_mem, queue_size, cache) { Ok(desc) } else { - Err(ErrorKind::QueueDescInvalid.into()) + Err(anyhow!(VirtioError::QueueDescInvalid)) } } @@ -298,16 +335,27 @@ impl SplitVringDesc { queue_size: u16, cache: &mut Option, ) -> bool { + if self.len == 0 { + error!("Zero sized buffers are not allowed"); + return false; + } let mut miss_cached = true; if let Some(reg_cache) = cache { let base = self.addr.0; - let offset = self.len as u64; - if base > reg_cache.start && base + offset < reg_cache.end { - base.checked_add(offset).unwrap(); + let offset = u64::from(self.len); + let end = match base.checked_add(offset) { + Some(addr) => addr, + None => { + error!("The memory of descriptor is invalid, range overflows"); + return false; + } + }; + // GPAChecked: the vring desc [addr, addr+len] must locate in guest ram. + if base > reg_cache.start && end < reg_cache.end { miss_cached = false; } } else { - let gotten_cache = sys_mem.get_region_cache(self.addr); + let gotten_cache = sys_mem.get_region_cache(self.addr, AddressAttr::Ram); if let Some(obtained_cache) = gotten_cache { if obtained_cache.reg_type == RegionType::Ram { *cache = gotten_cache; @@ -316,11 +364,9 @@ impl SplitVringDesc { } if miss_cached { + // GPAChecked: the vring desc addr must locate in guest ram. if let Err(ref e) = checked_offset_mem(sys_mem, self.addr, u64::from(self.len)) { - error!( - "The memory of descriptor is invalid, {} ", - error_chain::ChainedError::display_chain(e), - ); + error!("The memory of descriptor is invalid, {:?} ", e); return false; } } @@ -350,7 +396,7 @@ impl SplitVringDesc { cache: &mut Option, ) -> Result { SplitVringDesc::new(sys_mem, desc_table_host, queue_size, index, cache) - .chain_err(|| format!("Failed to find next descriptor {}", index)) + .with_context(|| format!("Failed to find next descriptor {}", index)) } /// Check whether this descriptor is write-only or read-only. @@ -367,12 +413,18 @@ impl SplitVringDesc { /// Return true if the indirect descriptor is valid. /// The len can be divided evenly by the size of descriptor and can not be zero. fn is_valid_indirect_desc(&self) -> bool { - if u64::from(self.len) % DESCRIPTOR_LEN == 0 && self.len != 0 { - true - } else { + if self.len == 0 + || u64::from(self.len) % DESCRIPTOR_LEN != 0 + || u64::from(self.len) / DESCRIPTOR_LEN > u64::from(u16::MAX) + { error!("The indirect descriptor is invalid, len: {}", self.len); - false + return false; + } + if self.has_next() { + error!("INDIRECT and NEXT flag should not be used together"); + return false; } + true } /// Get the num of descriptor in the table of indirect descriptor. @@ -383,18 +435,43 @@ impl SplitVringDesc { /// Get element from descriptor chain. fn get_element( sys_mem: &Arc, - desc_table_host: u64, - queue_size: u16, - index: u16, - mut desc: SplitVringDesc, + desc_info: &DescInfo, cache: &mut Option, elem: &mut Element, ) -> Result<()> { - elem.index = index; + let mut desc_table_host = desc_info.table_host; + let mut desc_size = desc_info.size; + let mut desc = desc_info.desc; + elem.index = desc_info.index; + let mut queue_size = desc_size; + let mut indirect: bool = false; + let mut write_elem_count: u32 = 0; + let mut desc_total_len: u64 = 0; loop { - if elem.desc_num >= queue_size { - break; + if elem.desc_num >= desc_size { + bail!("The element desc number exceeds max allowed"); + } + + if desc.is_indirect_desc() { + if !desc.is_valid_indirect_desc() { + return Err(anyhow!(VirtioError::QueueDescInvalid)); + } + if !indirect { + indirect = true; + } else { + bail!("Found two indirect descriptor elem in one request"); + } + (desc_table_host, _) = sys_mem + .get_host_address_from_cache(desc.addr, cache) + .with_context(|| "Failed to get descriptor table entry host address")?; + queue_size = desc.get_desc_num(); + desc = Self::next_desc(sys_mem, desc_table_host, queue_size, 0, cache)?; + desc_size = elem + .desc_num + .checked_add(queue_size) + .with_context(|| "The chained desc number overflows")?; + continue; } let iovec = ElemIovec { @@ -404,10 +481,19 @@ impl SplitVringDesc { if desc.write_only() { elem.in_iovec.push(iovec); + write_elem_count += 1; } else { + if write_elem_count > 0 { + bail!("Invalid order of the descriptor elem"); + } elem.out_iovec.push(iovec); } elem.desc_num += 1; + // Note: iovec.addr + iovec.len is located in RAM, and iovec.len is not greater than the + // VM RAM size. The number of iovec is not greater than 'queue_size * 2 - 1' which with + // a indirect table. Currently, the max value of queue_size is 1024. So, desc_total_len + // must not overflow. + desc_total_len += u64::from(iovec.len); if desc.has_next() { desc = Self::next_desc(sys_mem, desc_table_host, queue_size, desc.next, cache)?; @@ -416,50 +502,11 @@ impl SplitVringDesc { } } - Ok(()) - } - - /// Get element from indirect descriptor chain. - fn get_indirect_desc( - &self, - sys_mem: &Arc, - index: u16, - cache: &mut Option, - elem: &mut Element, - ) -> Result<()> { - if !self.is_valid_indirect_desc() { - return Err(ErrorKind::QueueDescInvalid.into()); + if desc_total_len > DESC_CHAIN_MAX_TOTAL_LEN { + bail!("Find a descriptor chain longer than 4GB in total"); } - let desc_num = self.get_desc_num(); - let desc_hva = match sys_mem.get_host_address(self.addr) { - Some(addr) => addr, - None => bail!("Failed to get descriptor table entry host address"), - }; - let desc = Self::next_desc(sys_mem, desc_hva, desc_num, 0, cache)?; - Self::get_element(sys_mem, desc_hva, desc_num, index, desc, cache, elem) - .chain_err(|| - format!("Failed to get element from indirect descriptor chain {}, table entry addr: 0x{:X}, size: {}", - index, self.addr.0, desc_num) - ) - } - - /// Get element from normal descriptor chain. - fn get_nonindirect_desc( - &self, - sys_mem: &Arc, - desc_table_host: u64, - queue_size: u16, - index: u16, - cache: &mut Option, - elem: &mut Element, - ) -> Result<()> { - Self::get_element(sys_mem, desc_table_host, queue_size, index, *self, cache, elem).chain_err(|| { - format!( - "Failed to get element from normal descriptor chain {}, table addr: 0x{:X}, size: {}", - index, desc_table_host, queue_size - ) - }) + Ok(()) } } @@ -469,42 +516,22 @@ impl ByteCode for SplitVringDesc {} #[derive(Default, Clone, Copy)] pub struct SplitVring { /// Region cache information. - pub cache: Option, - /// Guest physical address of the descriptor table. - /// The table is composed of descriptors(SplitVringDesc). - pub desc_table: GuestAddress, - - /// Guest physical address of the available ring. - /// The ring is composed of flags(u16), idx(u16), ring[size](u16) and used_event(u16). - pub avail_ring: GuestAddress, - - /// Guest physical address of the used ring. - /// The ring is composed of flags(u16), idx(u16), used_ring[size](UsedElem) and avail_event(u16). - pub used_ring: GuestAddress, - - /// Host address cache. - pub addr_cache: VirtioAddrCache, - - /// Indicate whether the queue configuration is finished. - pub ready: bool, - - /// The maximal size in elements offered by the device. - pub max_size: u16, - - /// The queue size set by frontend. - pub size: u16, - - /// Interrupt vector index of the queue for msix - pub vector: u16, - - /// The next index which can be popped in the available vring. - next_avail: Wrapping, + cache: Option, + /// The configuration of virtqueue. + queue_config: QueueConfig, +} - /// The next index which can be pushed in the used vring. - next_used: Wrapping, +impl Deref for SplitVring { + type Target = QueueConfig; + fn deref(&self) -> &Self::Target { + &self.queue_config + } +} - /// The index of last descriptor used which has triggered interrupt. - last_signal_used: Wrapping, +impl DerefMut for SplitVring { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.queue_config + } } impl SplitVring { @@ -516,17 +543,7 @@ impl SplitVring { pub fn new(queue_config: QueueConfig) -> Self { SplitVring { cache: None, - desc_table: queue_config.desc_table, - avail_ring: queue_config.avail_ring, - used_ring: queue_config.used_ring, - addr_cache: queue_config.addr_cache, - ready: queue_config.ready, - max_size: queue_config.max_size, - size: queue_config.size, - vector: queue_config.vector, - next_avail: Wrapping(queue_config.next_avail), - next_used: Wrapping(queue_config.next_used), - last_signal_used: Wrapping(queue_config.last_signal_used), + queue_config, } } @@ -535,87 +552,120 @@ impl SplitVring { min(self.size, self.max_size) } - /// Get the index of the available ring from guest memory. - fn get_avail_idx(&self, sys_mem: &Arc) -> Result { - let avail_flags_idx = sys_mem - .read_object_direct::(self.addr_cache.avail_ring_host) - .chain_err(|| ErrorKind::ReadObjectErr("avail id", self.avail_ring.raw_value()))?; - Ok(avail_flags_idx.idx) + /// Get the flags and idx of the available ring from guest memory. + fn get_avail_flags_idx(&self) -> Result { + // SAFETY: avail_ring_host is checked when addr_cache inited. + unsafe { + read_object_direct::(self.addr_cache.avail_ring_host).with_context( + || VirtioError::ReadObjectErr("avail flags idx", self.avail_ring.raw_value()), + ) + } + } + + /// Get the idx of the available ring from guest memory. + fn get_avail_idx(&self) -> Result { + let flags_idx = self.get_avail_flags_idx()?; + Ok(flags_idx.idx) } /// Get the flags of the available ring from guest memory. - fn get_avail_flags(&self, sys_mem: &Arc) -> Result { - let avail_flags_idx: SplitVringFlagsIdx = sys_mem - .read_object_direct::(self.addr_cache.avail_ring_host) - .chain_err(|| ErrorKind::ReadObjectErr("avail flags", self.avail_ring.raw_value()))?; - Ok(avail_flags_idx.flags) + fn get_avail_flags(&self) -> Result { + let flags_idx = self.get_avail_flags_idx()?; + Ok(flags_idx.flags) } - /// Get the index of the used ring from guest memory. - fn get_used_idx(&self, sys_mem: &Arc) -> Result { + /// Get the flags and idx of the used ring from guest memory. + fn get_used_flags_idx(&self) -> Result { // Make sure the idx read from sys_mem is new. fence(Ordering::SeqCst); - let used_flag_idx: SplitVringFlagsIdx = sys_mem - .read_object_direct::(self.addr_cache.used_ring_host) - .chain_err(|| ErrorKind::ReadObjectErr("used id", self.used_ring.raw_value()))?; - Ok(used_flag_idx.idx) + // SAFETY: used_ring_host has been checked in set_addr_cache() and is guaranteed to be within the ram range. + unsafe { + read_object_direct::(self.addr_cache.used_ring_host).with_context( + || VirtioError::ReadObjectErr("used flags idx", self.used_ring.raw_value()), + ) + } + } + + /// Get the index of the used ring from guest memory. + fn get_used_idx(&self) -> Result { + let flag_idx = self.get_used_flags_idx()?; + Ok(flag_idx.idx) + } + + /// Set the used flags to suppress virtqueue notification or not + fn set_used_flags(&self, suppress: bool) -> Result<()> { + let mut flags_idx = self.get_used_flags_idx()?; + + if suppress { + flags_idx.flags |= VRING_USED_F_NO_NOTIFY; + } else { + flags_idx.flags &= !VRING_USED_F_NO_NOTIFY; + } + // SAFETY: used_ring_host has been checked when addr_cache inited. + unsafe { + write_object_direct::(&flags_idx, self.addr_cache.used_ring_host) + .with_context(|| { + format!( + "Failed to set used flags, used_ring: 0x{:X}", + self.used_ring.raw_value() + ) + }) + }?; + // Make sure the data has been set. + fence(Ordering::SeqCst); + Ok(()) } /// Set the avail idx to the field of the event index for the available ring. - fn set_avail_event(&self, sys_mem: &Arc) -> Result<()> { + fn set_avail_event(&self, event_idx: u16) -> Result<()> { + trace::virtqueue_set_avail_event(self as *const _ as u64, event_idx); let avail_event_offset = VRING_FLAGS_AND_IDX_LEN + USEDELEM_LEN * u64::from(self.actual_size()); - let event_idx = self.next_avail.0; - - sys_mem - .write_object_direct( + // SAFETY: used_ring_host has been checked in set_addr_cache(). + unsafe { + write_object_direct( &event_idx, self.addr_cache.used_ring_host + avail_event_offset, ) - .chain_err(|| { + .with_context(|| { format!( "Failed to set avail event idx, used_ring: 0x{:X}, offset: {}", self.used_ring.raw_value(), avail_event_offset, ) - })?; + }) + }?; // Make sure the data has been set. fence(Ordering::SeqCst); Ok(()) } /// Get the event index of the used ring from guest memory. - fn get_used_event(&self, sys_mem: &Arc) -> Result { + fn get_used_event(&self) -> Result { let used_event_offset = VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * u64::from(self.actual_size()); // Make sure the event idx read from sys_mem is new. fence(Ordering::SeqCst); - let used_event_addr = self - .addr_cache - .avail_ring_host - .checked_add(used_event_offset) - .chain_err(|| { - ErrorKind::AddressOverflow( - "getting used event idx", - self.avail_ring.raw_value(), - used_event_offset, - ) - })?; - let used_event = sys_mem - .read_object_direct::(used_event_addr) - .chain_err(|| ErrorKind::ReadObjectErr("used event id", used_event_addr))?; + // The GPA of avail_ring_host with avail table length has been checked in + // is_invalid_memory which must not be overflowed. + let used_event_addr = self.addr_cache.avail_ring_host + used_event_offset; + // SAFETY: used_event_addr is protected by virtio calculations and is guaranteed to be within the ram range. + let used_event = unsafe { + read_object_direct::(used_event_addr) + .with_context(|| VirtioError::ReadObjectErr("used event id", used_event_addr)) + }?; Ok(used_event) } /// Return true if VRING_AVAIL_F_NO_INTERRUPT is set. - fn is_avail_ring_no_interrupt(&self, sys_mem: &Arc) -> bool { - match self.get_avail_flags(sys_mem) { + fn is_avail_ring_no_interrupt(&self) -> bool { + match self.get_avail_flags() { Ok(avail_flags) => (avail_flags & VRING_AVAIL_F_NO_INTERRUPT) != 0, Err(ref e) => { warn!( - "Failed to get the status for VRING_AVAIL_F_NO_INTERRUPT {}", - error_chain::ChainedError::display_chain(e) + "Failed to get the status for VRING_AVAIL_F_NO_INTERRUPT {:?}", + e ); false } @@ -623,49 +673,56 @@ impl SplitVring { } /// Return true if it's required to trigger interrupt for the used vring. - fn used_ring_need_event(&mut self, sys_mem: &Arc) -> bool { + fn used_ring_need_event(&mut self) -> bool { let old = self.last_signal_used; - let new = match self.get_used_idx(sys_mem) { + let new = match self.get_used_idx() { Ok(used_idx) => Wrapping(used_idx), Err(ref e) => { - error!( - "Failed to get the status for notifying used vring {}", - error_chain::ChainedError::display_chain(e) - ); + error!("Failed to get the status for notifying used vring: {:?}", e); return false; } }; - let used_event_idx = match self.get_used_event(sys_mem) { + let used_event_idx = match self.get_used_event() { Ok(idx) => Wrapping(idx), Err(ref e) => { - error!( - "Failed to get the status for notifying used vring {}", - error_chain::ChainedError::display_chain(e) - ); + error!("Failed to get the status for notifying used vring: {:?}", e); return false; } }; + let valid = self.signal_used_valid; + self.signal_used_valid = true; self.last_signal_used = new; - (new - used_event_idx - Wrapping(1)) < (new - old) + !valid || (new - used_event_idx - Wrapping(1)) < (new - old) + } + + fn is_overlap( + start1: GuestAddress, + end1: GuestAddress, + start2: GuestAddress, + end2: GuestAddress, + ) -> bool { + !(start1 >= end2 || start2 >= end1) } fn is_invalid_memory(&self, sys_mem: &Arc, actual_size: u64) -> bool { + // GPAChecked: the desc ring table must locate in guest ram. let desc_table_end = match checked_offset_mem(sys_mem, self.desc_table, DESCRIPTOR_LEN * actual_size) { Ok(addr) => addr, Err(ref e) => { error!( - "descriptor table is out of bounds: start:0x{:X} size:{} {}", + "descriptor table is out of bounds: start:0x{:X} size:{} {:?}", self.desc_table.raw_value(), DESCRIPTOR_LEN * actual_size, - error_chain::ChainedError::display_chain(e), + e ); return true; } }; + // GPAChecked: the avail ring table must locate in guest ram. let desc_avail_end = match checked_offset_mem( sys_mem, self.avail_ring, @@ -674,34 +731,49 @@ impl SplitVring { Ok(addr) => addr, Err(ref e) => { error!( - "avail ring is out of bounds: start:0x{:X} size:{} {}", + "avail ring is out of bounds: start:0x{:X} size:{} {:?}", self.avail_ring.raw_value(), VRING_AVAIL_LEN_EXCEPT_AVAILELEM + AVAILELEM_LEN * actual_size, - error_chain::ChainedError::display_chain(e), + e ); return true; } }; - if let Err(ref e) = checked_offset_mem( + // GPAChecked: the used ring table must locate in guest ram. + let desc_used_end = match checked_offset_mem( sys_mem, self.used_ring, VRING_USED_LEN_EXCEPT_USEDELEM + USEDELEM_LEN * actual_size, ) { - error!( - "used ring is out of bounds: start:0x{:X} size:{} {}", - self.used_ring.raw_value(), - VRING_USED_LEN_EXCEPT_USEDELEM + USEDELEM_LEN * actual_size, - error_chain::ChainedError::display_chain(e), - ); - return true; - } + Ok(addr) => addr, + Err(ref e) => { + error!( + "used ring is out of bounds: start:0x{:X} size:{} {:?}", + self.used_ring.raw_value(), + VRING_USED_LEN_EXCEPT_USEDELEM + USEDELEM_LEN * actual_size, + e, + ); + return true; + } + }; - if self.desc_table >= self.avail_ring - || self.avail_ring >= self.used_ring - || desc_table_end > self.avail_ring - || desc_avail_end > self.used_ring - { + if SplitVring::is_overlap( + self.desc_table, + desc_table_end, + self.avail_ring, + desc_avail_end, + ) || SplitVring::is_overlap( + self.avail_ring, + desc_avail_end, + self.used_ring, + desc_used_end, + ) || SplitVring::is_overlap( + self.desc_table, + desc_table_end, + self.used_ring, + desc_used_end, + ) { error!("The memory of descriptor table: 0x{:X}, avail ring: 0x{:X} or used ring: 0x{:X} is overlapped. queue size:{}", self.desc_table.raw_value(), self.avail_ring.raw_value(), self.used_ring.raw_value(), actual_size); return true; @@ -730,28 +802,23 @@ impl SplitVring { } } - fn get_vring_element( + fn get_desc_info( &mut self, sys_mem: &Arc, + next_avail: Wrapping, features: u64, - elem: &mut Element, - ) -> Result<()> { - let index_offset = VRING_FLAGS_AND_IDX_LEN - + AVAILELEM_LEN * u64::from(self.next_avail.0 % self.actual_size()); - let desc_index_addr = self - .addr_cache - .avail_ring_host - .checked_add(index_offset) - .chain_err(|| { - ErrorKind::AddressOverflow( - "popping avail ring", - self.avail_ring.raw_value(), - index_offset, - ) - })?; - let desc_index = sys_mem - .read_object_direct::(desc_index_addr) - .chain_err(|| ErrorKind::ReadObjectErr("the index of descriptor", desc_index_addr))?; + ) -> Result { + let index_offset = + VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * u64::from(next_avail.0 % self.actual_size()); + // The GPA of avail_ring_host with avail table length has been checked in + // is_invalid_memory which must not be overflowed. + let desc_index_addr = self.addr_cache.avail_ring_host + index_offset; + // SAFETY: dest_index_addr is protected by virtio calculations and is guaranteed to be within the ram range. + let desc_index = unsafe { + read_object_direct::(desc_index_addr).with_context(|| { + VirtioError::ReadObjectErr("the index of descriptor", desc_index_addr) + }) + }?; let desc = SplitVringDesc::new( sys_mem, @@ -760,42 +827,48 @@ impl SplitVring { desc_index, &mut self.cache, )?; - if desc.is_indirect_desc() { - if desc.write_only() { - bail!("Unexpected descriptor for writing only for popping avail ring"); - } - - desc.get_indirect_desc(sys_mem, desc_index, &mut self.cache, elem) - .map(|elem| { - self.next_avail += Wrapping(1); - elem - }) - .chain_err(|| "Failed to get indirect desc for popping avail ring")? - } else { - desc.get_nonindirect_desc( - sys_mem, - self.addr_cache.desc_table_host, - self.actual_size(), - desc_index, - &mut self.cache, - elem, - ) - .map(|elem| { - self.next_avail += Wrapping(1); - elem - })? - }; + // Suppress queue notification related to current processing desc chain. if virtio_has_feature(features, VIRTIO_F_RING_EVENT_IDX) { - self.set_avail_event(sys_mem) - .chain_err(|| "Failed to set avail event for popping avail ring")?; + self.set_avail_event((next_avail + Wrapping(1)).0) + .with_context(|| "Failed to set avail event for popping avail ring")?; } + Ok(DescInfo { + table_host: self.addr_cache.desc_table_host, + size: self.actual_size(), + index: desc_index, + desc, + }) + } + + fn get_vring_element( + &mut self, + sys_mem: &Arc, + features: u64, + elem: &mut Element, + ) -> Result<()> { + let desc_info = self.get_desc_info(sys_mem, self.next_avail, features)?; + + SplitVringDesc::get_element(sys_mem, &desc_info, &mut self.cache, elem).with_context( + || { + format!( + "Failed to get element from descriptor chain {}, table addr: 0x{:X}, size: {}", + desc_info.index, desc_info.table_host, desc_info.size, + ) + }, + )?; + self.next_avail += Wrapping(1); + Ok(()) } } impl VringOps for SplitVring { + fn is_enabled(&self) -> bool { + self.ready + } + fn is_valid(&self, sys_mem: &Arc) -> bool { let size = u64::from(self.actual_size()); if !self.ready { @@ -814,14 +887,22 @@ impl VringOps for SplitVring { } fn pop_avail(&mut self, sys_mem: &Arc, features: u64) -> Result { - let avail_len = self.avail_ring_len(sys_mem)?; - if avail_len == 0 { - bail!("failed to pop avail: empty!"); + let mut element = Element::new(0); + if !self.is_enabled() || self.avail_ring_len()? == 0 { + return Ok(element); } - let mut element = Element::new(0); + // Make sure descriptor read does not bypass avail index read. + fence(Ordering::Acquire); + self.get_vring_element(sys_mem, features, &mut element) - .chain_err(|| "Failed to get vring element")?; + .with_context(|| "Failed to get vring element")?; + + trace::virtqueue_pop_avail( + &*self as *const _ as u64, + element.in_iovec.len(), + element.out_iovec.len(), + ); Ok(element) } @@ -830,153 +911,145 @@ impl VringOps for SplitVring { self.next_avail -= Wrapping(1); } - fn add_used(&mut self, sys_mem: &Arc, index: u16, len: u32) -> Result<()> { + fn add_used(&mut self, index: u16, len: u32) -> Result<()> { if index >= self.size { - return Err(ErrorKind::QueueIndex(index, self.size).into()); + return Err(anyhow!(VirtioError::QueueIndex(index, self.size))); } let next_used = u64::from(self.next_used.0 % self.actual_size()); + trace::virtqueue_add_used(&*self as *const _ as u64, next_used, index, len); let used_elem_addr = self.addr_cache.used_ring_host + VRING_FLAGS_AND_IDX_LEN + next_used * USEDELEM_LEN; let used_elem = UsedElem { id: u32::from(index), len, }; - sys_mem - .write_object_direct::(&used_elem, used_elem_addr) - .chain_err(|| "Failed to write object for used element")?; - - self.next_used += Wrapping(1); - + // SAFETY: used_elem_addr is guaranteed to be within ram range. + unsafe { + write_object_direct::(&used_elem, used_elem_addr) + .with_context(|| "Failed to write object for used element") + }?; + // Make sure used element is filled before updating used idx. fence(Ordering::Release); - sys_mem - .write_object_direct( - &(self.next_used.0 as u16), + self.next_used += Wrapping(1); + // SAFETY: used_ring_host has been checked when addr_cache inited. + unsafe { + write_object_direct( + &(self.next_used.0), self.addr_cache.used_ring_host + VRING_IDX_POSITION, ) - .chain_err(|| "Failed to write next used idx")?; + .with_context(|| "Failed to write next used idx") + }?; + // Make sure used index is exposed before notifying guest. + fence(Ordering::SeqCst); + // Do we wrap around? + if self.next_used == self.last_signal_used { + self.signal_used_valid = false; + } Ok(()) } - fn should_notify(&mut self, sys_mem: &Arc, features: u64) -> bool { + fn should_notify(&mut self, features: u64) -> bool { if virtio_has_feature(features, VIRTIO_F_RING_EVENT_IDX) { - self.used_ring_need_event(sys_mem) + self.used_ring_need_event() } else { - !self.is_avail_ring_no_interrupt(sys_mem) + !self.is_avail_ring_no_interrupt() } } + fn suppress_queue_notify(&mut self, features: u64, suppress: bool) -> Result<()> { + if !self.is_enabled() { + bail!("queue is not ready"); + } + + if virtio_has_feature(features, VIRTIO_F_RING_EVENT_IDX) { + self.set_avail_event(self.get_avail_idx()?)?; + } else { + self.set_used_flags(suppress)?; + } + Ok(()) + } + fn actual_size(&self) -> u16 { self.actual_size() } fn get_queue_config(&self) -> QueueConfig { - QueueConfig { - desc_table: self.desc_table, - avail_ring: self.avail_ring, - used_ring: self.used_ring, - addr_cache: self.addr_cache, - ready: self.ready, - max_size: self.max_size, - size: self.size, - vector: self.vector, - next_avail: self.next_avail.0, - next_used: self.next_used.0, - last_signal_used: self.last_signal_used.0, - } + let mut config = self.queue_config; + config.signal_used_valid = false; + config } /// The number of descriptor chains in the available ring. - fn avail_ring_len(&mut self, sys_mem: &Arc) -> Result { - let avail_idx = self.get_avail_idx(sys_mem).map(Wrapping)?; + fn avail_ring_len(&mut self) -> Result { + let avail_idx = self.get_avail_idx().map(Wrapping)?; + + Ok((avail_idx - self.next_avail).0) + } + + fn get_avail_idx(&self) -> Result { + SplitVring::get_avail_idx(self) + } - Ok((avail_idx - self.next_avail).0) + fn get_used_idx(&self) -> Result { + SplitVring::get_used_idx(self) } - fn get_host_address_from_cache( - &self, - addr: GuestAddress, - mem_space: &Arc, - ) -> u64 { - let host_addr; - if let Some(cache) = self.cache { - if addr.0 >= cache.start && addr.0 < cache.end { - host_addr = cache.host_base + addr.0 - cache.start; - } else { - host_addr = mem_space.get_host_address(addr).unwrap_or(0); - } - } else { - host_addr = mem_space.get_host_address(addr).unwrap_or(0); - } - host_addr + fn get_cache(&self) -> &Option { + &self.cache } -} -/// Virtio queue. -pub struct Queue { - /// Vring structure. - pub vring: Box, -} + fn get_avail_bytes( + &mut self, + sys_mem: &Arc, + max_size: usize, + is_in: bool, + ) -> Result { + if !self.is_enabled() { + return Ok(0); + } + fence(Ordering::Acquire); + + let mut avail_bytes = 0_usize; + let mut avail_idx = self.next_avail; + let end_idx = self.get_avail_idx().map(Wrapping)?; + while (end_idx - avail_idx).0 > 0 { + let desc_info = self.get_desc_info(sys_mem, avail_idx, 0)?; + + let mut elem = Element::new(0); + SplitVringDesc::get_element(sys_mem, &desc_info, &mut self.cache, &mut elem).with_context( + || { + format!( + "Failed to get element from descriptor chain {}, table addr: 0x{:X}, size: {}", + desc_info.index, desc_info.table_host, desc_info.size, + ) + }, + )?; -impl Queue { - /// Create a virtqueue. - /// - /// # Arguments - /// - /// * `queue_config` - Configuration of the vring. - /// * `queue_type` - Type of virtqueue. - pub fn new(queue_config: QueueConfig, queue_type: u16) -> Result { - let vring: Box = match queue_type { - QUEUE_TYPE_SPLIT_VRING => Box::new(SplitVring::new(queue_config)), - _ => { - bail!("Unsupported queue type {}", queue_type); + for e in match is_in { + true => elem.in_iovec, + false => elem.out_iovec, + } { + avail_bytes += e.len as usize; } - }; - - Ok(Queue { vring }) - } - /// Return true if the memory layout of the virqueue is valid. - /// - /// # Arguments - /// - /// * `sys_mem` - Address space to which the vring belongs. - pub fn is_valid(&self, sys_mem: &Arc) -> bool { - self.vring.is_valid(sys_mem) + if avail_bytes >= max_size { + return Ok(max_size); + } + avail_idx += Wrapping(1); + } + Ok(avail_bytes) } } #[cfg(test)] mod tests { - pub use super::*; - use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; - - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - SYSTEM_SPACE_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } + use super::*; + use crate::tests::address_space_init; + use crate::{Queue, QUEUE_TYPE_PACKED_VRING, QUEUE_TYPE_SPLIT_VRING}; + use address_space::{AddressAttr, AddressSpace, GuestAddress}; trait VringOpsTest { fn set_desc( @@ -1022,10 +1095,10 @@ mod tests { next: u16, ) -> Result<()> { if index >= self.actual_size() { - return Err(ErrorKind::QueueIndex(index, self.size).into()); + return Err(anyhow!(VirtioError::QueueIndex(index, self.size))); } - let desc_addr_offset = DESCRIPTOR_LEN * index as u64; + let desc_addr_offset = DESCRIPTOR_LEN * u64::from(index); let desc = SplitVringDesc { addr, len, @@ -1035,22 +1108,29 @@ mod tests { sys_mem.write_object::( &desc, GuestAddress(self.desc_table.0 + desc_addr_offset), + AddressAttr::Ram, )?; Ok(()) } fn set_avail_ring_idx(&self, sys_mem: &Arc, idx: u16) -> Result<()> { - let avail_idx_offset = 2 as u64; - sys_mem - .write_object::(&idx, GuestAddress(self.avail_ring.0 + avail_idx_offset))?; + let avail_idx_offset = 2_u64; + sys_mem.write_object::( + &idx, + GuestAddress(self.avail_ring.0 + avail_idx_offset), + AddressAttr::Ram, + )?; Ok(()) } fn set_avail_ring_flags(&self, sys_mem: &Arc, flags: u16) -> Result<()> { - let avail_idx_offset = 0 as u64; - sys_mem - .write_object::(&flags, GuestAddress(self.avail_ring.0 + avail_idx_offset))?; + let avail_idx_offset = 0_u64; + sys_mem.write_object::( + &flags, + GuestAddress(self.avail_ring.0 + avail_idx_offset), + AddressAttr::Ram, + )?; Ok(()) } @@ -1060,47 +1140,61 @@ mod tests { avail_pos: u16, desc_index: u16, ) -> Result<()> { - let avail_idx_offset = VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * (avail_pos as u64); + let avail_idx_offset = VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * u64::from(avail_pos); sys_mem.write_object::( &desc_index, GuestAddress(self.avail_ring.0 + avail_idx_offset), + AddressAttr::Ram, )?; Ok(()) } fn get_avail_event(&self, sys_mem: &Arc) -> Result { let avail_event_idx_offset = - VRING_FLAGS_AND_IDX_LEN + USEDELEM_LEN * (self.actual_size() as u64); - let event_idx = sys_mem - .read_object::(GuestAddress(self.used_ring.0 + avail_event_idx_offset))?; + VRING_FLAGS_AND_IDX_LEN + USEDELEM_LEN * u64::from(self.actual_size()); + let event_idx = sys_mem.read_object::( + GuestAddress(self.used_ring.0 + avail_event_idx_offset), + AddressAttr::Ram, + )?; Ok(event_idx) } fn get_used_elem(&self, sys_mem: &Arc, index: u16) -> Result { - let used_elem_offset = VRING_FLAGS_AND_IDX_LEN + USEDELEM_LEN * (index as u64); - let used_elem = sys_mem - .read_object::(GuestAddress(self.used_ring.0 + used_elem_offset))?; + let used_elem_offset = VRING_FLAGS_AND_IDX_LEN + USEDELEM_LEN * u64::from(index); + let used_elem = sys_mem.read_object::( + GuestAddress(self.used_ring.0 + used_elem_offset), + AddressAttr::Ram, + )?; Ok(used_elem) } fn get_used_ring_idx(&self, sys_mem: &Arc) -> Result { let used_idx_offset = VRING_IDX_POSITION; - let idx = - sys_mem.read_object::(GuestAddress(self.used_ring.0 + used_idx_offset))?; + let idx = sys_mem.read_object::( + GuestAddress(self.used_ring.0 + used_idx_offset), + AddressAttr::Ram, + )?; Ok(idx) } fn set_used_ring_idx(&self, sys_mem: &Arc, idx: u16) -> Result<()> { let used_idx_offset = VRING_IDX_POSITION; - sys_mem.write_object::(&idx, GuestAddress(self.used_ring.0 + used_idx_offset))?; + sys_mem.write_object::( + &idx, + GuestAddress(self.used_ring.0 + used_idx_offset), + AddressAttr::Ram, + )?; Ok(()) } fn set_used_event_idx(&self, sys_mem: &Arc, idx: u16) -> Result<()> { let event_idx_offset = - VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * (self.actual_size() as u64); - sys_mem - .write_object::(&idx, GuestAddress(self.avail_ring.0 + event_idx_offset))?; + VRING_FLAGS_AND_IDX_LEN + AVAILELEM_LEN * u64::from(self.actual_size()); + sys_mem.write_object::( + &idx, + GuestAddress(self.avail_ring.0 + event_idx_offset), + AddressAttr::Ram, + )?; Ok(()) } } @@ -1119,12 +1213,12 @@ mod tests { flags, next, }; - sys_mem.write_object::(&desc, desc_addr)?; + sys_mem.write_object::(&desc, desc_addr, AddressAttr::Ram)?; Ok(()) } const SYSTEM_SPACE_SIZE: u64 = (1024 * 1024) as u64; - const QUEUE_SIZE: u16 = 256 as u16; + const QUEUE_SIZE: u16 = 256_u16; fn align(size: u64, alignment: u64) -> u64 { let align_adjust = if size % alignment != 0 { @@ -1132,7 +1226,7 @@ mod tests { } else { 0 }; - (size + align_adjust) as u64 + size + align_adjust } #[test] @@ -1149,38 +1243,38 @@ mod tests { // it is valid queue_config.desc_table = GuestAddress(0); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); queue_config.ready = true; queue_config.size = QUEUE_SIZE; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the status is not ready queue_config.ready = false; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); queue_config.ready = true; // it is invalid when the size of virtual ring is more than the max size queue_config.size = QUEUE_SIZE + 1; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // it is invalid when the size of virtual ring is zero queue_config.size = 0; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // it is invalid when the size of virtual ring isn't power of 2 queue_config.size = 15; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); } #[test] @@ -1189,58 +1283,58 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); queue_config.ready = true; queue_config.size = QUEUE_SIZE; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of descriptor table is out of bound queue_config.desc_table = - GuestAddress(SYSTEM_SPACE_SIZE - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + 1 as u64); + GuestAddress(SYSTEM_SPACE_SIZE - u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + 1_u64); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.desc_table = GuestAddress(0); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of avail ring is out of bound queue_config.avail_ring = GuestAddress( SYSTEM_SPACE_SIZE - - (VRING_AVAIL_LEN_EXCEPT_AVAILELEM + AVAILELEM_LEN * (QUEUE_SIZE as u64)) - + 1 as u64, + - (VRING_AVAIL_LEN_EXCEPT_AVAILELEM + AVAILELEM_LEN * u64::from(QUEUE_SIZE)) + + 1_u64, ); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of used ring is out of bound queue_config.used_ring = GuestAddress( SYSTEM_SPACE_SIZE - - (VRING_USED_LEN_EXCEPT_USEDELEM + USEDELEM_LEN * (QUEUE_SIZE as u64)) - + 1 as u64, + - (VRING_USED_LEN_EXCEPT_USEDELEM + USEDELEM_LEN * u64::from(QUEUE_SIZE)) + + 1_u64, ); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); } #[test] @@ -1249,68 +1343,69 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); queue_config.ready = true; queue_config.size = QUEUE_SIZE; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of descriptor table is equal to the address of avail ring queue_config.avail_ring = GuestAddress(0); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); - // it is invalid when the address of descriptor table is overlapped to the address of avail ring - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN - 1); + // it is invalid when the address of descriptor table is overlapped to the address of avail + // ring. + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN - 1); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of avail ring is equal to the address of used ring - queue_config.used_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.used_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of avail ring is overlapped to the address of used ring queue_config.used_ring = GuestAddress( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64) + + AVAILELEM_LEN * u64::from(QUEUE_SIZE) - 1, ); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); } #[test] @@ -1319,54 +1414,54 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); queue_config.ready = true; queue_config.size = QUEUE_SIZE; let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of descriptor table is not aligned to 16 - queue_config.desc_table = GuestAddress(15 as u64); + queue_config.desc_table = GuestAddress(15_u64); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.desc_table = GuestAddress(0); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of avail ring is not aligned to 2 - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN + 1); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + 1); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); // it is invalid when the address of used ring is not aligned to 4 queue_config.used_ring = GuestAddress( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64) + + AVAILELEM_LEN * u64::from(QUEUE_SIZE) + 3, ); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), false); + assert!(!queue.is_valid(&sys_space)); // recover the address for valid queue queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); let queue = Queue::new(queue_config, QUEUE_TYPE_SPLIT_VRING).unwrap(); - assert_eq!(queue.is_valid(&sys_space), true); + assert!(queue.is_valid(&sys_space)); } #[test] @@ -1375,23 +1470,32 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - sys_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); - queue_config.addr_cache.avail_ring_host = - sys_space.get_host_address(queue_config.avail_ring).unwrap(); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); - queue_config.addr_cache.used_ring_host = - sys_space.get_host_address(queue_config.used_ring).unwrap(); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.ready = true; queue_config.size = QUEUE_SIZE; let mut vring = SplitVring::new(queue_config); - assert_eq!(vring.is_valid(&sys_space), true); + assert!(vring.is_valid(&sys_space)); // it is ok when the descriptor chain is normal // set the information of index 0 for descriptor @@ -1428,7 +1532,7 @@ mod tests { // set 1 to the idx of avail ring vring.set_avail_ring_idx(&sys_space, 1).unwrap(); - let features = 1 << VIRTIO_F_RING_EVENT_IDX as u64; + let features = 1 << u64::from(VIRTIO_F_RING_EVENT_IDX); let elem = match vring.pop_avail(&sys_space, features) { Ok(ret) => ret, Err(_) => Element { @@ -1441,11 +1545,11 @@ mod tests { assert_eq!(elem.index, 0); assert_eq!(elem.desc_num, 3); assert_eq!(elem.out_iovec.len(), 1); - let elem_iov = elem.out_iovec.get(0).unwrap(); + let elem_iov = elem.out_iovec.first().unwrap(); assert_eq!(elem_iov.addr, GuestAddress(0x111)); assert_eq!(elem_iov.len, 16); assert_eq!(elem.in_iovec.len(), 2); - let elem_iov = elem.in_iovec.get(0).unwrap(); + let elem_iov = elem.in_iovec.first().unwrap(); assert_eq!(elem_iov.addr, GuestAddress(0x222)); assert_eq!(elem_iov.len, 32); let elem_iov = elem.in_iovec.get(1).unwrap(); @@ -1455,7 +1559,7 @@ mod tests { // the event idx of avail ring is equal to get_avail_event let event_idx = vring.get_avail_event(&sys_space).unwrap(); assert_eq!(event_idx, 1); - let avail_idx = vring.get_avail_idx(&sys_space).unwrap(); + let avail_idx = vring.get_avail_idx().unwrap(); assert_eq!(avail_idx, 1); } @@ -1465,23 +1569,32 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - sys_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); - queue_config.addr_cache.avail_ring_host = - sys_space.get_host_address(queue_config.avail_ring).unwrap(); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); - queue_config.addr_cache.used_ring_host = - sys_space.get_host_address(queue_config.used_ring).unwrap(); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.ready = true; queue_config.size = QUEUE_SIZE; let mut vring = SplitVring::new(queue_config); - assert_eq!(vring.is_valid(&sys_space), true); + assert!(vring.is_valid(&sys_space)); // it is ok when the descriptor chain is indirect // set the information for indirect descriptor @@ -1534,7 +1647,7 @@ mod tests { // set 1 to the idx of avail ring vring.set_avail_ring_idx(&sys_space, 1).unwrap(); - let features = 1 << VIRTIO_F_RING_EVENT_IDX as u64; + let features = 1 << u64::from(VIRTIO_F_RING_EVENT_IDX); let elem = match vring.pop_avail(&sys_space, features) { Ok(ret) => ret, Err(_) => Element { @@ -1547,14 +1660,14 @@ mod tests { assert_eq!(elem.index, 0); assert_eq!(elem.desc_num, 3); assert_eq!(elem.out_iovec.len(), 2); - let elem_iov = elem.out_iovec.get(0).unwrap(); + let elem_iov = elem.out_iovec.first().unwrap(); assert_eq!(elem_iov.addr, GuestAddress(0x444)); assert_eq!(elem_iov.len, 100); let elem_iov = elem.out_iovec.get(1).unwrap(); assert_eq!(elem_iov.addr, GuestAddress(0x555)); assert_eq!(elem_iov.len, 200); assert_eq!(elem.in_iovec.len(), 1); - let elem_iov = elem.in_iovec.get(0).unwrap(); + let elem_iov = elem.in_iovec.first().unwrap(); assert_eq!(elem_iov.addr, GuestAddress(0x666)); assert_eq!(elem_iov.len, 300); } @@ -1565,30 +1678,41 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - sys_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); - queue_config.addr_cache.avail_ring_host = - sys_space.get_host_address(queue_config.avail_ring).unwrap(); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); - queue_config.addr_cache.used_ring_host = - sys_space.get_host_address(queue_config.used_ring).unwrap(); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.ready = true; queue_config.size = QUEUE_SIZE; let mut vring = SplitVring::new(queue_config); - assert_eq!(vring.is_valid(&sys_space), true); + assert!(vring.is_valid(&sys_space)); // it is error when the idx of avail ring which is equal to next_avail // set 0 to the idx of avail ring which is equal to next_avail vring.set_avail_ring_idx(&sys_space, 0).unwrap(); - let features = 1 << VIRTIO_F_RING_EVENT_IDX as u64; - if let Ok(_) = vring.pop_avail(&sys_space, features) { - assert!(false); + let features = 1 << u64::from(VIRTIO_F_RING_EVENT_IDX); + if let Ok(elem) = vring.pop_avail(&sys_space, features) { + if elem.desc_num != 0 { + assert!(false); + } } // it is error when the indirect descriptor is written @@ -1624,12 +1748,12 @@ mod tests { 0, ) .unwrap(); - if let Ok(_) = vring.pop_avail(&sys_space, features) { + if vring.pop_avail(&sys_space, features).is_ok() { assert!(false); } - // error comes when the length of indirect descriptor is more than the length of descriptor chain - // set the information of index 0 for descriptor + // error comes when the length of indirect descriptor is more than the length of descriptor + // chain set the information of index 0 for descriptor. vring .set_desc( &sys_space, @@ -1678,6 +1802,251 @@ mod tests { } else { assert!(false); } + + // The INDIRECT and NEXT flag should not be used together. + vring + .set_desc( + &sys_space, + 0, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + 48, + VIRTQ_DESC_F_INDIRECT | VIRTQ_DESC_F_NEXT, + 0, + ) + .unwrap(); + if let Err(err) = vring.pop_avail(&sys_space, features) { + assert_eq!(err.to_string(), "Failed to get vring element"); + } else { + assert!(false); + } + + // The device-writable desc elems must behind the device-readable desc elems. + vring + .set_desc( + &sys_space, + 0, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + 48, + VIRTQ_DESC_F_INDIRECT, + 0, + ) + .unwrap(); + + // Set the information of index 0 for indirect descriptor. + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + GuestAddress(0x444), + 100, + VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, + 1, + ) + .unwrap(); + if let Err(err) = vring.pop_avail(&sys_space, features) { + assert_eq!(err.to_string(), "Failed to get vring element"); + } else { + assert!(false); + } + + // The VIRTQ_DESC_F_NEXT must not set to the descriptor in indirect table. + vring + .set_desc( + &sys_space, + 0, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + 16, + VIRTQ_DESC_F_INDIRECT, + 0, + ) + .unwrap(); + + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + GuestAddress(0x444), + 100, + VIRTQ_DESC_F_INDIRECT | VIRTQ_DESC_F_WRITE, + 1, + ) + .unwrap(); + if let Err(err) = vring.pop_avail(&sys_space, features) { + assert_eq!(err.to_string(), "Failed to get vring element"); + } else { + assert!(false); + } + } + + #[test] + fn test_pop_avail_04() { + let sys_space = address_space_init(); + + let mut queue_config = QueueConfig::new(QUEUE_SIZE); + queue_config.desc_table = GuestAddress(0); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.used_ring = GuestAddress(align( + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + + VRING_AVAIL_LEN_EXCEPT_AVAILELEM + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), + 4096, + )); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; + queue_config.ready = true; + queue_config.size = QUEUE_SIZE; + let mut vring = SplitVring::new(queue_config); + assert!(vring.is_valid(&sys_space)); + + // Set the information of index 0 for normal descriptor. + vring + .set_desc(&sys_space, 0, GuestAddress(0x111), 16, VIRTQ_DESC_F_NEXT, 1) + .unwrap(); + + // Set the information of index 1 for normal descriptor. + vring + .set_desc(&sys_space, 1, GuestAddress(0x222), 32, VIRTQ_DESC_F_NEXT, 2) + .unwrap(); + + // Set the incorrect information of index 2 for normal descriptor. + // The VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT flag can not be + // used together. + vring + .set_desc( + &sys_space, + 2, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + 32, + VIRTQ_DESC_F_INDIRECT | VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT, + 0, + ) + .unwrap(); + + // Set the information of index 0 for indirect descriptor. + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + GuestAddress(0x444), + 100, + VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, + 1, + ) + .unwrap(); + + // Set the information of index 1 for indirect descriptor. + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2 + DESCRIPTOR_LEN), + GuestAddress(0x555), + 200, + VIRTQ_DESC_F_WRITE, + 2, + ) + .unwrap(); + + // Set the index 0 of descriptor to the position 0 for the element of avail ring. + vring.set_avail_ring_elem(&sys_space, 0, 0).unwrap(); + // Set 1 to the idx of avail ring. + vring.set_avail_ring_idx(&sys_space, 1).unwrap(); + + let features = 1 << u64::from(VIRTIO_F_RING_EVENT_IDX); + if let Err(err) = vring.pop_avail(&sys_space, features) { + assert_eq!(err.to_string(), "Failed to get vring element"); + } else { + assert!(false); + } + + // Set the correct information of index 2 for normal descriptor. + vring + .set_desc( + &sys_space, + 2, + GuestAddress(SYSTEM_SPACE_SIZE / 2), + 32, + VIRTQ_DESC_F_INDIRECT, + 0, + ) + .unwrap(); + + // Set the incorrect information of index 1 for indirect descriptor. + // The VIRTQ_DESC_F_INDIRECT flag can not be used in indirect descriptor + // table. + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2 + DESCRIPTOR_LEN), + GuestAddress(0x555), + 208, + VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_INDIRECT, + 2, + ) + .unwrap(); + + if let Err(err) = vring.pop_avail(&sys_space, features) { + assert_eq!(err.to_string(), "Failed to get vring element"); + } else { + assert!(false); + } + + // Set the correct information of index 1 for indirect descriptor. + set_indirect_desc( + &sys_space, + GuestAddress(SYSTEM_SPACE_SIZE / 2 + DESCRIPTOR_LEN), + GuestAddress(0x555), + 200, + VIRTQ_DESC_F_WRITE, + 2, + ) + .unwrap(); + + // Check the result of pop_avail(), which has normal and indirect + // descriptor elem. + let elem = match vring.pop_avail(&sys_space, features) { + Ok(ret) => ret, + Err(_) => Element { + index: 1, + desc_num: 0, + out_iovec: Vec::new(), + in_iovec: Vec::new(), + }, + }; + + assert_eq!(elem.index, 0); + assert_eq!(elem.desc_num, 4); + + // Two elem for reading. + assert_eq!(elem.out_iovec.len(), 2); + let elem_iov = elem.out_iovec.first().unwrap(); + assert_eq!(elem_iov.addr, GuestAddress(0x111)); + assert_eq!(elem_iov.len, 16); + let elem_iov = elem.out_iovec.get(1).unwrap(); + assert_eq!(elem_iov.addr, GuestAddress(0x222)); + assert_eq!(elem_iov.len, 32); + + // Two elem for writing. + assert_eq!(elem.in_iovec.len(), 2); + let elem_iov = elem.in_iovec.first().unwrap(); + assert_eq!(elem_iov.addr, GuestAddress(0x444)); + assert_eq!(elem_iov.len, 100); + let elem_iov = elem.in_iovec.get(1).unwrap(); + assert_eq!(elem_iov.addr, GuestAddress(0x555)); + assert_eq!(elem_iov.len, 200); + + // The event idx of avail ring is equal to get_avail_event. + let event_idx = vring.get_avail_event(&sys_space).unwrap(); + assert_eq!(event_idx, 1); + let avail_idx = vring.get_avail_idx().unwrap(); + assert_eq!(avail_idx, 1); } #[test] @@ -1686,32 +2055,44 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - sys_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); - queue_config.addr_cache.avail_ring_host = - sys_space.get_host_address(queue_config.avail_ring).unwrap(); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); - queue_config.addr_cache.used_ring_host = - sys_space.get_host_address(queue_config.used_ring).unwrap(); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.ready = true; queue_config.size = QUEUE_SIZE; let mut vring = SplitVring::new(queue_config); - assert_eq!(vring.is_valid(&sys_space), true); + assert!(vring.is_valid(&sys_space)); // it is false when the index is more than the size of queue - let err = vring.add_used(&sys_space, QUEUE_SIZE, 100).unwrap_err(); - if let ErrorKind::QueueIndex(offset, size) = err.kind() { - assert_eq!(*offset, 256); - assert_eq!(*size, 256); + if let Err(err) = vring.add_used(QUEUE_SIZE, 100) { + if let Some(e) = err.downcast_ref::() { + if let VirtioError::QueueIndex(offset, size) = e { + assert_eq!(*offset, 256); + assert_eq!(*size, 256); + } + } } - assert!(vring.add_used(&sys_space, 10, 100).is_ok()); + assert!(vring.add_used(10, 100).is_ok()); let elem = vring.get_used_elem(&sys_space, 0).unwrap(); assert_eq!(elem.id, 10); assert_eq!(elem.len, 100); @@ -1724,53 +2105,66 @@ mod tests { let mut queue_config = QueueConfig::new(QUEUE_SIZE); queue_config.desc_table = GuestAddress(0); - queue_config.addr_cache.desc_table_host = - sys_space.get_host_address(queue_config.desc_table).unwrap(); - queue_config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * DESCRIPTOR_LEN); - queue_config.addr_cache.avail_ring_host = - sys_space.get_host_address(queue_config.avail_ring).unwrap(); + queue_config.addr_cache.desc_table_host = unsafe { + sys_space + .get_host_address(queue_config.desc_table, AddressAttr::Ram) + .unwrap() + }; + queue_config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN); + queue_config.addr_cache.avail_ring_host = unsafe { + sys_space + .get_host_address(queue_config.avail_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * DESCRIPTOR_LEN + u64::from(QUEUE_SIZE) * DESCRIPTOR_LEN + VRING_AVAIL_LEN_EXCEPT_AVAILELEM - + AVAILELEM_LEN * (QUEUE_SIZE as u64), + + AVAILELEM_LEN * u64::from(QUEUE_SIZE), 4096, )); - queue_config.addr_cache.used_ring_host = - sys_space.get_host_address(queue_config.used_ring).unwrap(); + queue_config.addr_cache.used_ring_host = unsafe { + sys_space + .get_host_address(queue_config.used_ring, AddressAttr::Ram) + .unwrap() + }; queue_config.ready = true; queue_config.size = QUEUE_SIZE; let mut vring = SplitVring::new(queue_config); - assert_eq!(vring.is_valid(&sys_space), true); + assert!(vring.is_valid(&sys_space)); - // it's true when the feature of event idx and no interrupt for the avail ring is closed - let features = 0 as u64; + // it's true when the feature of event idx and no interrupt for the avail ring is closed + let features = 0_u64; assert!(vring.set_avail_ring_flags(&sys_space, 0).is_ok()); - assert_eq!(vring.should_notify(&sys_space, features), true); + assert!(vring.should_notify(features)); - // it's false when the feature of event idx is closed and the feature of no interrupt for the avail ring is open - let features = 0 as u64; + // it's false when the feature of event idx is closed and the feature of no interrupt for + // the avail ring is open + let features = 0_u64; assert!(vring .set_avail_ring_flags(&sys_space, VRING_AVAIL_F_NO_INTERRUPT) .is_ok()); - assert_eq!(vring.should_notify(&sys_space, features), false); - - // it's true when the feature of event idx is open and (new - event_idx - Wrapping(1) < new -old) - let features = 1 << VIRTIO_F_RING_EVENT_IDX as u64; - vring.last_signal_used = Wrapping(5); //old - assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); //new - assert!(vring.set_used_event_idx(&sys_space, 6).is_ok()); //event_idx - assert_eq!(vring.should_notify(&sys_space, features), true); - - // it's false when the feature of event idx is open and (new - event_idx - Wrapping(1) > new -old) - vring.last_signal_used = Wrapping(5); //old - assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); //new - assert!(vring.set_used_event_idx(&sys_space, 1).is_ok()); //event_idx - assert_eq!(vring.should_notify(&sys_space, features), false); - - // it's false when the feature of event idx is open and (new - event_idx - Wrapping(1) = new -old) - vring.last_signal_used = Wrapping(5); //old - assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); //new - assert!(vring.set_used_event_idx(&sys_space, 4).is_ok()); //event_idx - assert_eq!(vring.should_notify(&sys_space, features), false); + assert!(!vring.should_notify(features)); + + // it's true when the feature of event idx is open and + // (new - event_idx - Wrapping(1) < new -old) + let features = 1 << u64::from(VIRTIO_F_RING_EVENT_IDX); + vring.last_signal_used = Wrapping(5); // old + assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); // new + assert!(vring.set_used_event_idx(&sys_space, 6).is_ok()); // event_idx + assert!(vring.should_notify(features)); + + // it's false when the feature of event idx is open and + // (new - event_idx - Wrapping(1) > new - old) + vring.last_signal_used = Wrapping(5); // old + assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); // new + assert!(vring.set_used_event_idx(&sys_space, 1).is_ok()); // event_idx + assert!(!vring.should_notify(features)); + + // it's false when the feature of event idx is open and + // (new - event_idx - Wrapping(1) = new -old) + vring.last_signal_used = Wrapping(5); // old + assert!(vring.set_used_ring_idx(&sys_space, 10).is_ok()); // new + assert!(vring.set_used_event_idx(&sys_space, 4).is_ok()); // event_idx + assert!(!vring.should_notify(features)); } } diff --git a/virtio/src/transport/mod.rs b/virtio/src/transport/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..9a56e1418e3c5570b7ec61d53be0df1e58df0a84 --- /dev/null +++ b/virtio/src/transport/mod.rs @@ -0,0 +1,14 @@ +// Copyright (c) 2023 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +pub mod virtio_mmio; +pub mod virtio_pci; diff --git a/virtio/src/virtio_mmio.rs b/virtio/src/transport/virtio_mmio.rs similarity index 39% rename from virtio/src/virtio_mmio.rs rename to virtio/src/transport/virtio_mmio.rs index d85e889d62e2ab2419e89ee25141a302790b73bc..eb081e1338cbc5fa879577de3afeda1480f4ddf2 100644 --- a/virtio/src/virtio_mmio.rs +++ b/virtio/src/transport/virtio_mmio.rs @@ -10,25 +10,29 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; -use address_space::{AddressRange, AddressSpace, GuestAddress, RegionIoEventFd}; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{ByteOrder, LittleEndian}; -#[cfg(target_arch = "x86_64")] -use machine_manager::config::{BootSource, Param}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use sysbus::{SysBus, SysBusDevOps, SysBusDevType, SysRes}; -use util::byte_code::ByteCode; +use log::{debug, error, info, warn}; use vmm_sys_util::eventfd::EventFd; -use super::{ - virtio_has_feature, Queue, QueueConfig, VirtioDevice, VirtioInterrupt, VirtioInterruptType, +use crate::error::VirtioError; +use crate::{ + virtio_has_feature, Queue, VirtioBaseState, VirtioDevice, VirtioInterrupt, VirtioInterruptType, CONFIG_STATUS_ACKNOWLEDGE, CONFIG_STATUS_DRIVER, CONFIG_STATUS_DRIVER_OK, CONFIG_STATUS_FAILED, - CONFIG_STATUS_FEATURES_OK, NOTIFY_REG_OFFSET, QUEUE_TYPE_PACKED_VRING, QUEUE_TYPE_SPLIT_VRING, - VIRTIO_F_RING_PACKED, VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING, + CONFIG_STATUS_FEATURES_OK, CONFIG_STATUS_NEEDS_RESET, NOTIFY_REG_OFFSET, + QUEUE_TYPE_PACKED_VRING, VIRTIO_F_RING_PACKED, VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING, }; -use crate::errors::{ErrorKind, Result, ResultExt}; +use address_space::{AddressRange, AddressSpace, GuestAddress, RegionIoEventFd}; +use devices::sysbus::{SysBus, SysBusDevBase, SysBusDevOps, SysBusDevType}; +use devices::{convert_bus_mut, Device, DeviceBase, MUT_SYS_BUS}; +use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::create_new_eventfd; /// Registers of virtio-mmio device refer to Virtio Spec. /// Magic value - Read Only. @@ -73,6 +77,17 @@ const QUEUE_AVAIL_HIGH_REG: u64 = 0x94; const QUEUE_USED_LOW_REG: u64 = 0xa0; /// The high 32bit of queue's Used Ring address. const QUEUE_USED_HIGH_REG: u64 = 0xa4; +/// Shared memory region id. +#[allow(unused)] +const SHM_SEL: u64 = 0xac; +/// Shared memory region 64 bit long length. 64 bits in two halves. +const SHM_LEN_LOW: u64 = 0xb0; +const SHM_LEN_HIGH: u64 = 0xb4; +/// Shared memory region 64 bit long physical address. 64 bits in two halves. +#[allow(unused)] +const SHM_BASE_LOW: u64 = 0xb8; +#[allow(unused)] +const SHM_BASE_HIGH: u64 = 0xbc; /// Configuration atomicity value. const CONFIG_GENERATION_REG: u64 = 0xfc; @@ -80,20 +95,17 @@ const VENDOR_ID: u32 = 0; const MMIO_MAGIC_VALUE: u32 = 0x7472_6976; const MMIO_VERSION: u32 = 2; -/// The maximum of virtio queue within a virtio device. -const MAXIMUM_NR_QUEUES: usize = 8; - /// HostNotifyInfo includes the info needed for notifying backend from guest. -pub struct HostNotifyInfo { +struct HostNotifyInfo { /// Eventfds which notify backend to use the avail ring. - events: Vec, + events: Vec>, } impl HostNotifyInfo { - pub fn new(queue_num: usize) -> Self { + fn new(queue_num: usize) -> Self { let mut events = Vec::new(); for _i in 0..queue_num { - events.push(EventFd::new(libc::EFD_NONBLOCK).unwrap()); + events.push(Arc::new(create_new_eventfd().unwrap())); } HostNotifyInfo { events } @@ -105,137 +117,169 @@ impl HostNotifyInfo { #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] pub struct VirtioMmioState { - /// Identify if this device is activated by frontend driver. - activated: bool, - /// Config space of virtio mmio device. - config_space: VirtioMmioCommonConfig, + virtio_base: VirtioBaseState, } -/// The configuration of virtio-mmio device, the fields refer to Virtio Spec. -#[derive(Copy, Clone, Default)] -pub struct VirtioMmioCommonConfig { - /// Bitmask of the features supported by the device (host)(32 bits per set). - features_select: u32, - /// Device (host) feature-setting selector. - acked_features_select: u32, - /// Interrupt status value. - interrupt_status: u32, - /// Device status. - device_status: u32, - /// Configuration atomicity value. - config_generation: u32, - /// Queue selector. - queue_select: u32, - /// The configuration of queues. - queues_config: [QueueConfig; MAXIMUM_NR_QUEUES], - /// The number of queues. - queue_num: usize, - /// The type of queue, either be split ring or packed ring. - queue_type: u16, +/// virtio-mmio device structure. +pub struct VirtioMmioDevice { + base: SysBusDevBase, + // The entity of low level device. + pub device: Arc>, + // HostNotifyInfo used for guest notifier + host_notify_info: HostNotifyInfo, + // System address space. + mem_space: Arc, + /// The function for interrupt triggering. + interrupt_cb: Option>, } -impl VirtioMmioCommonConfig { - pub fn new(device: &Arc>) -> Self { - let locked_device = device.lock().unwrap(); - let mut queues_config = [QueueConfig::default(); 8]; - let queue_size = locked_device.queue_size(); - let queue_num = locked_device.queue_num(); - for queue_config in queues_config.iter_mut().take(queue_num) { - *queue_config = QueueConfig::new(queue_size); +impl VirtioMmioDevice { + pub fn new( + mem_space: &Arc, + name: String, + device: Arc>, + sysbus: &Arc>, + region_base: u64, + region_size: u64, + ) -> Result { + if region_base >= sysbus.lock().unwrap().mmio_region.1 { + bail!("Mmio region space exhausted."); } + let queue_num = device.lock().unwrap().queue_num(); + let mut mmio_device = VirtioMmioDevice { + base: SysBusDevBase { + base: DeviceBase::new(name, false, None), + dev_type: SysBusDevType::VirtioMmio, + interrupt_evt: Some(Arc::new(create_new_eventfd()?)), + ..Default::default() + }, + device, + host_notify_info: HostNotifyInfo::new(queue_num), + mem_space: mem_space.clone(), + interrupt_cb: None, + }; + mmio_device.set_sys_resource(sysbus, region_base, region_size, "VirtioMmio")?; + mmio_device.set_parent_bus(sysbus.clone()); - VirtioMmioCommonConfig { - queues_config, - queue_num, - queue_type: QUEUE_TYPE_SPLIT_VRING, - ..Default::default() - } + Ok(mmio_device) } - /// Check whether virtio device status is as expected. - fn check_device_status(&self, set: u32, clr: u32) -> bool { - self.device_status & (set | clr) == set - } + /// Activate the virtio device, this function is called by vcpu thread when frontend + /// virtio driver is ready and write `DRIVER_OK` to backend. + fn activate(&mut self) -> Result<()> { + info!("func: activate, id: {:?}", &self.base.base.id); + let mut locked_dev = self.device.lock().unwrap(); + let queue_num = locked_dev.queue_num(); + let queue_type = locked_dev.queue_type(); + let features = locked_dev.virtio_base().driver_features; + let broken = locked_dev.virtio_base().broken.clone(); + let queues_config = &mut locked_dev.virtio_base_mut().queues_config; + + let mut queues = Vec::with_capacity(queue_num); + for q_config in queues_config.iter_mut() { + if !q_config.ready { + debug!("queue is not ready, please check your init process"); + } else { + q_config.set_addr_cache( + self.mem_space.clone(), + self.interrupt_cb.clone().unwrap(), + features, + &broken, + ); + } - /// Get the status of virtio device - fn get_device_status(&self) -> u32 { - self.device_status - } + let queue = Queue::new(*q_config, queue_type)?; + if q_config.ready && !queue.is_valid(&self.mem_space) { + bail!("Failed to activate device: Invalid queue"); + } + queues.push(Arc::new(Mutex::new(queue))); + } + locked_dev.virtio_base_mut().queues = queues; - /// Get mutable QueueConfig structure of virtio device. - fn get_mut_queue_config(&mut self) -> Result<&mut QueueConfig> { - if self.check_device_status( - CONFIG_STATUS_FEATURES_OK, - CONFIG_STATUS_DRIVER_OK | CONFIG_STATUS_FAILED, - ) { - let queue_select = self.queue_select; - self.queues_config - .get_mut(queue_select as usize) - .ok_or_else(|| { - format!( - "Mmio-reg queue_select {} overflows for mutable queue config", - queue_select, - ) - .into() - }) + let mut queue_evts = Vec::>::new(); + for fd in self.host_notify_info.events.iter() { + queue_evts.push(fd.clone()); + } + + if let Some(cb) = self.interrupt_cb.clone() { + locked_dev.activate(self.mem_space.clone(), cb, queue_evts)?; } else { - Err(ErrorKind::DevStatErr(self.device_status).into()) + bail!("Failed to activate device: No interrupt callback"); } + + Ok(()) } - /// Get immutable QueueConfig structure of virtio device. - fn get_queue_config(&self) -> Result<&QueueConfig> { - let queue_select = self.queue_select; - self.queues_config - .get(queue_select as usize) - .ok_or_else(|| { - format!( - "Mmio-reg queue_select overflows {} for immutable queue config", - queue_select, - ) - .into() - }) + fn assign_interrupt_cb(&mut self) { + let irq_state = self.base.irq_state.clone(); + let locked_dev = self.device.lock().unwrap(); + let virtio_base = locked_dev.virtio_base(); + let device_status = virtio_base.device_status.clone(); + let config_generation = virtio_base.config_generation.clone(); + let interrupt_status = virtio_base.interrupt_status.clone(); + + let cb = Arc::new(Box::new( + move |int_type: &VirtioInterruptType, _queue: Option<&Queue>, needs_reset: bool| { + let status = match int_type { + VirtioInterruptType::Config => { + if needs_reset { + device_status.fetch_or(CONFIG_STATUS_NEEDS_RESET, Ordering::SeqCst); + } + if device_status.load(Ordering::Acquire) & CONFIG_STATUS_DRIVER_OK == 0 { + return Ok(()); + } + config_generation.fetch_add(1, Ordering::SeqCst); + // Use (CONFIG | VRING) instead of CONFIG, it can be used to solve the + // IO stuck problem by change the device configure. + VIRTIO_MMIO_INT_CONFIG | VIRTIO_MMIO_INT_VRING + } + VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, + }; + interrupt_status.fetch_or(status, Ordering::SeqCst); + irq_state.trigger_irq()?; + + Ok(()) + }, + ) as VirtioInterrupt); + + self.interrupt_cb = Some(cb); } /// Read data from the common config of virtio device. /// Return the config value in u32. /// # Arguments /// - /// * `device` - Virtio device entity. /// * `offset` - The offset of common config. - fn read_common_config( - &mut self, - device: &Arc>, - interrupt_status: &Arc, - offset: u64, - ) -> Result { + fn read_common_config(&mut self, offset: u64) -> Result { + trace::virtio_tpt_read_common_config(&self.base.base.id, offset); + let locked_device = self.device.lock().unwrap(); let value = match offset { MAGIC_VALUE_REG => MMIO_MAGIC_VALUE, VERSION_REG => MMIO_VERSION, - DEVICE_ID_REG => device.lock().unwrap().device_type() as u32, + DEVICE_ID_REG => locked_device.device_type(), VENDOR_ID_REG => VENDOR_ID, DEVICE_FEATURES_REG => { - let mut features = device - .lock() - .unwrap() - .get_device_features(self.features_select); - if self.features_select == 1 { + let hfeatures_sel = locked_device.hfeatures_sel(); + let mut features = locked_device.device_features(hfeatures_sel); + if hfeatures_sel == 1 { features |= 0x1; // enable support of VirtIO Version 1 } features } - QUEUE_NUM_MAX_REG => self - .get_queue_config() + QUEUE_NUM_MAX_REG => locked_device + .queue_config() .map(|config| u32::from(config.max_size))?, - QUEUE_READY_REG => self.get_queue_config().map(|config| config.ready as u32)?, - INTERRUPT_STATUS_REG => { - self.interrupt_status = interrupt_status.load(Ordering::SeqCst); - self.interrupt_status - } - STATUS_REG => self.device_status, - CONFIG_GENERATION_REG => self.config_generation, + QUEUE_READY_REG => locked_device + .queue_config() + .map(|config| u32::from(config.ready))?, + INTERRUPT_STATUS_REG => locked_device.interrupt_status(), + STATUS_REG => locked_device.device_status(), + CONFIG_GENERATION_REG => u32::from(locked_device.config_generation()), + // SHM_SEL is unimplemented. According to the Virtio v1.2 spec: Reading from a non-existent + // region(i.e. where the ID written to SHMSel is unused) results in a length of -1. + SHM_LEN_LOW | SHM_LEN_HIGH => u32::MAX, _ => { - return Err(ErrorKind::MmioRegErr(offset).into()); + return Err(anyhow!(VirtioError::MmioRegErr(offset))); } }; @@ -246,260 +290,136 @@ impl VirtioMmioCommonConfig { /// /// # Arguments /// - /// * `device` - Virtio device entity. /// * `offset` - The offset of common config. /// * `value` - The value to write. /// /// # Errors /// /// Returns Error if the offset is out of bound. - fn write_common_config( - &mut self, - device: &Arc>, - interrupt_status: &Arc, - offset: u64, - value: u32, - ) -> Result<()> { + fn write_common_config(&mut self, offset: u64, value: u32) -> Result<()> { + trace::virtio_tpt_write_common_config(&self.base.base.id, offset, value); + let mut locked_device = self.device.lock().unwrap(); match offset { - DEVICE_FEATURES_SEL_REG => self.features_select = value, + DEVICE_FEATURES_SEL_REG => locked_device.set_hfeatures_sel(value), DRIVER_FEATURES_REG => { - if self.check_device_status( + if locked_device.check_device_status( CONFIG_STATUS_DRIVER, CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_FAILED, ) { - device - .lock() - .unwrap() - .set_driver_features(self.acked_features_select, value); - if self.acked_features_select == 1 + let gfeatures_sel = locked_device.gfeatures_sel(); + locked_device.set_driver_features(gfeatures_sel, value); + if gfeatures_sel == 1 && virtio_has_feature(u64::from(value) << 32, VIRTIO_F_RING_PACKED) { - self.queue_type = QUEUE_TYPE_PACKED_VRING; + locked_device.set_queue_type(QUEUE_TYPE_PACKED_VRING); } } else { - return Err(ErrorKind::DevStatErr(self.device_status).into()); + return Err(anyhow!(VirtioError::DevStatErr( + locked_device.device_status() + ))); } } - DRIVER_FEATURES_SEL_REG => self.acked_features_select = value, - QUEUE_SEL_REG => self.queue_select = value, - QUEUE_NUM_REG => self - .get_mut_queue_config() + DRIVER_FEATURES_SEL_REG => locked_device.set_gfeatures_sel(value), + QUEUE_SEL_REG => locked_device.set_queue_select(value as u16), + QUEUE_NUM_REG => locked_device + .queue_config_mut(true) .map(|config| config.size = value as u16)?, - QUEUE_READY_REG => self - .get_mut_queue_config() + QUEUE_READY_REG => locked_device + .queue_config_mut(true) .map(|config| config.ready = value == 1)?, INTERRUPT_ACK_REG => { - if self.check_device_status(CONFIG_STATUS_DRIVER_OK, 0) { - self.interrupt_status = interrupt_status.fetch_and(!value, Ordering::SeqCst); + if locked_device.check_device_status(CONFIG_STATUS_DRIVER_OK, 0) { + let isr = &locked_device.virtio_base_mut().interrupt_status; + isr.fetch_and(!value, Ordering::SeqCst); } } - STATUS_REG => self.device_status = value, - QUEUE_DESC_LOW_REG => self.get_mut_queue_config().map(|config| { + STATUS_REG => locked_device.set_device_status(value), + QUEUE_DESC_LOW_REG => locked_device.queue_config_mut(true).map(|config| { config.desc_table = GuestAddress(config.desc_table.0 | u64::from(value)); })?, - QUEUE_DESC_HIGH_REG => self.get_mut_queue_config().map(|config| { + QUEUE_DESC_HIGH_REG => locked_device.queue_config_mut(true).map(|config| { config.desc_table = GuestAddress(config.desc_table.0 | (u64::from(value) << 32)); })?, - QUEUE_AVAIL_LOW_REG => self.get_mut_queue_config().map(|config| { + QUEUE_AVAIL_LOW_REG => locked_device.queue_config_mut(true).map(|config| { config.avail_ring = GuestAddress(config.avail_ring.0 | u64::from(value)); })?, - QUEUE_AVAIL_HIGH_REG => self.get_mut_queue_config().map(|config| { + QUEUE_AVAIL_HIGH_REG => locked_device.queue_config_mut(true).map(|config| { config.avail_ring = GuestAddress(config.avail_ring.0 | (u64::from(value) << 32)); })?, - QUEUE_USED_LOW_REG => self.get_mut_queue_config().map(|config| { + QUEUE_USED_LOW_REG => locked_device.queue_config_mut(true).map(|config| { config.used_ring = GuestAddress(config.used_ring.0 | u64::from(value)); })?, - QUEUE_USED_HIGH_REG => self.get_mut_queue_config().map(|config| { + QUEUE_USED_HIGH_REG => locked_device.queue_config_mut(true).map(|config| { config.used_ring = GuestAddress(config.used_ring.0 | (u64::from(value) << 32)); })?, _ => { - return Err(ErrorKind::MmioRegErr(offset).into()); + return Err(anyhow!(VirtioError::MmioRegErr(offset))); } }; Ok(()) } } -/// virtio-mmio device structure. -pub struct VirtioMmioDevice { - // The entity of low level device. - pub device: Arc>, - // EventFd used to send interrupt to VM - interrupt_evt: EventFd, - // Interrupt status. - interrupt_status: Arc, - // HostNotifyInfo used for guest notifier - host_notify_info: HostNotifyInfo, - // The state of virtio mmio device. - state: VirtioMmioState, - // System address space. - mem_space: Arc, - // Virtio queues. - queues: Vec>>, - // System Resource of device. - res: SysRes, -} - -impl VirtioMmioDevice { - pub fn new(mem_space: &Arc, device: Arc>) -> Self { - let device_clone = device.clone(); - let queue_num = device_clone.lock().unwrap().queue_num(); +impl Device for VirtioMmioDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); - VirtioMmioDevice { - device, - interrupt_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), - interrupt_status: Arc::new(AtomicU32::new(0)), - host_notify_info: HostNotifyInfo::new(queue_num), - state: VirtioMmioState { - activated: false, - config_space: VirtioMmioCommonConfig::new(&device_clone), - }, - mem_space: mem_space.clone(), - queues: Vec::new(), - res: SysRes::default(), - } - } - - pub fn realize( - mut self, - sysbus: &mut SysBus, - region_base: u64, - region_size: u64, - #[cfg(target_arch = "x86_64")] bs: &Arc>, - ) -> Result>> { + fn realize(mut self) -> Result>> { + self.assign_interrupt_cb(); self.device .lock() .unwrap() .realize() - .chain_err(|| "Failed to realize virtio.")?; + .with_context(|| "Failed to realize virtio.")?; - if region_base >= sysbus.mmio_region.1 { - bail!("Mmio region space exhausted."); - } - self.set_sys_resource(sysbus, region_base, region_size)?; + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + MUT_SYS_BUS!(parent_bus, locked_bus, sysbus); let dev = Arc::new(Mutex::new(self)); - sysbus.attach_device(&dev, region_base, region_size)?; - - #[cfg(target_arch = "x86_64")] - bs.lock().unwrap().kernel_cmdline.push(Param { - param_type: "virtio_mmio.device".to_string(), - value: format!( - "{}@0x{:08x}:{}", - region_size, - region_base, - dev.lock().unwrap().res.irq - ), - }); - Ok(dev) - } - - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. - fn activate(&mut self) -> Result<()> { - let queues_config = - &mut self.state.config_space.queues_config[0..self.state.config_space.queue_num]; - let cloned_mem_space = self.mem_space.clone(); - for q_config in queues_config.iter_mut() { - q_config.addr_cache.desc_table_host = cloned_mem_space - .get_host_address(q_config.desc_table) - .unwrap_or(0); - q_config.addr_cache.avail_ring_host = cloned_mem_space - .get_host_address(q_config.avail_ring) - .unwrap_or(0); - q_config.addr_cache.used_ring_host = cloned_mem_space - .get_host_address(q_config.used_ring) - .unwrap_or(0); - let queue = Queue::new(*q_config, self.state.config_space.queue_type)?; - if !queue.is_valid(&self.mem_space) { - bail!("Invalid queue"); - } - self.queues.push(Arc::new(Mutex::new(queue))); - } - - let mut queue_evts = Vec::::new(); - for fd in self.host_notify_info.events.iter() { - let evt_fd_clone = match fd.try_clone() { - Ok(fd) => fd, - Err(e) => { - error!("Failed to clone IoEventFd, {}", e); - continue; - } - }; - queue_evts.push(evt_fd_clone); - } - - let interrupt_status = self.interrupt_status.clone(); - let interrupt_evt = self.interrupt_evt.try_clone().unwrap(); - let cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { - let status = match int_type { - VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, - VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, - }; - interrupt_status.fetch_or(status as u32, Ordering::SeqCst); - interrupt_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; + sysbus.attach_device(&dev)?; - Ok(()) - }, - ) as VirtioInterrupt); - - self.device.lock().unwrap().activate( - self.mem_space.clone(), - cb, - &self.queues, - queue_evts, - )?; - - Ok(()) + Ok(dev) } } impl SysBusDevOps for VirtioMmioDevice { + gen_base_func!(sysbusdev_base, sysbusdev_base_mut, SysBusDevBase, base); + /// Read data by virtio driver from VM. fn read(&mut self, data: &mut [u8], _base: GuestAddress, offset: u64) -> bool { + trace::virtio_tpt_read_config(&self.base.base.id, offset, data.len()); match offset { 0x00..=0xff if data.len() == 4 => { - let value = match self.state.config_space.read_common_config( - &self.device, - &self.interrupt_status, - offset, - ) { + let value = match self.read_common_config(offset) { Ok(v) => v, Err(ref e) => { error!( - "Failed to read mmio register {}, type: {}, {}", - offset, - self.device.lock().unwrap().device_type(), - error_chain::ChainedError::display_chain(e), + "Failed to read mmio register {:#x}, device: {}, {:?}", + offset, self.base.base.id, e, ); return false; } }; LittleEndian::write_u32(data, value); } - 0x100..=0xfff => { + 0x100..=0x1ff => { if let Err(ref e) = self .device .lock() .unwrap() - .read_config(offset as u64 - 0x100, data) + .read_config(offset - 0x100, data) { error!( - "Failed to read virtio-dev config space {} type: {} {}", - offset as u64 - 0x100, - self.device.lock().unwrap().device_type(), - error_chain::ChainedError::display_chain(e), + "Failed to read virtio-dev config space {:#x} device: {}, {:?}", + offset - 0x100, + self.base.base.id, + e, ); return false; } } _ => { warn!( - "Failed to read mmio register: overflows, offset is 0x{:x}, type: {}", - offset, - self.device.lock().unwrap().device_type(), + "Failed to read mmio register: overflows, offset is {:#x}, device: {}", + offset, self.base.base.id ); } }; @@ -508,75 +428,62 @@ impl SysBusDevOps for VirtioMmioDevice { /// Write data by virtio driver from VM. fn write(&mut self, data: &[u8], _base: GuestAddress, offset: u64) -> bool { + trace::virtio_tpt_write_config(&self.base.base.id, offset, data); match offset { 0x00..=0xff if data.len() == 4 => { let value = LittleEndian::read_u32(data); - if let Err(ref e) = self.state.config_space.write_common_config( - &self.device, - &self.interrupt_status, - offset, - value, - ) { + if let Err(ref e) = self.write_common_config(offset, value) { error!( - "Failed to write mmio register {}, type: {}, {}", - offset, - self.device.lock().unwrap().device_type(), - error_chain::ChainedError::display_chain(e), + "Failed to write mmio register {:#x}, device: {}, {:?}", + offset, self.base.base.id, e, ); return false; } - if self.state.config_space.check_device_status( + let locked_dev = self.device.lock().unwrap(); + if locked_dev.check_device_status( CONFIG_STATUS_ACKNOWLEDGE | CONFIG_STATUS_DRIVER | CONFIG_STATUS_DRIVER_OK | CONFIG_STATUS_FEATURES_OK, CONFIG_STATUS_FAILED, - ) && !self.state.activated + ) && !locked_dev.device_activated() { - let ret = self.activate().map(|_| self.state.activated = true); - if let Err(ref e) = ret { + drop(locked_dev); + if let Err(ref e) = self.activate() { error!( - "Failed to activate dev, type: {}, {}", - self.device.lock().unwrap().device_type(), - error_chain::ChainedError::display_chain(e), + "Failed to activate dev, device: {}, {:?}", + self.base.base.id, e, ); + return false; } + self.device.lock().unwrap().set_device_activated(true); } } - 0x100..=0xfff => { - if self - .state - .config_space - .check_device_status(CONFIG_STATUS_DRIVER, CONFIG_STATUS_FAILED) - { - if let Err(ref e) = self - .device - .lock() - .unwrap() - .write_config(offset as u64 - 0x100, data) - { + 0x100..=0x1ff => { + let mut locked_device = self.device.lock().unwrap(); + if locked_device.check_device_status(CONFIG_STATUS_DRIVER, CONFIG_STATUS_FAILED) { + if let Err(ref e) = locked_device.write_config(offset - 0x100, data) { error!( - "Failed to write virtio-dev config space {}, type: {}, {}", - offset as u64 - 0x100, - self.device.lock().unwrap().device_type(), - error_chain::ChainedError::display_chain(e), + "Failed to write virtio-dev config space {:#x}, device: {}, {:?}", + offset - 0x100, + self.base.base.id, + e, ); return false; } } else { - error!("Failed to write virtio-dev config space: driver is not ready 0x{:X}, type: {}", - self.state.config_space.get_device_status(), - self.device.lock().unwrap().device_type(), + error!("Failed to write virtio-dev config space: driver is not ready {:#x}, device: {}", + locked_device.device_status(), + self.base.base.id, ); return false; } } _ => { warn!( - "Failed to write mmio register: overflows, offset is 0x{:x} type: {}", - offset, - self.device.lock().unwrap().device_type(), + "Failed to write mmio register: overflows, offset is {:#x} device: {}", + offset, self.base.base.id, ); return false; } @@ -588,15 +495,8 @@ impl SysBusDevOps for VirtioMmioDevice { let mut ret = Vec::new(); for (index, eventfd) in self.host_notify_info.events.iter().enumerate() { let addr = u64::from(NOTIFY_REG_OFFSET); - let eventfd_clone = match eventfd.try_clone() { - Err(e) => { - error!("Failed to clone ioeventfd, error is {}", e); - continue; - } - Ok(fd) => fd, - }; ret.push(RegionIoEventFd { - fd: eventfd_clone, + fd: eventfd.clone(), addr_range: AddressRange::from((addr, std::mem::size_of::() as u64)), data_match: true, data: index as u64, @@ -604,18 +504,6 @@ impl SysBusDevOps for VirtioMmioDevice { } ret } - - fn interrupt_evt(&self) -> Option<&EventFd> { - Some(&self.interrupt_evt) - } - - fn get_sys_resource(&mut self) -> Option<&mut SysRes> { - Some(&mut self.res) - } - - fn get_type(&self) -> SysBusDevType { - SysBusDevType::VirtioMmio - } } impl acpi::AmlBuilder for VirtioMmioDevice { @@ -625,95 +513,54 @@ impl acpi::AmlBuilder for VirtioMmioDevice { } impl StateTransfer for VirtioMmioDevice { - fn get_state_vec(&self) -> migration::errors::Result> { - let mut state = self.state; - - for (index, queue) in self.queues.iter().enumerate() { - state.config_space.queues_config[index] = - queue.lock().unwrap().vring.get_queue_config(); - } - state.config_space.interrupt_status = self.interrupt_status.load(Ordering::Relaxed); - + fn get_state_vec(&self) -> Result> { + let state = VirtioMmioState { + virtio_base: self.device.lock().unwrap().virtio_base().get_state(), + }; Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *VirtioMmioState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("MMIO_DEVICE"))?; - let cloned_mem_space = self.mem_space.clone(); - let mut queue_states = - self.state.config_space.queues_config[0..self.state.config_space.queue_num].to_vec(); - self.queues = queue_states - .iter_mut() - .map(|queue_state| { - queue_state.addr_cache.desc_table_host = cloned_mem_space - .get_host_address(queue_state.desc_table) - .unwrap_or(0); - queue_state.addr_cache.avail_ring_host = cloned_mem_space - .get_host_address(queue_state.avail_ring) - .unwrap_or(0); - queue_state.addr_cache.used_ring_host = cloned_mem_space - .get_host_address(queue_state.used_ring) - .unwrap_or(0); - Arc::new(Mutex::new( - Queue::new(*queue_state, self.state.config_space.queue_type).unwrap(), - )) - }) - .collect(); - self.interrupt_status = Arc::new(AtomicU32::new(self.state.config_space.interrupt_status)); + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let s_len = std::mem::size_of::(); + if state.len() != s_len { + bail!("Invalid state length {}, expected {}", state.len(), s_len); + } + + let mut mmio_state = VirtioMmioState::default(); + mmio_state.as_mut_bytes().copy_from_slice(state); + let mut locked_dev = self.device.lock().unwrap(); + locked_dev.virtio_base_mut().set_state( + &mmio_state.virtio_base, + self.mem_space.clone(), + self.interrupt_cb.clone().unwrap(), + ); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&VirtioMmioState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&VirtioMmioState::descriptor().name).unwrap_or(!0) } } impl MigrationHook for VirtioMmioDevice { - fn resume(&mut self) -> migration::errors::Result<()> { - if self.state.activated { - let mut queue_evts = Vec::::new(); - for fd in self.host_notify_info.events.iter() { - let evt_fd_clone = match fd.try_clone() { - Ok(fd) => fd, - Err(e) => { - error!("Failed to clone IoEventFd, {}", e); - continue; - } - }; - queue_evts.push(evt_fd_clone); - } + fn resume(&mut self) -> Result<()> { + let mut locked_dev = self.device.lock().unwrap(); + if !locked_dev.device_activated() { + return Ok(()); + } - let interrupt_status = self.interrupt_status.clone(); - let interrupt_evt = self.interrupt_evt.try_clone().unwrap(); - let cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, _queue: Option<&Queue>| { - let status = match int_type { - VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, - VirtioInterruptType::Vring => VIRTIO_MMIO_INT_VRING, - }; - interrupt_status.fetch_or(status as u32, Ordering::SeqCst); - interrupt_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; - - Ok(()) - }, - ) as VirtioInterrupt); - - if let Err(e) = self.device.lock().unwrap().activate( - self.mem_space.clone(), - cb, - &self.queues, - queue_evts, - ) { + let mut queue_evts = Vec::>::new(); + for fd in self.host_notify_info.events.iter() { + queue_evts.push(fd.clone()); + } + + if let Some(cb) = self.interrupt_cb.clone() { + if let Err(e) = locked_dev.activate(self.mem_space.clone(), cb, queue_evts) { bail!("Failed to resume virtio mmio device: {}", e); } + } else { + bail!("Failed to resume device: No interrupt callback"); } Ok(()) @@ -722,62 +569,34 @@ impl MigrationHook for VirtioMmioDevice { #[cfg(test)] mod tests { - use std::io::Write; - - use address_space::{AddressSpace, GuestAddress, HostMemMapping, Region}; - use util::num_ops::{read_u32, write_u32}; - use super::*; - use crate::VIRTIO_TYPE_BLOCK; - - fn address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - SYSTEM_SPACE_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } + use crate::tests::{address_space_init, sysbus_init}; + use crate::{ + check_config_space_rw, read_config_default, VirtioBase, QUEUE_TYPE_SPLIT_VRING, + VIRTIO_TYPE_BLOCK, + }; + use address_space::{AddressSpace, GuestAddress}; - const SYSTEM_SPACE_SIZE: u64 = (1024 * 1024) as u64; const CONFIG_SPACE_SIZE: usize = 16; const QUEUE_NUM: usize = 2; const QUEUE_SIZE: u16 = 256; - pub struct VirtioDeviceTest { - pub device_features: u64, - pub driver_features: u64, - pub config_space: Vec, - pub b_active: bool, - pub b_realized: bool, + struct VirtioDeviceTest { + base: VirtioBase, + config_space: Vec, + b_active: bool, + b_realized: bool, } impl VirtioDeviceTest { - pub fn new() -> Self { + fn new() -> Self { let mut config_space = Vec::new(); for i in 0..CONFIG_SPACE_SIZE { config_space.push(i as u8); } VirtioDeviceTest { - device_features: 0, - driver_features: 0, + base: VirtioBase::new(VIRTIO_TYPE_BLOCK, QUEUE_NUM, QUEUE_SIZE), b_active: false, b_realized: false, config_space, @@ -786,69 +605,27 @@ mod tests { } impl VirtioDevice for VirtioDeviceTest { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + fn realize(&mut self) -> Result<()> { self.b_realized = true; + self.init_config_features()?; Ok(()) } - fn device_type(&self) -> u32 { - VIRTIO_TYPE_BLOCK - } - - fn queue_num(&self) -> usize { - QUEUE_NUM - } - - fn queue_size(&self) -> u16 { - QUEUE_SIZE - } - - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.device_features, features_select) + fn init_config_features(&mut self) -> Result<()> { + Ok(()) } - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.device_features; - if unrequested_features != 0 { - v &= !unrequested_features; - } - self.driver_features |= v; - } - - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { - let config_len = self.config_space.len() as u64; - if offset >= config_len { - bail!( - "The offset{} for reading is more than the length{} of configuration", - offset, - config_len - ); - } - if let Some(end) = offset.checked_add(data.len() as u64) { - data.write_all( - &self.config_space[offset as usize..std::cmp::min(end, config_len) as usize], - )?; - } - - Ok(()) + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + read_config_default(&self.config_space, offset, data) } fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + check_config_space_rw(&self.config_space, offset, data)?; let data_len = data.len(); - let config_len = self.config_space.len(); - if offset as usize + data_len > config_len { - bail!( - "The offset{} {}for writing is more than the length{} of configuration", - offset, - data_len, - config_len - ); - } - self.config_space[(offset as usize)..(offset as usize + data_len)] - .copy_from_slice(&data[..]); - + .copy_from_slice(data); Ok(()) } @@ -856,498 +633,416 @@ mod tests { &mut self, _mem_space: Arc, _interrupt_cb: Arc, - _queues: &[Arc>], - mut _queue_evts: Vec, + mut _queue_evts: Vec>, ) -> Result<()> { self.b_active = true; Ok(()) } } - #[test] - fn test_virtio_mmio_device_new() { + fn virtio_mmio_test_init() -> (Arc>, VirtioMmioDevice) { let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_device_clone = virtio_device.clone(); + let sys_space = address_space_init(); + let sysbus = sysbus_init(); + let virtio_mmio_device = VirtioMmioDevice::new( + &sys_space, + "test_virtio_mmio_device".to_string(), + virtio_device.clone(), + &sysbus, + 0x0A00_0000, + 0x0000_0200, + ) + .unwrap(); + + (virtio_device, virtio_mmio_device) + } - let virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); - assert_eq!(virtio_mmio_device.state.activated, false); + #[test] + fn test_virtio_mmio_device_new() { + let (virtio_device, virtio_mmio_device) = virtio_mmio_test_init(); + let locked_device = virtio_device.lock().unwrap(); + assert!(!locked_device.device_activated()); assert_eq!( virtio_mmio_device.host_notify_info.events.len(), - virtio_device_clone.lock().unwrap().queue_num() - ); - assert_eq!(virtio_mmio_device.state.config_space.features_select, 0); - assert_eq!( - virtio_mmio_device.state.config_space.acked_features_select, - 0 - ); - assert_eq!(virtio_mmio_device.state.config_space.device_status, 0); - assert_eq!(virtio_mmio_device.state.config_space.config_generation, 0); - assert_eq!(virtio_mmio_device.state.config_space.queue_select, 0); - assert_eq!( - virtio_mmio_device.state.config_space.queue_num, - virtio_device_clone.lock().unwrap().queue_num() - ); - assert_eq!( - virtio_mmio_device.state.config_space.queue_type, - QUEUE_TYPE_SPLIT_VRING + locked_device.queue_num() ); + assert_eq!(locked_device.hfeatures_sel(), 0); + assert_eq!(locked_device.gfeatures_sel(), 0); + assert_eq!(locked_device.device_status(), 0); + assert_eq!(locked_device.config_generation(), 0); + assert_eq!(locked_device.queue_select(), 0); + assert_eq!(locked_device.queue_type(), QUEUE_TYPE_SPLIT_VRING); } #[test] fn test_virtio_mmio_device_read_01() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_device_clone = virtio_device.clone(); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // read the register of magic value let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, MAGIC_VALUE_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, MAGIC_VALUE_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), MMIO_MAGIC_VALUE); // read the register of version let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, VERSION_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, VERSION_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), MMIO_VERSION); // read the register of device id let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, DEVICE_ID_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, DEVICE_ID_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), VIRTIO_TYPE_BLOCK); // read the register of vendor id let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, VENDOR_ID_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, VENDOR_ID_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), VENDOR_ID); // read the register of the features // get low 32bit of the features let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.features_select = 0; - virtio_device_clone.lock().unwrap().device_features = 0x0000_00f8_0000_00fe; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, DEVICE_FEATURES_REG), - true - ); + virtio_device.lock().unwrap().set_hfeatures_sel(0); + virtio_device.lock().unwrap().base.device_features = 0x0000_00f8_0000_00fe; + assert!(virtio_mmio_device.read(&mut buf[..], addr, DEVICE_FEATURES_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0x0000_00fe); // get high 32bit of the features for device which supports VirtIO Version 1 let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.features_select = 1; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, DEVICE_FEATURES_REG), - true - ); + virtio_device.lock().unwrap().set_hfeatures_sel(1); + assert!(virtio_mmio_device.read(&mut buf[..], addr, DEVICE_FEATURES_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0x0000_00f9); } #[test] fn test_virtio_mmio_device_read_02() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // read the register representing max size of the queue // for queue_select as 0 let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 0; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, QUEUE_NUM_MAX_REG), - true - ); - assert_eq!(LittleEndian::read_u32(&buf[..]), QUEUE_SIZE as u32); + virtio_device.lock().unwrap().set_queue_select(0); + assert!(virtio_mmio_device.read(&mut buf[..], addr, QUEUE_NUM_MAX_REG)); + assert_eq!(LittleEndian::read_u32(&buf[..]), u32::from(QUEUE_SIZE)); // for queue_select as 1 let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 1; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, QUEUE_NUM_MAX_REG), - true - ); - assert_eq!(LittleEndian::read_u32(&buf[..]), QUEUE_SIZE as u32); + virtio_device.lock().unwrap().set_queue_select(1); + assert!(virtio_mmio_device.read(&mut buf[..], addr, QUEUE_NUM_MAX_REG)); + assert_eq!(LittleEndian::read_u32(&buf[..]), u32::from(QUEUE_SIZE)); // read the register representing the status of queue // for queue_select as 0 let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); LittleEndian::write_u32(&mut buf[..], 1); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG)); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG)); assert_eq!(LittleEndian::read_u32(&data[..]), 1); // for queue_select as 1 let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 1; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, QUEUE_READY_REG), - true - ); + virtio_device.lock().unwrap().set_queue_select(1); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); + assert!(virtio_mmio_device.read(&mut buf[..], addr, QUEUE_READY_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0); // read the register representing the status of interrupt let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, INTERRUPT_STATUS_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, INTERRUPT_STATUS_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device - .interrupt_status - .store(0b10_1111, Ordering::Relaxed); - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, INTERRUPT_STATUS_REG), - true - ); + virtio_device + .lock() + .unwrap() + .set_interrupt_status(0b10_1111); + assert!(virtio_mmio_device.read(&mut buf[..], addr, INTERRUPT_STATUS_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0b10_1111); // read the register representing the status of device let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.device_status = 0; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, STATUS_REG), - true - ); + virtio_device.lock().unwrap().set_device_status(0); + assert!(virtio_mmio_device.read(&mut buf[..], addr, STATUS_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.device_status = 5; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, STATUS_REG), - true - ); + virtio_device.lock().unwrap().set_device_status(5); + assert!(virtio_mmio_device.read(&mut buf[..], addr, STATUS_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 5); } #[test] fn test_virtio_mmio_device_read_03() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_device_clone = virtio_device.clone(); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // read the configuration atomic value let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, CONFIG_GENERATION_REG), - true - ); + assert!(virtio_mmio_device.read(&mut buf[..], addr, CONFIG_GENERATION_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 0); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.config_generation = 10; - assert_eq!( - virtio_mmio_device.read(&mut buf[..], addr, CONFIG_GENERATION_REG), - true - ); + virtio_device.lock().unwrap().set_config_generation(10); + assert!(virtio_mmio_device.read(&mut buf[..], addr, CONFIG_GENERATION_REG)); assert_eq!(LittleEndian::read_u32(&buf[..]), 10); // read the unknown register let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!(virtio_mmio_device.read(&mut buf[..], addr, 0xf1), false); - assert_eq!(virtio_mmio_device.read(&mut buf[..], addr, 0xfff + 1), true); + assert!(!virtio_mmio_device.read(&mut buf[..], addr, 0xf1)); + assert!(virtio_mmio_device.read(&mut buf[..], addr, 0x1ff + 1)); assert_eq!(buf, [0xff, 0xff, 0xff, 0xff]); // read the configuration space of virtio device // write something let result: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; - virtio_device_clone + virtio_device .lock() .unwrap() .config_space .as_mut_slice() - .copy_from_slice(&result[..]); + .copy_from_slice(&result); let mut data: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - assert_eq!(virtio_mmio_device.read(&mut data[..], addr, 0x100), true); + assert!(virtio_mmio_device.read(&mut data[..], addr, 0x100)); assert_eq!(data, result); let mut data: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0]; let result: Vec = vec![9, 10, 11, 12, 13, 14, 15, 16]; - assert_eq!(virtio_mmio_device.read(&mut data[..], addr, 0x108), true); + assert!(virtio_mmio_device.read(&mut data[..], addr, 0x108)); assert_eq!(data, result); } #[test] fn test_virtio_mmio_device_write_01() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_device_clone = virtio_device.clone(); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // write the selector for device features let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 2); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DEVICE_FEATURES_SEL_REG), - true - ); - assert_eq!(virtio_mmio_device.state.config_space.features_select, 2); + assert!(virtio_mmio_device.write(&buf[..], addr, DEVICE_FEATURES_SEL_REG)); + assert_eq!(virtio_device.lock().unwrap().hfeatures_sel(), 2); // write the device features - // false when the device status is CONFIG_STATUS_FEATURES_OK or CONFIG_STATUS_FAILED isn't CONFIG_STATUS_DRIVER - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG), - false - ); - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FAILED; - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG), - false - ); - virtio_mmio_device.state.config_space.device_status = - CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_FAILED | CONFIG_STATUS_DRIVER; - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG), - false + // false when the device status is CONFIG_STATUS_FEATURES_OK or CONFIG_STATUS_FAILED isn't + // CONFIG_STATUS_DRIVER + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); + assert!(!virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG)); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FAILED); + assert!(!virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG)); + virtio_device.lock().unwrap().set_device_status( + CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_FAILED | CONFIG_STATUS_DRIVER, ); + assert!(!virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG)); // it is ok to write the low 32bit of device features - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_DRIVER; + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_DRIVER); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.acked_features_select = 0; + virtio_device.lock().unwrap().set_gfeatures_sel(0); LittleEndian::write_u32(&mut buf[..], 0x0000_00fe); - virtio_device_clone.lock().unwrap().device_features = 0x0000_00fe; + virtio_device.lock().unwrap().base.device_features = 0x0000_00fe; + assert!(virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG)); assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG), - true - ); - assert_eq!( - virtio_device_clone.lock().unwrap().driver_features as u32, + virtio_device.lock().unwrap().base.driver_features as u32, 0x0000_00fe ); // it is ok to write the high 32bit of device features let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.acked_features_select = 1; + virtio_device.lock().unwrap().set_gfeatures_sel(1); LittleEndian::write_u32(&mut buf[..], 0x0000_00ff); - virtio_device_clone.lock().unwrap().device_features = 0x0000_00ff_0000_0000; - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG), - true - ); + virtio_device.lock().unwrap().base.device_features = 0x0000_00ff_0000_0000; + assert!(virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_REG)); assert_eq!( - virtio_mmio_device.state.config_space.queue_type, + virtio_device.lock().unwrap().queue_type(), QUEUE_TYPE_PACKED_VRING ); assert_eq!( - virtio_device_clone.lock().unwrap().driver_features >> 32 as u32, + virtio_device.lock().unwrap().base.driver_features >> 32_u32, 0x0000_00ff ); // write the selector of driver features let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0x00ff_0000); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_SEL_REG), - true - ); - assert_eq!( - virtio_mmio_device.state.config_space.acked_features_select, - 0x00ff_0000 - ); + assert!(virtio_mmio_device.write(&buf[..], addr, DRIVER_FEATURES_SEL_REG)); + assert_eq!(virtio_device.lock().unwrap().gfeatures_sel(), 0x00ff_0000); // write the selector of queue let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0x0000_ff00); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_SEL_REG), - true - ); - assert_eq!( - virtio_mmio_device.state.config_space.queue_select, - 0x0000_ff00 - ); + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_SEL_REG)); + assert_eq!(virtio_device.lock().unwrap().queue_select(), 0x0000_ff00); // write the size of queue let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); LittleEndian::write_u32(&mut buf[..], 128); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_NUM_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_NUM_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!(config.size, 128); } else { assert!(false); - } + }; } #[test] fn test_virtio_mmio_device_write_02() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // write the ready status of queue let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); LittleEndian::write_u32(&mut buf[..], 1); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG)); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG)); assert_eq!(LittleEndian::read_u32(&data[..]), 1); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); LittleEndian::write_u32(&mut buf[..], 2); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_READY_REG)); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, QUEUE_READY_REG)); assert_eq!(LittleEndian::read_u32(&data[..]), 0); // write the interrupt status let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_DRIVER_OK; - virtio_mmio_device - .interrupt_status - .store(0b10_1111, Ordering::Relaxed); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_DRIVER_OK); + virtio_device + .lock() + .unwrap() + .set_interrupt_status(0b10_1111); LittleEndian::write_u32(&mut buf[..], 0b111); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, INTERRUPT_ACK_REG), - true - ); + assert!(virtio_mmio_device.write(&buf[..], addr, INTERRUPT_ACK_REG)); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, INTERRUPT_STATUS_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, INTERRUPT_STATUS_REG)); assert_eq!(LittleEndian::read_u32(&data[..]), 0b10_1000); } #[test] fn test_virtio_mmio_device_write_03() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); // write the low 32bit of queue's descriptor table address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xffff_fefe); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_DESC_LOW_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_DESC_LOW_REG)); + if let Ok(config) = virtio_mmio_device.device.lock().unwrap().queue_config() { assert_eq!(config.desc_table.0 as u32, 0xffff_fefe) } else { assert!(false); } // write the high 32bit of queue's descriptor table address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xfcfc_ffff); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_DESC_HIGH_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_DESC_HIGH_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!((config.desc_table.0 >> 32) as u32, 0xfcfc_ffff) } else { assert!(false); } // write the low 32bit of queue's available ring address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xfcfc_fafa); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_AVAIL_LOW_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_AVAIL_LOW_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!(config.avail_ring.0 as u32, 0xfcfc_fafa) } else { assert!(false); } // write the high 32bit of queue's available ring address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xecec_fafa); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_AVAIL_HIGH_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_AVAIL_HIGH_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!((config.avail_ring.0 >> 32) as u32, 0xecec_fafa) } else { assert!(false); } // write the low 32bit of queue's used ring address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xacac_fafa); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_USED_LOW_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_USED_LOW_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!(config.used_ring.0 as u32, 0xacac_fafa) } else { assert!(false); } // write the high 32bit of queue's used ring address - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; + virtio_device.lock().unwrap().set_queue_select(0); + virtio_device + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], 0xcccc_fafa); - assert_eq!( - virtio_mmio_device.write(&buf[..], addr, QUEUE_USED_HIGH_REG), - true - ); - if let Ok(config) = virtio_mmio_device.state.config_space.get_queue_config() { + assert!(virtio_mmio_device.write(&buf[..], addr, QUEUE_USED_HIGH_REG)); + if let Ok(config) = virtio_device.lock().unwrap().queue_config() { assert_eq!((config.used_ring.0 >> 32) as u32, 0xcccc_fafa) } else { assert!(false); - } + }; } fn align(size: u64, alignment: u64) -> u64 { @@ -1356,51 +1051,48 @@ mod tests { } else { 0 }; - (size + align_adjust) as u64 + size + align_adjust } #[test] fn test_virtio_mmio_device_write_04() { - let virtio_device = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_device_clone = virtio_device.clone(); - let sys_space = address_space_init(); - let mut virtio_mmio_device = VirtioMmioDevice::new(&sys_space, virtio_device); + let (virtio_device, mut virtio_mmio_device) = virtio_mmio_test_init(); let addr = GuestAddress(0); - virtio_mmio_device.state.config_space.queue_select = 0; - virtio_mmio_device.state.config_space.device_status = CONFIG_STATUS_FEATURES_OK; - if let Ok(config) = virtio_mmio_device.state.config_space.get_mut_queue_config() { + virtio_mmio_device.assign_interrupt_cb(); + let mut locked_device = virtio_device.lock().unwrap(); + locked_device.set_queue_select(0); + locked_device.set_device_status(CONFIG_STATUS_FEATURES_OK); + if let Ok(config) = locked_device.queue_config_mut(true) { config.desc_table = GuestAddress(0); - config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * 16); + config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * 16); config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * 16 + 8 + 2 * (QUEUE_SIZE as u64), + u64::from(QUEUE_SIZE) * 16 + 8 + 2 * u64::from(QUEUE_SIZE), 4096, )); config.size = QUEUE_SIZE; config.ready = true; } - virtio_mmio_device.state.config_space.queue_select = 1; - if let Ok(config) = virtio_mmio_device.state.config_space.get_mut_queue_config() { + locked_device.set_queue_select(1); + if let Ok(config) = locked_device.queue_config_mut(true) { config.desc_table = GuestAddress(0); - config.avail_ring = GuestAddress((QUEUE_SIZE as u64) * 16); + config.avail_ring = GuestAddress(u64::from(QUEUE_SIZE) * 16); config.used_ring = GuestAddress(align( - (QUEUE_SIZE as u64) * 16 + 8 + 2 * (QUEUE_SIZE as u64), + u64::from(QUEUE_SIZE) * 16 + 8 + 2 * u64::from(QUEUE_SIZE), 4096, )); config.size = QUEUE_SIZE / 2; config.ready = true; } + drop(locked_device); // write the device status let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; LittleEndian::write_u32(&mut buf[..], CONFIG_STATUS_ACKNOWLEDGE); - assert_eq!(virtio_mmio_device.write(&buf[..], addr, STATUS_REG), true); - assert_eq!(virtio_mmio_device.state.activated, false); + assert!(virtio_mmio_device.write(&buf[..], addr, STATUS_REG)); + assert!(!virtio_device.lock().unwrap().device_activated()); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, STATUS_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, STATUS_REG)); assert_eq!(LittleEndian::read_u32(&data[..]), CONFIG_STATUS_ACKNOWLEDGE); let mut buf: Vec = vec![0xff, 0xff, 0xff, 0xff]; @@ -1411,15 +1103,12 @@ mod tests { | CONFIG_STATUS_DRIVER_OK | CONFIG_STATUS_FEATURES_OK, ); - assert_eq!(virtio_device_clone.lock().unwrap().b_active, false); - assert_eq!(virtio_mmio_device.write(&buf[..], addr, STATUS_REG), true); - assert_eq!(virtio_mmio_device.state.activated, true); - assert_eq!(virtio_device_clone.lock().unwrap().b_active, true); + assert!(!virtio_device.lock().unwrap().b_active); + assert!(virtio_mmio_device.write(&buf[..], addr, STATUS_REG)); + assert!(virtio_device.lock().unwrap().device_activated()); + assert!(virtio_device.lock().unwrap().b_active); let mut data: Vec = vec![0xff, 0xff, 0xff, 0xff]; - assert_eq!( - virtio_mmio_device.read(&mut data[..], addr, STATUS_REG), - true - ); + assert!(virtio_mmio_device.read(&mut data[..], addr, STATUS_REG)); assert_eq!( LittleEndian::read_u32(&data[..]), CONFIG_STATUS_ACKNOWLEDGE diff --git a/virtio/src/virtio_pci.rs b/virtio/src/transport/virtio_pci.rs similarity index 36% rename from virtio/src/virtio_pci.rs rename to virtio/src/transport/virtio_pci.rs index 9eae77791a207ad09e3cb7fd71b7d268855ff3c4..0c4bfc88a6b44bf75f42f2a9089bd98887e9fb79 100644 --- a/virtio/src/virtio_pci.rs +++ b/virtio/src/transport/virtio_pci.rs @@ -10,44 +10,65 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::cmp::{max, min}; use std::mem::size_of; -use std::sync::atomic::{AtomicBool, AtomicU16, AtomicU32, Ordering}; +use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, Mutex, Weak}; -use address_space::{AddressRange, AddressSpace, GuestAddress, Region, RegionIoEventFd, RegionOps}; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{ByteOrder, LittleEndian}; -use error_chain::ChainedError; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use pci::config::{ - RegionType, BAR_0, COMMAND, DEVICE_ID, PCIE_CONFIG_SPACE_SIZE, REG_SIZE, REVISION_ID, - ROM_ADDRESS, STATUS, STATUS_INTERRUPT, SUBSYSTEM_ID, SUBSYSTEM_VENDOR_ID, SUB_CLASS_CODE, - VENDOR_ID, -}; -use pci::errors::{ErrorKind, Result as PciResult, ResultExt}; -use pci::msix::{update_dev_id, MsixState}; -use pci::{ - config::PciConfig, init_msix, init_multifunction, le_write_u16, ranges_overlap, PciBus, - PciDevOps, -}; -use util::{byte_code::ByteCode, num_ops::round_up, unix::host_page_size}; +use log::{debug, error, info, warn}; use vmm_sys_util::eventfd::EventFd; +#[cfg(feature = "virtio_gpu")] +use crate::Gpu; use crate::{ - virtio_has_feature, Queue, QueueConfig, VirtioDevice, VirtioInterrupt, VirtioInterruptType, + virtio_has_feature, NotifyEventFds, Queue, VirtioBaseState, VirtioDevice, VirtioDeviceQuirk, + VirtioInterrupt, VirtioInterruptType, }; use crate::{ CONFIG_STATUS_ACKNOWLEDGE, CONFIG_STATUS_DRIVER, CONFIG_STATUS_DRIVER_OK, CONFIG_STATUS_FAILED, - CONFIG_STATUS_FEATURES_OK, QUEUE_TYPE_PACKED_VRING, QUEUE_TYPE_SPLIT_VRING, - VIRTIO_F_RING_PACKED, VIRTIO_TYPE_BLOCK, VIRTIO_TYPE_NET, + CONFIG_STATUS_FEATURES_OK, CONFIG_STATUS_NEEDS_RESET, INVALID_VECTOR_NUM, + QUEUE_TYPE_PACKED_VRING, QUEUE_TYPE_SPLIT_VRING, VIRTIO_F_RING_PACKED, VIRTIO_F_VERSION_1, + VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING, VIRTIO_TYPE_BLOCK, VIRTIO_TYPE_CONSOLE, + VIRTIO_TYPE_FS, VIRTIO_TYPE_GPU, VIRTIO_TYPE_NET, VIRTIO_TYPE_SCSI, VIRTIO_TYPE_VSOCK, +}; +#[cfg(feature = "virtio_gpu")] +use address_space::HostMemMapping; +use address_space::{ + AddressAttr, AddressRange, AddressSpace, GuestAddress, Region, RegionIoEventFd, RegionOps, }; +use devices::pci::config::{ + RegionType, BAR_SPACE_UNMAPPED, DEVICE_ID, MINIMUM_BAR_SIZE_FOR_MMIO, PCIE_CONFIG_SPACE_SIZE, + PCI_SUBDEVICE_ID_QEMU, PCI_VENDOR_ID_REDHAT_QUMRANET, REG_SIZE, REVISION_ID, STATUS, + STATUS_INTERRUPT, SUBSYSTEM_ID, SUBSYSTEM_VENDOR_ID, SUB_CLASS_CODE, VENDOR_ID, +}; +use devices::pci::msix::MsixState; +use devices::pci::{ + config::PciConfig, init_intx, init_msix, init_multifunction, le_write_u16, le_write_u32, + PciBus, PciDevBase, PciDevOps, PciError, +}; +use devices::{convert_bus_ref, Bus, Device, DeviceBase, PCI_BUS}; +#[cfg(feature = "virtio_gpu")] +use machine_manager::config::VIRTIO_GPU_ENABLE_BAR0_SIZE; +use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::num_ops::{ranges_overlap, read_data_u32, write_data_u32}; +use util::{gen_base_func, offset_of}; const VIRTIO_QUEUE_MAX: u32 = 1024; -const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; +const VIRTIO_PCI_VENDOR_ID: u16 = PCI_VENDOR_ID_REDHAT_QUMRANET; const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; const VIRTIO_PCI_ABI_VERSION: u8 = 1; const VIRTIO_PCI_CLASS_ID_NET: u16 = 0x0280; const VIRTIO_PCI_CLASS_ID_BLOCK: u16 = 0x0100; +const VIRTIO_PCI_CLASS_ID_STORAGE_OTHER: u16 = 0x0180; +const VIRTIO_PCI_CLASS_ID_COMMUNICATION_OTHER: u16 = 0x0780; +#[cfg(target_arch = "aarch64")] +const VIRTIO_PCI_CLASS_ID_DISPLAY_OTHER: u16 = 0x0380; +const VIRTIO_PCI_CLASS_ID_DISPLAY_VGA: u16 = 0x0300; const VIRTIO_PCI_CLASS_ID_OTHERS: u16 = 0x00ff; const VIRTIO_PCI_CAP_COMMON_OFFSET: u32 = 0x0; @@ -106,238 +127,66 @@ const COMMON_Q_USEDLO_REG: u64 = 0x30; /// The high 32bit of queue's Used Ring address - Read Write. const COMMON_Q_USEDHI_REG: u64 = 0x34; +/// The max features select num, only 0 or 1 is valid: +/// 0: select feature bits 0 to 31. +/// 1: select feature bits 32 to 63. +const MAX_FEATURES_SELECT_NUM: u32 = 2; + +#[cfg(feature = "virtio_gpu")] +fn init_gpu_bar0(dev: &Arc>, config: &mut PciConfig) -> Result<()> { + let locked_dev = dev.lock().unwrap(); + let gpu = locked_dev.as_any().downcast_ref::().unwrap(); + let fb = gpu.get_bar0_fb(); + + let host_mmap = Arc::new(HostMemMapping::new( + GuestAddress(0), + None, + VIRTIO_GPU_ENABLE_BAR0_SIZE, + fb, + false, + true, + false, + )?); + + let region = Region::init_ram_region(host_mmap, "vgpu.vram"); + config.register_bar( + 0, + region, + RegionType::Mem32Bit, + true, + VIRTIO_GPU_ENABLE_BAR0_SIZE, + )?; + + Ok(()) +} + /// Get class id according to device type. /// /// # Arguments /// -/// * `device_type` - Device type set by the host. -fn get_virtio_class_id(device_type: u32) -> u16 { +/// * `device_type` - Device type set by the host. +/// * `device_quirk` - Device quirk set by the host. +fn get_virtio_class_id(device_type: u32, _device_quirk: Option) -> u16 { match device_type { VIRTIO_TYPE_BLOCK => VIRTIO_PCI_CLASS_ID_BLOCK, + VIRTIO_TYPE_SCSI => VIRTIO_PCI_CLASS_ID_BLOCK, + VIRTIO_TYPE_FS => VIRTIO_PCI_CLASS_ID_STORAGE_OTHER, VIRTIO_TYPE_NET => VIRTIO_PCI_CLASS_ID_NET, - _ => VIRTIO_PCI_CLASS_ID_OTHERS, - } -} - -/// The configuration of virtio-pci device, the fields refer to Virtio Spec. -#[derive(Clone)] -struct VirtioPciCommonConfig { - /// Bitmask of the features supported by the device (host)(32 bits per set) - features_select: u32, - /// Device (host) feature-setting selector. - acked_features_select: u32, - /// Interrupt status. - interrupt_status: Arc, - /// Device status. - device_status: u32, - /// Configuration atomicity value. - config_generation: u32, - /// Queue selector. - queue_select: u16, - /// The configuration vector for MSI-X. - msix_config: Arc, - /// The configuration of queues. - queues_config: Vec, - /// The type of queue, split-vring or packed-vring. - queue_type: u16, -} - -impl VirtioPciCommonConfig { - fn new(queue_size: u16, queue_num: usize) -> Self { - let mut queues_config = Vec::new(); - for _ in 0..queue_num { - queues_config.push(QueueConfig::new(queue_size)) - } - - VirtioPciCommonConfig { - features_select: 0, - acked_features_select: 0, - interrupt_status: Arc::new(AtomicU32::new(0)), - device_status: 0, - config_generation: 0, - queue_select: 0, - msix_config: Arc::new(AtomicU16::new(0)), - queues_config, - queue_type: QUEUE_TYPE_SPLIT_VRING, + VIRTIO_TYPE_CONSOLE | VIRTIO_TYPE_VSOCK => VIRTIO_PCI_CLASS_ID_COMMUNICATION_OTHER, + #[cfg(target_arch = "x86_64")] + VIRTIO_TYPE_GPU => VIRTIO_PCI_CLASS_ID_DISPLAY_VGA, + #[cfg(target_arch = "aarch64")] + VIRTIO_TYPE_GPU => { + if _device_quirk == Some(VirtioDeviceQuirk::VirtioGpuEnableBar0) { + return VIRTIO_PCI_CLASS_ID_DISPLAY_VGA; + } + VIRTIO_PCI_CLASS_ID_DISPLAY_OTHER } - } - - fn check_device_status(&self, set: u32, clr: u32) -> bool { - self.device_status & (set | clr) == set - } - - fn get_mut_queue_config(&mut self) -> PciResult<&mut QueueConfig> { - if self.check_device_status( - CONFIG_STATUS_FEATURES_OK, - CONFIG_STATUS_DRIVER_OK | CONFIG_STATUS_FAILED, - ) { - self.queues_config - .get_mut(self.queue_select as usize) - .ok_or_else(|| "pci-reg queue_select overflows".into()) - } else { - Err(ErrorKind::DeviceStatus(self.device_status).into()) + _ => { + warn!("Unknown device type, please make sure it is supported."); + VIRTIO_PCI_CLASS_ID_OTHERS } } - - fn get_queue_config(&self) -> PciResult<&QueueConfig> { - self.queues_config - .get(self.queue_select as usize) - .ok_or_else(|| "pci-reg queue_select overflows".into()) - } - - /// Read data from the common config of virtio device. - /// Return the config value in u32. - /// - /// # Arguments - /// - /// * `device` - Virtio device entity. - /// * `offset` - The offset of common config. - fn read_common_config( - &self, - device: &Arc>, - offset: u64, - ) -> PciResult { - let value = match offset { - COMMON_DFSELECT_REG => self.features_select, - COMMON_DF_REG => device - .lock() - .unwrap() - .get_device_features(self.features_select), - COMMON_GFSELECT_REG => self.acked_features_select, - COMMON_MSIX_REG => self.msix_config.load(Ordering::SeqCst) as u32, - COMMON_NUMQ_REG => self.queues_config.len() as u32, - COMMON_STATUS_REG => self.device_status, - COMMON_CFGGENERATION_REG => self.config_generation, - COMMON_Q_SELECT_REG => self.queue_select as u32, - COMMON_Q_SIZE_REG => self - .get_queue_config() - .map(|config| u32::from(config.max_size))?, - COMMON_Q_MSIX_REG => self - .get_queue_config() - .map(|config| u32::from(config.vector))?, - COMMON_Q_ENABLE_REG => self - .get_queue_config() - .map(|config| u32::from(config.ready))?, - COMMON_Q_NOFF_REG => self.queue_select as u32, - COMMON_Q_DESCLO_REG => self - .get_queue_config() - .map(|config| config.desc_table.0 as u32)?, - COMMON_Q_DESCHI_REG => self - .get_queue_config() - .map(|config| (config.desc_table.0 >> 32) as u32)?, - COMMON_Q_AVAILLO_REG => self - .get_queue_config() - .map(|config| config.avail_ring.0 as u32)?, - COMMON_Q_AVAILHI_REG => self - .get_queue_config() - .map(|config| (config.avail_ring.0 >> 32) as u32)?, - COMMON_Q_USEDLO_REG => self - .get_queue_config() - .map(|config| config.used_ring.0 as u32)?, - COMMON_Q_USEDHI_REG => self - .get_queue_config() - .map(|config| (config.used_ring.0 >> 32) as u32)?, - _ => { - return Err(ErrorKind::PciRegister(offset).into()); - } - }; - - Ok(value) - } - - /// Write data to the common config of virtio device. - /// - /// # Arguments - /// - /// * `device` - Virtio device entity. - /// * `offset` - The offset of common config. - /// * `value` - The value to write. - /// - /// # Errors - /// - /// Returns Error if the offset is out of bound. - fn write_common_config( - &mut self, - device: &Arc>, - offset: u64, - value: u32, - ) -> PciResult<()> { - match offset { - COMMON_DFSELECT_REG => { - self.features_select = value; - } - COMMON_GFSELECT_REG => { - self.acked_features_select = value; - } - COMMON_GF_REG => { - device - .lock() - .unwrap() - .set_driver_features(self.acked_features_select, value); - - if self.acked_features_select == 1 - && virtio_has_feature(u64::from(value) << 32, VIRTIO_F_RING_PACKED) - { - error!("Set packed virtqueue, which is not supported"); - self.queue_type = QUEUE_TYPE_PACKED_VRING; - } - } - COMMON_MSIX_REG => { - self.msix_config.store(value as u16, Ordering::SeqCst); - self.interrupt_status.store(0_u32, Ordering::SeqCst); - } - COMMON_STATUS_REG => { - self.device_status = value; - if self.device_status == 0 { - self.queues_config.iter_mut().for_each(|q| { - q.ready = false; - q.vector = 0; - q.avail_ring = GuestAddress(0); - q.desc_table = GuestAddress(0); - q.used_ring = GuestAddress(0); - }); - self.msix_config.store(0_u16, Ordering::SeqCst) - } - } - COMMON_Q_SELECT_REG => { - if value < VIRTIO_QUEUE_MAX { - self.queue_select = value as u16; - } - } - COMMON_Q_SIZE_REG => self - .get_mut_queue_config() - .map(|config| config.size = value as u16)?, - COMMON_Q_ENABLE_REG => self - .get_mut_queue_config() - .map(|config| config.ready = value == 1)?, - COMMON_Q_MSIX_REG => self - .get_mut_queue_config() - .map(|config| config.vector = value as u16)?, - COMMON_Q_DESCLO_REG => self.get_mut_queue_config().map(|config| { - config.desc_table = GuestAddress(config.desc_table.0 | u64::from(value)); - })?, - COMMON_Q_DESCHI_REG => self.get_mut_queue_config().map(|config| { - config.desc_table = GuestAddress(config.desc_table.0 | (u64::from(value) << 32)); - })?, - COMMON_Q_AVAILLO_REG => self.get_mut_queue_config().map(|config| { - config.avail_ring = GuestAddress(config.avail_ring.0 | u64::from(value)); - })?, - COMMON_Q_AVAILHI_REG => self.get_mut_queue_config().map(|config| { - config.avail_ring = GuestAddress(config.avail_ring.0 | (u64::from(value) << 32)); - })?, - COMMON_Q_USEDLO_REG => self.get_mut_queue_config().map(|config| { - config.used_ring = GuestAddress(config.used_ring.0 | u64::from(value)); - })?, - COMMON_Q_USEDHI_REG => self.get_mut_queue_config().map(|config| { - config.used_ring = GuestAddress(config.used_ring.0 | (u64::from(value) << 32)); - })?, - _ => { - return Err(ErrorKind::PciRegister(offset).into()); - } - }; - - Ok(()) - } } #[allow(clippy::upper_case_acronyms)] @@ -347,6 +196,7 @@ enum VirtioPciCapType { Notify = 2, ISR = 3, Device = 4, + CfgAccess = 5, } /// Virtio PCI Capability @@ -382,6 +232,27 @@ impl VirtioPciCap { } } +/// The struct of virtio pci capability for accessing BAR regions. +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +struct VirtioPciCfgAccessCap { + /// The struct of virtio pci capability. + cap: VirtioPciCap, + /// Data for BAR regions access. + pci_cfg_data: [u8; 4], +} + +impl ByteCode for VirtioPciCfgAccessCap {} + +impl VirtioPciCfgAccessCap { + fn new(cap_len: u8, cfg_type: u8) -> Self { + VirtioPciCfgAccessCap { + cap: VirtioPciCap::new(cap_len, cfg_type, 0, 0, 0), + pci_cfg_data: [0; 4], + } + } +} + /// The struct of virtio pci capability for notifying the host #[repr(C, packed)] #[derive(Debug, Copy, Clone, Default)] @@ -410,38 +281,11 @@ impl VirtioPciNotifyCap { } } -struct NotifyEventFds { - events: Vec, -} - -impl NotifyEventFds { - fn new(queue_num: usize) -> Self { - let mut events = Vec::new(); - for _i in 0..queue_num { - events.push(EventFd::new(libc::EFD_NONBLOCK).unwrap()); - } - - NotifyEventFds { events } - } -} - -impl Clone for NotifyEventFds { - fn clone(&self) -> NotifyEventFds { - let mut queue_evts = Vec::::new(); - for fd in self.events.iter() { - let cloned_evt_fd = fd.try_clone().unwrap(); - queue_evts.push(cloned_evt_fd); - } - NotifyEventFds { events: queue_evts } - } -} - /// The state of virtio-pci device. #[repr(C)] #[derive(Copy, Clone, Desc, ByteCode)] #[desc_version(compat_version = "0.1.0")] -pub struct VirtioPciState { - activated: bool, +struct VirtioPciState { dev_id: u16, /// Max length of config_space is 4096. config_space: [u8; 4096], @@ -450,48 +294,31 @@ pub struct VirtioPciState { last_cap_end: u16, last_ext_cap_offset: u16, last_ext_cap_end: u16, - features_select: u32, - acked_features_select: u32, - interrupt_status: u32, - device_status: u32, - config_generation: u32, - queue_select: u16, - msix_config: u16, - /// The configuration of queues. Max number of queues is 8. - queues_config: [QueueConfig; 8], - /// The number of queues. - queue_num: usize, + virtio_base: VirtioBaseState, } /// Virtio-PCI device structure #[derive(Clone)] pub struct VirtioPciDevice { - /// Name of this device - name: String, + base: PciDevBase, /// The entity of virtio device device: Arc>, /// Device id dev_id: Arc, - /// Devfn - devfn: u8, - /// If this device is activated or not. - device_activated: Arc, /// Memory AddressSpace sys_mem: Arc, - /// Pci config space. - config: PciConfig, - /// Virtio common config refer to Virtio Spec. - common_config: Arc>, - /// Primary Bus - parent_bus: Weak>, - /// Eventfds used for notifying the guest. - notify_eventfds: NotifyEventFds, + /// Offset of VirtioPciCfgAccessCap in Pci config space. + cfg_cap_offset: usize, + /// Eventfds used for guest notify the Device. + notify_eventfds: Arc, /// The function for interrupt triggering interrupt_cb: Option>, - /// Virtio queues. The vector and Queue will be shared acrossing thread, so all with Arc> wrapper. - queues: Arc>>>>, /// Multi-Function flag. multi_func: bool, + /// If the device need to register irqfd. + need_irqfd: bool, + /// Device activation error + activate_err: bool, } impl VirtioPciDevice { @@ -500,55 +327,76 @@ impl VirtioPciDevice { devfn: u8, sys_mem: Arc, device: Arc>, - parent_bus: Weak>, + parent_bus: Weak>, multi_func: bool, + need_irqfd: bool, ) -> Self { let queue_num = device.lock().unwrap().queue_num(); - let queue_size = device.lock().unwrap().queue_size(); - VirtioPciDevice { - name, + base: PciDevBase { + base: DeviceBase::new(name, true, Some(parent_bus)), + config: PciConfig::new(devfn, PCIE_CONFIG_SPACE_SIZE, VIRTIO_PCI_BAR_MAX), + devfn, + }, device, dev_id: Arc::new(AtomicU16::new(0)), - devfn, - device_activated: Arc::new(AtomicBool::new(false)), sys_mem, - config: PciConfig::new(PCIE_CONFIG_SPACE_SIZE, VIRTIO_PCI_BAR_MAX), - common_config: Arc::new(Mutex::new(VirtioPciCommonConfig::new( - queue_size, queue_num, - ))), - parent_bus, - notify_eventfds: NotifyEventFds::new(queue_num), + cfg_cap_offset: 0, + notify_eventfds: Arc::new(NotifyEventFds::new(queue_num)), interrupt_cb: None, - queues: Arc::new(Mutex::new(Vec::with_capacity(queue_num))), multi_func, + need_irqfd, + activate_err: false, } } fn assign_interrupt_cb(&mut self) { - let cloned_common_cfg = self.common_config.clone(); - let cloned_msix = self.config.msix.clone(); + let locked_dev = self.device.lock().unwrap(); + let virtio_base = locked_dev.virtio_base(); + let device_status = virtio_base.device_status.clone(); + let interrupt_status = virtio_base.interrupt_status.clone(); + let msix_config = virtio_base.config_vector.clone(); + let config_generation = virtio_base.config_generation.clone(); + + let cloned_msix = self.base.config.msix.as_ref().unwrap().clone(); + let cloned_intx = self.base.config.intx.as_ref().unwrap().clone(); let dev_id = self.dev_id.clone(); + let cb = Arc::new(Box::new( - move |int_type: &VirtioInterruptType, queue: Option<&Queue>| { + move |int_type: &VirtioInterruptType, queue: Option<&Queue>, needs_reset: bool| { let vector = match int_type { - VirtioInterruptType::Config => cloned_common_cfg - .lock() - .unwrap() - .msix_config - .load(Ordering::SeqCst), + VirtioInterruptType::Config => { + if needs_reset { + device_status.fetch_or(CONFIG_STATUS_NEEDS_RESET, Ordering::SeqCst); + } + if device_status.load(Ordering::Acquire) & CONFIG_STATUS_DRIVER_OK == 0 { + return Ok(()); + } + + // Use (CONFIG | VRING) instead of CONFIG, it can be used to solve the + // IO stuck problem by change the device configure. + interrupt_status.fetch_or( + VIRTIO_MMIO_INT_CONFIG | VIRTIO_MMIO_INT_VRING, + Ordering::SeqCst, + ); + config_generation.fetch_add(1, Ordering::SeqCst); + msix_config.load(Ordering::Acquire) + } VirtioInterruptType::Vring => { + interrupt_status.fetch_or(VIRTIO_MMIO_INT_VRING, Ordering::SeqCst); queue.map_or(0, |q| q.vring.get_queue_config().vector) } }; - if let Some(msix) = &cloned_msix { - msix.lock() - .unwrap() - .notify(vector, dev_id.load(Ordering::Acquire)); + let mut locked_msix = cloned_msix.lock().unwrap(); + if locked_msix.enabled { + if vector != INVALID_VECTOR_NUM { + locked_msix.notify(vector, dev_id.load(Ordering::Acquire)); + } } else { - bail!("Failed to send interrupt, msix does not exist"); + cloned_intx.lock().unwrap().notify(1); } + Ok(()) }, ) as VirtioInterrupt); @@ -558,17 +406,11 @@ impl VirtioPciDevice { fn ioeventfds(&self) -> Vec { let mut ret = Vec::new(); - for (index, eventfd) in self.notify_eventfds.events.iter().enumerate() { + let eventfds = (*self.notify_eventfds).clone(); + for (index, eventfd) in eventfds.events.into_iter().enumerate() { let addr = index as u64 * u64::from(VIRTIO_PCI_CAP_NOTIFY_OFF_MULTIPLIER); - let eventfd_clone = match eventfd.try_clone() { - Err(e) => { - error!("Failed to clone ioeventfd, error is {}", e); - continue; - } - Ok(fd) => fd, - }; ret.push(RegionIoEventFd { - fd: eventfd_clone, + fd: eventfd.clone(), addr_range: AddressRange::from((addr, 2u64)), data_match: false, data: index as u64, @@ -578,186 +420,376 @@ impl VirtioPciDevice { ret } - fn modern_mem_region_map(&mut self, data: T) -> PciResult<()> { - let cap_offset = self.config.add_pci_cap( + fn modern_mem_region_map(&mut self, data: T) -> Result { + let cap_offset = self.base.config.add_pci_cap( PCI_CAP_ID_VNDR, size_of::() + PCI_CAP_VNDR_AND_NEXT_SIZE as usize, )?; let write_start = cap_offset + PCI_CAP_VNDR_AND_NEXT_SIZE as usize; - self.config.config[write_start..(write_start + size_of::())] + self.base.config.config[write_start..(write_start + size_of::())] .copy_from_slice(data.as_bytes()); - Ok(()) + Ok(write_start) } - fn build_common_cfg_ops(&mut self) -> RegionOps { - let cloned_virtio_dev = self.device.clone(); - let cloned_common_cfg = self.common_config.clone(); - let common_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { - let value = match cloned_common_cfg - .lock() - .unwrap() - .read_common_config(&cloned_virtio_dev, offset) - { - Ok(v) => v, - Err(e) => { + fn activate_device(&mut self) -> bool { + info!("func: activate_device, id: {:?}", &self.base.base.id); + let mut locked_dev = self.device.lock().unwrap(); + if locked_dev.device_activated() { + return true; + } + + let queue_type = locked_dev.queue_type(); + let features = locked_dev.virtio_base().driver_features; + let broken = locked_dev.virtio_base().broken.clone(); + + let mut queues = Vec::new(); + let queues_config = &mut locked_dev.virtio_base_mut().queues_config; + for q_config in queues_config.iter_mut() { + if !q_config.ready { + debug!("queue is not ready, please check your init process"); + } else { + q_config.set_addr_cache( + self.sys_mem.clone(), + self.interrupt_cb.clone().unwrap(), + features, + &broken, + ); + } + let queue = Queue::new(*q_config, queue_type).unwrap(); + if q_config.ready && !queue.is_valid(&self.sys_mem) { + error!( + "Failed to activate device {}: Invalid queue", + self.base.base.id + ); + return false; + } + let arc_queue = Arc::new(Mutex::new(queue)); + queues.push(arc_queue.clone()); + } + locked_dev.virtio_base_mut().queues = queues; + + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + pci_bus.update_dev_id(self.base.devfn, &self.dev_id); + if self.need_irqfd { + let mut queue_num = locked_dev.queue_num(); + // No need to create call event for control queue. + // It will be polled in StratoVirt when activating the device. + if locked_dev.has_control_queue() && queue_num % 2 != 0 { + queue_num -= 1; + } + let call_evts = NotifyEventFds::new(queue_num); + if let Err(e) = locked_dev.set_guest_notifiers(&call_evts.events) { + error!( + "Failed to set guest notifiers, device {} error is {:?}", + self.base.base.id, e + ); + return false; + } + drop(locked_dev); + if !self.queues_register_irqfd(&call_evts.events) { + error!( + "Failed to register queues irqfd for device {}", + self.base.base.id + ); + return false; + } + locked_dev = self.device.lock().unwrap(); + } + + let queue_evts = (*self.notify_eventfds).clone().events; + if let Err(e) = locked_dev.activate( + self.sys_mem.clone(), + self.interrupt_cb.clone().unwrap(), + queue_evts, + ) { + // log only first activation error + if !self.activate_err { + self.activate_err = true; + error!( + "Failed to activate device {}, error is {:?}", + self.base.base.id, e + ); + } + return false; + } + self.activate_err = false; + + locked_dev.set_device_activated(true); + true + } + + fn deactivate_device(&self) { + info!("func: deactivate_device, id: {:?}", &self.base.base.id); + if self.need_irqfd && self.base.config.msix.is_some() { + let msix = self.base.config.msix.as_ref().unwrap(); + if msix.lock().unwrap().unregister_irqfd().is_err() { + warn!("unregister_irqfd failed"); + } + } + // call deactivate unconditionally, since device can be + // in half initialized state + let mut locked_dev = self.device.lock().unwrap(); + if let Err(e) = locked_dev.deactivate() { + error!( + "Failed to deactivate virtio device {}, error is {:?}", + self.base.base.id, e + ); + } + locked_dev.virtio_base_mut().reset(); + + if let Some(intx) = &self.base.config.intx { + intx.lock().unwrap().reset(); + } + if let Some(msix) = &self.base.config.msix { + msix.lock().unwrap().clear_pending_vectors(); + }; + } + + /// Read data from the common config of virtio device. + /// Return the config value in u32. + /// + /// # Arguments + /// + /// * `offset` - The offset of common config. + fn read_common_config(&self, offset: u64) -> Result { + trace::virtio_tpt_read_common_config(&self.base.base.id, offset); + let locked_device = self.device.lock().unwrap(); + let value = match offset { + COMMON_DFSELECT_REG => locked_device.hfeatures_sel(), + COMMON_DF_REG => { + let dfeatures_sel = locked_device.hfeatures_sel(); + if dfeatures_sel < MAX_FEATURES_SELECT_NUM { + locked_device.device_features(dfeatures_sel) + } else { + 0 + } + } + COMMON_GFSELECT_REG => locked_device.gfeatures_sel(), + COMMON_GF_REG => { + let gfeatures_sel = locked_device.gfeatures_sel(); + if gfeatures_sel < MAX_FEATURES_SELECT_NUM { + locked_device.driver_features(gfeatures_sel) + } else { + 0 + } + } + COMMON_MSIX_REG => u32::from(locked_device.config_vector()), + COMMON_NUMQ_REG => locked_device.virtio_base().queues_config.len() as u32, + COMMON_STATUS_REG => locked_device.device_status(), + COMMON_CFGGENERATION_REG => u32::from(locked_device.config_generation()), + COMMON_Q_SELECT_REG => u32::from(locked_device.queue_select()), + COMMON_Q_SIZE_REG => locked_device + .queue_config() + .map(|config| u32::from(config.size))?, + COMMON_Q_MSIX_REG => locked_device + .queue_config() + .map(|config| u32::from(config.vector))?, + COMMON_Q_ENABLE_REG => locked_device + .queue_config() + .map(|config| u32::from(config.ready))?, + COMMON_Q_NOFF_REG => u32::from(locked_device.queue_select()), + COMMON_Q_DESCLO_REG => locked_device + .queue_config() + .map(|config| config.desc_table.0 as u32)?, + COMMON_Q_DESCHI_REG => locked_device + .queue_config() + .map(|config| (config.desc_table.0 >> 32) as u32)?, + COMMON_Q_AVAILLO_REG => locked_device + .queue_config() + .map(|config| config.avail_ring.0 as u32)?, + COMMON_Q_AVAILHI_REG => locked_device + .queue_config() + .map(|config| (config.avail_ring.0 >> 32) as u32)?, + COMMON_Q_USEDLO_REG => locked_device + .queue_config() + .map(|config| config.used_ring.0 as u32)?, + COMMON_Q_USEDHI_REG => locked_device + .queue_config() + .map(|config| (config.used_ring.0 >> 32) as u32)?, + _ => 0, + }; + + Ok(value) + } + + /// Write data to the common config of virtio device. + /// + /// # Arguments + /// + /// * `offset` - The offset of common config. + /// * `value` - The value to write. + /// + /// # Errors + /// + /// Returns Error if the offset is out of bound. + fn write_common_config(&mut self, offset: u64, value: u32) -> Result<()> { + trace::virtio_tpt_write_common_config(&self.base.base.id, offset, value); + let mut locked_device = self.device.lock().unwrap(); + match offset { + COMMON_DFSELECT_REG => { + locked_device.set_hfeatures_sel(value); + } + COMMON_GFSELECT_REG => { + locked_device.set_gfeatures_sel(value); + } + COMMON_GF_REG => { + if locked_device.device_status() & CONFIG_STATUS_FEATURES_OK != 0 { + error!("it's not allowed to set features after having been negoiated for device {}", self.base.base.id); + return Ok(()); + } + let gfeatures_sel = locked_device.gfeatures_sel(); + if gfeatures_sel >= MAX_FEATURES_SELECT_NUM { + return Err(anyhow!(PciError::FeaturesSelect(gfeatures_sel))); + } + locked_device.set_driver_features(gfeatures_sel, value); + + if gfeatures_sel == 1 { + let features = u64::from(locked_device.driver_features(1)) << 32; + if virtio_has_feature(features, VIRTIO_F_RING_PACKED) { + locked_device.set_queue_type(QUEUE_TYPE_PACKED_VRING); + } else { + locked_device.set_queue_type(QUEUE_TYPE_SPLIT_VRING); + } + } + } + COMMON_MSIX_REG => { + if self.base.config.revise_msix_vector(value) { + locked_device.set_config_vector(value as u16); + } else { + locked_device.set_config_vector(INVALID_VECTOR_NUM); + } + locked_device.set_interrupt_status(0); + } + COMMON_STATUS_REG => { + if value & CONFIG_STATUS_FEATURES_OK != 0 && value & CONFIG_STATUS_DRIVER_OK == 0 { + let features = u64::from(locked_device.driver_features(1)) << 32; + if !virtio_has_feature(features, VIRTIO_F_VERSION_1) { + error!( + "Device {} is modern only, but the driver not support VIRTIO_F_VERSION_1", self.base.base.id + ); + return Ok(()); + } + } + if value != 0 && (locked_device.device_status() & !value) != 0 { error!( - "Failed to read common config of virtio-pci device, error is {}", - e.display_chain(), + "Driver must not clear a device status bit, device {}", + self.base.base.id ); - return false; + return Ok(()); } - }; - match data.len() { - 1 => data[0] = value as u8, - 2 => { - LittleEndian::write_u16(data, value as u16); + let old_status = locked_device.device_status(); + locked_device.set_device_status(value); + if locked_device.check_device_status( + CONFIG_STATUS_ACKNOWLEDGE + | CONFIG_STATUS_DRIVER + | CONFIG_STATUS_DRIVER_OK + | CONFIG_STATUS_FEATURES_OK, + CONFIG_STATUS_FAILED, + ) { + drop(locked_device); + self.activate_device(); + } else if old_status != 0 && locked_device.device_status() == 0 { + drop(locked_device); + self.deactivate_device(); } - 4 => { - LittleEndian::write_u32(data, value); + } + COMMON_Q_SELECT_REG => { + if value < VIRTIO_QUEUE_MAX { + locked_device.set_queue_select(value as u16); } - _ => { + } + COMMON_Q_SIZE_REG => locked_device + .queue_config_mut(true) + .map(|config| config.size = value as u16)?, + COMMON_Q_ENABLE_REG => { + if value != 1 { error!( - "invalid data length for reading pci common config: offset 0x{:x}, data len {}", - offset, data.len() + "Driver set illegal value for queue_enable {}, device {}", + value, self.base.base.id ); - return false; + return Err(anyhow!(PciError::QueueEnable(value))); } - }; - - true + locked_device + .queue_config_mut(true) + .map(|config| config.ready = true)?; + } + COMMON_Q_MSIX_REG => { + let val = if self.base.config.revise_msix_vector(value) { + value as u16 + } else { + INVALID_VECTOR_NUM + }; + // It should not check device status when detaching device which + // will set vector to INVALID_VECTOR_NUM. + let need_check = locked_device.device_status() != 0; + locked_device + .queue_config_mut(need_check) + .map(|config| config.vector = val)?; + } + COMMON_Q_DESCLO_REG => locked_device.queue_config_mut(true).map(|config| { + config.desc_table = GuestAddress(config.desc_table.0 | u64::from(value)); + })?, + COMMON_Q_DESCHI_REG => locked_device.queue_config_mut(true).map(|config| { + config.desc_table = GuestAddress(config.desc_table.0 | (u64::from(value) << 32)); + })?, + COMMON_Q_AVAILLO_REG => locked_device.queue_config_mut(true).map(|config| { + config.avail_ring = GuestAddress(config.avail_ring.0 | u64::from(value)); + })?, + COMMON_Q_AVAILHI_REG => locked_device.queue_config_mut(true).map(|config| { + config.avail_ring = GuestAddress(config.avail_ring.0 | (u64::from(value) << 32)); + })?, + COMMON_Q_USEDLO_REG => locked_device.queue_config_mut(true).map(|config| { + config.used_ring = GuestAddress(config.used_ring.0 | u64::from(value)); + })?, + COMMON_Q_USEDHI_REG => locked_device.queue_config_mut(true).map(|config| { + config.used_ring = GuestAddress(config.used_ring.0 | (u64::from(value) << 32)); + })?, + _ => { + return Err(anyhow!(PciError::PciRegister(offset))); + } }; - let cloned_pci_device = self.clone(); - let cloned_mem_space = self.sys_mem.clone(); - let common_write = move |data: &[u8], _addr: GuestAddress, offset: u64| -> bool { - let value = match data.len() { - 1 => data[0] as u32, - 2 => LittleEndian::read_u16(data) as u32, - 4 => LittleEndian::read_u32(data), - _ => { + Ok(()) + } + + fn build_common_cfg_ops(virtio_pci: Arc>) -> RegionOps { + let cloned_virtio_pci = virtio_pci.clone(); + let common_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { + let value = match cloned_virtio_pci.lock().unwrap().read_common_config(offset) { + Ok(v) => v, + Err(e) => { error!( - "Invalid data length for writing pci common config: offset 0x{:x}, data len {}", - offset, data.len() + "Failed to read common config of virtio-pci device, error is {:?}", + e, ); return false; } }; - let old_dev_status = cloned_pci_device - .common_config - .lock() - .unwrap() - .device_status; - if let Err(e) = cloned_pci_device - .common_config + write_data_u32(data, value) + }; + + let common_write = move |data: &[u8], _addr: GuestAddress, offset: u64| -> bool { + let mut value: u32 = 0; + if !read_data_u32(data, &mut value) { + return false; + } + + if let Err(e) = virtio_pci .lock() .unwrap() - .write_common_config(&cloned_pci_device.device.clone(), offset, value) + .write_common_config(offset, value) { error!( - "Failed to read common config of virtio-pci device, error is {}", - e.display_chain(), + "Failed to write common config of virtio-pci device, error is {:?}", + e, ); return false; } - - if !cloned_pci_device.device_activated.load(Ordering::Acquire) - && cloned_pci_device - .common_config - .lock() - .unwrap() - .check_device_status( - CONFIG_STATUS_ACKNOWLEDGE - | CONFIG_STATUS_DRIVER - | CONFIG_STATUS_DRIVER_OK - | CONFIG_STATUS_FEATURES_OK, - CONFIG_STATUS_FAILED, - ) - { - let queue_type = cloned_pci_device.common_config.lock().unwrap().queue_type; - let queues_config = &mut cloned_pci_device - .common_config - .lock() - .unwrap() - .queues_config; - let mut locked_queues = cloned_pci_device.queues.lock().unwrap(); - for q_config in queues_config.iter_mut() { - q_config.addr_cache.desc_table_host = cloned_mem_space - .get_host_address(q_config.desc_table) - .unwrap_or(0); - q_config.addr_cache.avail_ring_host = cloned_mem_space - .get_host_address(q_config.avail_ring) - .unwrap_or(0); - q_config.addr_cache.used_ring_host = cloned_mem_space - .get_host_address(q_config.used_ring) - .unwrap_or(0); - let queue = Queue::new(*q_config, queue_type).unwrap(); - if !queue.is_valid(&cloned_pci_device.sys_mem) { - error!("Failed to activate device: Invalid queue"); - return false; - } - let arc_queue = Arc::new(Mutex::new(queue)); - locked_queues.push(arc_queue.clone()); - } - - let queue_evts = cloned_pci_device.notify_eventfds.clone().events; - if let Some(cb) = cloned_pci_device.interrupt_cb.clone() { - if let Err(e) = cloned_pci_device.device.lock().unwrap().activate( - cloned_pci_device.sys_mem.clone(), - cb, - &locked_queues, - queue_evts, - ) { - error!("Failed to activate device, error is {}", e.display_chain()); - } - } else { - error!("Failed to activate device: No interrupt callback"); - return false; - } - cloned_pci_device - .device_activated - .store(true, Ordering::Release); - - update_dev_id( - &cloned_pci_device.parent_bus, - cloned_pci_device.devfn, - &cloned_pci_device.dev_id, - ); - } - - if old_dev_status != 0 - && cloned_pci_device - .common_config - .lock() - .unwrap() - .device_status - == 0 - { - let mut locked_queues = cloned_pci_device.queues.lock().unwrap(); - locked_queues.clear(); - if cloned_pci_device.device_activated.load(Ordering::Acquire) { - cloned_pci_device - .device_activated - .store(false, Ordering::Release); - let cloned_msix = cloned_pci_device.config.msix.as_ref().unwrap().clone(); - cloned_msix.lock().unwrap().reset(); - if let Err(e) = cloned_pci_device.device.lock().unwrap().deactivate() { - error!( - "Failed to deactivate virtio device, error is {}", - e.display_chain() - ); - } - } - update_dev_id( - &cloned_pci_device.parent_bus, - cloned_pci_device.devfn, - &cloned_pci_device.dev_id, - ); - } - true }; @@ -767,133 +799,279 @@ impl VirtioPciDevice { } } - fn modern_mem_region_init(&mut self, modern_mem_region: &Region) -> PciResult<()> { + fn modern_mem_region_init( + virtio_pci: Arc>, + modern_mem_region: &Region, + ) -> Result<()> { // 1. PCI common cap sub-region. - let common_region_ops = self.build_common_cfg_ops(); - let common_region = - Region::init_io_region(u64::from(VIRTIO_PCI_CAP_COMMON_LENGTH), common_region_ops); + let common_region_ops = Self::build_common_cfg_ops(virtio_pci.clone()); + let common_region = Region::init_io_region( + u64::from(VIRTIO_PCI_CAP_COMMON_LENGTH), + common_region_ops, + "VirtioPciCommon", + ); modern_mem_region .add_subregion(common_region, u64::from(VIRTIO_PCI_CAP_COMMON_OFFSET)) - .chain_err(|| "Failed to register pci-common-cap region.")?; + .with_context(|| "Failed to register pci-common-cap region.")?; // 2. PCI ISR cap sub-region. - let cloned_common_cfg = self.common_config.clone(); + let cloned_device = virtio_pci.lock().unwrap().device.clone(); + let cloned_intx = virtio_pci.lock().unwrap().base.config.intx.clone().unwrap(); let isr_read = move |data: &mut [u8], _: GuestAddress, _: u64| -> bool { if let Some(val) = data.get_mut(0) { - *val = cloned_common_cfg - .lock() - .unwrap() + let device_lock = cloned_device.lock().unwrap(); + *val = device_lock + .virtio_base() .interrupt_status .swap(0, Ordering::SeqCst) as u8; + cloned_intx.lock().unwrap().notify(0); + } + true + }; + let isr_write = move |_: &[u8], _: GuestAddress, _: u64| -> bool { true }; + let isr_region_ops = RegionOps { + read: Arc::new(isr_read), + write: Arc::new(isr_write), + }; + let isr_region = Region::init_io_region( + u64::from(VIRTIO_PCI_CAP_ISR_LENGTH), + isr_region_ops, + "VirtioIsr", + ); + modern_mem_region + .add_subregion(isr_region, u64::from(VIRTIO_PCI_CAP_ISR_OFFSET)) + .with_context(|| "Failed to register pci-isr-cap region.")?; + + // 3. PCI dev cap sub-region. + let cloned_virtio_dev = virtio_pci.lock().unwrap().device.clone(); + let device_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { + if let Err(e) = cloned_virtio_dev.lock().unwrap().read_config(offset, data) { + error!("Failed to read virtio-dev config space, error is {:?}", e); + return false; + } + true + }; + + let cloned_virtio_dev = virtio_pci.lock().unwrap().device.clone(); + let device_write = move |data: &[u8], _addr: GuestAddress, offset: u64| -> bool { + if let Err(e) = cloned_virtio_dev.lock().unwrap().write_config(offset, data) { + error!("Failed to write virtio-dev config space, error is {:?}", e); + return false; + } + true + }; + let device_region_ops = RegionOps { + read: Arc::new(device_read), + write: Arc::new(device_write), + }; + let device_region = Region::init_io_region( + u64::from(VIRTIO_PCI_CAP_DEVICE_LENGTH), + device_region_ops, + "VirtioDevice", + ); + modern_mem_region + .add_subregion(device_region, u64::from(VIRTIO_PCI_CAP_DEVICE_OFFSET)) + .with_context(|| "Failed to register pci-dev-cap region.")?; + + // 4. PCI notify cap sub-region. + let notify_read = move |_: &mut [u8], _: GuestAddress, _: u64| -> bool { true }; + let notify_write = move |_: &[u8], _: GuestAddress, _: u64| -> bool { true }; + let notify_region_ops = RegionOps { + read: Arc::new(notify_read), + write: Arc::new(notify_write), + }; + let notify_region = Region::init_io_region( + u64::from(VIRTIO_PCI_CAP_NOTIFY_LENGTH), + notify_region_ops, + "VirtioNotify", + ); + notify_region.set_ioeventfds(&virtio_pci.lock().unwrap().ioeventfds()); + + modern_mem_region + .add_subregion(notify_region, u64::from(VIRTIO_PCI_CAP_NOTIFY_OFFSET)) + .with_context(|| "Failed to register pci-notify-cap region.")?; + + Ok(()) + } + + // Access virtio configuration through VirtioPciCfgAccessCap. + fn do_cfg_access(&mut self, start: usize, end: usize, is_write: bool) { + let pci_cfg_data_offset = + self.cfg_cap_offset + offset_of!(VirtioPciCfgAccessCap, pci_cfg_data); + let cap_size = size_of::(); + // SAFETY: pci_cfg_data_offset is the offset of VirtioPciCfgAccessCap in Pci config space + // which is much less than u16::MAX. + if !ranges_overlap(start, end - start, pci_cfg_data_offset, cap_size).unwrap() { + return; + } + + let config = &self.base.config.config[self.cfg_cap_offset..]; + let bar = config[offset_of!(VirtioPciCap, bar_id)]; + let off = LittleEndian::read_u32(&config[offset_of!(VirtioPciCap, offset)..]); + let len = LittleEndian::read_u32(&config[offset_of!(VirtioPciCap, length)..]); + if bar >= VIRTIO_PCI_BAR_MAX { + warn!("The bar_id {} of VirtioPciCfgAccessCap exceeds max", bar); + return; + } + let bar_base = self.base.config.get_bar_address(bar as usize); + if bar_base == BAR_SPACE_UNMAPPED { + debug!("The bar {} of VirtioPciCfgAccessCap is not mapped", bar); + return; + } + if ![1, 2, 4].contains(&len) { + debug!("The length {} of VirtioPciCfgAccessCap is illegal", len); + return; + } + if off & (len - 1) != 0 { + warn!("The offset {} of VirtioPciCfgAccessCap is not aligned", off); + return; + } + if u64::from(off) + .checked_add(u64::from(len)) + .filter(|&end| end <= self.base.config.bars[bar as usize].size) + .is_none() + { + warn!("The access range of VirtioPciCfgAccessCap exceeds bar size"); + return; + } + + let result = if is_write { + let mut data = self.base.config.config[pci_cfg_data_offset..].as_ref(); + self.sys_mem.write( + &mut data, + GuestAddress(bar_base + u64::from(off)), + u64::from(len), + AddressAttr::MMIO, + ) + } else { + let mut data = self.base.config.config[pci_cfg_data_offset..].as_mut(); + self.sys_mem.read( + &mut data, + GuestAddress(bar_base + u64::from(off)), + u64::from(len), + AddressAttr::MMIO, + ) + }; + if let Err(e) = result { + error!( + "Failed to access virtio configuration through VirtioPciCfgAccessCap. device is {}, error is {:?}", self.base.base.id, + e + ); + } + } + + pub fn virtio_pci_auto_queues_num(queues_fixed: u16, nr_cpus: u8, queues_max: usize) -> u16 { + // Give each vcpu a vq, allow the vCPU that submit request can handle + // its own request completion. i.e, If the vq is not enough, vcpu A will + // receive completion of request that submitted by vcpu B, then A needs + // to IPI B. + min(queues_max as u16 - queues_fixed, u16::from(nr_cpus)) + } + + fn queues_register_irqfd(&self, call_fds: &[Arc]) -> bool { + if self.base.config.msix.is_none() { + error!( + "Failed to get msix in virtio pci device configure, device is {}", + self.base.base.id + ); + return false; + } + + let locked_dev = self.device.lock().unwrap(); + let mut locked_msix = self.base.config.msix.as_ref().unwrap().lock().unwrap(); + + let queues = &locked_dev.virtio_base().queues; + for (queue_index, queue_mutex) in queues.iter().enumerate() { + if locked_dev.has_control_queue() + && queue_index + 1 == queues.len() + && queues.len() % 2 != 0 + { + break; } - true - }; - let isr_write = move |_: &[u8], _: GuestAddress, _: u64| -> bool { true }; - let isr_region_ops = RegionOps { - read: Arc::new(isr_read), - write: Arc::new(isr_write), - }; - let isr_region = - Region::init_io_region(u64::from(VIRTIO_PCI_CAP_ISR_LENGTH), isr_region_ops); - modern_mem_region - .add_subregion(isr_region, u64::from(VIRTIO_PCI_CAP_ISR_OFFSET)) - .chain_err(|| "Failed to register pci-isr-cap region.")?; - // 3. PCI dev cap sub-region. - let cloned_virtio_dev = self.device.clone(); - let device_read = move |data: &mut [u8], _addr: GuestAddress, offset: u64| -> bool { - if let Err(e) = cloned_virtio_dev.lock().unwrap().read_config(offset, data) { - error!( - "Failed to read virtio-dev config space, error is {}", - e.display_chain() - ); - return false; + let vector = queue_mutex.lock().unwrap().vring.get_queue_config().vector; + if vector == INVALID_VECTOR_NUM { + continue; } - true - }; - let cloned_virtio_dev = self.device.clone(); - let device_write = move |data: &[u8], _addr: GuestAddress, offset: u64| -> bool { - if let Err(e) = cloned_virtio_dev.lock().unwrap().write_config(offset, data) { - error!( - "Failed to write virtio-dev config space, error is {}", - e.display_chain() - ); + if locked_msix + .register_irqfd(vector, call_fds[queue_index].clone()) + .is_err() + { return false; } - true - }; - let device_region_ops = RegionOps { - read: Arc::new(device_read), - write: Arc::new(device_write), - }; - let device_region = - Region::init_io_region(u64::from(VIRTIO_PCI_CAP_DEVICE_LENGTH), device_region_ops); - modern_mem_region - .add_subregion(device_region, u64::from(VIRTIO_PCI_CAP_DEVICE_OFFSET)) - .chain_err(|| "Failed to register pci-dev-cap region.")?; + } - // 4. PCI notify cap sub-region. - let notify_read = move |_: &mut [u8], _: GuestAddress, _: u64| -> bool { true }; - let notify_write = move |_: &[u8], _: GuestAddress, _: u64| -> bool { true }; - let notify_region_ops = RegionOps { - read: Arc::new(notify_read), - write: Arc::new(notify_write), - }; - let notify_region = - Region::init_io_region(u64::from(VIRTIO_PCI_CAP_NOTIFY_LENGTH), notify_region_ops); - notify_region.set_ioeventfds(&self.ioeventfds()); - modern_mem_region - .add_subregion(notify_region, u64::from(VIRTIO_PCI_CAP_NOTIFY_OFFSET)) - .chain_err(|| "Failed to register pci-notify-cap region.")?; + true + } - Ok(()) + pub fn get_virtio_device(&self) -> &Arc> { + &self.device } } -impl PciDevOps for VirtioPciDevice { - fn init_write_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_mask() - } +impl Device for VirtioPciDevice { + gen_base_func!(device_base, device_base_mut, DeviceBase, base.base); + + fn reset(&mut self, _reset_child_device: bool) -> Result<()> { + info!("func: reset, id: {:?}", &self.base.base.id); + self.deactivate_device(); + self.device + .lock() + .unwrap() + .reset() + .with_context(|| "Failed to reset virtio device")?; + self.base.config.reset()?; - fn init_write_clear_mask(&mut self) -> PciResult<()> { - self.config.init_common_write_clear_mask() + Ok(()) } - fn realize(mut self) -> PciResult<()> { - self.init_write_mask()?; - self.init_write_clear_mask()?; + fn realize(mut self) -> Result>> { + info!("func: realize, id: {:?}", &self.base.base.id); + let parent_bus = self.parent_bus().unwrap(); + self.init_write_mask(false)?; + self.init_write_clear_mask(false)?; + let device_quirk = self.device.lock().unwrap().device_quirk(); let device_type = self.device.lock().unwrap().device_type(); le_write_u16( - &mut self.config.config, + &mut self.base.config.config, VENDOR_ID as usize, VIRTIO_PCI_VENDOR_ID, )?; le_write_u16( - &mut self.config.config, + &mut self.base.config.config, DEVICE_ID as usize, VIRTIO_PCI_DEVICE_ID_BASE + device_type as u16, )?; - self.config.config[REVISION_ID] = VIRTIO_PCI_ABI_VERSION; - let class_id = get_virtio_class_id(device_type); - le_write_u16(&mut self.config.config, SUB_CLASS_CODE as usize, class_id)?; + self.base.config.config[REVISION_ID] = VIRTIO_PCI_ABI_VERSION; + let class_id = get_virtio_class_id(device_type, device_quirk); le_write_u16( - &mut self.config.config, - SUBSYSTEM_VENDOR_ID, - VIRTIO_PCI_VENDOR_ID, + &mut self.base.config.config, + SUB_CLASS_CODE as usize, + class_id, )?; le_write_u16( - &mut self.config.config, - SUBSYSTEM_ID, - 0x40 + device_type as u16, + &mut self.base.config.config, + SUBSYSTEM_VENDOR_ID, + VIRTIO_PCI_VENDOR_ID, )?; + // For compatibility with windows viogpu as front-end drivers. + let subsysid = if device_type == VIRTIO_TYPE_GPU { + PCI_SUBDEVICE_ID_QEMU + } else { + 0x40 + device_type as u16 + }; + le_write_u16(&mut self.base.config.config, SUBSYSTEM_ID, subsysid)?; + init_multifunction( self.multi_func, - &mut self.config.config, - self.devfn, - self.parent_bus.clone(), + &mut self.base.config.config, + self.base.devfn, + parent_bus.clone(), )?; + #[cfg(target_arch = "aarch64")] + self.base.config.set_interrupt_pin(); let common_cap = VirtioPciCap::new( size_of::() as u8 + PCI_CAP_VNDR_AND_NEXT_SIZE, @@ -932,95 +1110,110 @@ impl PciDevOps for VirtioPciDevice { ); self.modern_mem_region_map(notify_cap)?; - let nvectors = self.device.lock().unwrap().queue_num() + 1; + let cfg_cap = VirtioPciCfgAccessCap::new( + size_of::() as u8 + PCI_CAP_VNDR_AND_NEXT_SIZE, + VirtioPciCapType::CfgAccess as u8, + ); + self.cfg_cap_offset = self.modern_mem_region_map(cfg_cap)?; + + // Make related fields of PCI config writable for VirtioPciCfgAccessCap. + let write_mask = &mut self.base.config.write_mask[self.cfg_cap_offset..]; + write_mask[offset_of!(VirtioPciCap, bar_id)] = !0; + le_write_u32(write_mask, offset_of!(VirtioPciCap, offset), !0)?; + le_write_u32(write_mask, offset_of!(VirtioPciCap, length), !0)?; + le_write_u32( + write_mask, + offset_of!(VirtioPciCfgAccessCap, pci_cfg_data), + !0, + )?; + let nvectors = self.device.lock().unwrap().queue_num() + 1; init_msix( + &mut self.base, VIRTIO_PCI_MSIX_BAR_IDX as usize, nvectors as u32, - &mut self.config, self.dev_id.clone(), - &self.name, + None, + None, )?; - self.assign_interrupt_cb(); + init_intx( + self.name(), + &mut self.base.config, + parent_bus.clone(), + self.base.devfn, + )?; - let mut mem_region_size = ((VIRTIO_PCI_CAP_NOTIFY_OFFSET + VIRTIO_PCI_CAP_NOTIFY_LENGTH) - as u64) - .next_power_of_two(); - mem_region_size = round_up(mem_region_size, host_page_size()).unwrap(); - let modern_mem_region = Region::init_container_region(mem_region_size); - self.modern_mem_region_init(&modern_mem_region)?; + self.assign_interrupt_cb(); - self.config.register_bar( - VIRTIO_PCI_MEM_BAR_IDX as usize, - modern_mem_region, - RegionType::Mem32Bit, - false, - mem_region_size, - ); + #[cfg(feature = "virtio_gpu")] + if device_quirk == Some(VirtioDeviceQuirk::VirtioGpuEnableBar0) { + init_gpu_bar0(&self.device, &mut self.base.config)?; + } self.device .lock() .unwrap() .realize() - .chain_err(|| "Failed to realize virtio device")?; + .with_context(|| "Failed to realize virtio device")?; - let name = self.name.clone(); - let devfn = self.devfn; + let name = self.name(); + let devfn = u64::from(self.base.devfn); let dev = Arc::new(Mutex::new(self)); - let pci_bus = dev.lock().unwrap().parent_bus.upgrade().unwrap(); - let mut locked_pci_bus = pci_bus.lock().unwrap(); - let pci_device = locked_pci_bus.devices.get(&devfn); - if pci_device.is_none() { - locked_pci_bus.devices.insert(devfn, dev.clone()); - } else { - bail!( - "Devfn {:?} has been used by {:?}", - &devfn, - pci_device.unwrap().lock().unwrap().name() - ); - } - MigrationManager::register_device_instance_mutex_with_id( + let mut mem_region_size = + u64::from(VIRTIO_PCI_CAP_NOTIFY_OFFSET + VIRTIO_PCI_CAP_NOTIFY_LENGTH) + .next_power_of_two(); + mem_region_size = max(mem_region_size, MINIMUM_BAR_SIZE_FOR_MMIO as u64); + let modern_mem_region = + Region::init_container_region(mem_region_size, "VirtioPciModernMem"); + Self::modern_mem_region_init(dev.clone(), &modern_mem_region)?; + + dev.lock().unwrap().base.config.register_bar( + VIRTIO_PCI_MEM_BAR_IDX as usize, + modern_mem_region, + RegionType::Mem64Bit, + false, + mem_region_size, + )?; + + // Register device to pci bus. + let bus = parent_bus.upgrade().unwrap(); + bus.lock().unwrap().attach_child(devfn, dev.clone())?; + + MigrationManager::register_transport_instance( VirtioPciState::descriptor(), - dev, + dev.clone(), &name, ); - Ok(()) + Ok(dev) } - fn unrealize(&mut self) -> PciResult<()> { + fn unrealize(&mut self) -> Result<()> { + info!("func: unrealize, id: {:?}", &self.base.base.id); self.device .lock() .unwrap() .unrealize() - .chain_err(|| "Failed to unrealize the virtio device")?; + .with_context(|| "Failed to unrealize the virtio device")?; - let bus = self.parent_bus.upgrade().unwrap(); - self.config.unregister_bars(&bus)?; + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + self.base.config.unregister_bars(&bus)?; + + MigrationManager::unregister_device_instance(MsixState::descriptor(), &self.name()); + MigrationManager::unregister_transport_instance(VirtioPciState::descriptor(), &self.name()); - MigrationManager::unregister_device_instance_mutex_by_id( - MsixState::descriptor(), - &self.name, - ); - MigrationManager::unregister_device_instance_mutex_by_id( - VirtioPciState::descriptor(), - &self.name, - ); Ok(()) } +} - fn read_config(&self, offset: usize, data: &mut [u8]) { - let data_size = data.len(); - if offset + data_size > PCIE_CONFIG_SPACE_SIZE || data_size > REG_SIZE { - error!( - "Failed to read pcie config space at offset 0x{:x} with data size {}", - offset, data_size - ); - return; - } +impl PciDevOps for VirtioPciDevice { + gen_base_func!(pci_base, pci_base_mut, PciDevBase, base); - self.config.read(offset, data); + fn read_config(&mut self, offset: usize, data: &mut [u8]) { + trace::virtio_tpt_read_config(&self.base.base.id, offset as u64, data.len()); + self.do_cfg_access(offset, offset + data.len(), false); + self.base.config.read(offset, data); } fn write_config(&mut self, offset: usize, data: &[u8]) { @@ -1028,188 +1221,145 @@ impl PciDevOps for VirtioPciDevice { let end = offset + data_size; if end > PCIE_CONFIG_SPACE_SIZE || data_size > REG_SIZE { error!( - "Failed to write pcie config space at offset 0x{:x} with data size {}", - offset, data_size + "Failed to write pcie config space at offset {:#x} with data size {}, device is {}", + offset, data_size, self.base.base.id ); return; } + trace::virtio_tpt_write_config(&self.base.base.id, offset as u64, data); - self.config - .write(offset, data, self.dev_id.clone().load(Ordering::Acquire)); - if ranges_overlap( + let bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(bus, locked_bus, pci_bus); + self.base.config.write( offset, - end, - BAR_0 as usize, - BAR_0 as usize + REG_SIZE as usize * VIRTIO_PCI_BAR_MAX as usize, - ) || ranges_overlap(offset, end, ROM_ADDRESS, ROM_ADDRESS + 4) - || ranges_overlap(offset, end, COMMAND as usize, COMMAND as usize + 1) - { - let parent_bus = self.parent_bus.upgrade().unwrap(); - let locked_parent_bus = parent_bus.lock().unwrap(); - if let Err(e) = self.config.update_bar_mapping( - #[cfg(target_arch = "x86_64")] - &locked_parent_bus.io_region, - &locked_parent_bus.mem_region, - ) { - error!("Failed to update bar, error is {}", e.display_chain()); - } - } - } - - fn name(&self) -> String { - self.name.clone() - } - - fn devfn(&self) -> Option { - Some(self.devfn) + data, + self.dev_id.clone().load(Ordering::Acquire), + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ); + self.do_cfg_access(offset, end, true); } - fn reset(&mut self, _reset_child_device: bool) -> PciResult<()> { - self.device - .lock() - .unwrap() - .reset() - .chain_err(|| "Fail to reset virtio device") + fn get_dev_path(&self) -> Option { + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + match self.device.lock().unwrap().device_type() { + VIRTIO_TYPE_BLOCK => { + // The virtio blk device is identified as a single-channel SCSI device, + // so add scsi controller identification without channel, scsi-id and lun. + let parent_dev_path = self.get_parent_dev_path(parent_bus); + let mut dev_path = + self.populate_dev_path(parent_dev_path, self.base.devfn, "/scsi@"); + dev_path.push_str("/disk@0,0"); + Some(dev_path) + } + VIRTIO_TYPE_SCSI => { + // The virtio scsi controller can not set boot order, which is set for scsi device. + // All the scsi devices in the same scsi controller have the same boot path prefix + // (eg: /pci@XXXXX/scsi@$slot_id[,function_id]). And every scsi device has it's + // own boot path("/channel@0/disk@$target_id,$lun_id"); + let parent_dev_path = self.get_parent_dev_path(parent_bus); + let dev_path = self.populate_dev_path(parent_dev_path, self.base.devfn, "/scsi@"); + Some(dev_path) + } + _ => None, + } } } impl StateTransfer for VirtioPciDevice { - fn get_state_vec(&self) -> migration::errors::Result> { - let mut state = VirtioPciState::default(); + fn get_state_vec(&self) -> Result> { + let mut state = VirtioPciState { + dev_id: self.dev_id.load(Ordering::Acquire), + ..Default::default() + }; // Save virtio pci config state. - for idx in 0..self.config.config.len() { + for idx in 0..self.base.config.config.len() { // Clean interrupt status bit. if (idx as u8) == STATUS { - state.config_space[idx] = self.config.config[idx] & (!STATUS_INTERRUPT); + state.config_space[idx] = self.base.config.config[idx] & (!STATUS_INTERRUPT); } else { - state.config_space[idx] = self.config.config[idx]; + state.config_space[idx] = self.base.config.config[idx]; } - state.write_mask[idx] = self.config.write_mask[idx]; - state.write_clear_mask[idx] = self.config.write_clear_mask[idx]; + state.write_mask[idx] = self.base.config.write_mask[idx]; + state.write_clear_mask[idx] = self.base.config.write_clear_mask[idx]; } - state.last_cap_end = self.config.last_cap_end; - state.last_ext_cap_offset = self.config.last_ext_cap_offset; - state.last_ext_cap_end = self.config.last_ext_cap_end; + state.last_cap_end = self.base.config.last_cap_end; + state.last_ext_cap_offset = self.base.config.last_ext_cap_offset; + state.last_ext_cap_end = self.base.config.last_ext_cap_end; // Save virtio pci common config state. - { - let common_config = self.common_config.lock().unwrap(); - state.interrupt_status = common_config.interrupt_status.load(Ordering::SeqCst); - state.msix_config = common_config.msix_config.load(Ordering::SeqCst); - state.features_select = common_config.features_select; - state.acked_features_select = common_config.acked_features_select; - state.device_status = common_config.device_status; - state.config_generation = common_config.config_generation; - state.queue_select = common_config.queue_select; - } - - // Save virtio pci state. - state.activated = self.device_activated.load(Ordering::Relaxed); - state.dev_id = self.dev_id.load(Ordering::Acquire); - { - let locked_queues = self.queues.lock().unwrap(); - for (index, queue) in locked_queues.iter().enumerate() { - state.queues_config[index] = queue.lock().unwrap().vring.get_queue_config(); - state.queue_num += 1; - } - } + state.virtio_base = self.device.lock().unwrap().virtio_base().get_state(); Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - let mut pci_state = *VirtioPciState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("PCI_DEVICE"))?; + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let pci_state = VirtioPciState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("PCI_DEVICE"))?; // Set virtio pci config state. - let config_length = self.config.config.len(); - self.config.config = pci_state.config_space[..config_length].to_vec(); - self.config.write_mask = pci_state.write_mask[..config_length].to_vec(); - self.config.write_clear_mask = pci_state.write_clear_mask[..config_length].to_vec(); - self.config.last_cap_end = pci_state.last_cap_end; - self.config.last_ext_cap_end = pci_state.last_ext_cap_end; - self.config.last_ext_cap_offset = pci_state.last_ext_cap_offset; + self.dev_id.store(pci_state.dev_id, Ordering::Release); + let config_length = self.base.config.config.len(); + self.base.config.config = pci_state.config_space[..config_length].to_vec(); + self.base.config.write_mask = pci_state.write_mask[..config_length].to_vec(); + self.base.config.write_clear_mask = pci_state.write_clear_mask[..config_length].to_vec(); + self.base.config.last_cap_end = pci_state.last_cap_end; + self.base.config.last_ext_cap_end = pci_state.last_ext_cap_end; + self.base.config.last_ext_cap_offset = pci_state.last_ext_cap_offset; // Set virtio pci common config state. - { - let mut common_config = self.common_config.lock().unwrap(); - common_config - .interrupt_status - .store(pci_state.interrupt_status, Ordering::SeqCst); - common_config - .msix_config - .store(pci_state.msix_config, Ordering::SeqCst); - common_config.features_select = pci_state.features_select; - common_config.acked_features_select = pci_state.acked_features_select; - common_config.device_status = pci_state.device_status; - common_config.config_generation = pci_state.config_generation; - common_config.queue_select = pci_state.queue_select; - } - - // Set virtio pci state. - self.device_activated - .store(pci_state.activated, Ordering::Relaxed); - self.dev_id.store(pci_state.dev_id, Ordering::Release); - { - let queue_type = self.common_config.lock().unwrap().queue_type; - let mut locked_queues = self.queues.lock().unwrap(); - let cloned_mem_space = self.sys_mem.clone(); - for queue_state in pci_state.queues_config[0..pci_state.queue_num].iter_mut() { - queue_state.addr_cache.desc_table_host = cloned_mem_space - .get_host_address(queue_state.desc_table) - .unwrap_or(0); - queue_state.addr_cache.avail_ring_host = cloned_mem_space - .get_host_address(queue_state.avail_ring) - .unwrap_or(0); - queue_state.addr_cache.used_ring_host = cloned_mem_space - .get_host_address(queue_state.used_ring) - .unwrap_or(0); - locked_queues.push(Arc::new(Mutex::new( - Queue::new(*queue_state, queue_type).unwrap(), - ))) - } - } + let mut locked_device = self.device.lock().unwrap(); + locked_device.virtio_base_mut().set_state( + &pci_state.virtio_base, + self.sys_mem.clone(), + self.interrupt_cb.clone().unwrap(), + ); Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&VirtioPciState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&VirtioPciState::descriptor().name).unwrap_or(!0) } } impl MigrationHook for VirtioPciDevice { - fn resume(&mut self) -> migration::errors::Result<()> { - if self.device_activated.load(Ordering::Relaxed) { - // Reregister ioevents for notifies. - let parent_bus = self.parent_bus.upgrade().unwrap(); - let locked_parent_bus = parent_bus.lock().unwrap(); - if let Err(e) = self.config.update_bar_mapping( - #[cfg(target_arch = "x86_64")] - &locked_parent_bus.io_region, - &locked_parent_bus.mem_region, - ) { - bail!("Failed to update bar, error is {}", e.display_chain()); - } + fn resume(&mut self) -> Result<()> { + if !self.device.lock().unwrap().device_activated() { + return Ok(()); + } - let queue_evts = self.notify_eventfds.clone().events; - if let Some(cb) = self.interrupt_cb.clone() { - if let Err(e) = self.device.lock().unwrap().activate( - self.sys_mem.clone(), - cb, - &self.queues.lock().unwrap(), - queue_evts, - ) { - error!("Failed to resume device, error is {}", e.display_chain()); - } - } else { - error!("Failed to resume device: No interrupt callback"); + // Reregister ioevents for notifies. + let parent_bus = self.parent_bus().unwrap().upgrade().unwrap(); + PCI_BUS!(parent_bus, locked_bus, pci_bus); + if let Err(e) = self.base.config.update_bar_mapping( + #[cfg(target_arch = "x86_64")] + Some(&pci_bus.io_region), + Some(&pci_bus.mem_region), + ) { + bail!("Failed to update bar, error is {:?}", e); + } + + let queue_evts = (*self.notify_eventfds).clone().events; + if let Some(cb) = self.interrupt_cb.clone() { + if let Err(e) = + self.device + .lock() + .unwrap() + .activate(self.sys_mem.clone(), cb, queue_evts) + { + error!( + "Failed to resume device {}, error is {:?}", + self.base.base.id, e + ); } + } else { + error!( + "Failed to resume device {}: No interrupt callback", + self.base.base.id + ); } Ok(()) @@ -1220,72 +1370,59 @@ impl MigrationHook for VirtioPciDevice { mod tests { use std::sync::{Arc, Mutex}; - use address_space::{AddressSpace, GuestAddress, HostMemMapping}; - use pci::{ - config::{HEADER_TYPE, HEADER_TYPE_MULTIFUNC}, - le_read_u16, - }; - use util::num_ops::{read_u32, write_u32}; + use anyhow::Result; use vmm_sys_util::eventfd::EventFd; use super::*; - use crate::Result as VirtioResult; + use crate::tests::address_space_init; + use crate::VirtioBase; + use address_space::{AddressSpace, GuestAddress}; + use devices::pci::{ + config::{HEADER_TYPE, HEADER_TYPE_MULTIFUNC}, + le_read_u16, + }; const VIRTIO_DEVICE_TEST_TYPE: u32 = 1; const VIRTIO_DEVICE_QUEUE_NUM: usize = 2; const VIRTIO_DEVICE_QUEUE_SIZE: u16 = 256; - pub struct VirtioDeviceTest { - pub device_features: u64, - pub driver_features: u64, - pub is_activated: bool, + struct VirtioDeviceTest { + base: VirtioBase, + is_activated: bool, } impl VirtioDeviceTest { - pub fn new() -> Self { + fn new() -> Self { + let mut base = VirtioBase::new( + VIRTIO_DEVICE_TEST_TYPE, + VIRTIO_DEVICE_QUEUE_NUM, + VIRTIO_DEVICE_QUEUE_SIZE, + ); + base.device_features = 0xFFFF_FFF0; VirtioDeviceTest { - device_features: 0xFFFF_FFF0, - driver_features: 0, + base, is_activated: false, } } } impl VirtioDevice for VirtioDeviceTest { - fn realize(&mut self) -> VirtioResult<()> { - Ok(()) - } - - fn device_type(&self) -> u32 { - VIRTIO_DEVICE_TEST_TYPE - } - - fn queue_num(&self) -> usize { - VIRTIO_DEVICE_QUEUE_NUM - } - - fn queue_size(&self) -> u16 { - VIRTIO_DEVICE_QUEUE_SIZE - } + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.device_features, features_select) + fn realize(&mut self) -> Result<()> { + self.init_config_features()?; + Ok(()) } - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut v = write_u32(value, page); - let unrequested_features = v & !self.device_features; - if unrequested_features != 0 { - v &= !unrequested_features; - } - self.driver_features |= v; + fn init_config_features(&mut self) -> Result<()> { + Ok(()) } - fn read_config(&self, _offset: u64, mut _data: &mut [u8]) -> VirtioResult<()> { + fn read_config(&self, _offset: u64, mut _data: &mut [u8]) -> Result<()> { Ok(()) } - fn write_config(&mut self, _offset: u64, _data: &[u8]) -> VirtioResult<()> { + fn write_config(&mut self, _offset: u64, _data: &[u8]) -> Result<()> { Ok(()) } @@ -1293,143 +1430,184 @@ mod tests { &mut self, _mem_space: Arc, _interrupt_cb: Arc, - _queues: &[Arc>], - _queue_evts: Vec, - ) -> VirtioResult<()> { + _queue_evts: Vec>, + ) -> Result<()> { self.is_activated = true; Ok(()) } + + fn deactivate(&mut self) -> Result<()> { + Ok(()) + } } macro_rules! com_cfg_read_test { - ($cfg: ident, $dev: ident, $reg: ident, $expect: expr) => { - assert_eq!($cfg.read_common_config(&$dev, $reg).unwrap(), $expect) + ($pci_dev: ident, $reg: ident, $expect: expr) => { + assert_eq!($pci_dev.read_common_config($reg).unwrap(), $expect) }; } macro_rules! com_cfg_write_test { - ($cfg: ident, $dev: ident, $reg: ident, $val: expr) => { - assert!($cfg.write_common_config(&$dev, $reg, $val).is_ok()) + ($pci_dev: ident, $reg: ident, $val: expr) => { + assert!($pci_dev.write_common_config($reg, $val).is_ok()) }; } + fn virtio_pci_test_init( + multi_func: bool, + ) -> ( + Arc>, + Arc>, + VirtioPciDevice, + ) { + let virtio_dev = Arc::new(Mutex::new(VirtioDeviceTest::new())); + let sys_mem = address_space_init(); + let parent_bus = Arc::new(Mutex::new(PciBus::new( + String::from("test bus"), + #[cfg(target_arch = "x86_64")] + Region::init_container_region(1 << 16, "parent_bus"), + sys_mem.root().clone(), + ))); + let virtio_pci = VirtioPciDevice::new( + String::from("test device"), + 0, + sys_mem, + virtio_dev.clone(), + Arc::downgrade(&(parent_bus.clone() as Arc>)), + multi_func, + false, + ); + + // Note: if parent_bus is used in the code execution during the testing process, a variable needs to + // be used to maintain the count and avoid rust from automatically releasing this `Arc`. + (virtio_dev, parent_bus, virtio_pci) + } + #[test] fn test_common_config_dev_feature() { - let dev = Arc::new(Mutex::new(VirtioDeviceTest::new())); - let virtio_dev = dev.clone() as Arc>; - let queue_size = virtio_dev.lock().unwrap().queue_size(); - let queue_num = virtio_dev.lock().unwrap().queue_num(); - - let mut cmn_cfg = VirtioPciCommonConfig::new(queue_size, queue_num); + let (virtio_dev, _, mut virtio_pci) = virtio_pci_test_init(false); // Read virtio device features - cmn_cfg.features_select = 0_u32; - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_DF_REG, 0xFFFF_FFF0_u32); - cmn_cfg.features_select = 1_u32; - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_DF_REG, 0_u32); + virtio_dev.lock().unwrap().set_hfeatures_sel(0_u32); + com_cfg_read_test!(virtio_pci, COMMON_DF_REG, 0xFFFF_FFF0_u32); + virtio_dev.lock().unwrap().set_hfeatures_sel(1_u32); + com_cfg_read_test!(virtio_pci, COMMON_DF_REG, 0_u32); // Write virtio device features - cmn_cfg.acked_features_select = 1_u32; - com_cfg_write_test!(cmn_cfg, virtio_dev, COMMON_GF_REG, 0xFF); + virtio_dev.lock().unwrap().set_gfeatures_sel(1_u32); + com_cfg_write_test!(virtio_pci, COMMON_GF_REG, 0xFF); // The feature is not supported by this virtio device, and is masked - assert_eq!(dev.lock().unwrap().driver_features, 0_u64); - cmn_cfg.acked_features_select = 0_u32; - com_cfg_write_test!(cmn_cfg, virtio_dev, COMMON_GF_REG, 0xCF); + assert_eq!(virtio_dev.lock().unwrap().base.driver_features, 0_u64); + + virtio_dev.lock().unwrap().set_gfeatures_sel(0_u32); + com_cfg_write_test!(virtio_pci, COMMON_GF_REG, 0xCF); // The feature is partially supported by this virtio device, and is partially masked - assert_eq!(dev.lock().unwrap().driver_features, 0xC0_u64); + assert_eq!(virtio_dev.lock().unwrap().base.driver_features, 0xC0_u64); // Set the feature of the Queue type - cmn_cfg.acked_features_select = 1_u32; - dev.lock().unwrap().driver_features = 0_u64; - dev.lock().unwrap().device_features = 0xFFFF_FFFF_0000_0000_u64; + virtio_dev.lock().unwrap().set_gfeatures_sel(1_u32); + virtio_dev.lock().unwrap().base.driver_features = 0_u64; + virtio_dev.lock().unwrap().base.device_features = 0xFFFF_FFFF_0000_0000_u64; let driver_features = 1_u32 << (VIRTIO_F_RING_PACKED - 32); - com_cfg_write_test!(cmn_cfg, virtio_dev, COMMON_GF_REG, driver_features); - assert_eq!(cmn_cfg.queue_type, QUEUE_TYPE_PACKED_VRING); + com_cfg_write_test!(virtio_pci, COMMON_GF_REG, driver_features); + assert_eq!( + virtio_dev.lock().unwrap().queue_type(), + QUEUE_TYPE_PACKED_VRING + ); assert_eq!( - dev.lock().unwrap().driver_features, + virtio_dev.lock().unwrap().base.driver_features, 1_u64 << VIRTIO_F_RING_PACKED ); } #[test] fn test_common_config_queue() { - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let queue_size = virtio_dev.lock().unwrap().queue_size(); - let queue_num = virtio_dev.lock().unwrap().queue_num(); - let mut cmn_cfg = VirtioPciCommonConfig::new(queue_size, queue_num); + let (virtio_dev, _, virtio_pci) = virtio_pci_test_init(false); // Read Queue's Descriptor Table address - cmn_cfg.queue_select = VIRTIO_DEVICE_QUEUE_NUM as u16 - 1; - cmn_cfg.queues_config[cmn_cfg.queue_select as usize].desc_table = - GuestAddress(0xAABBCCDD_FFEEDDAA); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_DESCLO_REG, 0xFFEEDDAA_u32); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_DESCHI_REG, 0xAABBCCDD_u32); + virtio_dev + .lock() + .unwrap() + .set_queue_select(VIRTIO_DEVICE_QUEUE_NUM as u16 - 1); + let queue_select = virtio_dev.lock().unwrap().queue_select(); + virtio_dev.lock().unwrap().virtio_base_mut().queues_config[queue_select as usize] + .desc_table = GuestAddress(0xAABB_CCDD_FFEE_DDAA); + com_cfg_read_test!(virtio_pci, COMMON_Q_DESCLO_REG, 0xFFEEDDAA_u32); + com_cfg_read_test!(virtio_pci, COMMON_Q_DESCHI_REG, 0xAABBCCDD_u32); // Read Queue's Available Ring address - cmn_cfg.queue_select = 0; - cmn_cfg.queues_config[0].avail_ring = GuestAddress(0x11223344_55667788); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_AVAILLO_REG, 0x55667788_u32); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_AVAILHI_REG, 0x11223344_u32); + virtio_dev.lock().unwrap().set_queue_select(0); + virtio_dev.lock().unwrap().virtio_base_mut().queues_config[0].avail_ring = + GuestAddress(0x1122_3344_5566_7788); + com_cfg_read_test!(virtio_pci, COMMON_Q_AVAILLO_REG, 0x55667788_u32); + com_cfg_read_test!(virtio_pci, COMMON_Q_AVAILHI_REG, 0x11223344_u32); // Read Queue's Used Ring address - cmn_cfg.queue_select = 0; - cmn_cfg.queues_config[0].used_ring = GuestAddress(0x55667788_99AABBCC); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_USEDLO_REG, 0x99AABBCC_u32); - com_cfg_read_test!(cmn_cfg, virtio_dev, COMMON_Q_USEDHI_REG, 0x55667788_u32); + virtio_dev.lock().unwrap().set_queue_select(0); + virtio_dev.lock().unwrap().virtio_base_mut().queues_config[0].used_ring = + GuestAddress(0x5566_7788_99AA_BBCC); + com_cfg_read_test!(virtio_pci, COMMON_Q_USEDLO_REG, 0x99AABBCC_u32); + com_cfg_read_test!(virtio_pci, COMMON_Q_USEDHI_REG, 0x55667788_u32); } #[test] fn test_common_config_queue_error() { - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let queue_size = virtio_dev.lock().unwrap().queue_size(); - let queue_num = virtio_dev.lock().unwrap().queue_num(); - let mut cmn_cfg = VirtioPciCommonConfig::new(queue_size, queue_num); + let (virtio_dev, _, mut virtio_pci) = virtio_pci_test_init(false); + + assert!(init_msix( + &mut virtio_pci.base, + VIRTIO_PCI_MSIX_BAR_IDX as usize, + (virtio_dev.lock().unwrap().queue_num() + 1) as u32, + virtio_pci.dev_id.clone(), + None, + None, + ) + .is_ok()); // Error occurs when queue selector exceeds queue num - cmn_cfg.queue_select = VIRTIO_DEVICE_QUEUE_NUM as u16; - assert!(cmn_cfg - .read_common_config(&virtio_dev, COMMON_Q_SIZE_REG) - .is_err()); - assert!(cmn_cfg - .write_common_config(&virtio_dev, COMMON_Q_SIZE_REG, 128) + virtio_dev + .lock() + .unwrap() + .set_queue_select(VIRTIO_DEVICE_QUEUE_NUM as u16); + assert!(virtio_pci.read_common_config(COMMON_Q_SIZE_REG).is_err()); + assert!(virtio_pci + .write_common_config(COMMON_Q_SIZE_REG, 128) .is_err()); // Test Queue ready register - cmn_cfg.device_status = CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_DRIVER; - cmn_cfg.queue_select = 0; - com_cfg_write_test!(cmn_cfg, virtio_dev, COMMON_Q_ENABLE_REG, 0x1_u32); - assert!(cmn_cfg.queues_config.get(0).unwrap().ready); + virtio_dev + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_DRIVER); + virtio_dev.lock().unwrap().set_queue_select(0); + com_cfg_write_test!(virtio_pci, COMMON_Q_ENABLE_REG, 0x1_u32); + assert!( + virtio_dev + .lock() + .unwrap() + .virtio_base() + .queues_config + .first() + .unwrap() + .ready + ); // Failed to set Queue relevant register if device is no ready - cmn_cfg.device_status = CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_DRIVER_OK; - cmn_cfg.queue_select = 1; - assert!(cmn_cfg - .write_common_config(&virtio_dev, COMMON_Q_MSIX_REG, 0x4_u32) + virtio_dev + .lock() + .unwrap() + .set_device_status(CONFIG_STATUS_FEATURES_OK | CONFIG_STATUS_DRIVER_OK); + virtio_dev.lock().unwrap().set_queue_select(1); + assert!(virtio_pci + .write_common_config(COMMON_Q_MSIX_REG, 0x4_u32) .is_err()); } #[test] fn test_virtio_pci_config_access() { - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - let parent_bus = Arc::new(Mutex::new(PciBus::new( - String::from("test bus"), - #[cfg(target_arch = "x86_64")] - Region::init_container_region(1 << 16), - sys_mem.root().clone(), - ))); - let mut virtio_pci = VirtioPciDevice::new( - String::from("test device"), - 0, - sys_mem, - virtio_dev, - Arc::downgrade(&parent_bus), - false, - ); - virtio_pci.init_write_mask().unwrap(); - virtio_pci.init_write_clear_mask().unwrap(); + let (_, _parent_bus, mut virtio_pci) = virtio_pci_test_init(false); + + virtio_pci.init_write_mask(false).unwrap(); + virtio_pci.init_write_clear_mask(false).unwrap(); // Overflows, exceeds size of pcie config space let mut data = vec![0_u8; 4]; @@ -1446,90 +1624,60 @@ mod tests { #[test] fn test_virtio_pci_realize() { - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - let parent_bus = Arc::new(Mutex::new(PciBus::new( - String::from("test bus"), - #[cfg(target_arch = "x86_64")] - Region::init_container_region(1 << 16), - sys_mem.root().clone(), - ))); - let virtio_pci = VirtioPciDevice::new( - String::from("test device"), - 0, - sys_mem, - virtio_dev, - Arc::downgrade(&parent_bus), - false, - ); + let (_, _parent_bus, virtio_pci) = virtio_pci_test_init(false); assert!(virtio_pci.realize().is_ok()); } #[test] fn test_device_activate() { - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - let mem_size: u64 = 1024 * 1024; - let host_mmap = Arc::new( - HostMemMapping::new(GuestAddress(0), None, mem_size, None, false, false, false) - .unwrap(), - ); - sys_mem - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); + let (virtio_dev, _parent_bus, mut virtio_pci) = virtio_pci_test_init(false); - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let parent_bus = Arc::new(Mutex::new(PciBus::new( - String::from("test bus"), - #[cfg(target_arch = "x86_64")] - Region::init_container_region(1 << 16), - sys_mem.root().clone(), - ))); - let mut virtio_pci = VirtioPciDevice::new( - String::from("test device"), - 0, - sys_mem, - virtio_dev, - Arc::downgrade(&parent_bus), - false, - ); + #[cfg(target_arch = "aarch64")] + virtio_pci.base.config.set_interrupt_pin(); - // Prepare msix and interrupt callback - virtio_pci.assign_interrupt_cb(); init_msix( + &mut virtio_pci.base, VIRTIO_PCI_MSIX_BAR_IDX as usize, virtio_pci.device.lock().unwrap().queue_num() as u32 + 1, - &mut virtio_pci.config, virtio_pci.dev_id.clone(), - &virtio_pci.name, + None, + None, + ) + .unwrap(); + + let parent_bus = virtio_pci.parent_bus().unwrap(); + init_intx( + virtio_pci.name(), + &mut virtio_pci.base.config, + parent_bus.clone(), + virtio_pci.base.devfn, ) .unwrap(); + // Prepare msix and interrupt callback + virtio_pci.assign_interrupt_cb(); + // Prepare valid queue config - for queue_cfg in virtio_pci - .common_config + for queue_cfg in virtio_dev .lock() .unwrap() + .virtio_base_mut() .queues_config .iter_mut() { queue_cfg.desc_table = GuestAddress(0); - queue_cfg.avail_ring = GuestAddress((VIRTIO_DEVICE_QUEUE_SIZE as u64) * 16); + queue_cfg.avail_ring = GuestAddress(u64::from(VIRTIO_DEVICE_QUEUE_SIZE) * 16); queue_cfg.used_ring = GuestAddress(2 * 4096); queue_cfg.ready = true; queue_cfg.size = VIRTIO_DEVICE_QUEUE_SIZE; } - let common_cfg_ops = virtio_pci.build_common_cfg_ops(); + let common_cfg_ops = + VirtioPciDevice::build_common_cfg_ops(Arc::new(Mutex::new(virtio_pci))); // Device status is not ok, failed to activate virtio device let status = (CONFIG_STATUS_ACKNOWLEDGE | CONFIG_STATUS_DRIVER | CONFIG_STATUS_FEATURES_OK) .as_bytes(); (common_cfg_ops.write)(status, GuestAddress(0), COMMON_STATUS_REG); - assert_eq!(virtio_pci.device_activated.load(Ordering::Relaxed), false); + assert!(!virtio_dev.lock().unwrap().device_activated()); // Device status is not ok, failed to activate virtio device let status = (CONFIG_STATUS_ACKNOWLEDGE | CONFIG_STATUS_DRIVER @@ -1537,7 +1685,7 @@ mod tests { | CONFIG_STATUS_FEATURES_OK) .as_bytes(); (common_cfg_ops.write)(status, GuestAddress(0), COMMON_STATUS_REG); - assert_eq!(virtio_pci.device_activated.load(Ordering::Relaxed), false); + assert!(!virtio_dev.lock().unwrap().device_activated()); // Status is ok, virtio device is activated. let status = (CONFIG_STATUS_ACKNOWLEDGE | CONFIG_STATUS_DRIVER @@ -1545,41 +1693,27 @@ mod tests { | CONFIG_STATUS_FEATURES_OK) .as_bytes(); (common_cfg_ops.write)(status, GuestAddress(0), COMMON_STATUS_REG); - assert_eq!(virtio_pci.device_activated.load(Ordering::Relaxed), true); + assert!(virtio_dev.lock().unwrap().device_activated()); // If device status(not zero) is set to zero, reset the device (common_cfg_ops.write)(0_u32.as_bytes(), GuestAddress(0), COMMON_STATUS_REG); - assert_eq!(virtio_pci.device_activated.load(Ordering::Relaxed), false); + assert!(!virtio_dev.lock().unwrap().device_activated()); } #[test] fn test_multifunction() { - let virtio_dev: Arc> = - Arc::new(Mutex::new(VirtioDeviceTest::new())); - let sys_mem = AddressSpace::new(Region::init_container_region(u64::max_value())).unwrap(); - let parent_bus = Arc::new(Mutex::new(PciBus::new( - String::from("test bus"), - #[cfg(target_arch = "x86_64")] - Region::init_container_region(1 << 16), - sys_mem.root().clone(), - ))); - let mut virtio_pci = VirtioPciDevice::new( - String::from("test device"), - 24, - sys_mem, - virtio_dev, - Arc::downgrade(&parent_bus), - true, - ); + let (_, _parent_bus, mut virtio_pci) = virtio_pci_test_init(true); + let parent_bus = virtio_pci.parent_bus().unwrap(); assert!(init_multifunction( virtio_pci.multi_func, - &mut virtio_pci.config.config, - virtio_pci.devfn, - virtio_pci.parent_bus.clone() + &mut virtio_pci.base.config.config, + virtio_pci.base.devfn, + parent_bus, ) .is_ok()); - let header_type = le_read_u16(&virtio_pci.config.config, HEADER_TYPE as usize).unwrap(); - assert_eq!(header_type, HEADER_TYPE_MULTIFUNC as u16); + let header_type = + le_read_u16(&virtio_pci.base.config.config, HEADER_TYPE as usize).unwrap(); + assert_eq!(header_type, u16::from(HEADER_TYPE_MULTIFUNC)); } } diff --git a/virtio/src/vhost/kernel/mod.rs b/virtio/src/vhost/kernel/mod.rs index ccc85e957548a01f210a70f8eb521fc72fab9c02..e04b1589abbe2ff6e4c6c680e313de846921c8bb 100644 --- a/virtio/src/vhost/kernel/mod.rs +++ b/virtio/src/vhost/kernel/mod.rs @@ -10,32 +10,34 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +#[cfg(feature = "vhost_net")] mod net; +#[cfg(feature = "vhost_vsock")] mod vsock; +#[cfg(feature = "vhost_net")] pub use net::Net; -pub use vsock::{Vsock, VsockState}; +#[cfg(feature = "vhost_vsock")] +pub use vsock::{Vsock, VsockConfig, VsockState}; use std::fs::{File, OpenOptions}; use std::os::unix::fs::OpenOptionsExt; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::sync::{Arc, Mutex}; -use address_space::{ - AddressSpace, FlatRange, GuestAddress, Listener, ListenerReqType, RegionIoEventFd, RegionType, -}; -use util::byte_code::ByteCode; -use util::loop_context::{ - read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, -}; -use vmm_sys_util::epoll::EventSet; +use anyhow::{anyhow, Context, Result}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ptr, ioctl_with_ref}; +use vmm_sys_util::{ioctl_io_nr, ioctl_ioc_nr, ioctl_ior_nr, ioctl_iow_nr, ioctl_iowr_nr}; -use super::super::errors::{ErrorKind, Result, ResultExt}; -use super::super::{QueueConfig, VirtioInterrupt, VirtioInterruptType}; -use super::{VhostNotify, VhostOps}; -use crate::error_chain::ChainedError; +use super::super::QueueConfig; +use super::VhostOps; +use crate::VirtioError; +use address_space::{ + AddressAttr, AddressSpace, FlatRange, GuestAddress, Listener, ListenerReqType, RegionIoEventFd, + RegionType, +}; +use util::byte_code::ByteCode; /// Refer to VHOST_VIRTIO in /// https://github.com/torvalds/linux/blob/master/include/uapi/linux/vhost.h. @@ -56,7 +58,7 @@ ioctl_iow_nr!(VHOST_VSOCK_SET_GUEST_CID, VHOST, 0x60, u64); ioctl_iow_nr!(VHOST_VSOCK_SET_RUNNING, VHOST, 0x61, i32); /// Refer to vhost_vring_file in -/// https://github.com/torvalds/linux/blob/master/include/uapi/linux/vhost.h. +/// `` #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct VhostVringFile { @@ -127,12 +129,14 @@ impl ByteCode for VhostMemory {} #[derive(Clone)] struct VhostMemInfo { regions: Arc>>, + enabled: bool, } impl VhostMemInfo { fn new() -> VhostMemInfo { VhostMemInfo { regions: Arc::new(Mutex::new(Vec::new())), + enabled: false, } } @@ -155,7 +159,9 @@ impl VhostMemInfo { fn add_mem_range(&self, fr: &FlatRange) { let guest_phys_addr = fr.addr_range.base.raw_value(); let memory_size = fr.addr_range.size; - let userspace_addr = fr.owner.get_host_address().unwrap() + fr.offset_in_region; + let userspace_addr = + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + unsafe { fr.owner.get_host_address(AddressAttr::Ram).unwrap() } + fr.offset_in_region; self.regions.lock().unwrap().push(VhostMemoryRegion { guest_phys_addr, @@ -170,7 +176,9 @@ impl VhostMemInfo { let target = VhostMemoryRegion { guest_phys_addr: fr.addr_range.base.raw_value(), memory_size: fr.addr_range.size, - userspace_addr: fr.owner.get_host_address().unwrap() + fr.offset_in_region, + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + userspace_addr: unsafe { fr.owner.get_host_address(AddressAttr::Ram).unwrap() } + + fr.offset_in_region, flags_padding: 0_u64, }; for (index, mr) in mem_regions.iter().enumerate() { @@ -183,7 +191,7 @@ impl VhostMemInfo { return; } } - debug!("Vhost: deleting mem region failed: not matched"); + trace::vhost_delete_mem_range_failed(); } } @@ -192,12 +200,24 @@ impl Listener for VhostMemInfo { 0 } + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + fn handle_request( &self, range: Option<&FlatRange>, _evtfd: Option<&RegionIoEventFd>, req_type: ListenerReqType, - ) -> std::result::Result<(), address_space::errors::Error> { + ) -> std::result::Result<(), anyhow::Error> { match req_type { ListenerReqType::AddRegion => { if Self::check_vhost_mem_range(range.unwrap()) { @@ -228,13 +248,15 @@ impl VhostBackend { rawfd: Option, ) -> Result { let fd = match rawfd { - Some(rawfd) => unsafe { File::from_raw_fd(rawfd) }, + Some(rawfd) => + // SAFETY: this fd was configured in cmd line. + unsafe { File::from_raw_fd(rawfd) }, None => OpenOptions::new() .read(true) .write(true) .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK) .open(path) - .chain_err(|| format!("Failed to open {} for vhost backend.", path))?, + .with_context(|| format!("Failed to open {} for vhost backend.", path))?, }; let mem_info = Arc::new(Mutex::new(VhostMemInfo::new())); mem_space.register_listener(mem_info.clone())?; @@ -251,34 +273,54 @@ impl AsRawFd for VhostBackend { impl VhostOps for VhostBackend { fn set_owner(&self) -> Result<()> { + trace::vhost_set_owner(); + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl(self, VHOST_SET_OWNER()) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_OWNER".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_OWNER".to_string() + ))); } Ok(()) } fn reset_owner(&self) -> Result<()> { + trace::vhost_reset_owner(); + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl(self, VHOST_RESET_OWNER()) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_RESET_OWNER".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_RESET_OWNER".to_string() + ))); } Ok(()) } fn get_features(&self) -> Result { let mut avail_features: u64 = 0; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_mut_ref(self, VHOST_GET_FEATURES(), &mut avail_features) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_GET_FEATURES".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_GET_FEATURES".to_string() + ))); } + trace::vhost_get_features(avail_features); Ok(avail_features) } fn set_features(&self, features: u64) -> Result<()> { + trace::vhost_set_features(features); + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_FEATURES(), &features) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_FEATURES".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_FEATURES".to_string() + ))); } Ok(()) } @@ -304,21 +346,31 @@ impl VhostOps for VhostBackend { .copy_from_slice(region.as_bytes()); } + trace::vhost_set_mem_table(&bytes); + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ptr(self, VHOST_SET_MEM_TABLE(), bytes.as_ptr()) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_MEM_TABLE".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_MEM_TABLE".to_string() + ))); } Ok(()) } fn set_vring_num(&self, queue_idx: usize, num: u16) -> Result<()> { + trace::vhost_set_vring_num(queue_idx, num); let vring_state = VhostVringState { index: queue_idx as u32, num: u32::from(num), }; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_NUM(), &vring_state) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_VRING_NUM".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_VRING_NUM".to_string() + ))); } Ok(()) } @@ -327,27 +379,27 @@ impl VhostOps for VhostBackend { let locked_mem_info = self.mem_info.lock().unwrap(); let desc_user_addr = locked_mem_info .addr_to_host(queue_config.desc_table) - .ok_or_else(|| { - ErrorKind::Msg(format!( + .with_context(|| { + format!( "Failed to transform desc-table address {}", queue_config.desc_table.0 - )) + ) })?; let used_user_addr = locked_mem_info .addr_to_host(queue_config.used_ring) - .ok_or_else(|| { - ErrorKind::Msg(format!( + .with_context(|| { + format!( "Failed to transform used ring address {}", queue_config.used_ring.0 - )) + ) })?; let avail_user_addr = locked_mem_info .addr_to_host(queue_config.avail_ring) - .ok_or_else(|| { - ErrorKind::Msg(format!( + .with_context(|| { + format!( "Failed to transform avail ring address {}", queue_config.avail_ring.0 - )) + ) })?; let vring_addr = VhostVringAddr { @@ -359,21 +411,31 @@ impl VhostOps for VhostBackend { log_guest_addr: 0_u64, }; + trace::vhost_set_vring_addr(&vring_addr); + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_ADDR(), &vring_addr) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_VRING_ADDR".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_VRING_ADDR".to_string() + ))); } Ok(()) } fn set_vring_base(&self, queue_idx: usize, num: u16) -> Result<()> { + trace::vhost_set_vring_base(queue_idx, num); let vring_state = VhostVringState { index: queue_idx as u32, num: u32::from(num), }; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_BASE(), &vring_state) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_VRING_BASE".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_VRING_BASE".to_string() + ))); } Ok(()) } @@ -384,120 +446,49 @@ impl VhostOps for VhostBackend { num: 0, }; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_GET_VRING_BASE(), &vring_state) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_GET_VRING_BASE".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_GET_VRING_BASE".to_string() + ))); } + trace::vhost_get_vring_base(queue_idx, vring_state.num as u16); Ok(vring_state.num as u16) } - fn set_vring_call(&self, queue_idx: usize, fd: &EventFd) -> Result<()> { + fn set_vring_call(&self, queue_idx: usize, fd: Arc) -> Result<()> { + trace::vhost_set_vring_call(queue_idx, &fd); let vring_file = VhostVringFile { index: queue_idx as u32, fd: fd.as_raw_fd(), }; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_CALL(), &vring_file) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_VRING_CALL".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_VRING_CALL".to_string() + ))); } Ok(()) } - fn set_vring_kick(&self, queue_idx: usize, fd: &EventFd) -> Result<()> { + fn set_vring_kick(&self, queue_idx: usize, fd: Arc) -> Result<()> { + trace::vhost_set_vring_kick(queue_idx, &fd); let vring_file = VhostVringFile { index: queue_idx as u32, fd: fd.as_raw_fd(), }; + // SAFETY: self.fd was created in function new() and the + // return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_SET_VRING_KICK(), &vring_file) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_SET_VRING_KICK".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_SET_VRING_KICK".to_string() + ))); } Ok(()) } } - -pub struct VhostIoHandler { - interrupt_cb: Arc, - host_notifies: Vec, - deactivate_evt: RawFd, -} - -impl VhostIoHandler { - fn deactivate_evt_handler(&mut self) -> Vec { - let mut notifiers = Vec::new(); - for host_notify in self.host_notifies.iter() { - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - host_notify.notify_evt.as_raw_fd(), - None, - EventSet::IN, - Vec::new(), - )); - } - - notifiers.push(EventNotifier::new( - NotifierOperation::Delete, - self.deactivate_evt, - None, - EventSet::IN, - Vec::new(), - )); - - notifiers - } -} - -impl EventNotifierHelper for VhostIoHandler { - fn internal_notifiers(vhost_handler: Arc>) -> Vec { - let mut notifiers = Vec::new(); - let vhost = vhost_handler.clone(); - - let handler: Box Option>> = - Box::new(move |_, fd: RawFd| { - read_fd(fd); - - let locked_vhost_handler = vhost.lock().unwrap(); - - for host_notify in locked_vhost_handler.host_notifies.iter() { - if let Err(e) = (locked_vhost_handler.interrupt_cb)( - &VirtioInterruptType::Vring, - Some(&host_notify.queue.lock().unwrap()), - ) { - error!( - "Failed to trigger interrupt for vhost device, error is {}", - e.display_chain() - ); - } - } - - None as Option> - }); - let h = Arc::new(Mutex::new(handler)); - - for host_notify in vhost_handler.lock().unwrap().host_notifies.iter() { - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - host_notify.notify_evt.as_raw_fd(), - None, - EventSet::IN, - vec![h.clone()], - )); - } - - // Register event notifier for deactivate_evt. - let vhost = vhost_handler.clone(); - let handler: Box = Box::new(move |_, fd: RawFd| { - read_fd(fd); - Some(vhost.lock().unwrap().deactivate_evt_handler()) - }); - notifiers.push(EventNotifier::new( - NotifierOperation::AddShared, - vhost_handler.lock().unwrap().deactivate_evt, - None, - EventSet::IN, - vec![Arc::new(Mutex::new(handler))], - )); - - notifiers - } -} diff --git a/virtio/src/vhost/kernel/net.rs b/virtio/src/vhost/kernel/net.rs index c120d699c3f4595528100803d55ff730c13ff924..1131bac37dcf6d6ef06a499a8fee3f6b254b8ccd 100644 --- a/virtio/src/vhost/kernel/net.rs +++ b/virtio/src/vhost/kernel/net.rs @@ -10,63 +10,65 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::cmp; -use std::fs::File; -use std::io::Write; -use std::os::unix::io::AsRawFd; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; -use address_space::AddressSpace; -use machine_manager::{config::NetworkInterfaceConfig, event_loop::EventLoop}; -use util::byte_code::ByteCode; -use util::loop_context::EventNotifierHelper; -use util::num_ops::{read_u32, write_u32}; -use util::tap::Tap; +use anyhow::{anyhow, bail, Context, Result}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::ioctl::ioctl_with_ref; -use super::super::super::errors::{ErrorKind, Result, ResultExt}; -use super::super::super::{ - net::{build_device_config_space, create_tap, VirtioNetConfig}, - CtrlVirtio, NetCtrlHandler, Queue, VirtioDevice, VirtioInterrupt, VIRTIO_F_ACCESS_PLATFORM, - VIRTIO_F_VERSION_1, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, - VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MQ, - VIRTIO_TYPE_NET, +use super::super::{VhostIoHandler, VhostNotify, VhostOps}; +use super::{VhostBackend, VhostVringFile, VHOST_NET_SET_BACKEND}; +use crate::read_config_default; +use crate::{ + device::net::{build_device_config_space, create_tap, CtrlInfo, MAC_ADDR_LEN}, + error::VirtioError, + virtio_has_feature, CtrlVirtio, NetCtrlHandler, VirtioBase, VirtioDevice, VirtioInterrupt, + VirtioNetConfig, VIRTIO_F_ACCESS_PLATFORM, VIRTIO_F_VERSION_1, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MQ, VIRTIO_TYPE_NET, }; -use super::super::{VhostNotify, VhostOps}; -use super::{VhostBackend, VhostIoHandler, VhostVringFile, VHOST_NET_SET_BACKEND}; +use address_space::AddressSpace; +use machine_manager::config::{NetDevcfg, NetworkInterfaceConfig}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{create_new_eventfd, EventNotifierHelper}; +use util::tap::Tap; /// Number of virtqueues. const QUEUE_NUM_NET: usize = 2; -/// Size of each virtqueue. -const QUEUE_SIZE_NET: u16 = 256; /// Feature for vhost-net to add virtio_net_hdr for RX, and strip for TX packets. const VHOST_NET_F_VIRTIO_NET_HDR: u32 = 27; trait VhostNetBackend { /// Attach virtio net ring to a raw socket, or tap device. /// The socket must be already bound to an ethernet device, this device will be - /// used for transmit. Pass fd -1 to unbind from the socket and the transmit - /// device. This can be used to stop the ring (e.g. for migration). + /// used for transmit. Pass fd -1 to unbind from the socket and the transmit + /// device. This can be used to stop the ring (e.g. for migration). /// /// # Arguments /// * `queue_index` - Index of the queue to modify. /// * `fd` - EventFd that will be signaled from guest. - fn set_backend(&self, queue_index: usize, tap_file: &File) -> Result<()>; + fn set_backend(&self, queue_index: usize, fd: RawFd) -> Result<()>; } impl VhostNetBackend for VhostBackend { /// Attach virtio net ring to a raw socket, or tap device. - fn set_backend(&self, queue_index: usize, tap_file: &File) -> Result<()> { + fn set_backend(&self, queue_index: usize, fd: RawFd) -> Result<()> { let vring_file = VhostVringFile { index: queue_index as u32, - fd: tap_file.as_raw_fd(), + fd, }; + // SAFETY: self.fd was created in function new() and the return value will be checked later. let ret = unsafe { ioctl_with_ref(self, VHOST_NET_SET_BACKEND(), &vring_file) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_NET_SET_BACKEND".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_NET_SET_BACKEND".to_string() + ))); } Ok(()) } @@ -74,65 +76,92 @@ impl VhostNetBackend for VhostBackend { /// Network device structure. pub struct Net { + /// Virtio device base property. + base: VirtioBase, /// Configuration of the network device. net_cfg: NetworkInterfaceConfig, + /// Configuration of the backend netdev. + netdev_cfg: NetDevcfg, + /// Virtio net configurations. + config_space: Arc>, /// Tap device opened. taps: Option>, /// Related vhost-net kernel device. backends: Option>, - /// Bit mask of features supported by the backend. - device_features: u64, - /// Bit mask of features negotiated by the backend and the frontend. - driver_features: u64, /// Bit mask of features supported by the vhost-net kernel. vhost_features: u64, - /// Virtio net configurations. - device_config: VirtioNetConfig, /// System address space. mem_space: Arc, - /// EventFd for device deactivate. - deactivate_evt: EventFd, + /// Save irqfd used for vhost-net. + call_events: Vec>, } impl Net { - pub fn new(cfg: &NetworkInterfaceConfig, mem_space: &Arc) -> Self { + pub fn new( + net_cfg: &NetworkInterfaceConfig, + netdev_cfg: NetDevcfg, + mem_space: &Arc, + ) -> Self { + let queue_num = if net_cfg.mq { + (netdev_cfg.queues + 1) as usize + } else { + QUEUE_NUM_NET + }; + let queue_size = net_cfg.queue_size; + Net { - net_cfg: cfg.clone(), + base: VirtioBase::new(VIRTIO_TYPE_NET, queue_num, queue_size), + net_cfg: net_cfg.clone(), + netdev_cfg, + config_space: Default::default(), taps: None, backends: None, - device_features: 0_u64, - driver_features: 0_u64, vhost_features: 0_u64, - device_config: VirtioNetConfig::default(), mem_space: mem_space.clone(), - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + call_events: Vec::new(), } } } impl VirtioDevice for Net { - /// Realize vhost virtio network device. + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + fn realize(&mut self) -> Result<()> { - let queue_pairs = self.net_cfg.queues / 2; + let queue_pairs = self.netdev_cfg.queues / 2; let mut backends = Vec::with_capacity(queue_pairs as usize); for index in 0..queue_pairs { - let fd = if let Some(fds) = self.net_cfg.vhost_fds.as_mut() { + let fd = if let Some(fds) = self.netdev_cfg.vhost_fds.as_mut() { fds.get(index as usize).copied() } else { None }; let backend = VhostBackend::new(&self.mem_space, "/dev/vhost-net", fd) - .chain_err(|| "Failed to create backend for vhost net")?; + .with_context(|| "Failed to create backend for vhost net")?; backend .set_owner() - .chain_err(|| "Failed to set owner for vhost net")?; + .with_context(|| "Failed to set owner for vhost net")?; backends.push(backend); } - let mut vhost_features = backends[0] + let host_dev_name = match self.netdev_cfg.ifname.as_str() { + "" => None, + _ => Some(self.netdev_cfg.ifname.as_str()), + }; + + self.taps = create_tap(self.netdev_cfg.tap_fds.as_ref(), host_dev_name, queue_pairs) + .with_context(|| "Failed to create tap for vhost net")?; + self.backends = Some(backends); + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + let mut vhost_features = self.backends.as_ref().unwrap()[0] .get_features() - .chain_err(|| "Failed to get features for vhost net")?; + .with_context(|| "Failed to get features for vhost net")?; vhost_features &= !(1_u64 << VHOST_NET_F_VIRTIO_NET_HDR); vhost_features &= !(1_u64 << VIRTIO_F_ACCESS_PLATFORM); @@ -145,28 +174,23 @@ impl VirtioDevice for Net { | 1 << VIRTIO_NET_F_HOST_TSO4 | 1 << VIRTIO_NET_F_HOST_UFO; + let mut locked_config = self.config_space.lock().unwrap(); + + let queue_pairs = self.netdev_cfg.queues / 2; if self.net_cfg.mq - && queue_pairs >= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN - && queue_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX + && (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN..=VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + .contains(&queue_pairs) { device_features |= 1 << VIRTIO_NET_F_CTRL_VQ; device_features |= 1 << VIRTIO_NET_F_MQ; - self.device_config.max_virtqueue_pairs = queue_pairs; + locked_config.max_virtqueue_pairs = queue_pairs; } if let Some(mac) = &self.net_cfg.mac { - device_features |= build_device_config_space(&mut self.device_config, mac); + device_features |= build_device_config_space(&mut locked_config, mac); } - let host_dev_name = match self.net_cfg.host_dev_name.as_str() { - "" => None, - _ => Some(self.net_cfg.host_dev_name.as_str()), - }; - - self.taps = create_tap(self.net_cfg.tap_fds.as_ref(), host_dev_name, queue_pairs) - .chain_err(|| "Failed to create tap for vhost net")?; - self.backends = Some(backends); - self.device_features = device_features; + self.base.device_features = device_features; self.vhost_features = vhost_features; Ok(()) @@ -176,96 +200,66 @@ impl VirtioDevice for Net { Ok(()) } - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_NET - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - if self.net_cfg.mq { - (self.net_cfg.queues + 1) as usize - } else { - QUEUE_NUM_NET - } - } - - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_NET - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.device_features, features_select) - } - - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut features = write_u32(value, page); - let unsupported_features = features & !self.device_features; - if unsupported_features != 0 { - warn!( - "Received acknowledge request with unsupported feature for vhost net: 0x{:x}", - features - ); - features &= !unsupported_features; - } - self.driver_features |= features; + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let config_space = self.config_space.lock().unwrap(); + read_config_default(config_space.as_bytes(), offset, data) } - /// Read data of config from guest. - fn read_config(&self, offset: u64, mut data: &mut [u8]) -> Result<()> { - let config_slice = self.device_config.as_bytes(); - let config_size = config_slice.len() as u64; - if offset >= config_size { - return Err(ErrorKind::DevConfigOverflow(offset, config_size).into()); - } - if let Some(end) = offset.checked_add(data.len() as u64) { - data.write_all(&config_slice[offset as usize..cmp::min(end, config_size) as usize])?; + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + let data_len = data.len(); + let driver_features = self.base.driver_features; + let mut config_space = self.config_space.lock().unwrap(); + let config_slice = config_space.as_mut_bytes(); + + if !virtio_has_feature(driver_features, VIRTIO_NET_F_CTRL_MAC_ADDR) + && !virtio_has_feature(driver_features, VIRTIO_F_VERSION_1) + && offset == 0 + && data_len == MAC_ADDR_LEN + && *data != config_slice[0..data_len] + { + config_slice[(offset as usize)..(offset as usize + data_len)].copy_from_slice(data); } Ok(()) } - /// Write data to config from guest. - fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { - let data_len = data.len(); - let config_slice = self.device_config.as_mut_bytes(); - let config_len = config_slice.len(); - if offset as usize + data_len > config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len as u64).into()); + fn set_guest_notifiers(&mut self, queue_evts: &[Arc]) -> Result<()> { + for fd in queue_evts.iter() { + self.call_events.push(fd.clone()); } - config_slice[(offset as usize)..(offset as usize + data_len)].copy_from_slice(data); - Ok(()) } - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. fn activate( &mut self, mem_space: Arc, interrupt_cb: Arc, - queues: &[Arc>], - mut queue_evts: Vec, + queue_evts: Vec>, ) -> Result<()> { + let queues = self.base.queues.clone(); let queue_num = queues.len(); - if (self.driver_features & 1 << VIRTIO_NET_F_CTRL_VQ != 0) && (queue_num % 2 != 0) { + let driver_features = self.base.driver_features; + if (driver_features & 1 << VIRTIO_NET_F_CTRL_VQ != 0) && (queue_num % 2 != 0) { let ctrl_queue = queues[queue_num - 1].clone(); - let ctrl_queue_evt = queue_evts.remove(queue_num - 1); + let ctrl_queue_evt = queue_evts[queue_num - 1].clone(); + let ctrl_info = Arc::new(Mutex::new(CtrlInfo::new(self.config_space.clone()))); let ctrl_handler = NetCtrlHandler { - ctrl: CtrlVirtio::new(ctrl_queue, ctrl_queue_evt), + ctrl: CtrlVirtio::new(ctrl_queue, ctrl_queue_evt, ctrl_info), mem_space, interrupt_cb: interrupt_cb.clone(), - driver_features: self.driver_features, + driver_features, + device_broken: self.base.broken.clone(), + taps: None, }; - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))), + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))); + register_event_helper( + notifiers, self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, )?; } @@ -273,18 +267,18 @@ impl VirtioDevice for Net { for index in 0..queue_pairs { let mut host_notifies = Vec::new(); let backend = match &self.backends { - None => return Err("Failed to get backend for vhost net".into()), + None => return Err(anyhow!("Failed to get backend for vhost net")), Some(backends) => backends .get(index) - .chain_err(|| format!("Failed to get index {} vhost backend", index))?, + .with_context(|| format!("Failed to get index {} vhost backend", index))?, }; backend .set_features(self.vhost_features) - .chain_err(|| "Failed to set features for vhost net")?; + .with_context(|| "Failed to set features for vhost net")?; backend .set_mem_table() - .chain_err(|| "Failed to set mem table for vhost net")?; + .with_context(|| "Failed to set mem table for vhost net")?; for queue_index in 0..2 { let queue_mutex = queues[index * 2 + queue_index].clone(); @@ -294,7 +288,7 @@ impl VirtioDevice for Net { backend .set_vring_num(queue_index, actual_size) - .chain_err(|| { + .with_context(|| { format!( "Failed to set vring num for vhost net, index: {} size: {}", queue_index, actual_size, @@ -302,21 +296,21 @@ impl VirtioDevice for Net { })?; backend .set_vring_addr(&queue_config, queue_index, 0) - .chain_err(|| { + .with_context(|| { format!( "Failed to set vring addr for vhost net, index: {}", queue_index, ) })?; - backend.set_vring_base(queue_index, 0).chain_err(|| { + backend.set_vring_base(queue_index, 0).with_context(|| { format!( "Failed to set vring base for vhost net, index: {}", queue_index, ) })?; backend - .set_vring_kick(queue_index, &queue_evts[index * 2 + queue_index]) - .chain_err(|| { + .set_vring_kick(queue_index, queue_evts[index * 2 + queue_index].clone()) + .with_context(|| { format!( "Failed to set vring kick for vhost net, index: {}", index * 2 + queue_index, @@ -325,140 +319,139 @@ impl VirtioDevice for Net { drop(queue); - let host_notify = VhostNotify { - notify_evt: EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| ErrorKind::EventFdCreate)?, - queue: queue_mutex.clone(), + let event = if self.call_events.is_empty() { + let host_notify = VhostNotify { + notify_evt: Arc::new( + create_new_eventfd().with_context(|| VirtioError::EventFdCreate)?, + ), + queue: queue_mutex.clone(), + }; + let event = host_notify.notify_evt.clone(); + host_notifies.push(host_notify); + event + } else { + self.call_events[queue_index].clone() }; backend - .set_vring_call(queue_index, &host_notify.notify_evt) - .chain_err(|| { + .set_vring_call(queue_index, event) + .with_context(|| { format!( "Failed to set vring call for vhost net, index: {}", queue_index, ) })?; - host_notifies.push(host_notify); let tap = match &self.taps { None => bail!("Failed to get tap for vhost net"), Some(taps) => taps[index].clone(), }; - backend.set_backend(queue_index, &tap.file).chain_err(|| { - format!( - "Failed to set tap device for vhost net, index: {}", - queue_index, - ) - })?; + backend + .set_backend(queue_index, tap.file.as_raw_fd()) + .with_context(|| { + format!( + "Failed to set tap device for vhost net, index: {}", + queue_index, + ) + })?; } - let handler = VhostIoHandler { - interrupt_cb: interrupt_cb.clone(), - host_notifies, - deactivate_evt: self.deactivate_evt.as_raw_fd(), - }; - - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - None, - )?; + if self.call_events.is_empty() { + let handler = VhostIoHandler { + interrupt_cb: interrupt_cb.clone(), + host_notifies, + device_broken: self.base.broken.clone(), + }; + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper( + notifiers, + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + } } + self.base.broken.store(false, Ordering::SeqCst); + Ok(()) } fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; + unregister_event_helper( + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + self.call_events.clear(); Ok(()) } fn reset(&mut self) -> Result<()> { - // No need to close fd manually, because rust will - // automatically cleans up variables at the end of the lifecycle. - self.backends = None; - self.taps = None; - self.device_features = 0_u64; - self.driver_features = 0_u64; - self.vhost_features = 0_u64; - self.device_config = VirtioNetConfig::default(); - - self.realize() + let queue_pairs = self.netdev_cfg.queues / 2; + for index in 0..queue_pairs as usize { + let backend = match &self.backends { + None => return Err(anyhow!("Failed to get backend for vhost net")), + Some(backends) => backends + .get(index) + .with_context(|| format!("Failed to get index {} vhost backend", index))?, + }; + + // 2 queues: rx and tx. + for queue_index in 0..2 { + backend.set_backend(queue_index, -1)?; + } + } + + Ok(()) } } #[cfg(test)] mod tests { - use super::*; - use address_space::*; use std::fs::File; - const SYSTEM_SPACE_SIZE: u64 = (1024 * 1024) as u64; - - fn vhost_address_space_init() -> Arc { - let root = Region::init_container_region(1 << 36); - let sys_space = AddressSpace::new(root).unwrap(); - let host_mmap = Arc::new( - HostMemMapping::new( - GuestAddress(0), - None, - SYSTEM_SPACE_SIZE, - None, - false, - false, - false, - ) - .unwrap(), - ); - sys_space - .root() - .add_subregion( - Region::init_ram_region(host_mmap.clone()), - host_mmap.start_address().raw_value(), - ) - .unwrap(); - sys_space - } + use super::*; + use crate::tests::address_space_init; + use machine_manager::config::DEFAULT_VIRTQUEUE_SIZE; #[test] fn test_vhost_net_realize() { - let net1 = NetworkInterfaceConfig { - id: "eth1".to_string(), - host_dev_name: "tap1".to_string(), - mac: Some("1F:2C:3E:4A:5B:6D".to_string()), - vhost_type: Some("vhost-kernel".to_string()), + let netdev_cfg1 = NetDevcfg { + netdev_type: "tap".to_string(), + id: "net1".to_string(), tap_fds: Some(vec![4]), + vhost_kernel: true, vhost_fds: Some(vec![5]), - iothread: None, + ifname: "tap1".to_string(), queues: 2, + ..Default::default() + }; + let vhost_net_conf = NetworkInterfaceConfig { + id: "eth1".to_string(), + mac: Some("1F:2C:3E:4A:5B:6D".to_string()), + iothread: None, mq: false, + queue_size: DEFAULT_VIRTQUEUE_SIZE, + ..Default::default() }; - let conf = vec![net1]; - let confs = Some(conf); - let vhost_net_confs = confs.unwrap(); - let vhost_net_conf = vhost_net_confs[0].clone(); - let vhost_net_space = vhost_address_space_init(); - let mut vhost_net = Net::new(&vhost_net_conf, &vhost_net_space); + let vhost_net_space = address_space_init(); + let mut vhost_net = Net::new(&vhost_net_conf, netdev_cfg1, &vhost_net_space); // the tap_fd and vhost_fd attribute of vhost-net can't be assigned. - assert_eq!(vhost_net.realize().is_ok(), false); + assert!(vhost_net.realize().is_err()); - let net1 = NetworkInterfaceConfig { - id: "eth0".to_string(), - host_dev_name: "".to_string(), - mac: Some("1A:2B:3C:4D:5E:6F".to_string()), - vhost_type: Some("vhost-kernel".to_string()), - tap_fds: None, - vhost_fds: None, - iothread: None, + let netdev_cfg2 = NetDevcfg { + netdev_type: "tap".to_string(), + id: "net2".to_string(), + vhost_kernel: true, queues: 2, - mq: false, + ..Default::default() }; - let conf = vec![net1]; - let confs = Some(conf); - let vhost_net_confs = confs.unwrap(); - let vhost_net_conf = vhost_net_confs[0].clone(); - let mut vhost_net = Net::new(&vhost_net_conf, &vhost_net_space); + let net_cfg2 = NetworkInterfaceConfig { + id: "eth2".to_string(), + mac: Some("1A:2B:3C:4D:5E:6F".to_string()), + queue_size: DEFAULT_VIRTQUEUE_SIZE, + ..Default::default() + }; + let mut vhost_net = Net::new(&net_cfg2, netdev_cfg2, &vhost_net_space); // if fail to open vhost-net device, no need to continue. if let Err(_e) = File::open("/dev/vhost-net") { @@ -466,45 +459,45 @@ mod tests { } // without assigned value of tap_fd and vhost_fd, // vhost-net device can be realized successfully. - assert_eq!(vhost_net.realize().is_ok(), true); + assert!(vhost_net.realize().is_ok()); // test for get/set_driver_features - vhost_net.device_features = 0; + vhost_net.base.device_features = 0; let page: u32 = 0x0; let value: u32 = 0xff; vhost_net.set_driver_features(page, value); - let new_page = vhost_net.get_device_features(page); + assert_eq!(u64::from(vhost_net.driver_features(page)), 0_u64); + let new_page = vhost_net.device_features(page); assert_eq!(new_page, page); - vhost_net.device_features = 0xffff_ffff_ffff_ffff; + vhost_net.base.device_features = 0xffff_ffff_ffff_ffff; let page: u32 = 0x0; let value: u32 = 0xff; vhost_net.set_driver_features(page, value); - let new_page = vhost_net.get_device_features(page); + assert_eq!(u64::from(vhost_net.driver_features(page)), 0xff_u64); + let new_page = vhost_net.device_features(page); assert_ne!(new_page, page); // test for read/write_config - let device_config = vhost_net.device_config.as_bytes(); - let len = device_config.len() as u64; - + let len = vhost_net.config_space.lock().unwrap().as_bytes().len() as u64; let offset: u64 = 0; let data: Vec = vec![1; len as usize]; - assert_eq!(vhost_net.write_config(offset, &data).is_ok(), true); + assert!(vhost_net.write_config(offset, &data).is_ok()); let mut read_data: Vec = vec![0; len as usize]; - assert_eq!(vhost_net.read_config(offset, &mut read_data).is_ok(), true); - assert_eq!(read_data, data); + assert!(vhost_net.read_config(offset, &mut read_data).is_ok()); + assert_ne!(read_data, data); let offset: u64 = 1; let data: Vec = vec![1; len as usize]; - assert_eq!(vhost_net.write_config(offset, &data).is_ok(), false); + assert!(vhost_net.write_config(offset, &data).is_ok()); let offset: u64 = len + 1; let mut read_data: Vec = vec![0; len as usize]; - assert_eq!(vhost_net.read_config(offset, &mut read_data).is_ok(), false); + assert!(vhost_net.read_config(offset, &mut read_data).is_err()); let offset: u64 = len - 1; let mut read_data: Vec = vec![0; len as usize]; - assert_eq!(vhost_net.read_config(offset, &mut read_data).is_ok(), true); + assert!(vhost_net.read_config(offset, &mut read_data).is_err()); } } diff --git a/virtio/src/vhost/kernel/vsock.rs b/virtio/src/vhost/kernel/vsock.rs index 1489a97f3f5c27799880cfe740d80ae68deccf6f..b67d21aeb2f7a3c3cb8ccad11b909d6e968a6693 100644 --- a/virtio/src/vhost/kernel/vsock.rs +++ b/virtio/src/vhost/kernel/vsock.rs @@ -10,35 +10,61 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. -use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::io::RawFd; +use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; -use address_space::AddressSpace; +use anyhow::{anyhow, bail, Context, Result}; use byteorder::{ByteOrder, LittleEndian}; -use machine_manager::{config::VsockConfig, event_loop::EventLoop}; -use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; -use util::byte_code::ByteCode; -use util::loop_context::EventNotifierHelper; -use util::num_ops::{read_u32, write_u32}; +use clap::{ArgAction, Parser}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::ioctl::ioctl_with_ref; -use super::super::super::errors::{ErrorKind, Result, ResultExt}; -use super::super::super::{ - Queue, VirtioDevice, VirtioInterrupt, VirtioInterruptType, VIRTIO_TYPE_VSOCK, +use super::super::{VhostIoHandler, VhostNotify, VhostOps}; +use super::{VhostBackend, VHOST_VSOCK_SET_GUEST_CID, VHOST_VSOCK_SET_RUNNING}; +use crate::{ + Queue, VirtioBase, VirtioDevice, VirtioError, VirtioInterrupt, VirtioInterruptType, + VIRTIO_F_ACCESS_PLATFORM, VIRTIO_TYPE_VSOCK, }; -use super::super::{VhostNotify, VhostOps}; -use super::{VhostBackend, VhostIoHandler, VHOST_VSOCK_SET_GUEST_CID, VHOST_VSOCK_SET_RUNNING}; +use address_space::{AddressAttr, AddressSpace}; +use machine_manager::config::{get_pci_df, parse_bool, valid_id, DEFAULT_VIRTQUEUE_SIZE}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use migration::{DeviceStateDesc, FieldDesc, MigrationHook, MigrationManager, StateTransfer}; +use migration_derive::{ByteCode, Desc}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::{create_new_eventfd, EventNotifierHelper}; /// Number of virtqueues. const QUEUE_NUM_VSOCK: usize = 3; -/// Size of each virtqueue. -const QUEUE_SIZE_VSOCK: u16 = 256; /// Backend vhost-vsock device path. const VHOST_PATH: &str = "/dev/vhost-vsock"; /// Event transport reset const VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: u32 = 0; +const MAX_GUEST_CID: u64 = 4_294_967_295; +const MIN_GUEST_CID: u64 = 3; + +/// Config structure for virtio-vsock. +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct VsockConfig { + #[arg(long, value_parser = ["vhost-vsock-pci", "vhost-vsock-device"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool, action = ArgAction::Append)] + pub multifunction: Option, + #[arg(long, alias = "guest-cid", value_parser = clap::value_parser!(u64).range(MIN_GUEST_CID..=MAX_GUEST_CID))] + pub guest_cid: u64, + #[arg(long, alias = "vhostfd")] + pub vhost_fd: Option, +} + trait VhostVsockBackend { /// Each guest should have an unique CID which is used to route data to the guest. fn set_guest_cid(&self, cid: u64) -> Result<()>; @@ -48,18 +74,24 @@ trait VhostVsockBackend { impl VhostVsockBackend for VhostBackend { fn set_guest_cid(&self, cid: u64) -> Result<()> { + // SAFETY: self.fd was created in function new() and the return value will be checked later. let ret = unsafe { ioctl_with_ref(&self.fd, VHOST_VSOCK_SET_GUEST_CID(), &cid) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_VSOCK_SET_GUEST_CID".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_VSOCK_SET_GUEST_CID".to_string() + ))); } Ok(()) } fn set_running(&self, start: bool) -> Result<()> { - let on: u32 = if start { 1 } else { 0 }; + let on: u32 = u32::from(start); + // SAFETY: self.fd was created in function new() and the return value will be checked later. let ret = unsafe { ioctl_with_ref(&self.fd, VHOST_VSOCK_SET_RUNNING(), &on) }; if ret < 0 { - return Err(ErrorKind::VhostIoctl("VHOST_VSOCK_SET_RUNNING".to_string()).into()); + return Err(anyhow!(VirtioError::VhostIoctl( + "VHOST_VSOCK_SET_RUNNING".to_string() + ))); } Ok(()) } @@ -77,36 +109,44 @@ pub struct VsockState { config_space: [u8; 8], /// Last avail idx in vsock backend queue. last_avail_idx: [u16; 2], + /// Device broken status. + broken: bool, } /// Vsock device structure. pub struct Vsock { + /// Virtio device base property. + base: VirtioBase, /// Configuration of the vsock device. vsock_cfg: VsockConfig, + /// Configuration of virtio vsock. + config_space: [u8; 8], /// Related vhost-vsock kernel device. backend: Option, - /// The status of vsock. - state: VsockState, + /// Last avail idx in vsock backend queue. + last_avail_idx: [u16; 2], /// System address space. mem_space: Arc, /// Event queue for vsock. event_queue: Option>>, /// Callback to trigger interrupt. interrupt_cb: Option>, - /// EventFd for device deactivate. - deactivate_evt: EventFd, + /// Save irqfd used for vhost-vsock + call_events: Vec>, } impl Vsock { pub fn new(cfg: &VsockConfig, mem_space: &Arc) -> Self { Vsock { + base: VirtioBase::new(VIRTIO_TYPE_VSOCK, QUEUE_NUM_VSOCK, DEFAULT_VIRTQUEUE_SIZE), vsock_cfg: cfg.clone(), backend: None, - state: VsockState::default(), + config_space: Default::default(), + last_avail_idx: Default::default(), mem_space: mem_space.clone(), event_queue: None, interrupt_cb: None, - deactivate_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + call_events: Vec::new(), } } @@ -114,30 +154,39 @@ impl Vsock { /// been interrupted. The driver shuts down established connections and the guest_cid /// configuration field is fetched again. fn transport_reset(&self) -> Result<()> { - let mut event_queue_locked = self.event_queue.as_ref().unwrap().lock().unwrap(); - let element = event_queue_locked - .vring - .pop_avail(&self.mem_space, self.state.driver_features) - .chain_err(|| "Failed to get avail ring element.")?; - - self.mem_space - .write_object( - &VIRTIO_VSOCK_EVENT_TRANSPORT_RESET, - element.in_iovec[0].addr, - ) - .chain_err(|| "Failed to write buf for virtio vsock event")?; - event_queue_locked - .vring - .add_used( - &self.mem_space, - element.index, - VIRTIO_VSOCK_EVENT_TRANSPORT_RESET.as_bytes().len() as u32, - ) - .chain_err(|| format!("Failed to add used ring {}", element.index))?; - - if let Some(interrupt_cb) = &self.interrupt_cb { - interrupt_cb(&VirtioInterruptType::Vring, Some(&*event_queue_locked)) - .chain_err(|| ErrorKind::EventFdWrite)?; + if let Some(evt_queue) = self.event_queue.as_ref() { + let mut event_queue_locked = evt_queue.lock().unwrap(); + let element = event_queue_locked + .vring + .pop_avail(&self.mem_space, self.base.driver_features) + .with_context(|| "Failed to get avail ring element.")?; + if element.desc_num == 0 { + return Ok(()); + } + + self.mem_space + .write_object( + &VIRTIO_VSOCK_EVENT_TRANSPORT_RESET, + element.in_iovec[0].addr, + AddressAttr::Ram, + ) + .with_context(|| "Failed to write buf for virtio vsock event")?; + event_queue_locked + .vring + .add_used( + element.index, + VIRTIO_VSOCK_EVENT_TRANSPORT_RESET.as_bytes().len() as u32, + ) + .with_context(|| format!("Failed to add used ring {}", element.index))?; + + if let Some(interrupt_cb) = &self.interrupt_cb { + interrupt_cb( + &VirtioInterruptType::Vring, + Some(&*event_queue_locked), + false, + ) + .with_context(|| VirtioError::EventFdWrite)?; + } } Ok(()) @@ -145,54 +194,31 @@ impl Vsock { } impl VirtioDevice for Vsock { - /// Realize vhost virtio vsock device. + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + fn realize(&mut self) -> Result<()> { let vhost_fd: Option = self.vsock_cfg.vhost_fd; let backend = VhostBackend::new(&self.mem_space, VHOST_PATH, vhost_fd) - .chain_err(|| "Failed to create backend for vsock")?; + .with_context(|| "Failed to create backend for vsock")?; backend .set_owner() - .chain_err(|| "Failed to set owner for vsock")?; - self.state.device_features = backend - .get_features() - .chain_err(|| "Failed to get features for vsock")?; + .with_context(|| "Failed to set owner for vsock")?; self.backend = Some(backend); - Ok(()) - } - - /// Get the virtio device type, refer to Virtio Spec. - fn device_type(&self) -> u32 { - VIRTIO_TYPE_VSOCK - } - - /// Get the count of virtio device queues. - fn queue_num(&self) -> usize { - QUEUE_NUM_VSOCK - } + self.init_config_features()?; - /// Get the queue size of virtio device. - fn queue_size(&self) -> u16 { - QUEUE_SIZE_VSOCK - } - - /// Get device features from host. - fn get_device_features(&self, features_select: u32) -> u32 { - read_u32(self.state.device_features, features_select) + Ok(()) } - /// Set driver features by guest. - fn set_driver_features(&mut self, page: u32, value: u32) { - let mut features = write_u32(value, page); - let unsupported_features = features & !self.state.device_features; - if unsupported_features != 0 { - warn!("Unsupported feature ack (Vsock): {:x}", features); - features &= !unsupported_features; - } - self.state.driver_features |= features; + fn init_config_features(&mut self) -> Result<()> { + let backend = self.backend.as_ref().unwrap(); + let features = backend + .get_features() + .with_context(|| "Failed to get features for vsock")?; + self.base.device_features = features & !(1_u64 << VIRTIO_F_ACCESS_PLATFORM); + Ok(()) } - /// Read data of config from guest. fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { match offset { 0 if data.len() == 8 => LittleEndian::write_u64(data, self.vsock_cfg.guest_cid), @@ -208,47 +234,44 @@ impl VirtioDevice for Vsock { Ok(()) } - /// Write data to config from guest. - fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { - let data_len = data.len(); - let config_len = self.state.config_space.len(); - if offset as usize + data_len > config_len { - return Err(ErrorKind::DevConfigOverflow(offset, config_len as u64).into()); + fn write_config(&mut self, _offset: u64, _data: &[u8]) -> Result<()> { + Ok(()) + } + + fn set_guest_notifiers(&mut self, queue_evts: &[Arc]) -> Result<()> { + for fd in queue_evts.iter() { + self.call_events.push(fd.clone()); } - self.state.config_space[(offset as usize)..(offset as usize + data_len)] - .copy_from_slice(data); Ok(()) } - /// Activate the virtio device, this function is called by vcpu thread when frontend - /// virtio driver is ready and write `DRIVER_OK` to backend. fn activate( &mut self, _: Arc, interrupt_cb: Arc, - queues: &[Arc>], - queue_evts: Vec, + queue_evts: Vec>, ) -> Result<()> { let cid = self.vsock_cfg.guest_cid; - let mut host_notifies = Vec::new(); + let queues = &self.base.queues; // The receive queue and transmit queue will be handled in vhost. let vhost_queues = queues[..2].to_vec(); + let mut host_notifies = Vec::new(); // This event queue will be handled. self.event_queue = Some(queues[2].clone()); self.interrupt_cb = Some(interrupt_cb.clone()); // Preliminary setup for vhost net. let backend = match &self.backend { - None => return Err("Failed to get backend for vsock".into()), + None => return Err(anyhow!("Failed to get backend for vsock")), Some(backend_) => backend_, }; backend - .set_features(self.state.driver_features) - .chain_err(|| "Failed to set features for vsock")?; + .set_features(self.base.driver_features) + .with_context(|| "Failed to set features for vsock")?; backend .set_mem_table() - .chain_err(|| "Failed to set mem table for vsock")?; + .with_context(|| "Failed to set mem table for vsock")?; for (queue_index, queue_mutex) in vhost_queues.iter().enumerate() { let queue = queue_mutex.lock().unwrap(); @@ -257,116 +280,122 @@ impl VirtioDevice for Vsock { backend .set_vring_num(queue_index, actual_size) - .chain_err(|| { + .with_context(|| { format!("Failed to set vring num for vsock, index: {}", queue_index) })?; backend .set_vring_addr(&queue_config, queue_index, 0) - .chain_err(|| { + .with_context(|| { format!("Failed to set vring addr for vsock, index: {}", queue_index) })?; backend - .set_vring_base(queue_index, self.state.last_avail_idx[queue_index]) - .chain_err(|| { + .set_vring_base(queue_index, self.last_avail_idx[queue_index]) + .with_context(|| { format!("Failed to set vring base for vsock, index: {}", queue_index) })?; backend - .set_vring_kick(queue_index, &queue_evts[queue_index]) - .chain_err(|| { + .set_vring_kick(queue_index, queue_evts[queue_index].clone()) + .with_context(|| { format!("Failed to set vring kick for vsock, index: {}", queue_index) })?; drop(queue); - let host_notify = VhostNotify { - notify_evt: EventFd::new(libc::EFD_NONBLOCK) - .chain_err(|| ErrorKind::EventFdCreate)?, - queue: queue_mutex.clone(), + let event = if self.call_events.is_empty() { + let host_notify = VhostNotify { + notify_evt: Arc::new( + create_new_eventfd().with_context(|| VirtioError::EventFdCreate)?, + ), + queue: queue_mutex.clone(), + }; + let event = host_notify.notify_evt.clone(); + host_notifies.push(host_notify); + event + } else { + self.call_events[queue_index].clone() }; backend - .set_vring_call(queue_index, &host_notify.notify_evt) - .chain_err(|| { + .set_vring_call(queue_index, event) + .with_context(|| { format!("Failed to set vring call for vsock, index: {}", queue_index) })?; - host_notifies.push(host_notify); } backend.set_guest_cid(cid)?; backend.set_running(true)?; - let handler = VhostIoHandler { - interrupt_cb, - host_notifies, - deactivate_evt: self.deactivate_evt.as_raw_fd(), - }; + if self.call_events.is_empty() { + let handler = VhostIoHandler { + interrupt_cb: interrupt_cb.clone(), + host_notifies, + device_broken: self.base.broken.clone(), + }; + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, None, &mut self.base.deactivate_evts)?; + } - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))), - None, - )?; + self.base.broken.store(false, Ordering::SeqCst); Ok(()) } fn deactivate(&mut self) -> Result<()> { - self.deactivate_evt - .write(1) - .chain_err(|| ErrorKind::EventFdWrite)?; + unregister_event_helper(None, &mut self.base.deactivate_evts)?; + self.call_events.clear(); Ok(()) } fn reset(&mut self) -> Result<()> { - // No need to close fd manually, because rust will - // automatically cleans up variables at the end of the lifecycle. - self.backend = None; - self.state = VsockState::default(); - self.event_queue = None; - self.interrupt_cb = None; - - self.realize() + self.backend.as_ref().unwrap().set_running(false) } } impl StateTransfer for Vsock { - fn get_state_vec(&self) -> migration::errors::Result> { - let mut state = self.state; - migration::errors::ResultExt::chain_err( - self.backend.as_ref().unwrap().set_running(false), - || "Failed to set vsock backend stopping", - )?; - state.last_avail_idx[0] = self.backend.as_ref().unwrap().get_vring_base(0).unwrap(); - state.last_avail_idx[1] = self.backend.as_ref().unwrap().get_vring_base(1).unwrap(); - migration::errors::ResultExt::chain_err( - self.backend.as_ref().unwrap().set_running(true), - || "Failed to set vsock backend running", - )?; - migration::errors::ResultExt::chain_err(self.transport_reset(), || { + fn get_state_vec(&self) -> Result> { + Result::with_context(self.backend.as_ref().unwrap().set_running(false), || { + "Failed to set vsock backend stopping" + })?; + + let last_avail_idx_0 = self.backend.as_ref().unwrap().get_vring_base(0).unwrap(); + let last_avail_idx_1 = self.backend.as_ref().unwrap().get_vring_base(1).unwrap(); + let state = VsockState { + device_features: self.base.device_features, + driver_features: self.base.driver_features, + config_space: self.config_space, + last_avail_idx: [last_avail_idx_0, last_avail_idx_1], + broken: self.base.broken.load(Ordering::SeqCst), + }; + + Result::with_context(self.backend.as_ref().unwrap().set_running(true), || { + "Failed to set vsock backend running" + })?; + Result::with_context(self.transport_reset(), || { "Failed to send vsock transport reset event" })?; Ok(state.as_bytes().to_vec()) } - fn set_state_mut(&mut self, state: &[u8]) -> migration::errors::Result<()> { - self.state = *VsockState::from_bytes(state) - .ok_or(migration::errors::ErrorKind::FromBytesError("VSOCK"))?; - + fn set_state_mut(&mut self, state: &[u8]) -> Result<()> { + let state = VsockState::from_bytes(state) + .with_context(|| migration::error::MigrationError::FromBytesError("VSOCK"))?; + self.base.device_features = state.device_features; + self.base.driver_features = state.driver_features; + self.base.broken.store(state.broken, Ordering::SeqCst); + self.config_space = state.config_space; + self.last_avail_idx = state.last_avail_idx; Ok(()) } fn get_device_alias(&self) -> u64 { - if let Some(alias) = MigrationManager::get_desc_alias(&VsockState::descriptor().name) { - alias - } else { - !0 - } + MigrationManager::get_desc_alias(&VsockState::descriptor().name).unwrap_or(!0) } } impl MigrationHook for Vsock { #[cfg(target_arch = "aarch64")] - fn resume(&mut self) -> migration::errors::Result<()> { - migration::errors::ResultExt::chain_err(self.transport_reset(), || { + fn resume(&mut self) -> Result<()> { + Result::with_context(self.transport_reset(), || { "Failed to resume virtio vsock device" })?; @@ -376,25 +405,37 @@ impl MigrationHook for Vsock { #[cfg(test)] mod tests { - pub use super::super::*; - pub use super::*; - pub use address_space::*; - - fn vsock_address_space_init() -> Arc { - let root = Region::init_container_region(u64::max_value()); - let sys_mem = AddressSpace::new(root).unwrap(); - sys_mem - } + use super::*; + use crate::tests::address_space_init; + use machine_manager::config::str_slip_to_clap; fn vsock_create_instance() -> Vsock { let vsock_conf = VsockConfig { id: "test_vsock_1".to_string(), guest_cid: 3, vhost_fd: None, + ..Default::default() }; - let sys_mem = vsock_address_space_init(); - let vsock = Vsock::new(&vsock_conf, &sys_mem); - vsock + let sys_mem = address_space_init(); + + Vsock::new(&vsock_conf, &sys_mem) + } + + #[test] + fn test_vsock_config_cmdline_parser() { + let vsock_cmd = "vhost-vsock-device,id=test_vsock,guest-cid=3"; + let vsock_config = + VsockConfig::try_parse_from(str_slip_to_clap(vsock_cmd, true, false)).unwrap(); + assert_eq!(vsock_config.id, "test_vsock"); + assert_eq!(vsock_config.guest_cid, 3); + assert_eq!(vsock_config.vhost_fd, None); + + let vsock_cmd = "vhost-vsock-device,id=test_vsock,guest-cid=3,vhostfd=4"; + let vsock_config = + VsockConfig::try_parse_from(str_slip_to_clap(vsock_cmd, true, false)).unwrap(); + assert_eq!(vsock_config.id, "test_vsock"); + assert_eq!(vsock_config.guest_cid, 3); + assert_eq!(vsock_config.vhost_fd, Some(4)); } #[test] @@ -402,51 +443,53 @@ mod tests { // test vsock new method let mut vsock = vsock_create_instance(); - assert_eq!(vsock.state.device_features, 0); - assert_eq!(vsock.state.driver_features, 0); + assert_eq!(vsock.base.device_features, 0); + assert_eq!(vsock.base.driver_features, 0); assert!(vsock.backend.is_none()); assert_eq!(vsock.device_type(), VIRTIO_TYPE_VSOCK); assert_eq!(vsock.queue_num(), QUEUE_NUM_VSOCK); - assert_eq!(vsock.queue_size(), QUEUE_SIZE_VSOCK); + assert_eq!(vsock.queue_size_max(), DEFAULT_VIRTQUEUE_SIZE); // test vsock get_device_features - vsock.state.device_features = 0x0123_4567_89ab_cdef; - let features = vsock.get_device_features(0); + vsock.base.device_features = 0x0123_4567_89ab_cdef; + let features = vsock.device_features(0); assert_eq!(features, 0x89ab_cdef); - let features = vsock.get_device_features(1); + let features = vsock.device_features(1); assert_eq!(features, 0x0123_4567); - let features = vsock.get_device_features(3); + let features = vsock.device_features(3); assert_eq!(features, 0); // test vsock set_driver_features - vsock.state.device_features = 0x0123_4567_89ab_cdef; + vsock.base.device_features = 0x0123_4567_89ab_cdef; // check for unsupported feature vsock.set_driver_features(0, 0x7000_0000); - assert_eq!(vsock.state.device_features, 0x0123_4567_89ab_cdef); + assert_eq!(u64::from(vsock.driver_features(0)), 0_u64); + assert_eq!(vsock.base.device_features, 0x0123_4567_89ab_cdef); // check for supported feature vsock.set_driver_features(0, 0x8000_0000); - assert_eq!(vsock.state.device_features, 0x0123_4567_89ab_cdef); + assert_eq!(u64::from(vsock.driver_features(0)), 0x8000_0000_u64); + assert_eq!(vsock.base.device_features, 0x0123_4567_89ab_cdef); // test vsock read_config let mut buf: [u8; 8] = [0; 8]; - assert_eq!(vsock.read_config(0, &mut buf).is_ok(), true); + assert!(vsock.read_config(0, &mut buf).is_ok()); let value = LittleEndian::read_u64(&buf); assert_eq!(value, vsock.vsock_cfg.guest_cid); let mut buf: [u8; 4] = [0; 4]; - assert_eq!(vsock.read_config(0, &mut buf).is_ok(), true); + assert!(vsock.read_config(0, &mut buf).is_ok()); let value = LittleEndian::read_u32(&buf); assert_eq!(value, vsock.vsock_cfg.guest_cid as u32); let mut buf: [u8; 4] = [0; 4]; - assert_eq!(vsock.read_config(4, &mut buf).is_ok(), true); + assert!(vsock.read_config(4, &mut buf).is_ok()); let value = LittleEndian::read_u32(&buf); assert_eq!(value, (vsock.vsock_cfg.guest_cid >> 32) as u32); let mut buf: [u8; 4] = [0; 4]; - assert_eq!(vsock.read_config(5, &mut buf).is_err(), true); - assert_eq!(vsock.read_config(3, &mut buf).is_err(), true); + assert!(vsock.read_config(5, &mut buf).is_err()); + assert!(vsock.read_config(3, &mut buf).is_err()); } #[test] @@ -465,12 +508,9 @@ mod tests { // test vsock set_guest_cid let backend = vsock.backend.unwrap(); - assert_eq!(backend.set_guest_cid(3).is_ok(), true); - assert_eq!( - backend.set_guest_cid(u32::max_value() as u64).is_ok(), - false - ); - assert_eq!(backend.set_guest_cid(2).is_ok(), false); - assert_eq!(backend.set_guest_cid(0).is_ok(), false); + assert!(backend.set_guest_cid(3).is_ok()); + assert!(backend.set_guest_cid(u64::from(u32::max_value())).is_err()); + assert!(backend.set_guest_cid(2).is_err()); + assert!(backend.set_guest_cid(0).is_err()); } } diff --git a/virtio/src/vhost/mod.rs b/virtio/src/vhost/mod.rs index 412b3ca3a2bd27b152ea3c42877aebb4c9c3c0ad..8f78ef6663530ab17b7c461eaeadd1bb2efa5496 100644 --- a/virtio/src/vhost/mod.rs +++ b/virtio/src/vhost/mod.rs @@ -11,19 +11,27 @@ // See the Mulan PSL v2 for more details. pub mod kernel; -mod user; +pub mod user; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; +use anyhow::Result; +use log::error; +use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; -use super::errors::Result; -use super::{Queue, QueueConfig}; +use super::{Queue, QueueConfig, VirtioInterrupt, VirtioInterruptType}; +use util::loop_context::{ + read_fd, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; /// Vhost vring call notify structure. pub struct VhostNotify { /// Used to register in vhost kernel, when virtio queue have io request will notify to vhost. - pub notify_evt: EventFd, + pub notify_evt: Arc, /// The related virtio queue. pub queue: Arc>, } @@ -82,14 +90,14 @@ pub trait VhostOps { /// # Arguments /// * `queue_idx` - Index of the queue to modify. /// * `fd` - EventFd to trigger. - fn set_vring_call(&self, queue_idx: usize, fd: &EventFd) -> Result<()>; + fn set_vring_call(&self, queue_idx: usize, fd: Arc) -> Result<()>; /// Set eventfd to poll for added buffers. /// /// # Arguments /// * `queue_idx` - Index of the queue to modify. /// * `fd` - EventFd that will be signaled from guest. - fn set_vring_kick(&self, queue_idx: usize, fd: &EventFd) -> Result<()>; + fn set_vring_kick(&self, queue_idx: usize, fd: Arc) -> Result<()>; /// Set the status of ring. /// @@ -100,3 +108,51 @@ pub trait VhostOps { Ok(()) } } + +pub struct VhostIoHandler { + interrupt_cb: Arc, + host_notifies: Vec, + device_broken: Arc, +} + +impl EventNotifierHelper for VhostIoHandler { + fn internal_notifiers(vhost_handler: Arc>) -> Vec { + let mut notifiers = Vec::new(); + + let vhost = vhost_handler.clone(); + let handler: Rc = Rc::new(move |_, fd: RawFd| { + read_fd(fd); + let locked_vhost_handler = vhost.lock().unwrap(); + if locked_vhost_handler.device_broken.load(Ordering::SeqCst) { + return None; + } + for host_notify in locked_vhost_handler.host_notifies.iter() { + if host_notify.notify_evt.as_raw_fd() != fd { + continue; + } + if let Err(e) = (locked_vhost_handler.interrupt_cb)( + &VirtioInterruptType::Vring, + Some(&host_notify.queue.lock().unwrap()), + false, + ) { + error!( + "Failed to trigger interrupt for vhost device, error is {:?}", + e + ); + } + } + None as Option> + }); + for host_notify in vhost_handler.lock().unwrap().host_notifies.iter() { + notifiers.push(EventNotifier::new( + NotifierOperation::AddShared, + host_notify.notify_evt.as_raw_fd(), + None, + EventSet::IN, + vec![handler.clone()], + )); + } + + notifiers + } +} diff --git a/virtio/src/vhost/user/block.rs b/virtio/src/vhost/user/block.rs new file mode 100644 index 0000000000000000000000000000000000000000..4e0757f9fc0e309a0b032d6f71d660dc18bee843 --- /dev/null +++ b/virtio/src/vhost/user/block.rs @@ -0,0 +1,280 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use vmm_sys_util::eventfd::EventFd; + +use super::client::VhostUserClient; +use crate::vhost::VhostOps; +use crate::VhostUser::client::{ + VhostBackendType, VHOST_USER_PROTOCOL_F_CONFIG, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, + VHOST_USER_PROTOCOL_F_MQ, +}; +use crate::VhostUser::listen_guest_notifier; +use crate::VhostUser::message::VHOST_USER_F_PROTOCOL_FEATURES; +use crate::{ + check_config_space_rw, read_config_default, virtio_has_feature, VirtioBase, VirtioBlkConfig, + VirtioDevice, VirtioInterrupt, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_FLUSH, + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, + VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, VIRTIO_F_VERSION_1, VIRTIO_TYPE_BLOCK, +}; +use address_space::AddressSpace; +use machine_manager::config::{ + get_chardev_socket_path, get_pci_df, valid_block_device_virtqueue_size, valid_id, + ChardevConfig, MAX_VIRTIO_QUEUE, +}; +use machine_manager::event_loop::unregister_event_helper; +use util::byte_code::ByteCode; +use util::gen_base_func; + +#[derive(Parser, Debug, Clone, Default)] +#[command(no_binary_name(true))] +pub struct VhostUserBlkDevConfig { + #[arg(long, value_parser = ["vhost-user-blk-device", "vhost-user-blk-pci"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, alias = "num-queues", value_parser = clap::value_parser!(u16).range(1..=MAX_VIRTIO_QUEUE as i64))] + pub num_queues: Option, + #[arg(long)] + pub chardev: String, + #[arg(long, alias = "queue-size", default_value = "256", value_parser = valid_block_device_virtqueue_size)] + pub queue_size: u16, + #[arg(long)] + pub bootindex: Option, +} + +pub struct Block { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the block device. + blk_cfg: VhostUserBlkDevConfig, + /// Configuration of the vhost user blk's socket chardev. + chardev_cfg: ChardevConfig, + /// Config space of the block device. + config_space: VirtioBlkConfig, + /// System address space. + mem_space: Arc, + /// Vhost user client + client: Option>>, + /// Whether irqfd can be used. + pub enable_irqfd: bool, + /// Vhost user protocol features. + protocol_features: u64, +} + +impl Block { + pub fn new( + cfg: &VhostUserBlkDevConfig, + chardev_cfg: ChardevConfig, + mem_space: &Arc, + ) -> Self { + let queue_num = cfg.num_queues.unwrap_or(1) as usize; + let queue_size = cfg.queue_size; + + Block { + base: VirtioBase::new(VIRTIO_TYPE_BLOCK, queue_num, queue_size), + blk_cfg: cfg.clone(), + chardev_cfg, + config_space: Default::default(), + mem_space: mem_space.clone(), + client: None, + enable_irqfd: false, + protocol_features: 0_u64, + } + } + + /// Connect with spdk and register update event. + fn init_client(&mut self) -> Result<()> { + let socket_path = get_chardev_socket_path(self.chardev_cfg.clone())?; + let client = VhostUserClient::new( + &self.mem_space, + &socket_path, + self.queue_num() as u64, + VhostBackendType::TypeBlock, + ) + .with_context(|| { + "Failed to create the client which communicates with the server for vhost-user blk" + })?; + let client = Arc::new(Mutex::new(client)); + VhostUserClient::add_event(&client)?; + self.client = Some(client); + Ok(()) + } +} + +impl VirtioDevice for Block { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + self.init_client()?; + self.init_config_features()?; + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + let locked_client = self.client.as_ref().unwrap().lock().unwrap(); + let features = locked_client + .get_features() + .with_context(|| "Failed to get features for vhost-user blk")?; + + if virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES) { + let protocol_features = locked_client + .get_protocol_features() + .with_context(|| "Failed to get protocol features for vhost-user blk")?; + let supported_protocol_features = 1 << VHOST_USER_PROTOCOL_F_MQ + | 1 << VHOST_USER_PROTOCOL_F_CONFIG + | 1 << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; + self.protocol_features = supported_protocol_features & protocol_features; + locked_client + .set_protocol_features(self.protocol_features) + .with_context(|| "Failed to set protocol features for vhost-user blk")?; + + if virtio_has_feature(protocol_features, u32::from(VHOST_USER_PROTOCOL_F_CONFIG)) { + let config = locked_client + .get_virtio_blk_config() + .with_context(|| "Failed to get config for vhost-user blk")?; + self.config_space = config; + } else { + bail!( + "Failed to get config, spdk doesn't support, spdk protocol features: {:#b}", + protocol_features + ); + } + + if virtio_has_feature(protocol_features, u32::from(VHOST_USER_PROTOCOL_F_MQ)) { + let max_queue_num = locked_client + .get_max_queue_num() + .with_context(|| "Failed to get queue num for vhost-user blk")?; + if self.queue_num() > max_queue_num as usize { + bail!( + "Exceed the max queue num that spdk supported ({} queues)", + max_queue_num + ); + } + + if self.blk_cfg.num_queues.unwrap_or(1) > 1 { + self.config_space.num_queues = self.blk_cfg.num_queues.unwrap_or(1); + } + } else if self.blk_cfg.num_queues.unwrap_or(1) > 1 { + bail!( + "spdk doesn't support multi queue, spdk protocol features: {:#b}", + protocol_features + ); + } + } else { + bail!("Bad spdk feature: {:#b}", features); + } + drop(locked_client); + + self.base.device_features = 1_u64 << VIRTIO_F_VERSION_1 + | 1_u64 << VIRTIO_BLK_F_SIZE_MAX + | 1_u64 << VIRTIO_BLK_F_TOPOLOGY + | 1_u64 << VIRTIO_BLK_F_BLK_SIZE + | 1_u64 << VIRTIO_BLK_F_FLUSH + | 1_u64 << VIRTIO_BLK_F_DISCARD + | 1_u64 << VIRTIO_BLK_F_WRITE_ZEROES + | 1_u64 << VIRTIO_BLK_F_SEG_MAX + | 1_u64 << VIRTIO_BLK_F_RO; + if self.blk_cfg.num_queues.unwrap_or(1) > 1 { + self.base.device_features |= 1_u64 << VIRTIO_BLK_F_MQ; + } + self.base.device_features &= features; + + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + read_config_default(self.config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + check_config_space_rw(self.config_space.as_bytes(), offset, data)?; + + let offset = offset as usize; + let end = offset + data.len(); + let config_slice = self.config_space.as_mut_bytes(); + config_slice[offset..end].copy_from_slice(data); + + self.client + .as_ref() + .with_context(|| "Failed to get client when writing config")? + .lock() + .unwrap() + .set_virtio_blk_config(self.config_space) + .with_context(|| "Failed to set config for vhost-user blk")?; + + Ok(()) + } + + fn activate( + &mut self, + _mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let mut client = match &self.client { + Some(client) => client.lock().unwrap(), + None => return Err(anyhow!("Failed to get client for vhost-user blk")), + }; + client.features = self.base.driver_features; + client.protocol_features = self.protocol_features; + client.set_queues(&self.base.queues); + client.set_queue_evts(&queue_evts); + + if !self.enable_irqfd { + let queue_num = self.base.queues.len(); + listen_guest_notifier(&mut self.base, &mut client, None, queue_num, interrupt_cb)?; + } + + client.activate_vhost_user()?; + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + if let Some(client) = &self.client { + client.lock().unwrap().reset_vhost_user(false); + } + unregister_event_helper(None, &mut self.base.deactivate_evts)?; + Ok(()) + } + + fn unrealize(&mut self) -> Result<()> { + self.client + .as_ref() + .with_context(|| "Failed to get client when stopping event")? + .lock() + .unwrap() + .delete_event() + .with_context(|| "Failed to delete vhost-user blk event")?; + self.client = None; + Ok(()) + } + + fn set_guest_notifiers(&mut self, queue_evts: &[Arc]) -> Result<()> { + self.enable_irqfd = true; + match &self.client { + Some(client) => client.lock().unwrap().set_call_events(queue_evts), + None => return Err(anyhow!("Failed to get client for vhost-user blk")), + }; + + Ok(()) + } +} diff --git a/virtio/src/vhost/user/client.rs b/virtio/src/vhost/user/client.rs index 215fa375464bd1484657fc4163b7f281dcd7f86d..d4c86a780dd49b610968044d467e7009ec6df744 100644 --- a/virtio/src/vhost/user/client.rs +++ b/virtio/src/vhost/user/client.rs @@ -10,27 +10,44 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::cmp::Ordering; +use std::fs::File; use std::mem::size_of; -use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::rc::Rc; +use std::slice::from_raw_parts; use std::sync::{Arc, Mutex}; +use std::time::Duration; -use address_space::{ - AddressSpace, FileBackend, FlatRange, GuestAddress, Listener, ListenerReqType, RegionIoEventFd, -}; -use machine_manager::event_loop::EventLoop; -use util::loop_context::{EventNotifier, EventNotifierHelper, NotifierOperation}; +use anyhow::{bail, Context, Result}; +use log::{debug, error, info, warn}; use vmm_sys_util::{epoll::EventSet, eventfd::EventFd}; -use super::super::super::{ - errors::{ErrorKind, Result, ResultExt}, - QueueConfig, -}; use super::super::VhostOps; use super::message::{ RegionMemInfo, VhostUserHdrFlag, VhostUserMemContext, VhostUserMemHdr, VhostUserMsgHdr, VhostUserMsgReq, VhostUserVringAddr, VhostUserVringState, }; use super::sock::VhostUserSock; +use crate::device::block::VirtioBlkConfig; +use crate::VhostUser::message::VhostUserConfig; +use crate::{virtio_has_feature, Queue, QueueConfig}; +use address_space::{ + AddressAttr, AddressSpace, FileBackend, FlatRange, GuestAddress, Listener, ListenerReqType, + RegionIoEventFd, +}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper, EventLoop}; +use util::loop_context::{ + gen_delete_notifiers, EventNotifier, EventNotifierHelper, NotifierCallback, NotifierOperation, +}; +use util::unix::do_mmap; + +/// Vhost supports multiple queue +pub const VHOST_USER_PROTOCOL_F_MQ: u8 = 0; +/// Vhost supports `VHOST_USER_SET_CONFIG` and `VHOST_USER_GET_CONFIG` msg. +pub const VHOST_USER_PROTOCOL_F_CONFIG: u8 = 9; +/// Vhost supports `VHOST_USER_SET_INFLIGHT_FD` and `VHOST_USER_GET_INFLIGHT_FD` msg. +pub const VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: u8 = 12; struct ClientInternal { // Used to send requests to the vhost user backend in userspace. @@ -39,7 +56,6 @@ struct ClientInternal { max_queue_num: u64, } -#[allow(dead_code)] impl ClientInternal { fn new(sock: VhostUserSock, max_queue_num: u64) -> Self { ClientInternal { @@ -49,15 +65,24 @@ impl ClientInternal { } fn wait_ack_msg(&self, request: u32) -> Result { + self.wait_ack_msg_and_data::(request, None, &mut []) + } + + fn wait_ack_msg_and_data( + &self, + request: u32, + payload_opt: Option<&mut [u8]>, + fds: &mut [RawFd], + ) -> Result { let mut hdr = VhostUserMsgHdr::default(); let mut body: T = Default::default(); - let payload_opt: Option<&mut [u8]> = None; - - let (recv_len, _fds_num) = self + let (recv_len, fds_num) = self .sock - .recv_msg(Some(&mut hdr), Some(&mut body), payload_opt, &mut []) - .chain_err(|| "Failed to recv ack msg")?; - + .recv_msg(Some(&mut hdr), Some(&mut body), payload_opt, fds) + .with_context(|| "Failed to recv ack msg")?; + if fds_num != fds.len() { + bail!("Unexpected fds num: {}, expected: {}", fds_num, fds.len()); + } if request != hdr.request || recv_len != (size_of::() + size_of::()) || !hdr.is_reply() @@ -71,24 +96,82 @@ impl ClientInternal { } } -impl EventNotifierHelper for ClientInternal { +fn vhost_user_reconnect(client: &Arc>) { + if !client.lock().unwrap().reconnecting { + return; + } + let cloned_client = client.clone(); + let func = Box::new(move || { + vhost_user_reconnect(&cloned_client); + }); + + let dev_type = client.lock().unwrap().backend_type.to_string(); + info!("Try to reconnect vhost-user {}.", dev_type); + if let Err(_e) = client + .lock() + .unwrap() + .client + .lock() + .unwrap() + .sock + .domain + .connect() + { + // Default reconnecting time: 3s. + EventLoop::get_ctx(None) + .unwrap() + .timer_add(func, Duration::from_secs(3)); + return; + } + + client.lock().unwrap().reconnecting = false; + if let Err(e) = VhostUserClient::add_event(client) { + error!("Failed to update event for client sock, {:?}", e); + return; + } + + let mut locked_client = client.lock().unwrap(); + let protocol_features = locked_client.protocol_features; + if protocol_features != 0 { + if let Err(e) = locked_client.set_protocol_features(protocol_features) { + error!( + "Failed to set protocol features for vhost-user {}, {:?}", + dev_type, e + ); + return; + } + } + + if let Err(e) = locked_client.activate_vhost_user() { + error!("Failed to reactivate vhost-user {}, {:?}", dev_type, e); + } else { + info!("Reconnecting vhost-user {} succeed.", dev_type); + } +} + +impl EventNotifierHelper for VhostUserClient { fn internal_notifiers(client_handler: Arc>) -> Vec { let mut notifiers = Vec::new(); - let mut handlers = Vec::new(); - - let handler: Box Option>> = - Box::new(move |event, _| { - if event & EventSet::HANG_UP == EventSet::HANG_UP { - panic!("Receive the event of HANG_UP from vhost user backend"); - } else { - None - } - }); - handlers.push(Arc::new(Mutex::new(handler))); + let cloned_client = client_handler.clone(); + let handler: Rc = Rc::new(move |event, fd| { + if event & EventSet::HANG_UP == EventSet::HANG_UP { + let mut locked_client = cloned_client.lock().unwrap(); + if !locked_client.reconnecting { + locked_client.reconnecting = true; + drop(locked_client); + vhost_user_reconnect(&cloned_client); + } + Some(gen_delete_notifiers(&[fd])) + } else { + None + } + }); + let locked_client = client_handler.lock().unwrap(); notifiers.push(EventNotifier::new( NotifierOperation::AddShared, - client_handler + locked_client + .client .lock() .unwrap() .sock @@ -96,8 +179,9 @@ impl EventNotifierHelper for ClientInternal { .get_stream_raw_fd(), None, EventSet::HANG_UP, - handlers, + vec![handler], )); + notifiers } } @@ -111,22 +195,32 @@ struct RegionInfo { #[derive(Clone)] struct VhostUserMemInfo { regions: Arc>>, + enabled: bool, } -#[allow(dead_code)] impl VhostUserMemInfo { fn new() -> Self { VhostUserMemInfo { regions: Arc::new(Mutex::new(Vec::new())), + enabled: false, } } fn addr_to_host(&self, addr: GuestAddress) -> Option { let addr = addr.raw_value(); for reg_info in self.regions.lock().unwrap().iter() { - if addr >= reg_info.region.guest_phys_addr - && addr < reg_info.region.guest_phys_addr + reg_info.region.memory_size - { + let gpa_end = reg_info + .region + .guest_phys_addr + .checked_add(reg_info.region.memory_size) + .with_context(|| { + format!( + "Overflow when adding gpa with memory_size in region {:x?}", + reg_info.region + ) + }) + .ok()?; + if addr >= reg_info.region.guest_phys_addr && addr < gpa_end { let offset = addr - reg_info.region.guest_phys_addr; return Some(reg_info.region.userspace_addr + offset); } @@ -134,21 +228,22 @@ impl VhostUserMemInfo { None } - fn add_mem_range(&self, fr: &FlatRange) -> address_space::errors::Result<()> { + fn add_mem_range(&self, fr: &FlatRange) -> Result<()> { if fr.owner.region_type() != address_space::RegionType::Ram { return Ok(()); } let guest_phys_addr = fr.addr_range.base.raw_value(); let memory_size = fr.addr_range.size; - let host_address = match fr.owner.get_host_address() { + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + let host_address = match unsafe { fr.owner.get_host_address(AddressAttr::Ram) } { Some(addr) => addr, None => bail!("Failed to get host address to add mem range for vhost user device"), }; let file_back = match fr.owner.get_file_backend() { Some(file_back_) => file_back_, _ => { - info!("It is not share memory for vhost user device"); + debug!("It is not share memory for vhost user device"); return Ok(()); } }; @@ -160,19 +255,42 @@ impl VhostUserMemInfo { mmap_offset: file_back.offset + fr.offset_in_region, }; let region_info = RegionInfo { region, file_back }; - self.regions.lock().unwrap().push(region_info); + let mut locked_regions = self.regions.lock().unwrap(); + match locked_regions.binary_search_by(|r| { + if (r.region.guest_phys_addr + r.region.memory_size - 1) < guest_phys_addr { + Ordering::Less + } else if r.region.guest_phys_addr > (guest_phys_addr + memory_size - 1) { + Ordering::Greater + } else { + Ordering::Equal + } + }) { + Ok(p) => bail!( + "New region {:?} is overlapped with region {:?}", + region_info.region, + locked_regions[p].region + ), + Err(p) => locked_regions.insert(p, region_info), + } Ok(()) } - fn delete_mem_range(&self, fr: &FlatRange) -> address_space::errors::Result<()> { + fn delete_mem_range(&self, fr: &FlatRange) -> Result<()> { if fr.owner.region_type() != address_space::RegionType::Ram { return Ok(()); } - let file_back = fr.owner.get_file_backend().unwrap(); + let file_back = match fr.owner.get_file_backend() { + None => { + debug!("fr {:?} backend is not file, ignored", fr); + return Ok(()); + } + Some(fb) => fb, + }; let mut mem_regions = self.regions.lock().unwrap(); - let host_address = match fr.owner.get_host_address() { + // SAFETY: memory_size is range's size, so we make sure [hva, hva+size] is in ram range. + let host_address = match unsafe { fr.owner.get_host_address(AddressAttr::Ram) } { Some(addr) => addr, None => bail!("Failed to get host address to del mem range for vhost user device"), }; @@ -205,18 +323,34 @@ impl Listener for VhostUserMemInfo { 0 } + fn enabled(&self) -> bool { + self.enabled + } + + fn enable(&mut self) { + self.enabled = true; + } + + fn disable(&mut self) { + self.enabled = false; + } + fn handle_request( &self, range: Option<&FlatRange>, _evtfd: Option<&RegionIoEventFd>, req_type: ListenerReqType, - ) -> std::result::Result<(), address_space::errors::Error> { + ) -> std::result::Result<(), anyhow::Error> { match req_type { ListenerReqType::AddRegion => { - self.add_mem_range(range.unwrap())?; + self.add_mem_range( + range.with_context(|| "Flat range is None when adding region")?, + )?; } ListenerReqType::DeleteRegion => { - self.delete_mem_range(range.unwrap())?; + self.delete_mem_range( + range.with_context(|| "Flat range is None when deleting region")?, + )?; } _ => {} } @@ -224,18 +358,72 @@ impl Listener for VhostUserMemInfo { } } +/// Struct for set and get inflight fd request, field is defined by dpdk. +#[repr(C)] +#[derive(Debug, Default, Clone)] +pub struct VhostUserInflight { + // The size of memory area to track inflight I/O. + pub mmap_size: u64, + // The offset from the start of the supplied file descriptor. + pub mmap_offset: u64, + // The number of virtqueues. + pub queue_num: u16, + // The size of virtqueues. + pub queue_size: u16, +} + +/// Struct for saving inflight info, create this struct to save inflight info when +/// vhost client start, use this struct to set inflight fd when vhost client reconnect. +#[derive(Debug)] +struct VhostInflight { + // The inflight file. + file: Arc, + // Fd mmap addr, used for migration. + _addr: u64, + inner: VhostUserInflight, +} + +#[derive(PartialEq, Eq)] +pub enum VhostBackendType { + TypeNet, + TypeBlock, + TypeFs, +} + +impl ToString for VhostBackendType { + fn to_string(&self) -> String { + match self { + VhostBackendType::TypeNet => String::from("net"), + VhostBackendType::TypeBlock => String::from("block"), + VhostBackendType::TypeFs => String::from("fs"), + } + } +} + /// Struct for communication with the vhost user backend in userspace -#[derive(Clone)] pub struct VhostUserClient { client: Arc>, mem_info: VhostUserMemInfo, + delete_evts: Vec, + queues: Vec>>, + queue_evts: Vec>, + call_events: Vec>, + pub features: u64, + reconnecting: bool, + inflight: Option, + backend_type: VhostBackendType, + pub protocol_features: u64, } -#[allow(dead_code)] impl VhostUserClient { - pub fn new(mem_space: &Arc, path: &str, max_queue_num: u64) -> Result { + pub fn new( + mem_space: &Arc, + path: &str, + max_queue_num: u64, + backend_type: VhostBackendType, + ) -> Result { let mut sock = VhostUserSock::new(path); - sock.domain.connect().chain_err(|| { + sock.domain.connect().with_context(|| { format!( "Failed to connect the socket {} for vhost user client", path @@ -245,68 +433,424 @@ impl VhostUserClient { let mem_info = VhostUserMemInfo::new(); mem_space .register_listener(Arc::new(Mutex::new(mem_info.clone()))) - .chain_err(|| "Failed to register memory for vhost user client")?; + .with_context(|| "Failed to register memory for vhost user client")?; let client = Arc::new(Mutex::new(ClientInternal::new(sock, max_queue_num))); - Ok(VhostUserClient { client, mem_info }) + Ok(VhostUserClient { + client, + mem_info, + delete_evts: Vec::new(), + queues: Vec::new(), + queue_evts: Vec::new(), + call_events: Vec::new(), + features: 0, + reconnecting: false, + inflight: None, + backend_type, + protocol_features: 0_u64, + }) } - pub fn add_event_notifier(&self) -> Result<()> { - EventLoop::update_event( - EventNotifierHelper::internal_notifiers(self.client.clone()), - None, - ) - .chain_err(|| "Failed to update event for client sock")?; + /// Save queue info used for reconnection. + pub fn set_queues(&mut self, queues: &[Arc>]) { + for queue in queues.iter() { + self.queues.push(queue.clone()); + } + } + + /// Save eventfd used for reconnection. + pub fn set_queue_evts(&mut self, queue_evts: &[Arc]) { + for evt in queue_evts.iter() { + self.queue_evts.push(evt.clone()); + } + } + + /// Save irqfd used for reconnection. + pub fn set_call_events(&mut self, call_events: &[Arc]) { + for evt in call_events.iter() { + self.call_events.push(evt.clone()); + } + } + + /// Set inflight fd, include get inflight fd from vhost and set inflight to vhost. + pub fn set_inflight(&mut self, queue_num: u16, queue_size: u16) -> Result<()> { + if self.backend_type != VhostBackendType::TypeBlock { + // Only vhost-user-blk supports inflight fd now. + return Ok(()); + } + let protocol_feature = self + .get_protocol_features() + .with_context(|| "Failed to get protocol features for vhost-user blk")?; + if virtio_has_feature( + protocol_feature, + u32::from(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD), + ) { + if self.inflight.is_none() { + // Expect 1 fd. + let mut fds = [RawFd::default()]; + let vhost_user_inflight = self.get_inflight_fd(queue_num, queue_size, &mut fds)?; + let file = Arc::new( + // SAFETY: fds[0] create in function of get_inflight_fd. + unsafe { File::from_raw_fd(fds[0]) }, + ); + let hva = do_mmap( + &Some(file.as_ref()), + vhost_user_inflight.mmap_size, + vhost_user_inflight.mmap_offset, + true, + true, + false, + )?; + let inflight = VhostInflight { + file, + _addr: hva, + inner: vhost_user_inflight, + }; + self.inflight = Some(inflight); + } + let inflight = self.inflight.as_ref().unwrap(); + self.set_inflight_fd(inflight.inner.clone(), inflight.file.as_raw_fd())?; + } else { + bail!( + "Failed to get inflight fd, spdk doesn't support, spdk protocol feature: {:#b}", + protocol_feature + ); + } + Ok(()) + } + + /// Activate device by vhost-user protocol. + pub fn activate_vhost_user(&mut self) -> Result<()> { + self.set_owner() + .with_context(|| "Failed to set owner for vhost-user")?; + + self.set_features(self.features) + .with_context(|| "Failed to set features for vhost-user")?; + + self.set_mem_table() + .with_context(|| "Failed to set mem table for vhost-user")?; + + let queue_size = self + .queues + .first() + .unwrap() + .lock() + .unwrap() + .vring + .actual_size(); + self.set_inflight(self.queues.len() as u16, queue_size)?; + // Set all vring num to notify ovs/dpdk how many queues it needs to poll + // before setting vring info. + for (queue_index, queue_mutex) in self.queues.iter().enumerate() { + let actual_size = queue_mutex.lock().unwrap().vring.actual_size(); + self.set_vring_num(queue_index, actual_size) + .with_context(|| { + format!( + "Failed to set vring num for vhost-user, index: {}, size: {}", + queue_index, actual_size, + ) + })?; + } + + for (queue_index, queue_mutex) in self.queues.iter().enumerate() { + let queue = queue_mutex.lock().unwrap(); + if !queue.vring.is_enabled() { + warn!("Queue {} is not enabled, skip it", queue_index); + continue; + } + + let queue_config = queue.vring.get_queue_config(); + self.set_vring_addr(&queue_config, queue_index, 0) + .with_context(|| { + format!( + "Failed to set vring addr for vhost-user, index: {}", + queue_index, + ) + })?; + // When spdk/ovs has been killed, stratovirt can not get the last avail + // index in spdk/ovs, it can only use used index as last avail index. + let last_avail_idx = queue.vring.get_used_idx()?; + self.set_vring_base(queue_index, last_avail_idx) + .with_context(|| { + format!( + "Failed to set vring base for vhost-user, index: {}", + queue_index, + ) + })?; + self.set_vring_kick(queue_index, self.queue_evts[queue_index].clone()) + .with_context(|| { + format!( + "Failed to set vring kick for vhost-user, index: {}", + queue_index, + ) + })?; + self.set_vring_call(queue_index, self.call_events[queue_index].clone()) + .with_context(|| { + format!( + "Failed to set vring call for vhost-user, index: {}", + queue_index, + ) + })?; + } + + if self.backend_type == VhostBackendType::TypeBlock { + // If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, it should call + // set_vring_enable to enable vring. Otherwise, the ring is enabled by default. + // Currently, only vhost-user-blk device support negotiate VHOST_USER_F_PROTOCOL_FEATURES. + for (queue_index, queue_mutex) in self.queues.iter().enumerate() { + if !queue_mutex.lock().unwrap().is_enabled() { + continue; + } + self.set_vring_enable(queue_index, true).with_context(|| { + format!( + "Failed to set vring enable for vhost-user, index: {}", + queue_index, + ) + })?; + } + } + + Ok(()) + } + + fn reset_queues(&mut self) -> Result<()> { + for (queue_index, queue_mutex) in self.queues.iter().enumerate() { + if !queue_mutex.lock().unwrap().vring.is_enabled() { + continue; + } + self.set_vring_enable(queue_index, false) + .with_context(|| format!("Failed to set vring disable, index: {}", queue_index))?; + self.get_vring_base(queue_index) + .with_context(|| format!("Failed to get vring base, index: {}", queue_index))?; + } + Ok(()) + } + + pub fn reset_vhost_user(&mut self, reset_owner: bool) { + let dev_type = self.backend_type.to_string(); + if reset_owner { + if let Err(e) = self.reset_owner() { + warn!("Failed to reset owner for vhost-user {}: {:?}", dev_type, e); + } + } else if let Err(e) = self.reset_queues() { + warn!( + "Failed to reset queues for vhost-user {}: {:?}", + dev_type, e + ); + } + + self.queue_evts.clear(); + self.call_events.clear(); + self.queues.clear(); + } + + pub fn add_event(client: &Arc>) -> Result<()> { + let notifiers = EventNotifierHelper::internal_notifiers(client.clone()); + register_event_helper(notifiers, None, &mut client.lock().unwrap().delete_evts) + .with_context(|| "Failed to update event for client sock") + } + + /// Delete the socket event in ClientInternal. + pub fn delete_event(&mut self) -> Result<()> { + if self.reconnecting { + self.reconnecting = false; + // The socket event has been deleted before try to reconnect so let's just return. + return Ok(()); + } + unregister_event_helper(None, &mut self.delete_evts) + } + + /// Send get protocol features request to vhost. + pub fn get_protocol_features(&self) -> Result { + let request = VhostUserMsgReq::GetProtocolFeatures as u32; + let hdr = VhostUserMsgHdr::new(request, VhostUserHdrFlag::NeedReply as u32, 0); + let body_opt: Option<&u32> = None; + let payload_opt: Option<&[u8]> = None; + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), body_opt, payload_opt, &[]) + .with_context(|| "Failed to send msg for getting features")?; + let features = client + .wait_ack_msg::(request) + .with_context(|| "Failed to wait ack msg for getting protocols features")?; + + Ok(features) + } + + /// Send u64 value to vhost. + fn set_value(&self, request: VhostUserMsgReq, value: u64) -> Result<()> { + let hdr = VhostUserMsgHdr::new(request as u32, 0, size_of::() as u32); + let payload_opt: Option<&[u8]> = None; + self.client + .lock() + .unwrap() + .sock + .send_msg(Some(&hdr), Some(&value), payload_opt, &[]) + .with_context(|| "Failed to send msg for setting value")?; + + Ok(()) + } + + /// Set protocol features to vhost. + pub fn set_protocol_features(&self, features: u64) -> Result<()> { + self.set_value(VhostUserMsgReq::SetProtocolFeatures, features) + } + + /// Get virtio blk config from vhost. + pub fn get_virtio_blk_config(&self) -> Result { + let request = VhostUserMsgReq::GetConfig as u32; + let config_len = size_of::>(); + let hdr = VhostUserMsgHdr::new( + request, + VhostUserHdrFlag::NeedReply as u32, + config_len as u32, + ); + let cnf = VhostUserConfig::new(0, 0, VirtioBlkConfig::default())?; + let body_opt: Option<&u32> = None; + // SAFETY: the memory is allocated by us and it has been already aligned. + let payload_opt: Option<&[u8]> = Some(unsafe { + from_raw_parts( + (&cnf as *const VhostUserConfig) as *const u8, + config_len, + ) + }); + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), body_opt, payload_opt, &[]) + .with_context(|| "Failed to send msg for getting config")?; + let res = client + .wait_ack_msg::>(request) + .with_context(|| "Failed to wait ack msg for getting virtio blk config")?; + Ok(res.config) + } + + /// Set virtio blk config to vhost. + pub fn set_virtio_blk_config(&self, cnf: VirtioBlkConfig) -> Result<()> { + let client = self.client.lock().unwrap(); + let request = VhostUserMsgReq::SetConfig as u32; + let config_len = size_of::>(); + let hdr = VhostUserMsgHdr::new(request, 0, config_len as u32); + let payload_opt: Option<&[u8]> = None; + let config = VhostUserConfig::new(0, 0, cnf)?; + client + .sock + .send_msg(Some(&hdr), Some(&config), payload_opt, &[]) + .with_context(|| "Failed to send msg for getting virtio blk config")?; + Ok(()) + } + + /// Get max queues number that vhost supports. + pub fn get_max_queue_num(&self) -> Result { + let request = VhostUserMsgReq::GetQueueNum as u32; + let hdr = VhostUserMsgHdr::new(request, VhostUserHdrFlag::NeedReply as u32, 0); + let body_opt: Option<&u32> = None; + let payload_opt: Option<&[u8]> = None; + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), body_opt, payload_opt, &[]) + .with_context(|| "Failed to send msg for getting queue num")?; + let queue_num = client + .wait_ack_msg::(request) + .with_context(|| "Failed to wait ack msg for getting queue num")?; + Ok(queue_num) + } + /// Get inflight file info and inflight fd from vhost. + pub fn get_inflight_fd( + &self, + queue_num: u16, + queue_size: u16, + fds: &mut [RawFd], + ) -> Result { + let request = VhostUserMsgReq::GetInflightFd as u32; + let data_len = size_of::(); + let hdr = + VhostUserMsgHdr::new(request, VhostUserHdrFlag::NeedReply as u32, data_len as u32); + let inflight = VhostUserInflight { + mmap_size: 0, + mmap_offset: 0, + queue_num, + queue_size, + }; + let body_opt: Option<&u32> = None; + let payload_opt: Option<&[u8]> = Some( + // SAFETY: + // 1. inflight can be guaranteed not null. + // 2. data_len is constant. + unsafe { + from_raw_parts( + (&inflight as *const VhostUserInflight) as *const u8, + data_len, + ) + }, + ); + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), body_opt, payload_opt, &[]) + .with_context(|| "Failed to send msg for getting inflight fd")?; + let res = client + .wait_ack_msg_and_data::(request, None, fds) + .with_context(|| "Failed to wait ack msg for getting inflight fd")?; + Ok(res) + } + + /// Set inflight file info and send inflight fd to vhost. + pub fn set_inflight_fd(&self, inflight: VhostUserInflight, fd: RawFd) -> Result<()> { + let request = VhostUserMsgReq::SetInflightFd as u32; + let len = size_of::(); + let hdr = VhostUserMsgHdr::new(request, 0, len as u32); + let payload_opt: Option<&[u8]> = None; + self.client + .lock() + .unwrap() + .sock + .send_msg(Some(&hdr), Some(&inflight), payload_opt, &[fd]) + .with_context(|| "Failed to send msg for setting inflight fd")?; Ok(()) } } impl VhostOps for VhostUserClient { fn set_owner(&self) -> Result<()> { - let client = self.client.lock().unwrap(); + trace::vhost_set_owner(); let hdr = VhostUserMsgHdr::new(VhostUserMsgReq::SetOwner as u32, 0, 0); let body_opt: Option<&u32> = None; let payload_opt: Option<&[u8]> = None; - client + self.client + .lock() + .unwrap() .sock .send_msg(Some(&hdr), body_opt, payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting owner")?; + .with_context(|| "Failed to send msg for setting owner")?; Ok(()) } fn get_features(&self) -> Result { - let client = self.client.lock().unwrap(); let request = VhostUserMsgReq::GetFeatures as u32; let hdr = VhostUserMsgHdr::new(request, VhostUserHdrFlag::NeedReply as u32, 0); let body_opt: Option<&u32> = None; let payload_opt: Option<&[u8]> = None; + let client = self.client.lock().unwrap(); client .sock .send_msg(Some(&hdr), body_opt, payload_opt, &[]) - .chain_err(|| "Failed to send msg for getting features")?; + .with_context(|| "Failed to send msg for getting features")?; let features = client .wait_ack_msg::(request) - .chain_err(|| "Failed to wait ack msg for getting features")?; + .with_context(|| "Failed to wait ack msg for getting features")?; + trace::vhost_get_features(features); Ok(features) } fn set_features(&self, features: u64) -> Result<()> { - let client = self.client.lock().unwrap(); - let hdr = VhostUserMsgHdr::new( - VhostUserMsgReq::SetFeatures as u32, - 0, - size_of::() as u32, - ); - let payload_opt: Option<&[u8]> = None; - client - .sock - .send_msg(Some(&hdr), Some(&features), payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting features")?; - - Ok(()) + trace::vhost_set_features(features); + self.set_value(VhostUserMsgReq::SetFeatures, features) } fn set_mem_table(&self) -> Result<()> { @@ -322,12 +866,14 @@ impl VhostOps for VhostUserClient { memcontext.region_add(region_info.region); fds.push(region_info.file_back.file.as_raw_fd()); } + drop(mem_regions); - let client = self.client.lock().unwrap(); let len = size_of::() + num_region * size_of::(); let hdr = VhostUserMsgHdr::new(VhostUserMsgReq::SetMemTable as u32, 0, len as u32); let memhdr = VhostUserMemHdr::new(num_region as u32, 0); - client + self.client + .lock() + .unwrap() .sock .send_msg( Some(&hdr), @@ -335,16 +881,18 @@ impl VhostOps for VhostUserClient { Some(memcontext.regions.as_slice()), &fds, ) - .chain_err(|| "Failed to send msg for setting mem table")?; + .with_context(|| "Failed to send msg for setting mem table")?; + trace::vhost_set_mem_table(&memcontext.regions); Ok(()) } fn set_vring_num(&self, queue_idx: usize, num: u16) -> Result<()> { + trace::vhost_set_vring_num(queue_idx, num); let client = self.client.lock().unwrap(); if queue_idx as u64 > client.max_queue_num { bail!( - "The queue index {} is invaild {} for setting vring num", + "The queue index {} is invalid {} for setting vring num", queue_idx, client.max_queue_num ); @@ -356,17 +904,16 @@ impl VhostOps for VhostUserClient { size_of::() as u32, ); let payload_opt: Option<&[u8]> = None; - let vring_state = VhostUserVringState::new(queue_idx as u32, num as u32); + let vring_state = VhostUserVringState::new(queue_idx as u32, u32::from(num)); client .sock .send_msg(Some(&hdr), Some(&vring_state), payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting vring num")?; + .with_context(|| "Failed to send msg for setting vring num")?; Ok(()) } fn set_vring_addr(&self, queue: &QueueConfig, index: usize, flags: u32) -> Result<()> { - let client = self.client.lock().unwrap(); let hdr = VhostUserMsgHdr::new( VhostUserMsgReq::SetVringAddr as u32, 0, @@ -376,28 +923,29 @@ impl VhostOps for VhostUserClient { let desc_user_addr = self .mem_info .addr_to_host(queue.desc_table) - .ok_or_else(|| { - ErrorKind::Msg(format!( + .with_context(|| { + format!( "Failed to transform desc-table address {}", queue.desc_table.0 - )) + ) + })?; + let used_user_addr = self + .mem_info + .addr_to_host(queue.used_ring) + .with_context(|| { + format!( + "Failed to transform used ring address {}", + queue.used_ring.0 + ) })?; - - let used_user_addr = self.mem_info.addr_to_host(queue.used_ring).ok_or_else(|| { - ErrorKind::Msg(format!( - "Failed to transform used ring address {}", - queue.used_ring.0 - )) - })?; - let avail_user_addr = self .mem_info .addr_to_host(queue.avail_ring) - .ok_or_else(|| { - ErrorKind::Msg(format!( + .with_context(|| { + format!( "Failed to transform avail ring address {}", queue.avail_ring.0 - )) + ) })?; let vring_addr = VhostUserVringAddr { index: index as u32, @@ -407,15 +955,19 @@ impl VhostOps for VhostUserClient { avail_user_addr, log_guest_addr: 0_u64, }; - client + trace::vhost_set_vring_addr(&vring_addr); + self.client + .lock() + .unwrap() .sock .send_msg(Some(&hdr), Some(&vring_addr), payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting vring addr")?; + .with_context(|| "Failed to send msg for setting vring addr")?; Ok(()) } fn set_vring_base(&self, queue_idx: usize, last_avail_idx: u16) -> Result<()> { + trace::vhost_set_vring_base(queue_idx, last_avail_idx); let client = self.client.lock().unwrap(); if queue_idx as u64 > client.max_queue_num { bail!( @@ -431,16 +983,17 @@ impl VhostOps for VhostUserClient { size_of::() as u32, ); let payload_opt: Option<&[u8]> = None; - let vring_state = VhostUserVringState::new(queue_idx as u32, last_avail_idx as u32); + let vring_state = VhostUserVringState::new(queue_idx as u32, u32::from(last_avail_idx)); client .sock .send_msg(Some(&hdr), Some(&vring_state), payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting vring base")?; + .with_context(|| "Failed to send msg for setting vring base")?; Ok(()) } - fn set_vring_call(&self, queue_idx: usize, fd: &EventFd) -> Result<()> { + fn set_vring_call(&self, queue_idx: usize, fd: Arc) -> Result<()> { + trace::vhost_set_vring_call(queue_idx, &fd); let client = self.client.lock().unwrap(); if queue_idx as u64 > client.max_queue_num { bail!( @@ -459,16 +1012,17 @@ impl VhostOps for VhostUserClient { client .sock .send_msg(Some(&hdr), Some(&queue_idx), payload_opt, &[fd.as_raw_fd()]) - .chain_err(|| "Failed to send msg for setting vring call")?; + .with_context(|| "Failed to send msg for setting vring call")?; Ok(()) } - fn set_vring_kick(&self, queue_idx: usize, fd: &EventFd) -> Result<()> { + fn set_vring_kick(&self, queue_idx: usize, fd: Arc) -> Result<()> { + trace::vhost_set_vring_kick(queue_idx, &fd); let client = self.client.lock().unwrap(); if queue_idx as u64 > client.max_queue_num { bail!( - "The queue index {} is invaild {} for setting vring kick", + "The queue index {} is invalid {} for setting vring kick", queue_idx, client.max_queue_num ); @@ -483,16 +1037,17 @@ impl VhostOps for VhostUserClient { client .sock .send_msg(Some(&hdr), Some(&queue_idx), payload_opt, &[fd.as_raw_fd()]) - .chain_err(|| "Failed to send msg for setting vring kick")?; + .with_context(|| "Failed to send msg for setting vring kick")?; Ok(()) } fn set_vring_enable(&self, queue_idx: usize, status: bool) -> Result<()> { + trace::vhost_set_vring_enable(queue_idx, status); let client = self.client.lock().unwrap(); if queue_idx as u64 > client.max_queue_num { bail!( - "The queue index {} is invaild {} for setting vring enable", + "The queue index {} is invalid {} for setting vring enable", queue_idx, client.max_queue_num ); @@ -504,20 +1059,48 @@ impl VhostOps for VhostUserClient { size_of::() as u32, ); let payload_opt: Option<&[u8]> = None; - let vring_state = VhostUserVringState::new(queue_idx as u32, status as u32); + let vring_state = VhostUserVringState::new(queue_idx as u32, u32::from(status)); client .sock .send_msg(Some(&hdr), Some(&vring_state), payload_opt, &[]) - .chain_err(|| "Failed to send msg for setting vring enable")?; + .with_context(|| "Failed to send msg for setting vring enable")?; Ok(()) } fn reset_owner(&self) -> Result<()> { - bail!("Does not support for resetting owner") + trace::vhost_reset_owner(); + let hdr = VhostUserMsgHdr::new(VhostUserMsgReq::ResetOwner as u32, 0, 0); + let body_opt: Option<&u32> = None; + let payload_opt: Option<&[u8]> = None; + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), body_opt, payload_opt, &[]) + .with_context(|| "Failed to send msg for reset_owner")?; + Ok(()) } - fn get_vring_base(&self, _queue_idx: usize) -> Result { - bail!("Does not support for getting vring base") + fn get_vring_base(&self, queue_idx: usize) -> Result { + let request = VhostUserMsgReq::GetVringBase as u32; + let hdr = VhostUserMsgHdr::new( + request, + VhostUserHdrFlag::NeedReply as u32, + size_of::() as u32, + ); + + let vring_state = VhostUserVringState::new(queue_idx as u32, 0_u32); + let payload_opt: Option<&[u8]> = None; + let client = self.client.lock().unwrap(); + client + .sock + .send_msg(Some(&hdr), Some(&vring_state), payload_opt, &[]) + .with_context(|| "Failed to send msg for getting vring base")?; + let res = client + .wait_ack_msg::(request) + .with_context(|| "Failed to wait ack msg for getting vring base")?; + + trace::vhost_get_vring_base(queue_idx, res.value as u16); + Ok(res.value as u16) } } diff --git a/virtio/src/vhost/user/fs.rs b/virtio/src/vhost/user/fs.rs new file mode 100644 index 0000000000000000000000000000000000000000..f08ddd399fea540c2fd78cc140905ebfb0898218 --- /dev/null +++ b/virtio/src/vhost/user/fs.rs @@ -0,0 +1,257 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +// The num of high priority queue +const VIRIOT_FS_HIGH_PRIO_QUEUE_NUM: usize = 1; +// The num of request queue +const VIRTIO_FS_REQ_QUEUES_NUM: usize = 1; +// The size of queue for virtio fs +const VIRTIO_FS_QUEUE_SIZE: u16 = 128; + +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, bail, Context, Result}; +use clap::Parser; +use vmm_sys_util::eventfd::EventFd; + +use super::super::super::{VirtioDevice, VIRTIO_TYPE_FS}; +use super::super::VhostOps; +use super::{listen_guest_notifier, VhostBackendType, VhostUserClient}; +use crate::{read_config_default, VirtioBase, VirtioInterrupt}; +use address_space::AddressSpace; +use machine_manager::config::{ + get_pci_df, parse_bool, valid_id, ChardevConfig, ConfigError, SocketType, +}; +use machine_manager::event_loop::unregister_event_helper; +use util::byte_code::ByteCode; +use util::gen_base_func; + +const MAX_TAG_LENGTH: usize = 36; + +/// Config struct for `fs`. +/// Contains fs device's attr. +#[derive(Parser, Debug, Clone)] +#[command(no_binary_name(true))] +pub struct FsConfig { + #[arg(long, value_parser = ["vhost-user-fs-pci", "vhost-user-fs-device"])] + pub classtype: String, + #[arg(long, value_parser = valid_id)] + pub id: String, + #[arg(long)] + pub chardev: String, + #[arg(long, value_parser = valid_tag)] + pub tag: String, + #[arg(long)] + pub bus: Option, + #[arg(long, value_parser = get_pci_df)] + pub addr: Option<(u8, u8)>, + #[arg(long, value_parser = parse_bool)] + pub multifunction: Option, +} + +fn valid_tag(tag: &str) -> Result { + if tag.len() >= MAX_TAG_LENGTH { + return Err(anyhow!(ConfigError::StringLengthTooLong( + "fs device tag".to_string(), + MAX_TAG_LENGTH - 1, + ))); + } + Ok(tag.to_string()) +} + +#[derive(Copy, Clone)] +#[repr(C, packed)] +struct VirtioFsConfig { + tag: [u8; MAX_TAG_LENGTH], + num_request_queues: u32, +} + +impl Default for VirtioFsConfig { + fn default() -> Self { + VirtioFsConfig { + tag: [0; MAX_TAG_LENGTH], + num_request_queues: 0, + } + } +} + +impl ByteCode for VirtioFsConfig {} + +pub struct Fs { + base: VirtioBase, + fs_cfg: FsConfig, + chardev_cfg: ChardevConfig, + config_space: VirtioFsConfig, + client: Option>>, + mem_space: Arc, + enable_irqfd: bool, +} + +impl Fs { + /// The construct function of the Fs device. + /// + /// # Arguments + /// + /// `fs_cfg` - The config of this Fs device. + /// `chardev_cfg` - The config of this Fs device's chardev. + /// `mem_space` - The address space of this Fs device. + pub fn new(fs_cfg: FsConfig, chardev_cfg: ChardevConfig, mem_space: Arc) -> Self { + let queue_num = VIRIOT_FS_HIGH_PRIO_QUEUE_NUM + VIRTIO_FS_REQ_QUEUES_NUM; + let queue_size = VIRTIO_FS_QUEUE_SIZE; + + Fs { + base: VirtioBase::new(VIRTIO_TYPE_FS, queue_num, queue_size), + fs_cfg, + chardev_cfg, + config_space: VirtioFsConfig::default(), + client: None, + mem_space, + enable_irqfd: false, + } + } +} + +impl VirtioDevice for Fs { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + let queues_num = VIRIOT_FS_HIGH_PRIO_QUEUE_NUM + VIRTIO_FS_REQ_QUEUES_NUM; + + let socket_path = match self.chardev_cfg.classtype.socket_type()? { + SocketType::Unix { path } => path, + _ => bail!("Vhost-user-fs Chardev backend should be unix-socket type."), + }; + + let client = VhostUserClient::new( + &self.mem_space, + &socket_path, + queues_num as u64, + VhostBackendType::TypeFs, + ) + .with_context(|| { + "Failed to create the client which communicates with the server for virtio fs" + })?; + let client = Arc::new(Mutex::new(client)); + VhostUserClient::add_event(&client)?; + self.client = Some(client); + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + let tag_bytes_vec = self.fs_cfg.tag.clone().into_bytes(); + self.config_space.tag[..tag_bytes_vec.len()].copy_from_slice(tag_bytes_vec.as_slice()); + self.config_space.num_request_queues = VIRTIO_FS_REQ_QUEUES_NUM as u32; + + let client = self.client.as_ref().unwrap(); + self.base.device_features = client + .lock() + .unwrap() + .get_features() + .with_context(|| "Failed to get features for virtio fs")?; + + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + read_config_default(self.config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, _offset: u64, _data: &[u8]) -> Result<()> { + Ok(()) + } + + fn activate( + &mut self, + _mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let queues = &self.base.queues; + let mut client = match &self.client { + Some(client) => client.lock().unwrap(), + None => return Err(anyhow!("Failed to get client for virtio fs")), + }; + client.features = self.base.driver_features; + client.set_queues(queues); + client.set_queue_evts(&queue_evts); + + if !self.enable_irqfd { + let queue_num = queues.len(); + listen_guest_notifier(&mut self.base, &mut client, None, queue_num, interrupt_cb)?; + } + + client.activate_vhost_user()?; + + Ok(()) + } + + fn set_guest_notifiers(&mut self, queue_evts: &[Arc]) -> Result<()> { + self.enable_irqfd = true; + match &self.client { + Some(client) => client.lock().unwrap().set_call_events(queue_evts), + None => return Err(anyhow!("Failed to get client for virtio fs")), + } + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + if let Some(client) = &self.client { + client.lock().unwrap().reset_vhost_user(true); + } + unregister_event_helper(None, &mut self.base.deactivate_evts)?; + Ok(()) + } + + fn reset(&mut self) -> Result<()> { + self.base.device_features = 0_u64; + self.base.driver_features = 0_u64; + self.config_space = VirtioFsConfig::default(); + self.enable_irqfd = false; + + let client = match &self.client { + None => return Err(anyhow!("Failed to get client when resetting virtio fs")), + Some(client_) => client_, + }; + client + .lock() + .unwrap() + .delete_event() + .with_context(|| "Failed to delete virtio fs event")?; + self.client = None; + + self.realize() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use machine_manager::config::str_slip_to_clap; + + #[test] + fn test_vhostuserfs_cmdline_parser() { + // Test1: Right. + let fs_cmd = "vhost-user-fs-device,id=fs0,chardev=chardev0,tag=tag0"; + let fs_config = FsConfig::try_parse_from(str_slip_to_clap(fs_cmd, true, false)).unwrap(); + assert_eq!(fs_config.id, "fs0"); + assert_eq!(fs_config.chardev, "chardev0"); + assert_eq!(fs_config.tag, "tag0"); + + // Test2: Illegal value. + let fs_cmd = "vhost-user-fs-device,id=fs0,chardev=chardev0,tag=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + let result = FsConfig::try_parse_from(str_slip_to_clap(fs_cmd, true, false)); + assert!(result.is_err()); + } +} diff --git a/virtio/src/vhost/user/message.rs b/virtio/src/vhost/user/message.rs index a921c781321245a826c387d2e902d3ff7a031a67..44e847732ad93e1dd41d0b2a2b5a085ca8f3892a 100644 --- a/virtio/src/vhost/user/message.rs +++ b/virtio/src/vhost/user/message.rs @@ -10,14 +10,19 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +use std::mem::size_of; + +use anyhow::{bail, Result}; + /// The version of the protocol StratoVirt support. pub const VHOST_USER_VERSION: u32 = 0x1; pub const VHOST_USER_MSG_MAX_SIZE: usize = 0x1000; pub const MAX_ATTACHED_FD_ENTRIES: usize = 32; +pub const VHOST_USER_F_PROTOCOL_FEATURES: u32 = 30; +pub const VHOST_USER_MAX_CONFIG_SIZE: u32 = 256; /// Type of requests sending from vhost user device to the userspace process. #[repr(u32)] -#[allow(dead_code)] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum VhostUserMsgReq { None = 0, @@ -56,13 +61,54 @@ pub enum VhostUserMsgReq { MaxCmd = 33, } +impl From for VhostUserMsgReq { + fn from(t: u32) -> Self { + match t { + 0 => VhostUserMsgReq::None, + 1 => VhostUserMsgReq::GetFeatures, + 2 => VhostUserMsgReq::SetFeatures, + 3 => VhostUserMsgReq::SetOwner, + 4 => VhostUserMsgReq::ResetOwner, + 5 => VhostUserMsgReq::SetMemTable, + 6 => VhostUserMsgReq::SetLogBase, + 7 => VhostUserMsgReq::SetLogFd, + 8 => VhostUserMsgReq::SetVringNum, + 9 => VhostUserMsgReq::SetVringAddr, + 10 => VhostUserMsgReq::SetVringBase, + 11 => VhostUserMsgReq::GetVringBase, + 12 => VhostUserMsgReq::SetVringKick, + 13 => VhostUserMsgReq::SetVringCall, + 14 => VhostUserMsgReq::SetVringErr, + 15 => VhostUserMsgReq::GetProtocolFeatures, + 16 => VhostUserMsgReq::SetProtocolFeatures, + 17 => VhostUserMsgReq::GetQueueNum, + 18 => VhostUserMsgReq::SetVringEnable, + 19 => VhostUserMsgReq::SendRarp, + 20 => VhostUserMsgReq::NetSetMtu, + 21 => VhostUserMsgReq::SetSlaveReqFd, + 22 => VhostUserMsgReq::IotlbMsg, + 23 => VhostUserMsgReq::SetVringEndian, + 24 => VhostUserMsgReq::GetConfig, + 25 => VhostUserMsgReq::SetConfig, + 26 => VhostUserMsgReq::CreateCryptoSession, + 27 => VhostUserMsgReq::CloseCryptoSession, + 28 => VhostUserMsgReq::PostcopyAdvise, + 29 => VhostUserMsgReq::PostcopyListen, + 30 => VhostUserMsgReq::PostcopyEnd, + 31 => VhostUserMsgReq::GetInflightFd, + 32 => VhostUserMsgReq::SetInflightFd, + _ => VhostUserMsgReq::MaxCmd, + } + } +} + /// The meaning of flag bits for header of vhost user message. pub enum VhostUserHdrFlag { /// Bits[0..1] is message version number. Version = 0x3, - /// Bits[2] Mark message as reply. + /// Bits`\[`2`]` Mark message as reply. Reply = 0x4, - /// Bits[3] Sender anticipates a reply message from the peer. + /// Bits`\[`3`\]` Sender anticipates a reply message from the peer. NeedReply = 0x8, /// All valid bits. AllFlags = 0xc, @@ -70,7 +116,7 @@ pub enum VhostUserHdrFlag { ReservedBits = !0xf, } -///the struct for the header of vhost user message. +/// the struct for the header of vhost user message. #[repr(C)] pub struct VhostUserMsgHdr { /// The request id for vhost-user message @@ -81,7 +127,6 @@ pub struct VhostUserMsgHdr { pub size: u32, } -#[allow(dead_code)] impl VhostUserMsgHdr { /// Create a new instance of `VhostUserMsgHeader`. pub fn new(request: u32, flags: u32, size: u32) -> Self { @@ -100,7 +145,7 @@ impl VhostUserMsgHdr { } /// Check whether reply for this message is requested. - fn need_reply(&self) -> bool { + pub fn need_reply(&self) -> bool { (self.flags & VhostUserHdrFlag::NeedReply as u32) != 0 } @@ -128,9 +173,35 @@ impl Default for VhostUserMsgHdr { } } +/// Struct for get and set config to vhost user. +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +pub struct VhostUserConfig { + offset: u32, + size: u32, + flags: u32, + pub config: T, +} + +impl VhostUserConfig { + /// Create a new instance of `VhostUserConfig`. + pub fn new(offset: u32, flags: u32, config: T) -> Result { + let size = size_of::() as u32; + if size > VHOST_USER_MAX_CONFIG_SIZE { + bail!("Failed to create VhostUserConfig: exceed max config size.") + } + Ok(VhostUserConfig { + offset, + size, + flags, + config, + }) + } +} + /// Memory region information for the message of memory table. #[repr(C)] -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct RegionMemInfo { /// Guest physical address of the memory region. pub guest_phys_addr: u64, @@ -184,6 +255,7 @@ impl Default for VhostUserMemContext { /// The configuration for the state of virtual ring. #[repr(C)] +#[derive(Default)] pub struct VhostUserVringState { /// Index for virtual ring. pub index: u32, @@ -197,8 +269,9 @@ impl VhostUserVringState { } } -///The configuration for the address of virtual ring. +/// The configuration for the address of virtual ring. #[repr(C)] +#[derive(Debug)] pub struct VhostUserVringAddr { /// Index for virtual ring. pub index: u32, diff --git a/virtio/src/vhost/user/mod.rs b/virtio/src/vhost/user/mod.rs index 17d8dfffdd3869a3e906920ca4ff053f5fc5b1a5..68a0b166e63eecd668481ef86cfbc3400b615f00 100644 --- a/virtio/src/vhost/user/mod.rs +++ b/virtio/src/vhost/user/mod.rs @@ -10,6 +10,67 @@ // NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. // See the Mulan PSL v2 for more details. +pub mod fs; + +#[cfg(feature = "vhostuser_block")] +mod block; mod client; mod message; +#[cfg(feature = "vhostuser_net")] +mod net; mod sock; + +#[cfg(feature = "vhostuser_block")] +pub use self::block::{Block, VhostUserBlkDevConfig}; +pub use self::client::*; +pub use self::fs::*; +pub use self::message::*; +#[cfg(feature = "vhostuser_net")] +pub use self::net::Net; +pub use self::sock::*; + +use std::sync::{Arc, Mutex}; + +use anyhow::Result; + +use crate::{ + vhost::{VhostIoHandler, VhostNotify}, + NotifyEventFds, VirtioBase, VirtioInterrupt, +}; + +use machine_manager::event_loop::register_event_helper; +use util::loop_context::EventNotifierHelper; + +pub fn listen_guest_notifier( + base: &mut VirtioBase, + client: &mut VhostUserClient, + ctx_name: Option<&String>, + evts_num: usize, + interrupt_cb: Arc, +) -> Result<()> { + let call_evts = NotifyEventFds::new(evts_num); + let events = &call_evts.events; + client.set_call_events(events); + + let mut host_notifies = Vec::new(); + for (queue_index, queue_mutex) in base.queues.iter().enumerate() { + if queue_index >= events.len() { + break; + } + let host_notify = VhostNotify { + notify_evt: events[queue_index].clone(), + queue: queue_mutex.clone(), + }; + host_notifies.push(host_notify); + } + + let handler = VhostIoHandler { + interrupt_cb, + host_notifies, + device_broken: base.broken.clone(), + }; + let notifiers = EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(handler))); + register_event_helper(notifiers, ctx_name, &mut base.deactivate_evts)?; + + Ok(()) +} diff --git a/virtio/src/vhost/user/net.rs b/virtio/src/vhost/user/net.rs new file mode 100644 index 0000000000000000000000000000000000000000..fc98344638fee0dc706ed85270db05dd98f7d88e --- /dev/null +++ b/virtio/src/vhost/user/net.rs @@ -0,0 +1,295 @@ +// Copyright (c) 2022 Huawei Technologies Co.,Ltd. All rights reserved. +// +// StratoVirt is licensed under Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan +// PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PSL v2 for more details. + +use std::sync::atomic::Ordering; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, Context, Result}; +use vmm_sys_util::eventfd::EventFd; + +use super::super::VhostOps; +use super::{listen_guest_notifier, VhostBackendType, VhostUserClient}; +use crate::{ + device::net::{build_device_config_space, CtrlInfo, MAC_ADDR_LEN}, + read_config_default, virtio_has_feature, CtrlVirtio, NetCtrlHandler, VirtioBase, VirtioDevice, + VirtioInterrupt, VirtioNetConfig, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN, VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MQ, + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_TYPE_NET, +}; +use address_space::AddressSpace; +use machine_manager::config::{NetDevcfg, NetworkInterfaceConfig}; +use machine_manager::event_loop::{register_event_helper, unregister_event_helper}; +use util::byte_code::ByteCode; +use util::gen_base_func; +use util::loop_context::EventNotifierHelper; + +/// Number of virtqueues. +const QUEUE_NUM_NET: usize = 2; + +/// Network device structure. +pub struct Net { + /// Virtio device base property. + base: VirtioBase, + /// Configuration of the vhost user network device. + net_cfg: NetworkInterfaceConfig, + /// Configuration of the backend netdev. + netdev_cfg: NetDevcfg, + /// path of the socket chardev. + sock_path: String, + /// Virtio net configurations. + config_space: Arc>, + /// System address space. + mem_space: Arc, + /// Vhost user client. + client: Option>>, + /// Whether irqfd can be used. + enable_irqfd: bool, +} + +impl Net { + pub fn new( + net_cfg: &NetworkInterfaceConfig, + netdev_cfg: NetDevcfg, + sock_path: String, + mem_space: &Arc, + ) -> Self { + let queue_num = if net_cfg.mq { + // If support multi-queue, it should add 1 control queue. + (netdev_cfg.queues + 1) as usize + } else { + QUEUE_NUM_NET + }; + let queue_size = net_cfg.queue_size; + + Net { + base: VirtioBase::new(VIRTIO_TYPE_NET, queue_num, queue_size), + net_cfg: net_cfg.clone(), + netdev_cfg, + sock_path, + config_space: Default::default(), + mem_space: mem_space.clone(), + client: None, + enable_irqfd: false, + } + } + + fn delete_event(&mut self) -> Result<()> { + match &self.client { + Some(client) => { + client + .lock() + .unwrap() + .delete_event() + .with_context(|| "Failed to delete vhost-user net event")?; + } + None => return Err(anyhow!("Failed to get client when stopping event")), + }; + if !self.base.deactivate_evts.is_empty() { + unregister_event_helper( + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + } + + Ok(()) + } + + fn clean_up(&mut self) -> Result<()> { + self.delete_event()?; + self.base.device_features = 0; + self.base.driver_features = 0; + self.base.broken.store(false, Ordering::SeqCst); + self.config_space = Default::default(); + self.client = None; + + Ok(()) + } +} + +impl VirtioDevice for Net { + gen_base_func!(virtio_base, virtio_base_mut, VirtioBase, base); + + fn realize(&mut self) -> Result<()> { + let client = VhostUserClient::new( + &self.mem_space, + &self.sock_path, + self.queue_num() as u64, + VhostBackendType::TypeNet, + ) + .with_context(|| { + "Failed to create the client which communicates with the server for vhost-user net" + })?; + let client = Arc::new(Mutex::new(client)); + VhostUserClient::add_event(&client)?; + self.client = Some(client); + + self.init_config_features()?; + + Ok(()) + } + + fn init_config_features(&mut self) -> Result<()> { + let client = self.client.as_ref().unwrap(); + self.base.device_features = client + .lock() + .unwrap() + .get_features() + .with_context(|| "Failed to get features for vhost-user net")?; + + let features = 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_GUEST_CSUM + | 1 << VIRTIO_NET_F_GUEST_TSO4 + | 1 << VIRTIO_NET_F_GUEST_UFO + | 1 << VIRTIO_NET_F_HOST_TSO4 + | 1 << VIRTIO_NET_F_HOST_UFO + | 1 << VIRTIO_NET_F_MRG_RXBUF + | 1 << VIRTIO_F_RING_EVENT_IDX; + self.base.device_features &= features; + + let mut locked_config = self.config_space.lock().unwrap(); + + let queue_pairs = self.netdev_cfg.queues / 2; + if self.net_cfg.mq + && (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN..=VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + .contains(&queue_pairs) + { + self.base.device_features |= 1 << VIRTIO_NET_F_CTRL_VQ; + self.base.device_features |= 1 << VIRTIO_NET_F_MQ; + locked_config.max_virtqueue_pairs = queue_pairs; + } + + if let Some(mac) = &self.net_cfg.mac { + self.base.device_features |= build_device_config_space(&mut locked_config, mac); + } + + Ok(()) + } + + fn read_config(&self, offset: u64, data: &mut [u8]) -> Result<()> { + let config_space = self.config_space.lock().unwrap(); + read_config_default(config_space.as_bytes(), offset, data) + } + + fn write_config(&mut self, offset: u64, data: &[u8]) -> Result<()> { + let data_len = data.len(); + let mut config_space = self.config_space.lock().unwrap(); + let driver_features = self.base.driver_features; + let config_slice = config_space.as_mut_bytes(); + + if !virtio_has_feature(driver_features, VIRTIO_NET_F_CTRL_MAC_ADDR) + && !virtio_has_feature(driver_features, VIRTIO_F_VERSION_1) + && offset == 0 + && data_len == MAC_ADDR_LEN + && *data != config_slice[0..data_len] + { + config_slice[(offset as usize)..(offset as usize + data_len)].copy_from_slice(data); + } + + Ok(()) + } + + fn activate( + &mut self, + mem_space: Arc, + interrupt_cb: Arc, + queue_evts: Vec>, + ) -> Result<()> { + let mut client = match &self.client { + Some(client) => client.lock().unwrap(), + None => return Err(anyhow!("Failed to get client for vhost-user net")), + }; + + let queues = self.base.queues.clone(); + let queue_num = queues.len(); + let mut call_fds_num = queue_num; + let driver_features = self.base.driver_features; + let has_control_queue = + (driver_features & (1 << VIRTIO_NET_F_CTRL_VQ) != 0) && (queue_num % 2 != 0); + if has_control_queue { + let ctrl_queue = queues[queue_num - 1].clone(); + let ctrl_queue_evt = queue_evts[queue_num - 1].clone(); + let ctrl_info = Arc::new(Mutex::new(CtrlInfo::new(self.config_space.clone()))); + + let ctrl_handler = NetCtrlHandler { + ctrl: CtrlVirtio::new(ctrl_queue, ctrl_queue_evt, ctrl_info), + mem_space, + interrupt_cb: interrupt_cb.clone(), + driver_features, + device_broken: self.base.broken.clone(), + taps: None, + }; + + let notifiers = + EventNotifierHelper::internal_notifiers(Arc::new(Mutex::new(ctrl_handler))); + register_event_helper( + notifiers, + self.net_cfg.iothread.as_ref(), + &mut self.base.deactivate_evts, + )?; + + call_fds_num -= 1; + client.set_queues(&queues[..(queue_num - 1)]); + client.set_queue_evts(&queue_evts[..(queue_num - 1)]); + } else { + client.set_queues(&queues); + client.set_queue_evts(&queue_evts); + } + client.features = driver_features & !(1 << VIRTIO_NET_F_MAC); + + if !self.enable_irqfd { + listen_guest_notifier( + &mut self.base, + &mut client, + self.net_cfg.iothread.as_ref(), + call_fds_num, + interrupt_cb, + )?; + } + client.activate_vhost_user()?; + self.base.broken.store(false, Ordering::SeqCst); + + Ok(()) + } + + fn set_guest_notifiers(&mut self, queue_evts: &[Arc]) -> Result<()> { + self.enable_irqfd = true; + match &self.client { + Some(client) => client.lock().unwrap().set_call_events(queue_evts), + None => return Err(anyhow!("Failed to get client for vhost-user net")), + }; + + Ok(()) + } + + fn deactivate(&mut self) -> Result<()> { + self.clean_up()?; + self.realize() + } + + fn reset(&mut self) -> Result<()> { + self.clean_up()?; + self.realize() + } + + fn unrealize(&mut self) -> Result<()> { + self.delete_event()?; + self.client = None; + + Ok(()) + } + + fn has_control_queue(&self) -> bool { + virtio_has_feature(self.base.device_features, VIRTIO_NET_F_CTRL_VQ) + } +} diff --git a/virtio/src/vhost/user/sock.rs b/virtio/src/vhost/user/sock.rs index 422e1f28aa29459c68389522ad1e263ce40531ee..a944ebdc25ae3b662981f5290a3efec998a32dd0 100644 --- a/virtio/src/vhost/user/sock.rs +++ b/virtio/src/vhost/user/sock.rs @@ -13,21 +13,23 @@ use std::mem::size_of; use std::os::unix::io::RawFd; +use anyhow::{bail, Result}; use libc::{c_void, iovec}; -use util::unix::UnixSock; -use super::super::super::errors::Result; use super::message::{MAX_ATTACHED_FD_ENTRIES, VHOST_USER_MSG_MAX_SIZE}; +use util::unix::UnixSock; #[derive(Clone)] pub struct VhostUserSock { pub domain: UnixSock, + pub path: String, } impl VhostUserSock { pub fn new(path: &str) -> Self { VhostUserSock { domain: UnixSock::new(path), + path: path.to_string(), } } @@ -73,9 +75,9 @@ impl VhostUserSock { if let Some(payload) = payload_opt { iovs.push(iovec { iov_base: payload.as_ptr() as *const u8 as *mut c_void, - iov_len: payload.len() * size_of::

(), + iov_len: std::mem::size_of_val(payload), }); - total_len += payload.len() * size_of::

(); + total_len += std::mem::size_of_val(payload); } if (total_len - size_of::()) > VHOST_USER_MSG_MAX_SIZE { @@ -138,7 +140,7 @@ impl VhostUserSock { if let Some(payload) = payload_opt { iovs.push(iovec { iov_base: payload.as_ptr() as *const u8 as *mut c_void, - iov_len: payload.len() * size_of::

(), + iov_len: std::mem::size_of_val(payload), }); }